Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
5668f217
Commit
5668f217
authored
Aug 21, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
delete print
parent
cd05757b
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
21 additions
and
23 deletions
+21
-23
precitDiaryLocal.py
local/precitDiaryLocal.py
+7
-7
predictDiary.py
predictDiary.py
+14
-15
userProfile.py
userProfile.py
+0
-1
No files found.
local/precitDiaryLocal.py
View file @
5668f217
...
...
@@ -41,7 +41,7 @@ def test_con_sql(device_id):
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def
feature_en
(
x_list
,
device_id
):
data
=
pd
.
DataFrame
(
x_list
)
data
=
data
.
rename
(
columns
=
{
0
:
"
diary_
id"
})
data
=
data
.
rename
(
columns
=
{
0
:
"
c
id"
})
data
[
"device_id"
]
=
device_id
now
=
datetime
.
now
()
data
[
"hour"
]
=
now
.
hour
...
...
@@ -87,7 +87,7 @@ def predict(queue_name, x_list, device_id):
def
save_result
(
queue_name
,
x_list
):
score_df
=
pd
.
read_csv
(
"/Users/mac/utils/result/{0}_output.txt"
.
format
(
queue_name
),
header
=
None
)
score_df
=
score_df
.
rename
(
columns
=
{
0
:
"score"
})
score_df
[
"
diary_
id"
]
=
x_list
score_df
[
"
c
id"
]
=
x_list
merge_score
(
x_list
,
score_df
)
...
...
@@ -111,19 +111,19 @@ def merge_score(x_list, score_df):
def
update_dairy_queue
(
score_df
):
diary_id
=
score_df
[
"
diary_
id"
]
.
values
.
tolist
()
diary_id
=
score_df
[
"
c
id"
]
.
values
.
tolist
()
video_id
=
[]
x
=
1
while
x
<=
len
(
diary_id
):
video_id
.
append
(
diary_id
[
x
])
x
+=
5
not_video_id
=
list
(
set
(
diary_id
)
-
set
(
video_id
))
not_video_id_df
=
score_df
.
loc
[
score_df
[
"
diary_
id"
]
.
isin
(
not_video_id
)]
not_video_id_df
=
score_df
.
loc
[
score_df
[
"
c
id"
]
.
isin
(
not_video_id
)]
not_video_id_df
=
not_video_id_df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
video_id_df
=
score_df
.
loc
[
score_df
[
"
diary_
id"
]
.
isin
(
video_id
)]
video_id_df
=
score_df
.
loc
[
score_df
[
"
c
id"
]
.
isin
(
video_id
)]
video_id_df
=
video_id_df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
not_video_id
=
not_video_id_df
[
"
diary_
id"
]
.
values
.
tolist
()
video_id
=
video_id_df
[
"
diary_
id"
]
.
values
.
tolist
()
not_video_id
=
not_video_id_df
[
"
c
id"
]
.
values
.
tolist
()
video_id
=
video_id_df
[
"
c
id"
]
.
values
.
tolist
()
diary_id
=
not_video_id
i
=
1
for
j
in
video_id
:
...
...
predictDiary.py
View file @
5668f217
...
...
@@ -23,7 +23,6 @@ def feature_en(user_profile):
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data
[
"y"
]
=
0
data
=
data
.
drop
(
"city_id"
,
axis
=
1
)
print
(
data
.
head
(
10
))
return
data
...
...
@@ -103,20 +102,20 @@ def multi_predict(predict_list,processes=12):
if
__name__
==
"__main__"
:
# TODO 如果耗时小于一分钟,下一次取到的device_id和上一次相同。还有一种情况,一个用户持续活跃,会被重复预测
#
while True:
#
empty,device_id_list = get_active_users()
#
if empty:
#
for eachFile in os.listdir("/tmp"):
#
if "xlearn" in eachFile:
#
os.remove("/tmp" + "/" + eachFile)
#
time.sleep(58)
#
else:
#
old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist()
#
# 求活跃用户和老用户的交集,也就是只预测老用户
#
predict_list = list(set(device_id_list) & set(old_device_id_list))
#
multi_predict(predict_list)
router
(
"358035085192742"
)
while
True
:
empty
,
device_id_list
=
get_active_users
()
if
empty
:
for
eachFile
in
os
.
listdir
(
"/tmp"
):
if
"xlearn"
in
eachFile
:
os
.
remove
(
"/tmp"
+
"/"
+
eachFile
)
time
.
sleep
(
58
)
else
:
old_device_id_list
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_device_id.csv"
)[
"device_id"
]
.
values
.
tolist
()
# 求活跃用户和老用户的交集,也就是只预测老用户
predict_list
=
list
(
set
(
device_id_list
)
&
set
(
old_device_id_list
))
multi_predict
(
predict_list
)
#TODO 上线前把预测流程中的计时器、打印代码删掉或者注释,因为预测对性能要求高,能少一条代码语句就少一条
...
...
userProfile.py
View file @
5668f217
...
...
@@ -31,5 +31,4 @@ def fetch_user_profile(device_id):
user_profile_dict
=
{}
for
i
in
user_profile
.
columns
:
user_profile_dict
[
i
]
=
user_profile
.
loc
[
0
,
i
]
print
(
user_profile_dict
)
return
user_profile_dict
,
False
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment