Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
46db6c71
Commit
46db6c71
authored
Aug 27, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
update rate.csv
parents
abed2189
d1ddcfe6
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
15 deletions
+17
-15
diaryQueueUpdate.py
diaryQueueUpdate.py
+16
-14
userProfile.py
userProfile.py
+1
-1
No files found.
diaryQueueUpdate.py
View file @
46db6c71
...
...
@@ -22,7 +22,8 @@ def get_video_id():
result
=
cursor
.
fetchall
()
df
=
pd
.
DataFrame
(
list
(
result
))
video_id
=
df
[
0
]
.
values
.
tolist
()
print
(
video_id
[:
10
])
print
(
"video_id候选集"
)
print
(
video_id
[:
80
])
db
.
close
()
return
video_id
...
...
@@ -110,14 +111,13 @@ def get_score(queue_arg):
return
score_df
def
update_dairy_queue
(
score_df
,
predict_score_df
):
def
update_dairy_queue
(
score_df
,
predict_score_df
,
total_video_id
):
diary_id
=
score_df
[
"cid"
]
.
values
.
tolist
()
video_id
=
[]
x
=
1
while
x
<
len
(
diary_id
):
video_id
.
append
(
diary_id
[
x
])
x
+=
5
video_id
=
list
(
set
(
diary_id
)
&
set
(
total_video_id
))
print
(
"video_id:"
)
print
(
video_id
)
if
len
(
video_id
)
>
0
:
not_video
=
list
(
set
(
diary_id
)
-
set
(
video_id
))
# 为了相加时cid能够匹配,先把cid变成索引
...
...
@@ -210,7 +210,7 @@ def get_queue(device_id, city_id,queue_name):
return
queue_list
def
pipe_line
(
queue_name
,
queue_arg
,
device_id
):
def
pipe_line
(
queue_name
,
queue_arg
,
device_id
,
total_video_id
):
predict
(
queue_name
,
queue_arg
,
device_id
)
predict_score_df
=
save_result
(
queue_name
,
queue_arg
,
device_id
)
score_df
=
get_score
(
queue_arg
)
...
...
@@ -221,18 +221,18 @@ def pipe_line(queue_name, queue_arg, device_id):
score_df
=
score_df
.
rename
(
columns
=
{
0
:
"score"
,
1
:
"cid"
})
print
(
"日记打分表"
)
print
(
score_df
.
head
(
1
))
diary_queue
=
update_dairy_queue
(
score_df
,
predict_score_df
)
diary_queue
=
update_dairy_queue
(
score_df
,
predict_score_df
,
total_video_id
)
return
diary_queue
def
user_update
(
device_id
,
city_id
,
queue_name
,
data_set_cid
):
def
user_update
(
device_id
,
city_id
,
queue_name
,
data_set_cid
,
total_video_id
):
queue_list
=
get_queue
(
device_id
,
city_id
,
queue_name
)
if
queue_list
:
queue_predict
=
list
(
set
(
queue_list
)
&
set
(
data_set_cid
))
queue_not_predict
=
list
(
set
(
queue_list
)
-
set
(
data_set_cid
))
queue_arg
=
[
queue_predict
,
queue_not_predict
,
queue_list
]
if
queue_predict
!=
[]:
diary_queue
=
pipe_line
(
queue_name
,
queue_arg
,
device_id
)
diary_queue
=
pipe_line
(
queue_name
,
queue_arg
,
device_id
,
total_video_id
)
if
diary_queue
:
update_sql_dairy_queue
(
queue_name
,
diary_queue
,
device_id
,
city_id
)
print
(
"更新结束"
)
...
...
@@ -243,22 +243,24 @@ def user_update(device_id, city_id, queue_name,data_set_cid):
else
:
print
(
"日记队列为空"
)
def
multi_proecess_update
(
device_id
,
city_id
,
data_set_cid
):
def
multi_proecess_update
(
device_id
,
city_id
,
data_set_cid
,
total_video_id
):
queue_name_list
=
[
"native_queue"
,
"nearby_queue"
,
"nation_queue"
,
"megacity_queue"
]
pool
=
Pool
(
4
)
for
queue_name
in
queue_name_list
:
pool
.
apply_async
(
user_update
,
(
device_id
,
city_id
,
queue_name
,
data_set_cid
,))
pool
.
apply_async
(
user_update
,
(
device_id
,
city_id
,
queue_name
,
data_set_cid
,
total_video_id
,
))
pool
.
close
()
pool
.
join
()
def
run
():
data_set_cid
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
total_video_id
=
get_video_id
()
device_city_list
=
get_active_users
()
for
device_city
in
device_city_list
:
start
=
time
.
time
()
multi_proecess_update
(
device_city
[
0
],
device_city
[
1
],
data_set_cid
)
multi_proecess_update
(
device_city
[
0
],
device_city
[
1
],
data_set_cid
,
total_video_id
)
end
=
time
.
time
()
print
(
"更新该用户队列耗时{}秒"
.
format
((
end
-
start
)))
print
(
"end"
)
...
...
userProfile.py
View file @
46db6c71
...
...
@@ -14,7 +14,7 @@ def get_active_users():
sql
=
"select device_id,city_id from user_active_time "
\
"where active_time <= '{}' and active_time >= '{}'"
.
format
(
now_end
,
now_start
)
df
=
con_sql
(
sql
)
return
((
"
358035085192742
"
,
"beijing"
),)
return
((
"
AB20292B-5D15-4C44-9429-1C2FF5ED26F6
"
,
"beijing"
),)
if
df
.
empty
:
print
(
"当下这一分钟没有活跃用户,不需要预测"
)
for
eachFile
in
os
.
listdir
(
"/tmp"
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment