Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
def62f9f
Commit
def62f9f
authored
Aug 29, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
add txt gitignore
parents
4b17aa5b
c5bcb8e4
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
64 additions
and
6 deletions
+64
-6
diary-training.py
diary-training.py
+2
-0
diaryQueueUpdate.py
diaryQueueUpdate.py
+38
-6
utils.py
utils.py
+24
-0
No files found.
diary-training.py
View file @
def62f9f
...
@@ -4,6 +4,8 @@ import pandas as pd
...
@@ -4,6 +4,8 @@ import pandas as pd
from
sklearn.utils
import
shuffle
from
sklearn.utils
import
shuffle
import
numpy
as
np
import
numpy
as
np
import
xlearn
as
xl
import
xlearn
as
xl
# 从数据库的表里获取数据,并转化成df格式
# 从数据库的表里获取数据,并转化成df格式
def
con_sql
(
sql
):
def
con_sql
(
sql
):
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
...
...
diaryQueueUpdate.py
View file @
def62f9f
...
@@ -170,7 +170,32 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id):
...
@@ -170,7 +170,32 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id):
cursor
.
execute
(
sql
)
cursor
.
execute
(
sql
)
db
.
commit
()
db
.
commit
()
db
.
close
()
db
.
close
()
print
(
"成功写入diaryid"
)
print
(
"成功写入diary_id"
)
def
queue_compare
(
old_list
,
new_list
):
global
update_queue_numbers
print
(
"更新日记队列总数:{}"
.
format
(
update_queue_numbers
))
# 去掉前面的"diary|"
old_list
=
list
(
map
(
lambda
x
:
int
(
x
[
6
:]),
old_list
))
# print("旧表前十个")
# print(old_list[:10])
# print("新表前十个")
# print(new_list[:10])
temp
=
list
(
range
(
len
(
old_list
)))
x_dict
=
dict
(
zip
(
old_list
,
temp
))
temp
=
list
(
range
(
len
(
new_list
)))
y_dict
=
dict
(
zip
(
new_list
,
temp
))
i
=
0
for
key
in
x_dict
.
keys
():
if
x_dict
[
key
]
!=
y_dict
[
key
]:
i
+=
1
if
i
>
0
:
update_queue_numbers
+=
1
print
(
"更新日记队列总数:{}"
.
format
(
update_queue_numbers
))
print
(
"日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}
%
"
.
format
(
len
(
old_list
),
i
,
round
(
i
/
len
(
old_list
)
*
100
),
2
))
def
get_queue
(
device_id
,
city_id
,
queue_name
):
def
get_queue
(
device_id
,
city_id
,
queue_name
):
...
@@ -218,6 +243,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id):
...
@@ -218,6 +243,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id):
diary_queue
=
pipe_line
(
queue_name
,
queue_arg
,
device_id
,
total_video_id
)
diary_queue
=
pipe_line
(
queue_name
,
queue_arg
,
device_id
,
total_video_id
)
if
diary_queue
:
if
diary_queue
:
update_sql_dairy_queue
(
queue_name
,
diary_queue
,
device_id
,
city_id
)
update_sql_dairy_queue
(
queue_name
,
diary_queue
,
device_id
,
city_id
)
queue_compare
(
queue_list
,
diary_queue
)
# print("更新结束")
# print("更新结束")
else
:
else
:
print
(
"获取的日记列表是空,所以不更新日记队列"
)
print
(
"获取的日记列表是空,所以不更新日记队列"
)
...
@@ -242,17 +268,23 @@ if __name__ == "__main__":
...
@@ -242,17 +268,23 @@ if __name__ == "__main__":
# 增加缓存日记视频列表
# 增加缓存日记视频列表
cache_video_id
=
[]
cache_video_id
=
[]
cache_device_city_list
=
[]
cache_device_city_list
=
[]
update_queue_numbers
=
0
while
True
:
while
True
:
data_set_cid
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
data_set_cid
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
total_video_id
=
get_video_id
(
cache_video_id
)
total_video_id
=
get_video_id
(
cache_video_id
)
cache_video_id
=
total_video_id
cache_video_id
=
total_video_id
device_city_list
=
get_active_users
()
device_city_list
=
get_active_users
()
# 过滤掉上次预测过的用户
print
(
"过滤前用户数:{}"
.
format
(
len
(
device_city_list
)))
device_city_list
=
list
(
set
(
device_city_list
)
-
set
(
cache_device_city_list
))
# 过滤掉5分钟内预测过的用户
cache_device_city_list
=
device_city_list
device_city_list
=
list
(
set
(
tuple
(
device_city_list
))
-
set
(
tuple
(
cache_device_city_list
)))
total_number
+=
len
(
device_city_list
)
print
(
"过滤后用户数:{}"
.
format
(
len
(
device_city_list
)))
print
(
"累计预测用户总数:{}"
.
format
(
total_number
))
print
(
"缓存视频个数:{}"
.
format
(
len
(
cache_device_city_list
)))
if
datetime
.
now
()
.
minute
%
5
==
0
:
cache_device_city_list
=
[]
if
device_city_list
!=
[]:
if
device_city_list
!=
[]:
cache_device_city_list
.
extend
(
device_city_list
)
total_number
+=
len
(
device_city_list
)
print
(
"累计预测用户总数:{}"
.
format
(
total_number
))
for
device_city
in
device_city_list
:
for
device_city
in
device_city_list
:
# start = time.time()
# start = time.time()
multi_proecess_update
(
device_city
[
0
],
device_city
[
1
],
data_set_cid
,
total_video_id
)
multi_proecess_update
(
device_city
[
0
],
device_city
[
1
],
data_set_cid
,
total_video_id
)
...
...
utils.py
View file @
def62f9f
...
@@ -48,6 +48,30 @@ def con_sql(sql):
...
@@ -48,6 +48,30 @@ def con_sql(sql):
return
df
return
df
def
queue_compare
(
old_list
,
new_list
):
# 去掉前面的"diary|"
old_list
=
list
(
map
(
lambda
x
:
int
(
x
[
6
:]),
old_list
))
# print("旧表前十个")
# print(old_list[:10])
# print("新表前十个")
# print(new_list[:10])
temp
=
list
(
range
(
len
(
old_list
)))
x_dict
=
dict
(
zip
(
old_list
,
temp
))
temp
=
list
(
range
(
len
(
new_list
)))
y_dict
=
dict
(
zip
(
new_list
,
temp
))
i
=
0
for
key
in
x_dict
.
keys
():
if
x_dict
[
key
]
!=
y_dict
[
key
]:
i
+=
1
if
i
>
0
:
update_queue_numbers
+=
1
print
(
"更新日记队列总数:{}"
.
format
(
update_queue_numbers
))
print
(
"日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}
%
"
.
format
(
len
(
old_list
),
i
,
round
(
i
/
len
(
old_list
)
*
100
),
2
))
def
move_file
():
def
move_file
():
import
os
import
os
for
eachFile
in
os
.
listdir
(
"/data2/models/train"
):
for
eachFile
in
os
.
listdir
(
"/data2/models/train"
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment