Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
b3cabea7
Commit
b3cabea7
authored
Aug 31, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
gray stat data to file
parents
bdc5743a
aaf2c17d
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
43 deletions
+27
-43
diaryUpdateOnlineOffline.py
diaryUpdateOnlineOffline.py
+5
-7
userProfile.py
userProfile.py
+22
-12
utils.py
utils.py
+0
-24
No files found.
diaryUpdateOnlineOffline.py
View file @
b3cabea7
...
...
@@ -16,7 +16,6 @@ from config import *
import
socket
def
get_video_id
(
cache_video_id
):
if
flag
:
db
=
pymysql
.
connect
(
host
=
ONLINE_EAGLE_HOST
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
...
...
@@ -283,17 +282,19 @@ if __name__ == "__main__":
flag
=
True
path
=
DIRECTORY_PATH
# 下面这个ip是本地电脑ip
if
socket
.
gethostbyname
(
socket
.
gethostname
())
==
'172.30.
5.84
'
:
if
socket
.
gethostbyname
(
socket
.
gethostname
())
==
'172.30.
8.160
'
:
flag
=
False
path
=
LOCAL_DIRCTORY
# 增加缓存日记视频列表
cache_video_id
=
[]
cache_device_city_list
=
[]
differ
=
0
while
True
:
data_set_cid
=
pd
.
read_csv
(
path
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
total_video_id
=
get_video_id
(
cache_video_id
)
cache_video_id
=
total_video_id
device_city_list
=
get_active_users
(
flag
,
path
)
start
=
time
.
time
()
device_city_list
=
get_active_users
(
flag
,
path
,
differ
)
# 过滤掉5分钟内预测过的用户
device_city_list
=
list
(
set
(
tuple
(
device_city_list
))
-
set
(
tuple
(
cache_device_city_list
)))
if
datetime
.
now
()
.
minute
%
5
==
0
:
...
...
@@ -301,11 +302,8 @@ if __name__ == "__main__":
if
device_city_list
!=
[]:
cache_device_city_list
.
extend
(
device_city_list
)
for
device_city
in
device_city_list
:
# start = time.time()
multi_proecess_update
(
device_city
[
0
],
device_city
[
1
],
data_set_cid
,
total_video_id
)
# end = time.time()
# print("更新该用户队列耗时{}秒".format((end - start)))
differ
=
time
.
time
()
-
start
...
...
userProfile.py
View file @
b3cabea7
...
...
@@ -5,32 +5,44 @@ import pandas as pd
import
os
import
time
import
pymysql
import
time
# 获取当下一分钟内活跃用户
def
get_active_users
(
flag
,
path
):
now
=
datetime
.
now
()
now_start
=
str
(
now
)[:
16
]
+
":00"
now_end
=
str
(
now
)[:
16
]
+
":59"
def
get_active_users
(
flag
,
path
,
differ
):
if
differ
==
0
:
end
=
time
.
time
()
start
=
time
.
time
()
-
60
elif
0
<
differ
<
10
:
time
.
sleep
(
30
)
differ
+=
30
end
=
time
.
time
()
start
=
end
-
differ
else
:
end
=
time
.
time
()
start
=
end
-
differ
end_datetime
=
str
(
datetime
.
fromtimestamp
(
end
))
start_datetime
=
str
(
datetime
.
fromtimestamp
(
start
))
sql
=
"select device_id,city_id from user_active_time "
\
"where active_time <= '{}' and active_time >= '{}'"
.
format
(
now_end
,
now_start
)
"where active_time <= '{}' and active_time >= '{}'"
.
format
(
end_datetime
,
start_datetime
)
if
flag
:
df
=
con_sql
(
sql
)
else
:
db
=
pymysql
.
connect
(
host
=
'192.168.15.12'
,
port
=
4000
,
user
=
'root'
,
db
=
'jerry_test'
)
sql
=
"select device_id,city_id from user_active_time"
#
sql = "select device_id,city_id from user_active_time"
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
df
=
pd
.
DataFrame
(
list
(
result
))
.
dropna
()
db
.
close
()
if
df
.
empty
:
print
(
"当下这一分钟没有活跃用户,不需要预测"
)
time
.
sleep
(
56
)
print
(
"当下没有活跃用户数"
)
return
[]
else
:
# 统计活跃用户中尾号是6的用户数
else
:
temp_list
=
df
[
0
]
.
values
.
tolist
()
now
=
datetime
.
now
()
tail6_file_path
=
path
+
"{}tail6Unique.csv"
.
format
(
str
(
now
)[:
10
])
if
os
.
path
.
exists
(
tail6_file_path
):
# 尾号是6的活跃用户数
...
...
@@ -50,7 +62,6 @@ def get_active_users(flag,path):
df
=
df
.
loc
[
df
[
0
]
.
isin
(
old_device_id_list
)]
if
df
.
empty
:
print
(
"该列表是新用户,不需要预测"
)
time
.
sleep
(
56
)
return
[]
else
:
# TODO 正式上线后注释下面的只预测尾号是6的代码
...
...
@@ -62,7 +73,6 @@ def get_active_users(flag,path):
device_temp_list
))
if
predict_list
==
[]:
print
(
'没有尾号是6和目标用户'
)
time
.
sleep
(
56
)
return
[]
else
:
df
=
df
.
loc
[
df
[
0
]
.
isin
(
predict_list
)]
...
...
@@ -78,7 +88,7 @@ def get_active_users(flag,path):
all_predict_list
=
eval
(
pd
.
read_csv
(
predict_file_path
)
.
loc
[
0
,
"list"
])
else
:
all_predict_list
=
[]
all_predict_list
.
extend
(
device_
city_
list
)
all_predict_list
.
extend
(
device_list
)
if
all_predict_list
!=
[]:
df_predict
=
pd
.
DataFrame
({
"number"
:
[
len
(
set
(
all_predict_list
))],
"time"
:
[
str
(
now
)[:
16
]],
"list"
:
[
list
(
set
(
all_predict_list
))]})
...
...
utils.py
View file @
b3cabea7
...
...
@@ -48,30 +48,6 @@ def con_sql(sql):
return
df
def
queue_compare
(
old_list
,
new_list
):
# 去掉前面的"diary|"
old_list
=
list
(
map
(
lambda
x
:
int
(
x
[
6
:]),
old_list
))
# print("旧表前十个")
# print(old_list[:10])
# print("新表前十个")
# print(new_list[:10])
temp
=
list
(
range
(
len
(
old_list
)))
x_dict
=
dict
(
zip
(
old_list
,
temp
))
temp
=
list
(
range
(
len
(
new_list
)))
y_dict
=
dict
(
zip
(
new_list
,
temp
))
i
=
0
for
key
in
x_dict
.
keys
():
if
x_dict
[
key
]
!=
y_dict
[
key
]:
i
+=
1
if
i
>
0
:
update_queue_numbers
+=
1
print
(
"更新日记队列总数:{}"
.
format
(
update_queue_numbers
))
print
(
"日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}
%
"
.
format
(
len
(
old_list
),
i
,
round
(
i
/
len
(
old_list
)
*
100
),
2
))
def
move_file
():
import
os
for
eachFile
in
os
.
listdir
(
"/data2/models/train"
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment