Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
605673e8
Commit
605673e8
authored
Aug 21, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update predictDiaryLocal file
parent
4758923b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
93 additions
and
54 deletions
+93
-54
precitDIaryLocal.py
local/precitDIaryLocal.py
+93
-54
No files found.
local/precitDIaryLocal.py
View file @
605673e8
from
config
import
*
import
pandas
as
pd
import
pickle
import
pickle
import
xlearn
as
xl
import
xlearn
as
xl
from
userProfile
import
*
import
time
from
utils
import
*
from
utils
import
*
import
os
# 本地测试脚本
# 本地测试脚本
...
@@ -20,19 +16,20 @@ def test_con_sql(device_id):
...
@@ -20,19 +16,20 @@ def test_con_sql(device_id):
result
=
cursor
.
fetchall
()
result
=
cursor
.
fetchall
()
df
=
pd
.
DataFrame
(
list
(
result
))
df
=
pd
.
DataFrame
(
list
(
result
))
if
not
df
.
empty
:
if
not
df
.
empty
:
df
=
df
.
rename
(
columns
=
{
0
:
"native_queue"
,
1
:
"nearby_queue"
,
2
:
"nation_queue"
,
3
:
"megacity_queue"
})
df
=
df
.
rename
(
columns
=
{
0
:
"native_queue"
,
1
:
"nearby_queue"
,
2
:
"nation_queue"
,
3
:
"megacity_queue"
})
native_queue
=
df
.
loc
[
0
,
"native_queue"
]
.
split
(
","
)
native_queue_list
=
df
.
loc
[
0
,
"native_queue"
]
.
split
(
","
)
nearby_queue
=
df
.
loc
[
0
,
"nearby_queue"
]
.
split
(
","
)
nearby_queue_list
=
df
.
loc
[
0
,
"nearby_queue"
]
.
split
(
","
)
nation_queue
=
df
.
loc
[
0
,
"nation_queue"
]
.
split
(
","
)
nation_queue_list
=
df
.
loc
[
0
,
"nation_queue"
]
.
split
(
","
)
megacity_queue
=
df
.
loc
[
0
,
"megacity_queue"
]
.
split
(
","
)
megacity_queue_list
=
df
.
loc
[
0
,
"megacity_queue"
]
.
split
(
","
)
db
.
close
()
db
.
close
()
return
native_queue_list
,
nearby_queue_list
,
nation_queue_list
,
megacity_queue_list
return
native_queue
,
nearby_queue
,
nation_queue
,
megacity_queue
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def
feature_en
(
x_list
,
device_id
):
def
feature_en
(
x_list
,
device_id
):
data
=
pd
.
DataFrame
(
x_list
)
data
=
pd
.
DataFrame
(
x_list
)
data
=
data
.
rename
(
columns
=
{
0
:
"diary_id"
})
data
=
data
.
rename
(
columns
=
{
0
:
"diary_id"
})
data
[
"device_id"
]
=
device_id
data
[
"device_id"
]
=
device_id
now
=
datetime
.
now
()
now
=
datetime
.
now
()
data
[
"hour"
]
=
now
.
hour
data
[
"hour"
]
=
now
.
hour
...
@@ -48,55 +45,99 @@ def feature_en(x_list,device_id):
...
@@ -48,55 +45,99 @@ def feature_en(x_list,device_id):
# 把ffm.pkl load进来,将上面的表转化为ffm格式
# 把ffm.pkl load进来,将上面的表转化为ffm格式
def
transform_ffm_format
(
df
,
device_id
):
def
transform_ffm_format
(
df
,
device_id
):
with
open
(
"/Users/mac/utils/ffm.pkl"
,
"rb"
)
as
f
:
with
open
(
"/Users/mac/utils/ffm.pkl"
,
"rb"
)
as
f
:
ffm_format_pandas
=
pickle
.
load
(
f
)
ffm_format_pandas
=
pickle
.
load
(
f
)
data
=
ffm_format_pandas
.
transform
(
df
)
data
=
ffm_format_pandas
.
transform
(
df
)
now
=
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d-
%
H-
%
M"
)
now
=
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d-
%
H-
%
M"
)
predict_file_name
=
"/Users/mac/utils/result/{0}_{1}.csv"
.
format
(
device_id
,
now
)
predict_file_name
=
"/Users/mac/utils/result/{0}_{1}.csv"
.
format
(
device_id
,
now
)
data
.
to_csv
(
predict_file_name
,
index
=
False
,
header
=
None
)
data
.
to_csv
(
predict_file_name
,
index
=
False
,
header
=
None
)
print
(
"成功将ffm预测文件写到本地"
)
print
(
"成功将ffm预测文件写到本地"
)
return
predict_file_name
return
predict_file_name
# 将模型加载,预测,把预测日记的概率值按照降序排序,存到一个表里
# 将模型加载,预测,把预测日记的概率值按照降序排序,存到一个表里
def
predict
(
queue_name
,
x_list
,
device_id
):
def
predict
(
queue_name
,
x_list
,
device_id
):
instance
=
feature_en
(
x_list
)
data
=
feature_en
(
x_list
)
instance_file_path
=
transform_ffm_format
(
instance
,
device_id
)
data_file_path
=
transform_ffm_format
(
data
,
device_id
)
ffm_model
=
xl
.
create_ffm
()
ffm_model
=
xl
.
create_ffm
()
ffm_model
.
setTest
(
instance
_file_path
)
ffm_model
.
setTest
(
data
_file_path
)
ffm_model
.
setSigmoid
()
ffm_model
.
setSigmoid
()
ffm_model
.
predict
(
"/Users/mac/utils/model.out"
,
ffm_model
.
predict
(
"/Users/mac/utils/model.out"
,
"/Users/mac/utils/result/{0}_output.txt"
.
format
(
queue_name
))
"/Users/mac/utils/result/{0}_output.txt"
.
format
(
queue_name
))
print
(
"{}预测结束"
.
format
(
queue_name
))
print
(
"{}预测结束"
.
format
(
queue_name
))
predict_save_to_local
(
user_profile
,
instance
)
save_result
(
queue_name
,
x_list
)
# 将预测结果与device_id 进行拼接,并按照概率降序排序
def
save_result
(
queue_name
,
x_list
):
def
wrapper_result
(
user_profile
,
instance
):
score_df
=
pd
.
read_csv
(
"/Users/mac/utils/result/{0}_output.txt"
.
format
(
queue_name
),
header
=
None
)
proba
=
pd
.
read_csv
(
DIRECTORY_PATH
+
score_df
=
score_df
.
rename
(
columns
=
{
0
:
"score"
})
"result/{0}_output.txt"
.
format
(
user_profile
[
'device_id'
]),
header
=
None
)
score_df
[
"diary_id"
]
=
x_list
proba
=
proba
.
rename
(
columns
=
{
0
:
"prob"
})
merge_score
(
x_list
,
score_df
)
proba
[
"cid"
]
=
instance
[
'cid'
]
proba
=
proba
.
sort_values
(
by
=
"prob"
,
ascending
=
False
)
proba
=
proba
.
head
(
50
)
return
proba
# 预测候选集保存到本地
def
predict_save_to_local
(
user_profile
,
instance
):
def
merge_score
(
x_list
,
score_df
):
proba
=
wrapper_result
(
user_profile
,
instance
)
db
=
pymysql
.
connect
(
host
=
'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'work'
,
proba
.
loc
[:,
"url"
]
=
proba
[
"cid"
]
.
apply
(
lambda
x
:
"http://m.igengmei.com/diary_book/"
+
str
(
x
[
6
:])
+
'/'
)
passwd
=
'workwork'
,
db
=
'zhengxing_test'
)
proba
.
to_csv
(
DIRECTORY_PATH
+
"result/feed_{}"
.
format
(
user_profile
[
'device_id'
]),
index
=
False
)
cursor
=
db
.
cursor
()
print
(
"成功将预测候选集保存到本地"
)
score_list
=
[]
for
i
in
x_list
:
sql
=
"select score from biz_feed_diary_score where diary_id = '{}';"
.
format
(
i
)
if
cursor
.
execute
(
sql
)
!=
0
:
result
=
cursor
.
fetchone
()
score_list
.
append
(
result
)
# 没有查到这个diary_id,默认score值是0
else
:
score_list
.
append
(
0
)
db
.
close
()
score_df
[
"score"
]
=
score_df
[
"score"
]
+
score_list
update_dairy_queue
(
score_df
)
def
update_dairy_queue
(
score_df
):
diary_id
=
score_df
[
"diary_id"
]
.
values
.
tolist
()
video_id
=
[]
x
=
1
while
x
<=
len
(
diary_id
):
video_id
.
append
(
diary_id
[
x
])
x
+=
5
not_video_id
=
list
(
set
(
diary_id
)
-
set
(
video_id
))
not_video_id_df
=
score_df
.
loc
[
score_df
[
"diary_id"
]
.
isin
(
not_video_id
)]
not_video_id_df
=
not_video_id_df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
video_id_df
=
score_df
.
loc
[
score_df
[
"diary_id"
]
.
isin
(
video_id
)]
video_id_df
=
video_id_df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
not_video_id
=
not_video_id_df
[
"diary_id"
]
.
values
.
tolist
()
video_id
=
video_id_df
[
"diary_id"
]
.
values
.
tolist
()
diary_id
=
not_video_id
i
=
1
for
j
in
video_id
:
diary_id
.
insert
(
i
,
j
)
i
+=
5
return
diary_id
def
update_sql_dairy_queue
(
queue_name
,
diary_id
,
device_id
):
db
=
pymysql
.
connect
(
host
=
'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'workwork'
,
db
=
'doris_test'
)
cursor
=
db
.
cursor
()
sql
=
"update device_diary_queue set '{}'='{}' where device_id = '{}'"
.
format
(
queue_name
,
diary_id
,
device_id
)
cursor
.
execute
(
sql
)
db
.
close
()
# def router(device_id):
# 获取数据库表更新时间
# user_profile, not_exist = fetch_user_profile(device_id)
def
get_update_time
():
# if not_exist:
db
=
pymysql
.
connect
(
host
=
'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'work'
,
# print('Sorry, we don\'t have you.')
passwd
=
'workwork'
,
db
=
'doris_test'
)
# else:
cursor
=
db
.
cursor
()
# predict(user_profile)
sql
=
"SELECT `UPDATE_TIME` FROM `information_schema`.`TABLES` "
\
"WHERE `information_schema`.`TABLES`.`TABLE_SCHEMA` = 'doris_test' "
\
"AND `information_schema`.`TABLES`.`TABLE_NAME` = 'device_diary_queue';"
cursor
.
execute
(
sql
)
update_time
=
cursor
.
fetchone
()[
0
]
return
update_time
# 多进程预测
# 多进程预测
...
@@ -113,16 +154,14 @@ def predict_save_to_local(user_profile, instance):
...
@@ -113,16 +154,14 @@ def predict_save_to_local(user_profile, instance):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# 数据库没有更新时间字段,下面的代码不能使用
# sql_update_time_start = get_update_time()
native_queue_list
,
nearby_queue_list
,
nation_queue_list
,
megacity_queue_list
=
test_con_sql
(
"device_id"
)
native_queue_list
,
nearby_queue_list
,
nation_queue_list
,
megacity_queue_list
=
test_con_sql
(
"device_id"
)
predict
(
"native_queue"
,
native_queue_list
,
"device_id"
)
name_dict
=
{
"native_queue"
:
native_queue_list
,
"nearby_queue"
:
nearby_queue_list
,
predict
(
"nearby_queue"
,
nearby_queue_list
,
"device_id"
)
"nation_queue"
:
nation_queue_list
,
"megacity_queue"
:
megacity_queue_list
}
predict
(
"nation_queue"
,
nation_queue_list
,
"device_id"
)
for
key
in
name_dict
.
keys
():
predict
(
"megacity_queue"
,
megacity_queue_list
,
"device_id"
)
diary_id
=
predict
(
key
,
name_dict
[
key
],
"devcie_id"
)
sql_update_time_end
=
get_update_time
()
# 数据库没有更新时间字段,下面的代码不能使用
# if sql_update_time_start == sql_update_time_end:
update_sql_dairy_queue
(
key
,
diary_id
,
"device_id"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment