Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
dbfe7ffc
Commit
dbfe7ffc
authored
Aug 21, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
update output format
parents
a99efd7d
4758923b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
128 additions
and
0 deletions
+128
-0
precitDIaryLocal.py
local/precitDIaryLocal.py
+128
-0
No files found.
local/precitDIaryLocal.py
0 → 100644
View file @
dbfe7ffc
from
config
import
*
import
pandas
as
pd
import
pickle
import
xlearn
as
xl
from
userProfile
import
*
import
time
from
utils
import
*
import
os
# 本地测试脚本
# 从测试Tidb数据库的表里获取数据,并转化成df格式
def
test_con_sql
(
device_id
):
db
=
pymysql
.
connect
(
host
=
'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'workwork'
,
db
=
'doris_test'
)
cursor
=
db
.
cursor
()
sql
=
"select native_queue,nearby_queue,nation_queue,megacity_queue from device_diary_queue "
\
"where device_id = '{}';"
.
format
(
device_id
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
df
=
pd
.
DataFrame
(
list
(
result
))
if
not
df
.
empty
:
df
=
df
.
rename
(
columns
=
{
0
:
"native_queue"
,
1
:
"nearby_queue"
,
2
:
"nation_queue"
,
3
:
"megacity_queue"
})
native_queue_list
=
df
.
loc
[
0
,
"native_queue"
]
.
split
(
","
)
nearby_queue_list
=
df
.
loc
[
0
,
"nearby_queue"
]
.
split
(
","
)
nation_queue_list
=
df
.
loc
[
0
,
"nation_queue"
]
.
split
(
","
)
megacity_queue_list
=
df
.
loc
[
0
,
"megacity_queue"
]
.
split
(
","
)
db
.
close
()
return
native_queue_list
,
nearby_queue_list
,
nation_queue_list
,
megacity_queue_list
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def
feature_en
(
x_list
,
device_id
):
data
=
pd
.
DataFrame
(
x_list
)
data
=
data
.
rename
(
columns
=
{
0
:
"diary_id"
})
data
[
"device_id"
]
=
device_id
now
=
datetime
.
now
()
data
[
"hour"
]
=
now
.
hour
data
[
"minute"
]
=
now
.
minute
data
.
loc
[
data
[
"hour"
]
==
0
,
[
"hour"
]]
=
24
data
.
loc
[
data
[
"minute"
]
==
0
,
[
"minute"
]]
=
60
data
[
"hour"
]
=
data
[
"hour"
]
.
astype
(
"category"
)
data
[
"minute"
]
=
data
[
"minute"
]
.
astype
(
"category"
)
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data
[
"y"
]
=
0
return
data
# 把ffm.pkl load进来,将上面的表转化为ffm格式
def
transform_ffm_format
(
df
,
device_id
):
with
open
(
"/Users/mac/utils/ffm.pkl"
,
"rb"
)
as
f
:
ffm_format_pandas
=
pickle
.
load
(
f
)
data
=
ffm_format_pandas
.
transform
(
df
)
now
=
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d-
%
H-
%
M"
)
predict_file_name
=
"/Users/mac/utils/result/{0}_{1}.csv"
.
format
(
device_id
,
now
)
data
.
to_csv
(
predict_file_name
,
index
=
False
,
header
=
None
)
print
(
"成功将ffm预测文件写到本地"
)
return
predict_file_name
# 将模型加载,预测,把预测日记的概率值按照降序排序,存到一个表里
def
predict
(
queue_name
,
x_list
,
device_id
):
instance
=
feature_en
(
x_list
)
instance_file_path
=
transform_ffm_format
(
instance
,
device_id
)
ffm_model
=
xl
.
create_ffm
()
ffm_model
.
setTest
(
instance_file_path
)
ffm_model
.
setSigmoid
()
ffm_model
.
predict
(
"/Users/mac/utils/model.out"
,
"/Users/mac/utils/result/{0}_output.txt"
.
format
(
queue_name
))
print
(
"{}预测结束"
.
format
(
queue_name
))
predict_save_to_local
(
user_profile
,
instance
)
# 将预测结果与device_id 进行拼接,并按照概率降序排序
def
wrapper_result
(
user_profile
,
instance
):
proba
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"result/{0}_output.txt"
.
format
(
user_profile
[
'device_id'
]),
header
=
None
)
proba
=
proba
.
rename
(
columns
=
{
0
:
"prob"
})
proba
[
"cid"
]
=
instance
[
'cid'
]
proba
=
proba
.
sort_values
(
by
=
"prob"
,
ascending
=
False
)
proba
=
proba
.
head
(
50
)
return
proba
# 预测候选集保存到本地
def
predict_save_to_local
(
user_profile
,
instance
):
proba
=
wrapper_result
(
user_profile
,
instance
)
proba
.
loc
[:,
"url"
]
=
proba
[
"cid"
]
.
apply
(
lambda
x
:
"http://m.igengmei.com/diary_book/"
+
str
(
x
[
6
:])
+
'/'
)
proba
.
to_csv
(
DIRECTORY_PATH
+
"result/feed_{}"
.
format
(
user_profile
[
'device_id'
]),
index
=
False
)
print
(
"成功将预测候选集保存到本地"
)
# def router(device_id):
# user_profile, not_exist = fetch_user_profile(device_id)
# if not_exist:
# print('Sorry, we don\'t have you.')
# else:
# predict(user_profile)
# 多进程预测
# def multi_predict(predict_list,processes=12):
# pool = Pool(processes)
# for device_id in predict_list:
# start = time.time()
# pool.apply_async(router, (device_id,))
# end = time.time()
# print("该用户{}预测耗时{}秒".format(device_id, (end - start)))
#
# pool.close()
# pool.join()
if
__name__
==
"__main__"
:
native_queue_list
,
nearby_queue_list
,
nation_queue_list
,
megacity_queue_list
=
test_con_sql
(
"device_id"
)
predict
(
"native_queue"
,
native_queue_list
,
"device_id"
)
predict
(
"nearby_queue"
,
nearby_queue_list
,
"device_id"
)
predict
(
"nation_queue"
,
nation_queue_list
,
"device_id"
)
predict
(
"megacity_queue"
,
megacity_queue_list
,
"device_id"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment