Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
c9ffad87
Commit
c9ffad87
authored
Aug 08, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bugs
parent
f57de602
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
32 additions
and
27 deletions
+32
-27
config.py
config.py
+5
-0
diaryTraining.py
diaryTraining.py
+6
-7
processData.py
processData.py
+1
-1
userDiaryPredict.py
userDiaryPredict.py
+20
-19
No files found.
config.py
View file @
c9ffad87
...
...
@@ -6,5 +6,10 @@ DATA_START_DATE = '2018-07-05'
DATA_END_DATE
=
'2018-08-06'
MODEL_VERSION
=
''
lr
=
0.03
l2_lambda
=
0.002
# processData.py
# diaryTraining.py
diaryTraining.py
View file @
c9ffad87
...
...
@@ -6,22 +6,21 @@ print("Start training")
ffm_model
=
xl
.
create_ffm
()
ffm_model
.
setTrain
(
DIRECTORY_PATH
+
"train{0}-{1}.csv"
.
format
(
DATA_START_DATE
,
VALIDATION_DATE
))
ffm_model
.
setValidate
(
DIRECTORY_PATH
+
"validation{0}.csv"
.
format
(
VALIDATION_DATE
))
lr
=
0.03
l2_lambda
=
0.002
param
=
{
'task'
:
'binary'
,
'lr'
:
lr
,
'lambda'
:
l2_lambda
,
'metric'
:
'auc'
}
param
=
{
'task'
:
'binary'
,
'lr'
:
lr
,
'lambda'
:
l2_lambda
,
'metric'
:
'auc'
}
ffm_model
.
fit
(
param
,
DIRECTORY_PATH
+
"model_{0}-{1}_lr{2}_lambda{3}.out"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
))
DATA_END_DATE
,
lr
,
l2_lambda
))
print
(
"predicting"
)
ffm_model
.
setTest
(
DIRECTORY_PATH
+
"test{0}.csv"
.
format
(
TEST_DATE
))
ffm_model
.
setSigmoid
()
ffm_model
.
predict
(
DIRECTORY_PATH
+
"model_{0}-{1}_lr{2}_lambda{3}.out"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
,
"0.03"
,
"0.002"
),
DATA_END_DATE
,
lr
,
l2_lambda
),
DIRECTORY_PATH
+
"testset{0}_output_model_{1}-{2}_lr{3}_lambda{4}.txt"
.
format
(
TEST_DATE
,
DATA_START_DATE
,
DATA_END_DATE
,
"0.03"
,
"0.002"
))
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
))
print
(
'---------------candidates--------------'
)
get_eachCityDiaryTop2000
()
processData.py
View file @
c9ffad87
...
...
@@ -53,7 +53,7 @@ def feature_en():
print
(
data
.
head
(
2
))
# 持久化候选cid
data_set_cid
=
data
[
[
"cid"
]
]
.
unique
()
data_set_cid
=
data
[
"cid"
]
.
unique
()
cid_df
=
pd
.
DataFrame
()
cid_df
[
'cid'
]
=
data_set_cid
print
(
"data_set_cid :"
)
...
...
userDiaryPredict.py
View file @
c9ffad87
...
...
@@ -13,6 +13,7 @@ from userProfile import fetch_user_profile
def
device_id_merge
(
user_profile
):
file_name
=
DIRECTORY_PATH
+
"diaryTestSet/{0}DiaryTop2000.csv"
.
format
(
user_profile
[
'city_id'
])
data
=
pd
.
read_csv
(
file_name
)
data
[
"device_id"
]
=
user_profile
[
'device_id'
]
now
=
datetime
.
datetime
.
now
()
data
[
"hour"
]
=
now
.
hour
...
...
@@ -22,49 +23,49 @@ def device_id_merge(user_profile):
data
[
"hour"
]
=
data
[
"hour"
]
.
astype
(
"category"
)
data
[
"minute"
]
=
data
[
"minute"
]
.
astype
(
"category"
)
data
[
"y"
]
=
0
data
=
data
.
drop
(
"city_id"
,
axis
=
1
)
data
=
data
.
drop
(
"city_id"
,
axis
=
1
)
print
(
data
.
head
(
2
))
return
data
# 把ffm.pkl load进来,将上面的表转化为ffm格式
def
transform_ffm_format
(
ffm_format_pandas
,
df
,
device_id
):
data
=
ffm_format_pandas
.
transform
(
df
)
now
=
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d-
%
H-
%
M"
)
def
transform_ffm_format
(
df
,
device_id
):
file_path
=
DIRECTORY_PATH
+
"ffm_{0}_{1}.pkl"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
)
with
open
(
file_path
,
"rb"
)
as
f
:
ffm_format_pandas
=
pickle
.
load
(
f
)
data
=
ffm_format_pandas
.
transform
(
df
)
now
=
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d-
%
H-
%
M"
)
predict_file_name
=
DIRECTORY_PATH
+
"diaryPredictSet/{0}_{1}DiaryTop2000.csv"
.
format
(
device_id
,
now
)
data
.
to_csv
(
predict_file_name
)
user_instance_file_path
=
''
return
user_instance_file_path
predict_file_name
=
DIRECTORY_PATH
+
"diaryPredictSet/{0}_{1}DiaryTop2000.csv"
.
format
(
device_id
,
now
)
data
.
to_csv
(
predict_file_name
)
user_instance_file_path
=
''
return
user_instance_file_path
# 将模型加载,预测,把预测日记的概率值按照降序排序,存到一个表里
def
predict
(
user_profile
):
ffm_model
=
xl
.
create_ffm
()
user_instance_file_path
=
device_id_merge
(
device_id
)
user_instance
=
device_id_merge
(
user_profile
)
user_instance_file_path
=
transform_ffm_format
(
user_instance
)
ffm_model
.
setTest
(
user_instance_file_path
)
ffm_model
.
predict
(
DIRECTORY_PATH
+
MODEL_VERSION
,
"./{0}_output.txt"
.
format
(
device_id
))
ffm_model
.
predict
(
DIRECTORY_PATH
+
"model_{0}-{1}_lr{2}_lambda{3}.out"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
,
lr
,
l2_lambda
),
DIRECTORY_PATH
+
"/{0}_output.txt"
.
format
(
user_profile
[
'device_id'
]))
def
router
(
device_id
):
user_profile
,
is_exist
=
fetch_user_profile
(
device_id
)
file_path
=
DIRECTORY_PATH
+
"ffm_{0}_{1}.pkl"
.
format
(
DATA_START_DATE
,
DATA_END_DATE
)
with
open
(
file_path
,
"rb"
)
as
f
:
ffm_format_pandas
=
pickle
.
load
(
f
)
if
is_exist
:
predict
()
predict
(
user_profile
)
else
:
p
ass
# do something
p
rint
(
'Sorry, we don
\'
t have you'
)
if
__name__
==
"__main__"
:
router
(
device_id
=
'358035085192742'
)
# 预测一些真实的device_id
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment