Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
34cb81cc
Commit
34cb81cc
authored
Aug 27, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改cid、deviceid、pkl、model保存路径
parent
d934e2c4
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
5 deletions
+6
-5
diaryQueueUpdate.py
diaryQueueUpdate.py
+1
-0
diaryTraining.py
diaryTraining.py
+2
-2
processData.py
processData.py
+3
-3
No files found.
diaryQueueUpdate.py
View file @
34cb81cc
...
...
@@ -24,6 +24,7 @@ def get_video_id():
db
.
close
()
return
video_id
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def
feature_en
(
x_list
,
device_id
):
data
=
pd
.
DataFrame
(
x_list
)
...
...
diaryTraining.py
View file @
34cb81cc
...
...
@@ -10,11 +10,11 @@ def train():
# log保存路径,如果不加这个参数,日志默认保存在/temp路径下,不符合规范
param
=
{
'task'
:
'binary'
,
'lr'
:
lr
,
'lambda'
:
l2_lambda
,
'metric'
:
'auc'
,
"log"
:
"/data2/models/result"
}
ffm_model
.
fit
(
param
,
DIRECTORY_PATH
+
"model.out"
)
ffm_model
.
fit
(
param
,
DIRECTORY_PATH
+
"
train/
model.out"
)
print
(
"predicting"
)
ffm_model
.
setTest
(
DIRECTORY_PATH
+
"test_ffm_data.csv"
)
ffm_model
.
setSigmoid
()
ffm_model
.
predict
(
DIRECTORY_PATH
+
"model.out"
,
DIRECTORY_PATH
+
"test_set_predict_output.txt"
)
ffm_model
.
predict
(
DIRECTORY_PATH
+
"
train/
model.out"
,
DIRECTORY_PATH
+
"test_set_predict_output.txt"
)
processData.py
View file @
34cb81cc
...
...
@@ -52,14 +52,14 @@ def feature_en(data_start_date, data_end_date, validation_date, test_date):
data_set_cid
=
data
[
"cid"
]
.
unique
()
cid_df
=
pd
.
DataFrame
()
cid_df
[
'cid'
]
=
data_set_cid
cid_df
.
to_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
,
index
=
False
)
cid_df
.
to_csv
(
DIRECTORY_PATH
+
"
train/
data_set_cid.csv"
,
index
=
False
)
print
(
"成功保存data_set_cid"
)
# 将device_id 保存,目的是为了判断预测的device_id是否在这个集合里,如果不在,不需要预测
data_set_device_id
=
data
[
"device_id"
]
.
unique
()
device_id_df
=
pd
.
DataFrame
()
device_id_df
[
'device_id'
]
=
data_set_device_id
device_id_df
.
to_csv
(
DIRECTORY_PATH
+
"data_set_device_id.csv"
,
index
=
False
)
device_id_df
.
to_csv
(
DIRECTORY_PATH
+
"
train/
data_set_device_id.csv"
,
index
=
False
)
print
(
"成功保存data_set_device_id"
)
return
data
,
test_number
,
validation_number
...
...
@@ -69,7 +69,7 @@ def ffm_transform(data, test_number, validation_number):
start
=
time
.
time
()
ffm_train
=
multiFFMFormatPandas
()
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
,
n
=
50000
,
processes
=
8
)
with
open
(
DIRECTORY_PATH
+
"ffm.pkl"
,
"wb"
)
as
f
:
with
open
(
DIRECTORY_PATH
+
"
train/
ffm.pkl"
,
"wb"
)
as
f
:
pickle
.
dump
(
ffm_train
,
f
)
print
(
"done transform ffm"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment