Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
586d9007
Commit
586d9007
authored
Sep 01, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改配置文件文件路径
parent
d2a3d31c
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
6 additions
and
5 deletions
+6
-5
dataProcess.py
dataProcess.py
+1
-1
diaryTraining.py
diaryTraining.py
+1
-1
train.py
train.py
+2
-2
utils.py
utils.py
+2
-1
No files found.
dataProcess.py
View file @
586d9007
...
...
@@ -69,7 +69,7 @@ def ffm_transform(data, test_number, validation_number):
start
=
time
.
time
()
ffm_train
=
multiFFMFormatPandas
()
# 服务器内存空闲的时候,可以下面的4改成6。4比较稳定,如果服务器内存被其他程序占用较多的时候,用6可能因为分配不到内存,脚本挂掉。
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
,
n
=
50000
,
processes
=
4
)
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
,
n
=
50000
,
processes
=
8
)
with
open
(
DIRECTORY_PATH
+
"train/ffm.pkl"
,
"wb"
)
as
f
:
pickle
.
dump
(
ffm_train
,
f
)
...
...
diaryTraining.py
View file @
586d9007
...
...
@@ -8,7 +8,7 @@ def train():
ffm_model
.
setTrain
(
DIRECTORY_PATH
+
"train_ffm_data.csv"
)
ffm_model
.
setValidate
(
DIRECTORY_PATH
+
"validation_ffm_data.csv"
)
# log保存路径,如果不加这个参数,日志默认保存在/temp路径下,不符合规范
param
=
{
'task'
:
'binary'
,
'lr'
:
lr
,
'lambda'
:
l2_lambda
,
'metric'
:
'auc'
,
"log"
:
"/data2/models/
result"
}
param
=
{
'task'
:
'binary'
,
'lr'
:
lr
,
'lambda'
:
l2_lambda
,
'metric'
:
'auc'
,
"log"
:
DIRECTORY_PATH
+
"
result"
}
ffm_model
.
fit
(
param
,
DIRECTORY_PATH
+
"train/model.out"
)
...
...
train.py
View file @
586d9007
...
...
@@ -17,9 +17,9 @@ if __name__ == "__main__":
train
()
end_train
=
time
.
time
()
print
(
"训练模型耗时{}分"
.
format
((
end_train
-
start_train
)
/
60
))
#
move_file()
move_file
()
#TODO 如果用自己写的keepProcess文件守护进程,下面在这个函数里删掉重新启动进程那行代码,因为可能会多启动一次进程
#
restart_process()
restart_process
()
...
...
utils.py
View file @
586d9007
...
...
@@ -9,6 +9,7 @@ from sklearn.metrics import auc
from
multiprocessing
import
Pool
import
os
import
signal
from
config
import
*
def
get_date
():
...
...
@@ -50,7 +51,7 @@ def con_sql(sql):
def
move_file
():
import
os
for
eachFile
in
os
.
listdir
(
"/data2/models/train"
):
os
.
rename
(
"/data2/models/train"
+
"/"
+
eachFile
,
"/data2/models/"
+
eachFile
)
os
.
rename
(
DIRECTORY_PATH
+
"train"
+
"/"
+
eachFile
,
DIRECTORY_PATH
+
eachFile
)
print
(
"成功将文件剪切到对应路径"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment