Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
22196732
Commit
22196732
authored
Sep 01, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改配置文件文件路径
parent
586d9007
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
7 deletions
+9
-7
dataProcess.py
dataProcess.py
+1
-2
train.py
train.py
+1
-1
utils.py
utils.py
+7
-4
No files found.
dataProcess.py
View file @
22196732
...
...
@@ -68,8 +68,7 @@ def ffm_transform(data, test_number, validation_number):
print
(
"Start ffm transform"
)
start
=
time
.
time
()
ffm_train
=
multiFFMFormatPandas
()
# 服务器内存空闲的时候,可以下面的4改成6。4比较稳定,如果服务器内存被其他程序占用较多的时候,用6可能因为分配不到内存,脚本挂掉。
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
,
n
=
50000
,
processes
=
8
)
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
,
n
=
50000
,
processes
=
20
)
with
open
(
DIRECTORY_PATH
+
"train/ffm.pkl"
,
"wb"
)
as
f
:
pickle
.
dump
(
ffm_train
,
f
)
...
...
train.py
View file @
22196732
...
...
@@ -19,7 +19,7 @@ if __name__ == "__main__":
print
(
"训练模型耗时{}分"
.
format
((
end_train
-
start_train
)
/
60
))
move_file
()
#TODO 如果用自己写的keepProcess文件守护进程,下面在这个函数里删掉重新启动进程那行代码,因为可能会多启动一次进程
restart_process
()
#
restart_process()
...
...
utils.py
View file @
22196732
...
...
@@ -18,9 +18,12 @@ def get_date():
month
=
now
.
month
day
=
now
.
day
date
=
datetime
(
year
,
month
,
day
)
data_start_date
=
(
date
-
timedelta
(
days
=
3
))
.
strftime
(
"
%
Y-
%
m-
%
d"
)
data_end_date
=
(
date
-
timedelta
(
days
=
1
))
.
strftime
(
"
%
Y-
%
m-
%
d"
)
validation_date
=
(
date
-
timedelta
(
days
=
2
))
.
strftime
(
"
%
Y-
%
m-
%
d"
)
data_start_date
=
"2018-07-15"
data_end_date
=
"2018-08-30"
validation_date
=
"2018-08-29"
# data_start_date = (date - timedelta(days=3)).strftime("%Y-%m-%d")
# data_end_date = (date - timedelta(days=1)).strftime("%Y-%m-%d")
# validation_date = (date - timedelta(days=2)).strftime("%Y-%m-%d")
# 验证集和测试集的日期必须相差一天,否则切割数据集时会报错
test_date
=
data_end_date
print
(
"data_start_date,data_end_date,validation_date,test_date:"
)
...
...
@@ -50,7 +53,7 @@ def con_sql(sql):
def
move_file
():
import
os
for
eachFile
in
os
.
listdir
(
"/data2/models/
train"
):
for
eachFile
in
os
.
listdir
(
DIRECTORY_PATH
+
"
train"
):
os
.
rename
(
DIRECTORY_PATH
+
"train"
+
"/"
+
eachFile
,
DIRECTORY_PATH
+
eachFile
)
print
(
"成功将文件剪切到对应路径"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment