Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
cb34d9d3
Commit
cb34d9d3
authored
Aug 29, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change ffm process
parent
c5bcb8e4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
37 additions
and
10 deletions
+37
-10
config.py
config.py
+30
-4
diaryUpdateOnlineOffline.py
diaryUpdateOnlineOffline.py
+0
-0
processData.py
processData.py
+2
-2
userProfile.py
userProfile.py
+5
-4
No files found.
config.py
View file @
cb34d9d3
DIRECTORY_PATH
=
'/data2/models/'
DIRECTORY_PATH
=
'/data2/models/'
# 测试日期一定要大于验证日期,因为切割数据集的代码是这样设置的
# 测试日期一定要大于验证日期,因为切割数据集的代码是这样设置的
# VALIDATION_DATE = '2018-08-05'
# VALIDATION_DATE = '2018-08-05'
...
@@ -13,5 +11,33 @@ MODEL_VERSION = ''
...
@@ -13,5 +11,33 @@ MODEL_VERSION = ''
lr
=
0.03
lr
=
0.03
l2_lambda
=
0.002
l2_lambda
=
0.002
# processData.py
#线上日记视频对应的ip
# diaryTraining.py
ONLINE_EAGLE_HOST
=
'10.66.157.22'
# 测试日记视频所在的ip
LOCAL_EAGLE_HOST
=
"192.168.15.12"
# 本地地址
LOCAL_DIRCTORY
=
"/Users/mac/utils/"
# # 线下pkl
# "/Users/mac/utils/ffm.pkl"
# #线下预测文件
# "/Users/mac/utils/result/{0}.csv".format(queue_name)
# # 线下模型、预测产出文件
# "/Users/mac/utils/model.out",
# "/Users/mac/utils/result/{0}_output.txt".format(queue_name)
#
# # 线下日记队列
# host='rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com', port=3306, user='work',
# passwd='workwork', db='doris_test'
# select native_queue from device_diary_queue where device_id = '{}' and city_id = '{}';".for
# update device_diary_queue set {}='{}' where device_id = '{}' and city_id = '{}'".format\
# (queue_name,id_str,device_id, city_id)
#
# # 线下日记打分表
# host='rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com', port=3306, user='work',
# passwd='workwork', db='zhengxing_test'
# "select score,diary_id from biz_feed_diary_score where diary_id in {};".format(diary_list)
diaryUpdateOnlineOffline.py
0 → 100644
View file @
cb34d9d3
This diff is collapsed.
Click to expand it.
processData.py
View file @
cb34d9d3
...
@@ -68,8 +68,8 @@ def ffm_transform(data, test_number, validation_number):
...
@@ -68,8 +68,8 @@ def ffm_transform(data, test_number, validation_number):
print
(
"Start ffm transform"
)
print
(
"Start ffm transform"
)
start
=
time
.
time
()
start
=
time
.
time
()
ffm_train
=
multiFFMFormatPandas
()
ffm_train
=
multiFFMFormatPandas
()
# 服务器内存空闲的时候,可以下面的
6改成8。6比较稳定,如果服务器内存占用较多的时候,用8
可能因为分配不到内存,脚本挂掉。
# 服务器内存空闲的时候,可以下面的
4改成6。4比较稳定,如果服务器内存被其他程序占用较多的时候,用6
可能因为分配不到内存,脚本挂掉。
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
,
n
=
50000
,
processes
=
6
)
data
=
ffm_train
.
fit_transform
(
data
,
y
=
'y'
,
n
=
50000
,
processes
=
4
)
with
open
(
DIRECTORY_PATH
+
"train/ffm.pkl"
,
"wb"
)
as
f
:
with
open
(
DIRECTORY_PATH
+
"train/ffm.pkl"
,
"wb"
)
as
f
:
pickle
.
dump
(
ffm_train
,
f
)
pickle
.
dump
(
ffm_train
,
f
)
...
...
userProfile.py
View file @
cb34d9d3
...
@@ -7,18 +7,19 @@ import time
...
@@ -7,18 +7,19 @@ import time
# 获取当下一分钟内活跃用户
# 获取当下一分钟内活跃用户
def
get_active_users
():
def
get_active_users
(
flag
):
now
=
datetime
.
now
()
now
=
datetime
.
now
()
now_start
=
str
(
now
)[:
16
]
+
":00"
now_start
=
str
(
now
)[:
16
]
+
":00"
now_end
=
str
(
now
)[:
16
]
+
":59"
now_end
=
str
(
now
)[:
16
]
+
":59"
sql
=
"select device_id,city_id from user_active_time "
\
sql
=
"select device_id,city_id from user_active_time "
\
"where active_time <= '{}' and active_time >= '{}'"
.
format
(
now_end
,
now_start
)
"where active_time <= '{}' and active_time >= '{}'"
.
format
(
now_end
,
now_start
)
if
flag
:
df
=
con_sql
(
sql
)
df
=
con_sql
(
sql
)
else
:
pass
# df = 问一下亚男,如果没有,造表,造数据
if
df
.
empty
:
if
df
.
empty
:
print
(
"当下这一分钟没有活跃用户,不需要预测"
)
print
(
"当下这一分钟没有活跃用户,不需要预测"
)
for
eachFile
in
os
.
listdir
(
"/tmp"
):
if
"xlearn"
in
eachFile
:
os
.
remove
(
"/tmp"
+
"/"
+
eachFile
)
time
.
sleep
(
56
)
time
.
sleep
(
56
)
return
[]
return
[]
else
:
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment