Commit cb34d9d3 authored by 张彦钊's avatar 张彦钊

change ffm process

parent c5bcb8e4
DIRECTORY_PATH = '/data2/models/' DIRECTORY_PATH = '/data2/models/'
# 测试日期一定要大于验证日期,因为切割数据集的代码是这样设置的 # 测试日期一定要大于验证日期,因为切割数据集的代码是这样设置的
# VALIDATION_DATE = '2018-08-05' # VALIDATION_DATE = '2018-08-05'
...@@ -13,5 +11,33 @@ MODEL_VERSION = '' ...@@ -13,5 +11,33 @@ MODEL_VERSION = ''
lr = 0.03 lr = 0.03
l2_lambda = 0.002 l2_lambda = 0.002
# processData.py #线上日记视频对应的ip
# diaryTraining.py ONLINE_EAGLE_HOST = '10.66.157.22'
# 测试日记视频所在的ip
LOCAL_EAGLE_HOST = "192.168.15.12"
# 本地地址
LOCAL_DIRCTORY = "/Users/mac/utils/"
# # 线下pkl
# "/Users/mac/utils/ffm.pkl"
# #线下预测文件
# "/Users/mac/utils/result/{0}.csv".format(queue_name)
# # 线下模型、预测产出文件
# "/Users/mac/utils/model.out",
# "/Users/mac/utils/result/{0}_output.txt".format(queue_name)
#
# # 线下日记队列
# host='rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com', port=3306, user='work',
# passwd='workwork', db='doris_test'
# select native_queue from device_diary_queue where device_id = '{}' and city_id = '{}';".for
# update device_diary_queue set {}='{}' where device_id = '{}' and city_id = '{}'".format\
# (queue_name,id_str,device_id, city_id)
#
# # 线下日记打分表
# host='rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com', port=3306, user='work',
# passwd='workwork', db='zhengxing_test'
# "select score,diary_id from biz_feed_diary_score where diary_id in {};".format(diary_list)
This diff is collapsed.
...@@ -68,8 +68,8 @@ def ffm_transform(data, test_number, validation_number): ...@@ -68,8 +68,8 @@ def ffm_transform(data, test_number, validation_number):
print("Start ffm transform") print("Start ffm transform")
start = time.time() start = time.time()
ffm_train = multiFFMFormatPandas() ffm_train = multiFFMFormatPandas()
# 服务器内存空闲的时候,可以下面的6改成8。6比较稳定,如果服务器内存占用较多的时候,用8可能因为分配不到内存,脚本挂掉。 # 服务器内存空闲的时候,可以下面的4改成6。4比较稳定,如果服务器内存被其他程序占用较多的时候,用6可能因为分配不到内存,脚本挂掉。
data = ffm_train.fit_transform(data, y='y',n=50000,processes=6) data = ffm_train.fit_transform(data, y='y',n=50000,processes=4)
with open(DIRECTORY_PATH+"train/ffm.pkl", "wb") as f: with open(DIRECTORY_PATH+"train/ffm.pkl", "wb") as f:
pickle.dump(ffm_train, f) pickle.dump(ffm_train, f)
......
...@@ -7,18 +7,19 @@ import time ...@@ -7,18 +7,19 @@ import time
# 获取当下一分钟内活跃用户 # 获取当下一分钟内活跃用户
def get_active_users(): def get_active_users(flag):
now = datetime.now() now = datetime.now()
now_start = str(now)[:16] + ":00" now_start = str(now)[:16] + ":00"
now_end = str(now)[:16] + ":59" now_end = str(now)[:16] + ":59"
sql = "select device_id,city_id from user_active_time " \ sql = "select device_id,city_id from user_active_time " \
"where active_time <= '{}' and active_time >= '{}'".format(now_end,now_start) "where active_time <= '{}' and active_time >= '{}'".format(now_end,now_start)
if flag:
df = con_sql(sql) df = con_sql(sql)
else:
pass
# df = 问一下亚男,如果没有,造表,造数据
if df.empty: if df.empty:
print("当下这一分钟没有活跃用户,不需要预测") print("当下这一分钟没有活跃用户,不需要预测")
for eachFile in os.listdir("/tmp"):
if "xlearn" in eachFile:
os.remove("/tmp" + "/" + eachFile)
time.sleep(56) time.sleep(56)
return [] return []
else: else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment