Commit dbfe7ffc authored by 高雅喆's avatar 高雅喆

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

update output format
parents a99efd7d 4758923b
from config import *
import pandas as pd
import pickle
import xlearn as xl
from userProfile import *
import time
from utils import *
import os
# 本地测试脚本
# 从测试Tidb数据库的表里获取数据,并转化成df格式
def test_con_sql(device_id):
db = pymysql.connect(host='rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com', port=3306, user='work',
passwd='workwork', db='doris_test')
cursor = db.cursor()
sql = "select native_queue,nearby_queue,nation_queue,megacity_queue from device_diary_queue " \
"where device_id = '{}';".format(device_id)
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
if not df.empty:
df = df.rename(columns = {0:"native_queue",1:"nearby_queue",2:"nation_queue",3:"megacity_queue"})
native_queue_list = df.loc[0,"native_queue"].split(",")
nearby_queue_list = df.loc[0,"nearby_queue"].split(",")
nation_queue_list = df.loc[0,"nation_queue"].split(",")
megacity_queue_list = df.loc[0,"megacity_queue"].split(",")
db.close()
return native_queue_list,nearby_queue_list,nation_queue_list,megacity_queue_list
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def feature_en(x_list,device_id):
data = pd.DataFrame(x_list)
data = data.rename(columns= {0:"diary_id"})
data["device_id"] = device_id
now = datetime.now()
data["hour"] = now.hour
data["minute"] = now.minute
data.loc[data["hour"] == 0, ["hour"]] = 24
data.loc[data["minute"] == 0, ["minute"]] = 60
data["hour"] = data["hour"].astype("category")
data["minute"] = data["minute"].astype("category")
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data["y"] = 0
return data
# 把ffm.pkl load进来,将上面的表转化为ffm格式
def transform_ffm_format(df, device_id):
with open("/Users/mac/utils/ffm.pkl","rb") as f:
ffm_format_pandas = pickle.load(f)
data = ffm_format_pandas.transform(df)
now = datetime.now().strftime("%Y-%m-%d-%H-%M")
predict_file_name = "/Users/mac/utils/result/{0}_{1}.csv".format(device_id, now)
data.to_csv(predict_file_name, index=False,header=None)
print("成功将ffm预测文件写到本地")
return predict_file_name
# 将模型加载,预测,把预测日记的概率值按照降序排序,存到一个表里
def predict(queue_name,x_list,device_id):
instance = feature_en(x_list)
instance_file_path = transform_ffm_format(instance, device_id)
ffm_model = xl.create_ffm()
ffm_model.setTest(instance_file_path)
ffm_model.setSigmoid()
ffm_model.predict("/Users/mac/utils/model.out",
"/Users/mac/utils/result/{0}_output.txt".format(queue_name))
print("{}预测结束".format(queue_name))
predict_save_to_local(user_profile, instance)
# 将预测结果与device_id 进行拼接,并按照概率降序排序
def wrapper_result(user_profile, instance):
proba = pd.read_csv(DIRECTORY_PATH +
"result/{0}_output.txt".format(user_profile['device_id']), header=None)
proba = proba.rename(columns={0: "prob"})
proba["cid"] = instance['cid']
proba = proba.sort_values(by="prob", ascending=False)
proba = proba.head(50)
return proba
# 预测候选集保存到本地
def predict_save_to_local(user_profile, instance):
proba = wrapper_result(user_profile, instance)
proba.loc[:, "url"] = proba["cid"].apply(lambda x: "http://m.igengmei.com/diary_book/" + str(x[6:]) + '/')
proba.to_csv(DIRECTORY_PATH + "result/feed_{}".format(user_profile['device_id']), index=False)
print("成功将预测候选集保存到本地")
# def router(device_id):
# user_profile, not_exist = fetch_user_profile(device_id)
# if not_exist:
# print('Sorry, we don\'t have you.')
# else:
# predict(user_profile)
# 多进程预测
# def multi_predict(predict_list,processes=12):
# pool = Pool(processes)
# for device_id in predict_list:
# start = time.time()
# pool.apply_async(router, (device_id,))
# end = time.time()
# print("该用户{}预测耗时{}秒".format(device_id, (end - start)))
#
# pool.close()
# pool.join()
if __name__ == "__main__":
native_queue_list, nearby_queue_list, nation_queue_list, megacity_queue_list = test_con_sql("device_id")
predict("native_queue",native_queue_list,"device_id")
predict("nearby_queue", nearby_queue_list, "device_id")
predict("nation_queue", nation_queue_list, "device_id")
predict("megacity_queue", megacity_queue_list, "device_id")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment