Commit 17f114c6 authored by 张彦钊's avatar 张彦钊

add codes in test file

parent eeae786a
import pymysql
import pandas as pd
from datetime import datetime
import time
from config import *
import pickle
import xlearn as xl
def con_sql(sql):
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
......@@ -31,6 +35,104 @@ def get_active_users():
# 为了debug supervisor,修改了下面的return参数
return "0",device_id_list
def fetch_user_profile(device_id):
sql = "select device_id,city_id from data_feed_click where device_id = '{0}' limit 1".format(device_id)
user_profile = con_sql(sql)
if user_profile.empty:
print("没有获取到该用户对应的city_id")
# 为了debug supervisor,修改了下面的return参数
return {1: 2}, 1
else:
user_profile = user_profile.rename(columns={0: "device_id", 1: "city_id"})
print("成功获取该用户对应的city_id")
user_profile_dict = {}
for i in user_profile.columns:
user_profile_dict[i] = user_profile.loc[0, i]
# 为了debug supervisor,修改了下面的return参数
return user_profile_dict, "0"
def feature_en(user_profile):
file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop3000.csv".format(user_profile['city_id'])
data = pd.read_csv(file_name)
data["device_id"] = user_profile['device_id']
now = datetime.now()
data["hour"] = now.hour
data["minute"] = now.minute
data.loc[data["hour"] == 0, ["hour"]] = 24
data.loc[data["minute"] == 0, ["minute"]] = 60
data["hour"] = data["hour"].astype("category")
data["minute"] = data["minute"].astype("category")
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data["y"] = 0
data = data.drop("city_id", axis=1)
print(data.head(1))
print("特征工程处理结束")
return data
def transform_ffm_format(df, device_id):
file_path = DIRECTORY_PATH + "ffm_{0}_{1}.pkl".format(DATA_START_DATE, DATA_END_DATE)
with open(file_path, "rb") as f:
ffm_format_pandas = pickle.load(f)
data = ffm_format_pandas.transform(df)
now = datetime.now().strftime("%Y-%m-%d-%H-%M")
print("ffm格式转化结束")
predict_file_name = DIRECTORY_PATH + "result/{0}_{1}DiaryTop3000.csv".format(device_id, now)
data.to_csv(predict_file_name, index=False,header=None)
print("ffm写到服务器")
return predict_file_name
def wrapper_result(user_profile, instance):
proba = pd.read_csv(DIRECTORY_PATH +
"result/{0}_output.txt".format(user_profile['device_id']), header=None)
proba = proba.rename(columns={0: "prob"})
proba["cid"] = instance['cid']
proba = proba.sort_values(by="prob", ascending=False)
proba = proba.head(50)
return proba
def predict_save_to_local(user_profile, instance):
proba = wrapper_result(user_profile, instance)
proba.loc[:, "url"] = proba["cid"].apply(lambda x: "http://m.igengmei.com/diary_book/" + str(x[6:]) + '/')
proba.to_csv(DIRECTORY_PATH + "result/feed_{}".format(user_profile['device_id']), index=False)
print("成功将预测候选集保存到本地")
def predict(user_profile):
instance = feature_en(user_profile)
instance_file_path = transform_ffm_format(instance, user_profile["device_id"])
ffm_model = xl.create_ffm()
ffm_model.setTest(instance_file_path)
ffm_model.setSigmoid()
ffm_model.predict(DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE, lr, l2_lambda),
DIRECTORY_PATH + "result/{0}_output.txt".format(user_profile['device_id']))
print("预测结束")
predict_save_to_local(user_profile, instance)
def router(device_id):
user_profile, not_exist = fetch_user_profile(device_id)
if not_exist==1:
print('Sorry, we don\'t have you.')
else:
predict(user_profile)
if __name__ == "__main__":
while True:
get_active_users()
start = time.time()
empty,device_id_list = get_active_users()
if empty==1:
time.sleep(60)
else:
old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist()
for device_id in device_id_list:
if device_id in old_device_id_list:
router(device_id)
else:
print("该用户不是老用户,不能预测")
end = time.time()
time_cost = (end - start)
print("耗时{}秒".format(time_cost))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment