Commit 5668f217 authored by 张彦钊's avatar 张彦钊

delete print

parent cd05757b
......@@ -41,7 +41,7 @@ def test_con_sql(device_id):
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def feature_en(x_list, device_id):
data = pd.DataFrame(x_list)
data = data.rename(columns={0: "diary_id"})
data = data.rename(columns={0: "cid"})
data["device_id"] = device_id
now = datetime.now()
data["hour"] = now.hour
......@@ -87,7 +87,7 @@ def predict(queue_name, x_list, device_id):
def save_result(queue_name, x_list):
score_df = pd.read_csv("/Users/mac/utils/result/{0}_output.txt".format(queue_name), header=None)
score_df = score_df.rename(columns={0: "score"})
score_df["diary_id"] = x_list
score_df["cid"] = x_list
merge_score(x_list, score_df)
......@@ -111,19 +111,19 @@ def merge_score(x_list, score_df):
def update_dairy_queue(score_df):
diary_id = score_df["diary_id"].values.tolist()
diary_id = score_df["cid"].values.tolist()
video_id = []
x = 1
while x <= len(diary_id):
video_id.append(diary_id[x])
x += 5
not_video_id = list(set(diary_id) - set(video_id))
not_video_id_df = score_df.loc[score_df["diary_id"].isin(not_video_id)]
not_video_id_df = score_df.loc[score_df["cid"].isin(not_video_id)]
not_video_id_df = not_video_id_df.sort_values(by="score", ascending=False)
video_id_df = score_df.loc[score_df["diary_id"].isin(video_id)]
video_id_df = score_df.loc[score_df["cid"].isin(video_id)]
video_id_df = video_id_df.sort_values(by="score", ascending=False)
not_video_id = not_video_id_df["diary_id"].values.tolist()
video_id = video_id_df["diary_id"].values.tolist()
not_video_id = not_video_id_df["cid"].values.tolist()
video_id = video_id_df["cid"].values.tolist()
diary_id = not_video_id
i = 1
for j in video_id:
......
......@@ -23,7 +23,6 @@ def feature_en(user_profile):
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data["y"] = 0
data = data.drop("city_id", axis=1)
print(data.head(10))
return data
......@@ -103,20 +102,20 @@ def multi_predict(predict_list,processes=12):
if __name__ == "__main__":
# TODO 如果耗时小于一分钟,下一次取到的device_id和上一次相同。还有一种情况,一个用户持续活跃,会被重复预测
# while True:
# empty,device_id_list = get_active_users()
# if empty:
# for eachFile in os.listdir("/tmp"):
# if "xlearn" in eachFile:
# os.remove("/tmp" + "/" + eachFile)
# time.sleep(58)
# else:
# old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist()
# # 求活跃用户和老用户的交集,也就是只预测老用户
# predict_list = list(set(device_id_list) & set(old_device_id_list))
# multi_predict(predict_list)
router("358035085192742")
while True:
empty,device_id_list = get_active_users()
if empty:
for eachFile in os.listdir("/tmp"):
if "xlearn" in eachFile:
os.remove("/tmp" + "/" + eachFile)
time.sleep(58)
else:
old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist()
# 求活跃用户和老用户的交集,也就是只预测老用户
predict_list = list(set(device_id_list) & set(old_device_id_list))
multi_predict(predict_list)
#TODO 上线前把预测流程中的计时器、打印代码删掉或者注释,因为预测对性能要求高,能少一条代码语句就少一条
......
......@@ -31,5 +31,4 @@ def fetch_user_profile(device_id):
user_profile_dict = {}
for i in user_profile.columns:
user_profile_dict[i] = user_profile.loc[0, i]
print(user_profile_dict)
return user_profile_dict, False
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment