Commit 5668f217 authored by 张彦钊's avatar 张彦钊

delete print

parent cd05757b
...@@ -41,7 +41,7 @@ def test_con_sql(device_id): ...@@ -41,7 +41,7 @@ def test_con_sql(device_id):
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致 # 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def feature_en(x_list, device_id): def feature_en(x_list, device_id):
data = pd.DataFrame(x_list) data = pd.DataFrame(x_list)
data = data.rename(columns={0: "diary_id"}) data = data.rename(columns={0: "cid"})
data["device_id"] = device_id data["device_id"] = device_id
now = datetime.now() now = datetime.now()
data["hour"] = now.hour data["hour"] = now.hour
...@@ -87,7 +87,7 @@ def predict(queue_name, x_list, device_id): ...@@ -87,7 +87,7 @@ def predict(queue_name, x_list, device_id):
def save_result(queue_name, x_list): def save_result(queue_name, x_list):
score_df = pd.read_csv("/Users/mac/utils/result/{0}_output.txt".format(queue_name), header=None) score_df = pd.read_csv("/Users/mac/utils/result/{0}_output.txt".format(queue_name), header=None)
score_df = score_df.rename(columns={0: "score"}) score_df = score_df.rename(columns={0: "score"})
score_df["diary_id"] = x_list score_df["cid"] = x_list
merge_score(x_list, score_df) merge_score(x_list, score_df)
...@@ -111,19 +111,19 @@ def merge_score(x_list, score_df): ...@@ -111,19 +111,19 @@ def merge_score(x_list, score_df):
def update_dairy_queue(score_df): def update_dairy_queue(score_df):
diary_id = score_df["diary_id"].values.tolist() diary_id = score_df["cid"].values.tolist()
video_id = [] video_id = []
x = 1 x = 1
while x <= len(diary_id): while x <= len(diary_id):
video_id.append(diary_id[x]) video_id.append(diary_id[x])
x += 5 x += 5
not_video_id = list(set(diary_id) - set(video_id)) not_video_id = list(set(diary_id) - set(video_id))
not_video_id_df = score_df.loc[score_df["diary_id"].isin(not_video_id)] not_video_id_df = score_df.loc[score_df["cid"].isin(not_video_id)]
not_video_id_df = not_video_id_df.sort_values(by="score", ascending=False) not_video_id_df = not_video_id_df.sort_values(by="score", ascending=False)
video_id_df = score_df.loc[score_df["diary_id"].isin(video_id)] video_id_df = score_df.loc[score_df["cid"].isin(video_id)]
video_id_df = video_id_df.sort_values(by="score", ascending=False) video_id_df = video_id_df.sort_values(by="score", ascending=False)
not_video_id = not_video_id_df["diary_id"].values.tolist() not_video_id = not_video_id_df["cid"].values.tolist()
video_id = video_id_df["diary_id"].values.tolist() video_id = video_id_df["cid"].values.tolist()
diary_id = not_video_id diary_id = not_video_id
i = 1 i = 1
for j in video_id: for j in video_id:
......
...@@ -23,7 +23,6 @@ def feature_en(user_profile): ...@@ -23,7 +23,6 @@ def feature_en(user_profile):
# 虽然预测y,但ffm转化需要y,并不影响预测结果 # 虽然预测y,但ffm转化需要y,并不影响预测结果
data["y"] = 0 data["y"] = 0
data = data.drop("city_id", axis=1) data = data.drop("city_id", axis=1)
print(data.head(10))
return data return data
...@@ -103,20 +102,20 @@ def multi_predict(predict_list,processes=12): ...@@ -103,20 +102,20 @@ def multi_predict(predict_list,processes=12):
if __name__ == "__main__": if __name__ == "__main__":
# TODO 如果耗时小于一分钟,下一次取到的device_id和上一次相同。还有一种情况,一个用户持续活跃,会被重复预测 # TODO 如果耗时小于一分钟,下一次取到的device_id和上一次相同。还有一种情况,一个用户持续活跃,会被重复预测
# while True: while True:
# empty,device_id_list = get_active_users() empty,device_id_list = get_active_users()
# if empty: if empty:
# for eachFile in os.listdir("/tmp"): for eachFile in os.listdir("/tmp"):
# if "xlearn" in eachFile: if "xlearn" in eachFile:
# os.remove("/tmp" + "/" + eachFile) os.remove("/tmp" + "/" + eachFile)
# time.sleep(58) time.sleep(58)
# else: else:
# old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist() old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist()
# # 求活跃用户和老用户的交集,也就是只预测老用户 # 求活跃用户和老用户的交集,也就是只预测老用户
# predict_list = list(set(device_id_list) & set(old_device_id_list)) predict_list = list(set(device_id_list) & set(old_device_id_list))
# multi_predict(predict_list) multi_predict(predict_list)
router("358035085192742")
#TODO 上线前把预测流程中的计时器、打印代码删掉或者注释,因为预测对性能要求高,能少一条代码语句就少一条 #TODO 上线前把预测流程中的计时器、打印代码删掉或者注释,因为预测对性能要求高,能少一条代码语句就少一条
......
...@@ -31,5 +31,4 @@ def fetch_user_profile(device_id): ...@@ -31,5 +31,4 @@ def fetch_user_profile(device_id):
user_profile_dict = {} user_profile_dict = {}
for i in user_profile.columns: for i in user_profile.columns:
user_profile_dict[i] = user_profile.loc[0, i] user_profile_dict[i] = user_profile.loc[0, i]
print(user_profile_dict)
return user_profile_dict, False return user_profile_dict, False
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment