Commit 71e5100b authored by 张彦钊's avatar 张彦钊

update diaryqueue file

parent d1ddcfe6
......@@ -137,8 +137,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
i = 1
for j in video_id:
new_queue.insert(i, j)
# TODO 下面的3是测试用的,如果上线后,把3改成5
i += 3
i += 5
print("分数合并成功")
return new_queue
......@@ -253,24 +252,23 @@ def multi_proecess_update(device_id, city_id, data_set_cid,total_video_id):
pool.join()
def run():
data_set_cid = pd.read_csv(DIRECTORY_PATH + "data_set_cid.csv")["cid"].values.tolist()
total_video_id = get_video_id()
device_city_list = get_active_users()
for device_city in device_city_list:
start = time.time()
multi_proecess_update(device_city[0], device_city[1], data_set_cid,total_video_id)
end = time.time()
print("更新该用户队列耗时{}秒".format((end-start)))
print("end")
if __name__ == "__main__":
warnings.filterwarnings("ignore")
# todo 正式上线后把下面while True的代码加上
# while True:
run()
total_number = 0
while True:
data_set_cid = pd.read_csv(DIRECTORY_PATH + "data_set_cid.csv")["cid"].values.tolist()
total_video_id = get_video_id()
device_city_list = get_active_users()
total_number += len(device_city_list)
if device_city_list != []:
for device_city in device_city_list:
start = time.time()
multi_proecess_update(device_city[0], device_city[1], data_set_cid, total_video_id)
end = time.time()
print("更新该用户队列耗时{}秒".format((end - start)))
print("累计预测用户总数:{}".format(total_number))
# # TODO 上线后把预测用户改成多进程预测
......
......@@ -14,15 +14,13 @@ def get_active_users():
sql = "select device_id,city_id from user_active_time " \
"where active_time <= '{}' and active_time >= '{}'".format(now_end,now_start)
df = con_sql(sql)
return (("AB20292B-5D15-4C44-9429-1C2FF5ED26F6","beijing"),)
if df.empty:
print("当下这一分钟没有活跃用户,不需要预测")
for eachFile in os.listdir("/tmp"):
if "xlearn" in eachFile:
os.remove("/tmp" + "/" + eachFile)
time.sleep(58)
return False
time.sleep(56)
return []
else:
df = df.rename(columns={0: "device_id", 1: "city_id"})
old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist()
......@@ -30,20 +28,25 @@ def get_active_users():
df = df.loc[df["device_id"].isin(old_device_id_list)]
if df.empty:
print("该列表是新用户,不需要预测")
return []
else:
# TODO 正式上线后注释下面的只预测尾号是6的代码
# 只预测尾号是6的ID,这块是测试要求的
# device_temp_list = df["device_id"].values.tolist()
# predict_list = list(filter(lambda x: str(x)[-1] == "6", device_temp_list))
# df = df.loc[df["device_id"].isin(predict_list)]
# TODO 上线后把下面的temp删掉
# 把刘潇的id加进去
df = pd.DataFrame({"device_id":["358035085192742"],"city_id":["beijing"]})
device_list = df["device_id"].values.tolist()
city_list = df["city_id"].values.tolist()
device_city_list = list(zip(device_list, city_list))
print("当下这一分钟预测用户数量:{}".format(len(device_city_list)))
return device_city_list
device_temp_list = df["device_id"].values.tolist()
predict_list = list(filter(lambda x: str(x)[-1] == "6", device_temp_list))
if predict_list == []:
print('没有尾号是6的用户')
return []
else:
df = df.loc[df["device_id"].isin(predict_list)]
device_list = df["device_id"].values.tolist()
city_list = df["city_id"].values.tolist()
device_city_list = list(zip(device_list, city_list))
# TODO 上线后把刘潇的id删除
# 把刘潇的id加进去
device_city_list.append(("358035085192742", "beijing"))
print("当下这一分钟预测用户数量:{}".format(len(device_city_list)))
return device_city_list
def fetch_user_profile(device_id):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment