Commit 1fa8a7fd authored by 张彦钊's avatar 张彦钊

fix user lost bug

parent 4b43b998
...@@ -16,7 +16,6 @@ from config import * ...@@ -16,7 +16,6 @@ from config import *
import socket import socket
def get_video_id(cache_video_id): def get_video_id(cache_video_id):
if flag: if flag:
db = pymysql.connect(host=ONLINE_EAGLE_HOST, port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle') db = pymysql.connect(host=ONLINE_EAGLE_HOST, port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
...@@ -283,17 +282,19 @@ if __name__ == "__main__": ...@@ -283,17 +282,19 @@ if __name__ == "__main__":
flag = True flag = True
path = DIRECTORY_PATH path = DIRECTORY_PATH
# 下面这个ip是本地电脑ip # 下面这个ip是本地电脑ip
if socket.gethostbyname(socket.gethostname()) == '172.30.5.84': if socket.gethostbyname(socket.gethostname()) == '172.30.8.160':
flag = False flag = False
path = LOCAL_DIRCTORY path = LOCAL_DIRCTORY
# 增加缓存日记视频列表 # 增加缓存日记视频列表
cache_video_id = [] cache_video_id = []
cache_device_city_list = [] cache_device_city_list = []
differ = 0
while True: while True:
data_set_cid = pd.read_csv(path + "data_set_cid.csv")["cid"].values.tolist() data_set_cid = pd.read_csv(path + "data_set_cid.csv")["cid"].values.tolist()
total_video_id = get_video_id(cache_video_id) total_video_id = get_video_id(cache_video_id)
cache_video_id = total_video_id cache_video_id = total_video_id
device_city_list = get_active_users(flag,path) start = time.time()
device_city_list = get_active_users(flag,path,differ)
# 过滤掉5分钟内预测过的用户 # 过滤掉5分钟内预测过的用户
device_city_list = list(set(tuple(device_city_list))-set(tuple(cache_device_city_list))) device_city_list = list(set(tuple(device_city_list))-set(tuple(cache_device_city_list)))
if datetime.now().minute % 5 == 0: if datetime.now().minute % 5 == 0:
...@@ -301,11 +302,8 @@ if __name__ == "__main__": ...@@ -301,11 +302,8 @@ if __name__ == "__main__":
if device_city_list != []: if device_city_list != []:
cache_device_city_list.extend(device_city_list) cache_device_city_list.extend(device_city_list)
for device_city in device_city_list: for device_city in device_city_list:
# start = time.time()
multi_proecess_update(device_city[0], device_city[1], data_set_cid, total_video_id) multi_proecess_update(device_city[0], device_city[1], data_set_cid, total_video_id)
# end = time.time() differ = time.time()-start
# print("更新该用户队列耗时{}秒".format((end - start)))
......
...@@ -5,32 +5,44 @@ import pandas as pd ...@@ -5,32 +5,44 @@ import pandas as pd
import os import os
import time import time
import pymysql import pymysql
import time
# 获取当下一分钟内活跃用户 # 获取当下一分钟内活跃用户
def get_active_users(flag,path): def get_active_users(flag,path,differ):
now = datetime.now() if differ == 0:
now_start = str(now)[:16] + ":00" end = time.time()
now_end = str(now)[:16] + ":59" start = time.time()-60
elif 0 < differ < 10:
time.sleep(30)
differ += 30
end = time.time()
start = end - differ
else:
end = time.time()
start = end - differ
end_datetime = str(datetime.fromtimestamp(end))
start_datetime = str(datetime.fromtimestamp(start))
sql = "select device_id,city_id from user_active_time " \ sql = "select device_id,city_id from user_active_time " \
"where active_time <= '{}' and active_time >= '{}'".format(now_end,now_start) "where active_time <= '{}' and active_time >= '{}'".format(end_datetime,start_datetime)
if flag: if flag:
df = con_sql(sql) df = con_sql(sql)
else: else:
db = pymysql.connect(host='192.168.15.12', port=4000, user='root', db='jerry_test') db = pymysql.connect(host='192.168.15.12', port=4000, user='root', db='jerry_test')
sql = "select device_id,city_id from user_active_time" # sql = "select device_id,city_id from user_active_time"
cursor = db.cursor() cursor = db.cursor()
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
df = pd.DataFrame(list(result)).dropna() df = pd.DataFrame(list(result)).dropna()
db.close() db.close()
if df.empty: if df.empty:
print("当下这一分钟没有活跃用户,不需要预测") print("当下没有活跃用户数")
time.sleep(56)
return [] return []
else:
# 统计活跃用户中尾号是6的用户数 # 统计活跃用户中尾号是6的用户数
else:
temp_list = df[0].values.tolist() temp_list = df[0].values.tolist()
now = datetime.now()
tail6_file_path = path + "{}tail6Unique.csv".format(str(now)[:10]) tail6_file_path = path + "{}tail6Unique.csv".format(str(now)[:10])
if os.path.exists(tail6_file_path): if os.path.exists(tail6_file_path):
# 尾号是6的活跃用户数 # 尾号是6的活跃用户数
...@@ -50,7 +62,6 @@ def get_active_users(flag,path): ...@@ -50,7 +62,6 @@ def get_active_users(flag,path):
df = df.loc[df[0].isin(old_device_id_list)] df = df.loc[df[0].isin(old_device_id_list)]
if df.empty: if df.empty:
print("该列表是新用户,不需要预测") print("该列表是新用户,不需要预测")
time.sleep(56)
return [] return []
else: else:
# TODO 正式上线后注释下面的只预测尾号是6的代码 # TODO 正式上线后注释下面的只预测尾号是6的代码
...@@ -62,7 +73,6 @@ def get_active_users(flag,path): ...@@ -62,7 +73,6 @@ def get_active_users(flag,path):
device_temp_list)) device_temp_list))
if predict_list == []: if predict_list == []:
print('没有尾号是6和目标用户') print('没有尾号是6和目标用户')
time.sleep(56)
return [] return []
else: else:
df = df.loc[df[0].isin(predict_list)] df = df.loc[df[0].isin(predict_list)]
...@@ -78,7 +88,7 @@ def get_active_users(flag,path): ...@@ -78,7 +88,7 @@ def get_active_users(flag,path):
all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"]) all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
else: else:
all_predict_list = [] all_predict_list = []
all_predict_list.extend(device_city_list) all_predict_list.extend(device_list)
if all_predict_list != []: if all_predict_list != []:
df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]], df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
"list": [list(set(all_predict_list))]}) "list": [list(set(all_predict_list))]})
......
...@@ -48,30 +48,6 @@ def con_sql(sql): ...@@ -48,30 +48,6 @@ def con_sql(sql):
return df return df
def queue_compare(old_list, new_list):
# 去掉前面的"diary|"
old_list = list(map(lambda x: int(x[6:]),old_list))
# print("旧表前十个")
# print(old_list[:10])
# print("新表前十个")
# print(new_list[:10])
temp = list(range(len(old_list)))
x_dict = dict(zip(old_list, temp))
temp = list(range(len(new_list)))
y_dict = dict(zip(new_list, temp))
i = 0
for key in x_dict.keys():
if x_dict[key] != y_dict[key]:
i += 1
if i >0:
update_queue_numbers += 1
print("更新日记队列总数:{}".format(update_queue_numbers))
print("日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}%".format(len(old_list), i,
round(i / len(old_list) * 100), 2))
def move_file(): def move_file():
import os import os
for eachFile in os.listdir("/data2/models/train"): for eachFile in os.listdir("/data2/models/train"):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment