Commit 1fa8a7fd authored by 张彦钊's avatar 张彦钊

fix user lost bug

parent 4b43b998
......@@ -16,7 +16,6 @@ from config import *
import socket
def get_video_id(cache_video_id):
if flag:
db = pymysql.connect(host=ONLINE_EAGLE_HOST, port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
......@@ -283,17 +282,19 @@ if __name__ == "__main__":
flag = True
path = DIRECTORY_PATH
# 下面这个ip是本地电脑ip
if socket.gethostbyname(socket.gethostname()) == '172.30.5.84':
if socket.gethostbyname(socket.gethostname()) == '172.30.8.160':
flag = False
path = LOCAL_DIRCTORY
# 增加缓存日记视频列表
cache_video_id = []
cache_device_city_list = []
differ = 0
while True:
data_set_cid = pd.read_csv(path + "data_set_cid.csv")["cid"].values.tolist()
total_video_id = get_video_id(cache_video_id)
cache_video_id = total_video_id
device_city_list = get_active_users(flag,path)
start = time.time()
device_city_list = get_active_users(flag,path,differ)
# 过滤掉5分钟内预测过的用户
device_city_list = list(set(tuple(device_city_list))-set(tuple(cache_device_city_list)))
if datetime.now().minute % 5 == 0:
......@@ -301,11 +302,8 @@ if __name__ == "__main__":
if device_city_list != []:
cache_device_city_list.extend(device_city_list)
for device_city in device_city_list:
# start = time.time()
multi_proecess_update(device_city[0], device_city[1], data_set_cid, total_video_id)
# end = time.time()
# print("更新该用户队列耗时{}秒".format((end - start)))
differ = time.time()-start
......
......@@ -5,32 +5,44 @@ import pandas as pd
import os
import time
import pymysql
import time
# 获取当下一分钟内活跃用户
def get_active_users(flag,path):
now = datetime.now()
now_start = str(now)[:16] + ":00"
now_end = str(now)[:16] + ":59"
def get_active_users(flag,path,differ):
if differ == 0:
end = time.time()
start = time.time()-60
elif 0 < differ < 10:
time.sleep(30)
differ += 30
end = time.time()
start = end - differ
else:
end = time.time()
start = end - differ
end_datetime = str(datetime.fromtimestamp(end))
start_datetime = str(datetime.fromtimestamp(start))
sql = "select device_id,city_id from user_active_time " \
"where active_time <= '{}' and active_time >= '{}'".format(now_end,now_start)
"where active_time <= '{}' and active_time >= '{}'".format(end_datetime,start_datetime)
if flag:
df = con_sql(sql)
else:
db = pymysql.connect(host='192.168.15.12', port=4000, user='root', db='jerry_test')
sql = "select device_id,city_id from user_active_time"
# sql = "select device_id,city_id from user_active_time"
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result)).dropna()
db.close()
if df.empty:
print("当下这一分钟没有活跃用户,不需要预测")
time.sleep(56)
print("当下没有活跃用户数")
return []
else:
# 统计活跃用户中尾号是6的用户数
else:
temp_list = df[0].values.tolist()
now = datetime.now()
tail6_file_path = path + "{}tail6Unique.csv".format(str(now)[:10])
if os.path.exists(tail6_file_path):
# 尾号是6的活跃用户数
......@@ -50,7 +62,6 @@ def get_active_users(flag,path):
df = df.loc[df[0].isin(old_device_id_list)]
if df.empty:
print("该列表是新用户,不需要预测")
time.sleep(56)
return []
else:
# TODO 正式上线后注释下面的只预测尾号是6的代码
......@@ -62,7 +73,6 @@ def get_active_users(flag,path):
device_temp_list))
if predict_list == []:
print('没有尾号是6和目标用户')
time.sleep(56)
return []
else:
df = df.loc[df[0].isin(predict_list)]
......@@ -78,7 +88,7 @@ def get_active_users(flag,path):
all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
else:
all_predict_list = []
all_predict_list.extend(device_city_list)
all_predict_list.extend(device_list)
if all_predict_list != []:
df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
"list": [list(set(all_predict_list))]})
......
......@@ -48,30 +48,6 @@ def con_sql(sql):
return df
def queue_compare(old_list, new_list):
# 去掉前面的"diary|"
old_list = list(map(lambda x: int(x[6:]),old_list))
# print("旧表前十个")
# print(old_list[:10])
# print("新表前十个")
# print(new_list[:10])
temp = list(range(len(old_list)))
x_dict = dict(zip(old_list, temp))
temp = list(range(len(new_list)))
y_dict = dict(zip(new_list, temp))
i = 0
for key in x_dict.keys():
if x_dict[key] != y_dict[key]:
i += 1
if i >0:
update_queue_numbers += 1
print("更新日记队列总数:{}".format(update_queue_numbers))
print("日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}%".format(len(old_list), i,
round(i / len(old_list) * 100), 2))
def move_file():
import os
for eachFile in os.listdir("/data2/models/train"):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment