diff --git a/diary-training.py b/diary-training.py index a7851f6ce382bd127914417ea693eaee6f0636dd..2b050260f11893b2324e6f5b2981d4afd2f4c468 100644 --- a/diary-training.py +++ b/diary-training.py @@ -4,6 +4,8 @@ import pandas as pd from sklearn.utils import shuffle import numpy as np import xlearn as xl + + # 从数æ®åº“的表里获å–æ•°æ®ï¼Œå¹¶è½¬åŒ–æˆdfæ ¼å¼ def con_sql(sql): db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') diff --git a/diaryQueueUpdate.py b/diaryQueueUpdate.py index 60d83c6370289d6d082268690a5019192e683653..827dab2ec3461a9a40e947ca7906bbfa41c84d06 100644 --- a/diaryQueueUpdate.py +++ b/diaryQueueUpdate.py @@ -170,7 +170,32 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id): cursor.execute(sql) db.commit() db.close() - print("æˆåŠŸå†™å…¥diaryid") + print("æˆåŠŸå†™å…¥diary_id") + + +def queue_compare(old_list, new_list): + global update_queue_numbers + print("更新日记队列总数:{}".format(update_queue_numbers)) + # 去掉å‰é¢çš„"diary|" + old_list = list(map(lambda x: int(x[6:]),old_list)) + # print("旧表å‰å个") + # print(old_list[:10]) + # print("新表å‰å个") + # print(new_list[:10]) + temp = list(range(len(old_list))) + x_dict = dict(zip(old_list, temp)) + temp = list(range(len(new_list))) + y_dict = dict(zip(new_list, temp)) + i = 0 + for key in x_dict.keys(): + if x_dict[key] != y_dict[key]: + i += 1 + + if i >0: + update_queue_numbers += 1 + print("更新日记队列总数:{}".format(update_queue_numbers)) + print("日记队列更新å‰æ—¥è®°æ€»ä¸ªæ•°{},ä½ç½®å‘生å˜åŒ–个数{},å‘生å˜åŒ–率{}%".format(len(old_list), i, + round(i / len(old_list) * 100), 2)) def get_queue(device_id, city_id,queue_name): @@ -218,6 +243,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id): diary_queue = pipe_line(queue_name, queue_arg, device_id,total_video_id) if diary_queue: update_sql_dairy_queue(queue_name, diary_queue, device_id, city_id) + queue_compare(queue_list,diary_queue) # print("更新结æŸ") else: print("获å–的日记列表是空,所以ä¸æ›´æ–°æ—¥è®°é˜Ÿåˆ—") @@ -242,17 +268,23 @@ if __name__ == "__main__": # å¢žåŠ ç¼“å˜æ—¥è®°è§†é¢‘列表 cache_video_id = [] cache_device_city_list = [] + update_queue_numbers = 0 while True: data_set_cid = pd.read_csv(DIRECTORY_PATH + "data_set_cid.csv")["cid"].values.tolist() total_video_id = get_video_id(cache_video_id) cache_video_id = total_video_id device_city_list = get_active_users() - # 过滤掉上次预测过的用户 - device_city_list = list(set(device_city_list)-set(cache_device_city_list)) - cache_device_city_list = device_city_list - total_number += len(device_city_list) - print("累计预测用户总数:{}".format(total_number)) + print("过滤å‰ç”¨æˆ·æ•°ï¼š{}".format(len(device_city_list))) + # 过滤掉5分钟内预测过的用户 + device_city_list = list(set(tuple(device_city_list))-set(tuple(cache_device_city_list))) + print("过滤åŽç”¨æˆ·æ•°ï¼š{}".format(len(device_city_list))) + print("缓å˜è§†é¢‘个数:{}".format(len(cache_device_city_list))) + if datetime.now().minute % 5 == 0: + cache_device_city_list = [] if device_city_list != []: + cache_device_city_list.extend(device_city_list) + total_number += len(device_city_list) + print("累计预测用户总数:{}".format(total_number)) for device_city in device_city_list: # start = time.time() multi_proecess_update(device_city[0], device_city[1], data_set_cid, total_video_id) diff --git a/utils.py b/utils.py index 11076750803fff4a182a7ba3c995033404e126d8..778ea1a5c722f78655a0e010cba4ee29a425133a 100644 --- a/utils.py +++ b/utils.py @@ -48,6 +48,30 @@ def con_sql(sql): return df +def queue_compare(old_list, new_list): + # 去掉å‰é¢çš„"diary|" + old_list = list(map(lambda x: int(x[6:]),old_list)) + # print("旧表å‰å个") + # print(old_list[:10]) + # print("新表å‰å个") + # print(new_list[:10]) + + temp = list(range(len(old_list))) + x_dict = dict(zip(old_list, temp)) + temp = list(range(len(new_list))) + y_dict = dict(zip(new_list, temp)) + i = 0 + for key in x_dict.keys(): + if x_dict[key] != y_dict[key]: + i += 1 + + if i >0: + update_queue_numbers += 1 + print("更新日记队列总数:{}".format(update_queue_numbers)) + print("日记队列更新å‰æ—¥è®°æ€»ä¸ªæ•°{},ä½ç½®å‘生å˜åŒ–个数{},å‘生å˜åŒ–率{}%".format(len(old_list), i, + round(i / len(old_list) * 100), 2)) + + def move_file(): import os for eachFile in os.listdir("/data2/models/train"):