Commit def62f9f authored by 高雅喆's avatar 高雅喆

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

add txt gitignore
parents 4b17aa5b c5bcb8e4
...@@ -4,6 +4,8 @@ import pandas as pd ...@@ -4,6 +4,8 @@ import pandas as pd
from sklearn.utils import shuffle from sklearn.utils import shuffle
import numpy as np import numpy as np
import xlearn as xl import xlearn as xl
# 从数据库的表里获取数据,并转化成df格式 # 从数据库的表里获取数据,并转化成df格式
def con_sql(sql): def con_sql(sql):
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
......
...@@ -170,7 +170,32 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id): ...@@ -170,7 +170,32 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id):
cursor.execute(sql) cursor.execute(sql)
db.commit() db.commit()
db.close() db.close()
print("成功写入diaryid") print("成功写入diary_id")
def queue_compare(old_list, new_list):
global update_queue_numbers
print("更新日记队列总数:{}".format(update_queue_numbers))
# 去掉前面的"diary|"
old_list = list(map(lambda x: int(x[6:]),old_list))
# print("旧表前十个")
# print(old_list[:10])
# print("新表前十个")
# print(new_list[:10])
temp = list(range(len(old_list)))
x_dict = dict(zip(old_list, temp))
temp = list(range(len(new_list)))
y_dict = dict(zip(new_list, temp))
i = 0
for key in x_dict.keys():
if x_dict[key] != y_dict[key]:
i += 1
if i >0:
update_queue_numbers += 1
print("更新日记队列总数:{}".format(update_queue_numbers))
print("日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}%".format(len(old_list), i,
round(i / len(old_list) * 100), 2))
def get_queue(device_id, city_id,queue_name): def get_queue(device_id, city_id,queue_name):
...@@ -218,6 +243,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id): ...@@ -218,6 +243,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id):
diary_queue = pipe_line(queue_name, queue_arg, device_id,total_video_id) diary_queue = pipe_line(queue_name, queue_arg, device_id,total_video_id)
if diary_queue: if diary_queue:
update_sql_dairy_queue(queue_name, diary_queue, device_id, city_id) update_sql_dairy_queue(queue_name, diary_queue, device_id, city_id)
queue_compare(queue_list,diary_queue)
# print("更新结束") # print("更新结束")
else: else:
print("获取的日记列表是空,所以不更新日记队列") print("获取的日记列表是空,所以不更新日记队列")
...@@ -242,17 +268,23 @@ if __name__ == "__main__": ...@@ -242,17 +268,23 @@ if __name__ == "__main__":
# 增加缓存日记视频列表 # 增加缓存日记视频列表
cache_video_id = [] cache_video_id = []
cache_device_city_list = [] cache_device_city_list = []
update_queue_numbers = 0
while True: while True:
data_set_cid = pd.read_csv(DIRECTORY_PATH + "data_set_cid.csv")["cid"].values.tolist() data_set_cid = pd.read_csv(DIRECTORY_PATH + "data_set_cid.csv")["cid"].values.tolist()
total_video_id = get_video_id(cache_video_id) total_video_id = get_video_id(cache_video_id)
cache_video_id = total_video_id cache_video_id = total_video_id
device_city_list = get_active_users() device_city_list = get_active_users()
# 过滤掉上次预测过的用户 print("过滤前用户数:{}".format(len(device_city_list)))
device_city_list = list(set(device_city_list)-set(cache_device_city_list)) # 过滤掉5分钟内预测过的用户
cache_device_city_list = device_city_list device_city_list = list(set(tuple(device_city_list))-set(tuple(cache_device_city_list)))
total_number += len(device_city_list) print("过滤后用户数:{}".format(len(device_city_list)))
print("累计预测用户总数:{}".format(total_number)) print("缓存视频个数:{}".format(len(cache_device_city_list)))
if datetime.now().minute % 5 == 0:
cache_device_city_list = []
if device_city_list != []: if device_city_list != []:
cache_device_city_list.extend(device_city_list)
total_number += len(device_city_list)
print("累计预测用户总数:{}".format(total_number))
for device_city in device_city_list: for device_city in device_city_list:
# start = time.time() # start = time.time()
multi_proecess_update(device_city[0], device_city[1], data_set_cid, total_video_id) multi_proecess_update(device_city[0], device_city[1], data_set_cid, total_video_id)
......
...@@ -48,6 +48,30 @@ def con_sql(sql): ...@@ -48,6 +48,30 @@ def con_sql(sql):
return df return df
def queue_compare(old_list, new_list):
# 去掉前面的"diary|"
old_list = list(map(lambda x: int(x[6:]),old_list))
# print("旧表前十个")
# print(old_list[:10])
# print("新表前十个")
# print(new_list[:10])
temp = list(range(len(old_list)))
x_dict = dict(zip(old_list, temp))
temp = list(range(len(new_list)))
y_dict = dict(zip(new_list, temp))
i = 0
for key in x_dict.keys():
if x_dict[key] != y_dict[key]:
i += 1
if i >0:
update_queue_numbers += 1
print("更新日记队列总数:{}".format(update_queue_numbers))
print("日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}%".format(len(old_list), i,
round(i / len(old_list) * 100), 2))
def move_file(): def move_file():
import os import os
for eachFile in os.listdir("/data2/models/train"): for eachFile in os.listdir("/data2/models/train"):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment