from utils import con_sql from datetime import datetime from config import * import pandas as pd import os import time import pymysql import time # 统计尾号6的活跃用户数 def unique_user_count(file_path, temp_list, now): if os.path.exists(file_path): # 尾号是6的活跃用户数 tail_6_list = eval(pd.read_csv(file_path).loc[0, "list"]) else: tail_6_list = [] tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list))) if tail_6_list != []: df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]], "list": [list(set(tail_6_list))]}) df_tail_6.to_csv(file_path, index=None) print("截止现在尾号是6的独立活跃数:{}".format(len(set(tail_6_list)))) # 统计预测过的独立用户数 def predict_user_count(predict_file_path,device_list,now): if os.path.exists(predict_file_path): # 预测过尾号是6的用户数 all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"]) else: all_predict_list = [] all_predict_list.extend(device_list) if all_predict_list != []: df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]], "list": [list(set(all_predict_list))]}) df_predict.to_csv(predict_file_path, index=None) print("截止现在预测过尾号是6的独立活跃数:{}".format(len(set(all_predict_list)))) # 获取当下一分钟内活跃用户 def get_active_users(flag,path,differ): if differ == 0: end = time.time() start = time.time()-60 elif 0 < differ < 10: time.sleep(30) differ += 30 end = time.time() start = end - differ else: end = time.time() start = end - differ end_datetime = str(datetime.fromtimestamp(end)) start_datetime = str(datetime.fromtimestamp(start)) if flag: sql = "select device_id,city_id from user_active_time " \ "where active_time <= '{}' and active_time >= '{}'".format(end_datetime, start_datetime) db = pymysql.connect(host=ACTIVE_USER_DB_ONLINE["host"], port=ACTIVE_USER_DB_ONLINE["port"], user=ACTIVE_USER_DB_ONLINE["user"], passwd=ACTIVE_USER_DB_ONLINE["passwd"], db=ACTIVE_USER_DB_ONLINE["db"]) df = con_sql(db,sql) else: db = pymysql.connect(host=ACTIVE_USER_DB_LOCAL["host"], port=ACTIVE_USER_DB_LOCAL["port"], user=ACTIVE_USER_DB_LOCAL["user"], db=ACTIVE_USER_DB_LOCAL["db"]) sql = "select device_id,city_id from user_active_time" df = con_sql(db, sql) if df.empty: print("当下没有活跃用户数") return [] # 统计活跃用户中尾号是6的用户数 else: temp_list = df[0].values.tolist() now = datetime.now() tail6_file_path = path + "{}tail6Unique.csv".format(str(now)[:10]) unique_user_count(tail6_file_path, temp_list, now) # if os.path.exists(tail6_file_path): # # 尾号是6的活跃用户数 # tail_6_list = eval(pd.read_csv(tail6_file_path).loc[0, "list"]) # else: # tail_6_list = [] # # tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list))) # if tail_6_list != []: # df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]], # "list": [list(set(tail_6_list))]}) # df_tail_6.to_csv(tail6_file_path, index=None) # # print("截止现在尾号是6的独立活跃数:{}".format(len(set(tail_6_list)))) old_device_id_list = pd.read_csv(path + "data_set_device_id.csv")["device_id"].values.tolist() # 求活跃用户和老用户的交集,也就是只预测老用户 df = df.loc[df[0].isin(old_device_id_list)] if df.empty: print("该列表是新用户,不需要预测") return [] else: # TODO 正式上线后注释下面的只预测尾号是6的代码 # 只预测尾号是6的ID,这块是测试要求的 device_temp_list = df[0].values.tolist() predict_list = list(filter(lambda x: (str(x)[-1] == "6") or (str(x)=="358035085192742") or str(x)=="AB20292B-5D15-4C44-9429-1C2FF5ED26F6", device_temp_list)) if predict_list == []: print('没有尾号是6和目标用户') return [] else: df = df.loc[df[0].isin(predict_list)] device_list = df[0].values.tolist() city_list = df[1].values.tolist() device_city_list = list(zip(device_list, city_list)) print("当下这一分钟预测用户数量:{}".format(len(device_city_list))) #统计尾号6的预测用户 predict_file_path = path + "{}predictTail6Unique.csv".format(str(now)[:10]) predict_user_count(predict_file_path,device_list,now) # if os.path.exists(predict_file_path): # # 预测过尾号是6的用户数 # all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"]) # else: # all_predict_list = [] # all_predict_list.extend(device_list) # if all_predict_list != []: # df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]], # "list": [list(set(all_predict_list))]}) # df_predict.to_csv(predict_file_path, index=None) return device_city_list def fetch_user_profile(device_id): db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') sql = "select device_id,city_id from data_feed_click where device_id = '{0}' limit 1".format(device_id) user_profile = con_sql(db,sql) if user_profile.empty: print("没有获取到该用户对应的city_id") return None,True else: user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"}) user_profile_dict = {} for i in user_profile.columns: user_profile_dict[i] = user_profile.loc[0, i] return user_profile_dict, False