from utils import con_sql from datetime import datetime from config import * import pandas as pd import os import time # 获取当下一分钟内活跃用户 def get_active_users(): now = datetime.now() now_start = str(now)[:16] + ":00" now_end = str(now)[:16] + ":59" sql = "select device_id,city_id from user_active_time " \ "where active_time <= '{}' and active_time >= '{}'".format(now_end,now_start) df = con_sql(sql) return (("AB20292B-5D15-4C44-9429-1C2FF5ED26F6","beijing"),) if df.empty: print("当下这一分钟没有活跃用户,不需要预测") for eachFile in os.listdir("/tmp"): if "xlearn" in eachFile: os.remove("/tmp" + "/" + eachFile) time.sleep(58) return False else: df = df.rename(columns={0: "device_id", 1: "city_id"}) old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist() # 求活跃用户和老用户的交集,也就是只预测老用户 df = df.loc[df["device_id"].isin(old_device_id_list)] if df.empty: print("该列表是新用户,不需要预测") else: # TODO 正式上线后注释下面的只预测尾号是6的代码 # 只预测尾号是6的ID,这块是测试要求的 # device_temp_list = df["device_id"].values.tolist() # predict_list = list(filter(lambda x: str(x)[-1] == "6", device_temp_list)) # df = df.loc[df["device_id"].isin(predict_list)] # TODO 上线后把下面的temp删掉 # 把刘潇的id加进去 df = pd.DataFrame({"device_id":["358035085192742"],"city_id":["beijing"]}) device_list = df["device_id"].values.tolist() city_list = df["city_id"].values.tolist() device_city_list = list(zip(device_list, city_list)) print("当下这一分钟预测用户数量:{}".format(len(device_city_list))) return device_city_list def fetch_user_profile(device_id): sql = "select device_id,city_id from data_feed_click where device_id = '{0}' limit 1".format(device_id) user_profile = con_sql(sql) if user_profile.empty: print("没有获取到该用户对应的city_id") return None,True else: user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"}) user_profile_dict = {} for i in user_profile.columns: user_profile_dict[i] = user_profile.loc[0, i] return user_profile_dict, False