userProfile.py

from utils import con_sql
from datetime import datetime
from config import *
import pandas as pd
import os
import time
import pymysql
import time


# 获取当下一分钟内活跃用户
def get_active_users(flag,path,differ):
    if differ == 0:
        end = time.time()
        start = time.time()-60
    elif 0 < differ < 10:
        time.sleep(30)
        differ += 30
        end = time.time()
        start = end - differ
    else:
        end = time.time()
        start = end - differ
    end_datetime = str(datetime.fromtimestamp(end))
    start_datetime = str(datetime.fromtimestamp(start))
    sql = "select device_id,city_id from user_active_time " \
          "where active_time <= '{}' and active_time >= '{}'".format(end_datetime,start_datetime)
    if flag:
        df = con_sql(sql)
    else:
        db = pymysql.connect(host='192.168.15.12', port=4000, user='root', db='jerry_test')
        sql = "select device_id,city_id from user_active_time"
        cursor = db.cursor()
        cursor.execute(sql)
        result = cursor.fetchall()
        df = pd.DataFrame(list(result)).dropna()
        db.close()

    if df.empty:
        print("当下没有活跃用户数")
        return []
    # 统计活跃用户中尾号是6的用户数
    else:
        temp_list = df[0].values.tolist()
        now = datetime.now()
        tail6_file_path = path + "{}tail6Unique.csv".format(str(now)[:10])
        if os.path.exists(tail6_file_path):
            # 尾号是6的活跃用户数
            tail_6_list = eval(pd.read_csv(tail6_file_path).loc[0, "list"])
        else:
            tail_6_list = []

        tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list)))
        if tail_6_list != []:
            df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]],
                                      "list": [list(set(tail_6_list))]})
            df_tail_6.to_csv(tail6_file_path, index=None)

        print("截止现在尾号是6的独立活跃数：{}".format(len(set(tail_6_list))))
        old_device_id_list = pd.read_csv(path + "data_set_device_id.csv")["device_id"].values.tolist()
        # 求活跃用户和老用户的交集，也就是只预测老用户
        df = df.loc[df[0].isin(old_device_id_list)]
        if df.empty:
            print("该列表是新用户，不需要预测")
            return []
        else:
            # TODO 正式上线后注释下面的只预测尾号是6的代码
            # 只预测尾号是6的ID，这块是测试要求的
            device_temp_list = df[0].values.tolist()

            predict_list = list(filter(lambda x: (str(x)[-1] == "6") or (str(x)=="358035085192742")
                                                 or str(x)=="AB20292B-5D15-4C44-9429-1C2FF5ED26F6",
                                       device_temp_list))
            if predict_list == []:
                print('没有尾号是6和目标用户')
                return []
            else:
                df = df.loc[df[0].isin(predict_list)]
                device_list = df[0].values.tolist()
                city_list = df[1].values.tolist()
                device_city_list = list(zip(device_list, city_list))
                print("当下这一分钟预测用户数量：{}".format(len(device_city_list)))

                #统计尾号6的预测用户
                predict_file_path = path + "{}predictTail6Unique.csv".format(str(now)[:10])
                if os.path.exists(predict_file_path):
                    # 预测过尾号是6的用户数
                    all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
                else:
                    all_predict_list = []
                all_predict_list.extend(device_list)
                if all_predict_list != []:
                    df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
                                               "list": [list(set(all_predict_list))]})
                    df_predict.to_csv(predict_file_path, index=None)

                return device_city_list


def fetch_user_profile(device_id):
        sql = "select device_id,city_id from data_feed_click where device_id = '{0}' limit 1".format(device_id)
        user_profile = con_sql(sql)
        if user_profile.empty:
            print("没有获取到该用户对应的city_id")
            return None,True
        else:
            user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"})
            user_profile_dict = {}
            for i in user_profile.columns:
                user_profile_dict[i] = user_profile.loc[0, i]
            return user_profile_dict, False