userProfile.py 6.49 KB
from utils import con_sql
from datetime import datetime
from config import *
import pandas as pd
import os
import time
import pymysql
import time


# 统计尾号6的活跃用户数
def unique_user_count(file_path, temp_list, now):
    if os.path.exists(file_path):
        # 尾号是6的活跃用户数
        tail_6_list = eval(pd.read_csv(file_path).loc[0, "list"])
    else:
        tail_6_list = []

    tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list)))
    if tail_6_list != []:
        df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]],
                                  "list": [list(set(tail_6_list))]})
        df_tail_6.to_csv(file_path, index=None)
    print("截止现在尾号是6的独立活跃数:{}".format(len(set(tail_6_list))))


# 统计预测过的独立用户数
def predict_user_count(predict_file_path,device_list,now):
    if os.path.exists(predict_file_path):
        # 预测过尾号是6的用户数
        all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
    else:
        all_predict_list = []
    all_predict_list.extend(device_list)
    if all_predict_list != []:
        df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
                                   "list": [list(set(all_predict_list))]})
        df_predict.to_csv(predict_file_path, index=None)
    print("截止现在预测过尾号是6的独立活跃数:{}".format(len(set(all_predict_list))))


# 获取当下一分钟内活跃用户
def get_active_users(flag,path,differ):
    if differ == 0:
        end = time.time()
        start = time.time()-60
    elif 0 < differ < 10:
        time.sleep(30)
        differ += 30
        end = time.time()
        start = end - differ
    else:
        end = time.time()
        start = end - differ
    end_datetime = str(datetime.fromtimestamp(end))
    start_datetime = str(datetime.fromtimestamp(start))
    if flag:
        sql = "select device_id,city_id from user_active_time " \
              "where active_time <= '{}' and active_time >= '{}'".format(end_datetime, start_datetime)
        db = pymysql.connect(host=ACTIVE_USER_DB_ONLINE["host"], port=ACTIVE_USER_DB_ONLINE["port"],
                             user=ACTIVE_USER_DB_ONLINE["user"], passwd=ACTIVE_USER_DB_ONLINE["passwd"],
                             db=ACTIVE_USER_DB_ONLINE["db"])
        df = con_sql(db,sql)
    else:
        db = pymysql.connect(host=ACTIVE_USER_DB_LOCAL["host"], port=ACTIVE_USER_DB_LOCAL["port"],
                             user=ACTIVE_USER_DB_LOCAL["user"], db=ACTIVE_USER_DB_LOCAL["db"])
        sql = "select device_id,city_id from user_active_time"
        df = con_sql(db, sql)

    if df.empty:
        print("当下没有活跃用户数")
        return []
    # 统计活跃用户中尾号是6的用户数
    else:
        temp_list = df[0].values.tolist()
        now = datetime.now()
        tail6_file_path = path + "{}tail6Unique.csv".format(str(now)[:10])
        unique_user_count(tail6_file_path, temp_list, now)
        # if os.path.exists(tail6_file_path):
        #     # 尾号是6的活跃用户数
        #     tail_6_list = eval(pd.read_csv(tail6_file_path).loc[0, "list"])
        # else:
        #     tail_6_list = []
        #
        # tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list)))
        # if tail_6_list != []:
        #     df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]],
        #                               "list": [list(set(tail_6_list))]})
        #     df_tail_6.to_csv(tail6_file_path, index=None)
        #
        # print("截止现在尾号是6的独立活跃数:{}".format(len(set(tail_6_list))))
        old_device_id_list = pd.read_csv(path + "data_set_device_id.csv")["device_id"].values.tolist()
        # 求活跃用户和老用户的交集,也就是只预测老用户
        df = df.loc[df[0].isin(old_device_id_list)]
        if df.empty:
            print("该列表是新用户,不需要预测")
            return []
        else:
            # TODO 正式上线后注释下面的只预测尾号是6的代码
            # 只预测尾号是6的ID,这块是测试要求的
            device_temp_list = df[0].values.tolist()

            predict_list = list(filter(lambda x: (str(x)[-1] == "6") or (str(x)=="358035085192742")
                                                 or str(x)=="AB20292B-5D15-4C44-9429-1C2FF5ED26F6",
                                       device_temp_list))
            if predict_list == []:
                print('没有尾号是6和目标用户')
                return []
            else:
                df = df.loc[df[0].isin(predict_list)]
                device_list = df[0].values.tolist()
                city_list = df[1].values.tolist()
                device_city_list = list(zip(device_list, city_list))
                print("当下这一分钟预测用户数量:{}".format(len(device_city_list)))

                #统计尾号6的预测用户
                predict_file_path = path + "{}predictTail6Unique.csv".format(str(now)[:10])
                predict_user_count(predict_file_path,device_list,now)
                # if os.path.exists(predict_file_path):
                #     # 预测过尾号是6的用户数
                #     all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
                # else:
                #     all_predict_list = []
                # all_predict_list.extend(device_list)
                # if all_predict_list != []:
                #     df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
                #                                "list": [list(set(all_predict_list))]})
                #     df_predict.to_csv(predict_file_path, index=None)

                return device_city_list


def fetch_user_profile(device_id):
        db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
        sql = "select device_id,city_id from data_feed_click where device_id = '{0}' limit 1".format(device_id)
        user_profile = con_sql(db,sql)
        if user_profile.empty:
            print("没有获取到该用户对应的city_id")
            return None,True
        else:
            user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"})
            user_profile_dict = {}
            for i in user_profile.columns:
                user_profile_dict[i] = user_profile.loc[0, i]
            return user_profile_dict, False