userProfile.py 6.49 KB
Newer Older
1
from utils import con_sql
2
from datetime import datetime
3 4 5 6
from config import *
import pandas as pd
import os
import time
张彦钊's avatar
张彦钊 committed
7
import pymysql
张彦钊's avatar
张彦钊 committed
8
import time
9 10


张彦钊's avatar
张彦钊 committed
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
# 统计尾号6的活跃用户数
def unique_user_count(file_path, temp_list, now):
    if os.path.exists(file_path):
        # 尾号是6的活跃用户数
        tail_6_list = eval(pd.read_csv(file_path).loc[0, "list"])
    else:
        tail_6_list = []

    tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list)))
    if tail_6_list != []:
        df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]],
                                  "list": [list(set(tail_6_list))]})
        df_tail_6.to_csv(file_path, index=None)
    print("截止现在尾号是6的独立活跃数:{}".format(len(set(tail_6_list))))


# 统计预测过的独立用户数
def predict_user_count(predict_file_path,device_list,now):
    if os.path.exists(predict_file_path):
        # 预测过尾号是6的用户数
        all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
    else:
        all_predict_list = []
    all_predict_list.extend(device_list)
    if all_predict_list != []:
        df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
                                   "list": [list(set(all_predict_list))]})
        df_predict.to_csv(predict_file_path, index=None)
    print("截止现在预测过尾号是6的独立活跃数:{}".format(len(set(all_predict_list))))


42
# 获取当下一分钟内活跃用户
张彦钊's avatar
张彦钊 committed
43 44 45 46 47 48 49 50 51 52 53 54 55 56
def get_active_users(flag,path,differ):
    if differ == 0:
        end = time.time()
        start = time.time()-60
    elif 0 < differ < 10:
        time.sleep(30)
        differ += 30
        end = time.time()
        start = end - differ
    else:
        end = time.time()
        start = end - differ
    end_datetime = str(datetime.fromtimestamp(end))
    start_datetime = str(datetime.fromtimestamp(start))
张彦钊's avatar
张彦钊 committed
57
    if flag:
张彦钊's avatar
张彦钊 committed
58 59 60 61 62 63
        sql = "select device_id,city_id from user_active_time " \
              "where active_time <= '{}' and active_time >= '{}'".format(end_datetime, start_datetime)
        db = pymysql.connect(host=ACTIVE_USER_DB_ONLINE["host"], port=ACTIVE_USER_DB_ONLINE["port"],
                             user=ACTIVE_USER_DB_ONLINE["user"], passwd=ACTIVE_USER_DB_ONLINE["passwd"],
                             db=ACTIVE_USER_DB_ONLINE["db"])
        df = con_sql(db,sql)
张彦钊's avatar
张彦钊 committed
64
    else:
张彦钊's avatar
张彦钊 committed
65 66
        db = pymysql.connect(host=ACTIVE_USER_DB_LOCAL["host"], port=ACTIVE_USER_DB_LOCAL["port"],
                             user=ACTIVE_USER_DB_LOCAL["user"], db=ACTIVE_USER_DB_LOCAL["db"])
张彦钊's avatar
张彦钊 committed
67
        sql = "select device_id,city_id from user_active_time"
张彦钊's avatar
张彦钊 committed
68
        df = con_sql(db, sql)
张彦钊's avatar
张彦钊 committed
69

70
    if df.empty:
张彦钊's avatar
张彦钊 committed
71
        print("当下没有活跃用户数")
张彦钊's avatar
张彦钊 committed
72
        return []
张彦钊's avatar
张彦钊 committed
73
    # 统计活跃用户中尾号是6的用户数
74
    else:
张彦钊's avatar
张彦钊 committed
75
        temp_list = df[0].values.tolist()
张彦钊's avatar
张彦钊 committed
76
        now = datetime.now()
张彦钊's avatar
张彦钊 committed
77
        tail6_file_path = path + "{}tail6Unique.csv".format(str(now)[:10])
张彦钊's avatar
张彦钊 committed
78 79 80 81 82 83 84 85 86 87 88 89 90 91
        unique_user_count(tail6_file_path, temp_list, now)
        # if os.path.exists(tail6_file_path):
        #     # 尾号是6的活跃用户数
        #     tail_6_list = eval(pd.read_csv(tail6_file_path).loc[0, "list"])
        # else:
        #     tail_6_list = []
        #
        # tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list)))
        # if tail_6_list != []:
        #     df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]],
        #                               "list": [list(set(tail_6_list))]})
        #     df_tail_6.to_csv(tail6_file_path, index=None)
        #
        # print("截止现在尾号是6的独立活跃数:{}".format(len(set(tail_6_list))))
张彦钊's avatar
张彦钊 committed
92
        old_device_id_list = pd.read_csv(path + "data_set_device_id.csv")["device_id"].values.tolist()
93
        # 求活跃用户和老用户的交集,也就是只预测老用户
张彦钊's avatar
张彦钊 committed
94
        df = df.loc[df[0].isin(old_device_id_list)]
95 96
        if df.empty:
            print("该列表是新用户,不需要预测")
张彦钊's avatar
张彦钊 committed
97
            return []
98 99
        else:
            # TODO 正式上线后注释下面的只预测尾号是6的代码
100
            # 只预测尾号是6的ID,这块是测试要求的
张彦钊's avatar
张彦钊 committed
101
            device_temp_list = df[0].values.tolist()
张彦钊's avatar
张彦钊 committed
102 103 104 105

            predict_list = list(filter(lambda x: (str(x)[-1] == "6") or (str(x)=="358035085192742")
                                                 or str(x)=="AB20292B-5D15-4C44-9429-1C2FF5ED26F6",
                                       device_temp_list))
张彦钊's avatar
张彦钊 committed
106
            if predict_list == []:
张彦钊's avatar
张彦钊 committed
107
                print('没有尾号是6和目标用户')
张彦钊's avatar
张彦钊 committed
108 109
                return []
            else:
张彦钊's avatar
张彦钊 committed
110 111 112
                df = df.loc[df[0].isin(predict_list)]
                device_list = df[0].values.tolist()
                city_list = df[1].values.tolist()
张彦钊's avatar
张彦钊 committed
113 114
                device_city_list = list(zip(device_list, city_list))
                print("当下这一分钟预测用户数量:{}".format(len(device_city_list)))
张彦钊's avatar
张彦钊 committed
115 116 117

                #统计尾号6的预测用户
                predict_file_path = path + "{}predictTail6Unique.csv".format(str(now)[:10])
张彦钊's avatar
张彦钊 committed
118 119 120 121 122 123 124 125 126 127 128
                predict_user_count(predict_file_path,device_list,now)
                # if os.path.exists(predict_file_path):
                #     # 预测过尾号是6的用户数
                #     all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
                # else:
                #     all_predict_list = []
                # all_predict_list.extend(device_list)
                # if all_predict_list != []:
                #     df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
                #                                "list": [list(set(all_predict_list))]})
                #     df_predict.to_csv(predict_file_path, index=None)
张彦钊's avatar
张彦钊 committed
129

张彦钊's avatar
张彦钊 committed
130
                return device_city_list
131 132 133


def fetch_user_profile(device_id):
张彦钊's avatar
张彦钊 committed
134
        db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
135
        sql = "select device_id,city_id from data_feed_click where device_id = '{0}' limit 1".format(device_id)
张彦钊's avatar
张彦钊 committed
136
        user_profile = con_sql(db,sql)
137
        if user_profile.empty:
张彦钊's avatar
张彦钊 committed
138
            print("没有获取到该用户对应的city_id")
139
            return None,True
140 141 142 143 144
        else:
            user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"})
            user_profile_dict = {}
            for i in user_profile.columns:
                user_profile_dict[i] = user_profile.loc[0, i]
145
            return user_profile_dict, False