1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from utils import con_sql
from datetime import datetime
from config import *
import pandas as pd
import os
import time
import pymysql
import time
# 统计尾号6的活跃用户数
def unique_user_count(file_path, temp_list, now):
if os.path.exists(file_path):
# 尾号是6的活跃用户数
tail_6_list = eval(pd.read_csv(file_path).loc[0, "list"])
else:
tail_6_list = []
tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list)))
if tail_6_list != []:
df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]],
"list": [list(set(tail_6_list))]})
df_tail_6.to_csv(file_path, index=None)
print("截止现在尾号是6的独立活跃数:{}".format(len(set(tail_6_list))))
# 统计预测过的独立用户数
def predict_user_count(predict_file_path,device_list,now):
if os.path.exists(predict_file_path):
# 预测过尾号是6的用户数
all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
else:
all_predict_list = []
all_predict_list.extend(device_list)
if all_predict_list != []:
df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
"list": [list(set(all_predict_list))]})
df_predict.to_csv(predict_file_path, index=None)
print("截止现在预测过尾号是6的独立活跃数:{}".format(len(set(all_predict_list))))
# 获取当下一分钟内活跃用户
def get_active_users(flag,path,differ):
if differ == 0:
end = time.time()
start = time.time()-60
elif 0 < differ < 10:
time.sleep(30)
differ += 30
end = time.time()
start = end - differ
else:
end = time.time()
start = end - differ
end_datetime = str(datetime.fromtimestamp(end))
start_datetime = str(datetime.fromtimestamp(start))
if flag:
sql = "select device_id,city_id from user_active_time " \
"where active_time <= '{}' and active_time >= '{}'".format(end_datetime, start_datetime)
db = pymysql.connect(host=ACTIVE_USER_DB_ONLINE["host"], port=ACTIVE_USER_DB_ONLINE["port"],
user=ACTIVE_USER_DB_ONLINE["user"], passwd=ACTIVE_USER_DB_ONLINE["passwd"],
db=ACTIVE_USER_DB_ONLINE["db"])
df = con_sql(db,sql)
else:
db = pymysql.connect(host=ACTIVE_USER_DB_LOCAL["host"], port=ACTIVE_USER_DB_LOCAL["port"],
user=ACTIVE_USER_DB_LOCAL["user"], db=ACTIVE_USER_DB_LOCAL["db"])
sql = "select device_id,city_id from user_active_time"
df = con_sql(db, sql)
if df.empty:
print("当下没有活跃用户数")
return []
# 统计活跃用户中尾号是6的用户数
else:
temp_list = df[0].values.tolist()
now = datetime.now()
tail6_file_path = path + "{}tail6Unique.csv".format(str(now)[:10])
unique_user_count(tail6_file_path, temp_list, now)
# if os.path.exists(tail6_file_path):
# # 尾号是6的活跃用户数
# tail_6_list = eval(pd.read_csv(tail6_file_path).loc[0, "list"])
# else:
# tail_6_list = []
#
# tail_6_list.extend(list(filter(lambda x: (str(x)[-1] == "6"), temp_list)))
# if tail_6_list != []:
# df_tail_6 = pd.DataFrame({"number": [len(set(tail_6_list))], "time": [str(now)[:16]],
# "list": [list(set(tail_6_list))]})
# df_tail_6.to_csv(tail6_file_path, index=None)
#
# print("截止现在尾号是6的独立活跃数:{}".format(len(set(tail_6_list))))
old_device_id_list = pd.read_csv(path + "data_set_device_id.csv")["device_id"].values.tolist()
# 求活跃用户和老用户的交集,也就是只预测老用户
df = df.loc[df[0].isin(old_device_id_list)]
if df.empty:
print("该列表是新用户,不需要预测")
return []
else:
# TODO 正式上线后注释下面的只预测尾号是6的代码
# 只预测尾号是6的ID,这块是测试要求的
device_temp_list = df[0].values.tolist()
predict_list = list(filter(lambda x: (str(x)[-1] == "6") or (str(x)=="358035085192742")
or str(x)=="AB20292B-5D15-4C44-9429-1C2FF5ED26F6",
device_temp_list))
if predict_list == []:
print('没有尾号是6和目标用户')
return []
else:
df = df.loc[df[0].isin(predict_list)]
device_list = df[0].values.tolist()
city_list = df[1].values.tolist()
device_city_list = list(zip(device_list, city_list))
print("当下这一分钟预测用户数量:{}".format(len(device_city_list)))
#统计尾号6的预测用户
predict_file_path = path + "{}predictTail6Unique.csv".format(str(now)[:10])
predict_user_count(predict_file_path,device_list,now)
# if os.path.exists(predict_file_path):
# # 预测过尾号是6的用户数
# all_predict_list = eval(pd.read_csv(predict_file_path).loc[0, "list"])
# else:
# all_predict_list = []
# all_predict_list.extend(device_list)
# if all_predict_list != []:
# df_predict = pd.DataFrame({"number": [len(set(all_predict_list))], "time": [str(now)[:16]],
# "list": [list(set(all_predict_list))]})
# df_predict.to_csv(predict_file_path, index=None)
return device_city_list
def fetch_user_profile(device_id):
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select device_id,city_id from data_feed_click where device_id = '{0}' limit 1".format(device_id)
user_profile = con_sql(db,sql)
if user_profile.empty:
print("没有获取到该用户对应的city_id")
return None,True
else:
user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"})
user_profile_dict = {}
for i in user_profile.columns:
user_profile_dict[i] = user_profile.loc[0, i]
return user_profile_dict, False