1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# -*- coding: UTF-8 -*-
from utils import con_sql,get_yesterday_date,get_between_day
import time
OUTPUT_PATH = "/data2/models/eda/gray_stat/"
class GrayStat(object):
def __init__(self, cid_type, uid_type, platform,ndays=get_yesterday_date()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
platform : ios;android
ndays : '2018-08-30'....
"""
self.cid_type = cid_type
self.uid_type = uid_type
if platform == "ios":
self.platform = "='App Store'"
else:
self.platform = "!='App Store'"
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and cid_type='{3}' \
and device_id regexp '[{1}]$' \
and device_type{2}".format(self.ndays,self.uid_type,\
self.platform.replace(' ',''),self.cid_type)
uid_count = con_sql(sql)[0][0]
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and cid_type='{3}' \
and device_id regexp '[{1}]$' \
and device_type{2}".format(self.ndays,self.uid_type,\
self.platform.replace(' ',''),self.cid_type)
uid_clk_times = con_sql(sql)[0][0]
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{4}' \
and device_id regexp '[{1}]$' \
and device_type{2} \
and device_id in (select device_id from data_feed_click \
where stat_date='{0}' \
and cid_type='{4}' \
and device_id regexp '[{1}]$' \
and device_type{3})".format(self.ndays,self.uid_type,self.platform,\
self.platform.replace(' ',''),self.cid_type)
uid_imp_times = con_sql(sql)[0][0]
return uid_imp_times
if __name__ == '__main__':
# date_list = get_between_day('2018-08-20','2018-08-29')
# for my_date in date_list:
print("开始获取ffm中的灰度非灰度比例")
start = time.time()
#1.ffm中的灰度非灰度(ios和安卓一样):
# 灰度:_6
# 非灰度:_8
result1 = []
#1.1获取ios和android平台的数据
platforms = ['ios','android']
grays = ['6','8']
for platform in platforms:
for gray in grays:
g_class = GrayStat('diary',gray,platform)
uid_count = g_class.get_uid_count()
uid_clk_times = g_class.get_uid_clk_times()
uid_imp_times = g_class.get_uid_imp_times()
uid_clk_rate = round(uid_clk_times/uid_imp_times,4) if uid_imp_times != 0 else 0
result1.append([g_class.ndays,g_class.cid_type,platform,gray,uid_count,\
uid_clk_times,uid_imp_times,uid_clk_rate])
result1.append([g_class.ndays,g_class.cid_type,platform,'all',\
result1[-1][4]+result1[-2][4],\
result1[-1][5]+result1[-2][5],\
result1[-1][6]+result1[-2][6],\
round((result1[-1][5]+result1[-2][5])/(result1[-1][6]+result1[-2][6]),4)]) if (result1[-1][6]+result1[-2][6]) != 0 else 0
#1.2获取所有平台的数据
labels = ['6','8','all']
for i in range(3):
result1.append([g_class.ndays,g_class.cid_type,'all',labels[i],\
result1[i][4]+result1[i+3][4],\
result1[i][5]+result1[i+3][5],\
result1[i][6]+result1[i+3][6],\
round((result1[i][5]+result1[i+3][5])/(result1[i][6]+result1[i+3][6]),4)]) if (result1[i][6]+result1[i+3][6]) !=0 else 0
#1.3把一天所有的数据存入文件
output1 = OUTPUT_PATH + "gray_ffm.csv"
with open(output1,'a+') as f:
for line in result1:
line = [str(i) for i in line]
str_line = ','.join(line) + '\n'
f.write(str_line)
#2.实际中的灰度非灰度:
# ios:
# 灰度:6|8
#非灰度:^68
# android:
# 灰度:0|1|2|6|8
#非灰度:^01268
print("开始获取实际中的灰度非灰度比例")
result2 = []
#2.1获取ios和android平台的数据
platforms = ['ios','android']
for platform in platforms:
#TODO 对于(安卓灰度放到 0 1 2 6 8;iOS灰度保持 6 8)问题,做一个id判断即可
if platform == 'ios':
grays = ['6|8','^68']
else:
grays = ['0|1|2|6|8','^01268']
for gray in grays:
g_class = GrayStat('diary',gray,platform)
uid_count = g_class.get_uid_count()
uid_clk_times = g_class.get_uid_clk_times()
uid_imp_times = g_class.get_uid_imp_times()
uid_clk_rate = round(uid_clk_times/uid_imp_times,4) if uid_imp_times != 0 else 0
result2.append([g_class.ndays,g_class.cid_type,platform,gray,uid_count,\
uid_clk_times,uid_imp_times,uid_clk_rate])
result2.append([g_class.ndays,g_class.cid_type,platform,'all',\
result2[-1][4]+result2[-2][4],\
result2[-1][5]+result2[-2][5],\
result2[-1][6]+result2[-2][6],\
round((result2[-1][5]+result2[-2][5])/(result2[-1][6]+result2[-2][6]),4)]) if (result2[-1][6]+result2[-2][6]) != 0 else 0
#2.2获取所有平台的数据
labels = ['gray','not gray','all']
for i in range(3):
result2.append([g_class.ndays,g_class.cid_type,'all',labels[i],\
result2[i][4]+result2[i+3][4],\
result2[i][5]+result2[i+3][5],\
result2[i][6]+result2[i+3][6],\
round((result2[i][5]+result2[i+3][5])/(result2[i][6]+result2[i+3][6]),4)]) if (result2[i][6]+result2[i+3][6]) !=0 else 0
#2.3把一天所有的数据写入文件
output2 = OUTPUT_PATH + "gray_all.csv"
with open(output2,'a+') as f:
for line in result2:
line = [str(i) for i in line]
str_line = ','.join(line) + '\n'
f.write(str_line)
end = time.time()
print("程序执行时间:{}s".format(end-start))