1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# -*- coding: UTF-8 -*-
from utils import con_sql,get_yesterday_date,get_between_day
import time
import sys
class GrayStat(object):
def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
"""
self.cid_type = cid_type
self.uid_type = uid_type
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from {3}) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type,table_name)
uid_count = con_sql(sql)[0][0]
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from {3}) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type,table_name)
uid_clk_times = con_sql(sql)[0][0]
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{1}' \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from {3}) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type,table_name)
uid_imp_times = con_sql(sql)[0][0]
return uid_imp_times
class AllStat(object):
def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
"""
self.cid_type = cid_type
self.uid_type = uid_type
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_count = con_sql(sql)[0][0]
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_clk_times = con_sql(sql)[0][0]
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{1}' \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_imp_times = con_sql(sql)[0][0]
return uid_imp_times
def main():
output = OUTPUT_PATH + "ctr.csv"
with open(output,"a+") as f:
print("stat" + " " + get_yesterday_date())
g_class = GrayStat("diary",num)
a_class = AllStat("diary",num)
line1 = str(g_class.get_uid_count())+","+str(g_class.get_uid_imp_times())+","+str(g_class.get_uid_clk_times())
line2 = str(a_class.get_uid_count())+","+str(a_class.get_uid_imp_times())+","+str(a_class.get_uid_clk_times())
g_ctr = g_class.get_uid_clk_times()/g_class.get_uid_imp_times()
a_ctr = a_class.get_uid_clk_times()/a_class.get_uid_imp_times()
growth_rate = (g_ctr-a_ctr)/a_ctr
coverage_rate = g_class.get_uid_count()/a_class.get_uid_count()
line = get_yesterday_date() + "," + str(round(g_ctr*100,2))+'%' + "," + str(round(a_ctr*100,2))+'%' + "," + \
str(round(growth_rate*100,2))+'%' + "," + line1 + "," + line2 + "," + str(round(coverage_rate*100,2)) + "%" + "\n"
f.write(line)
if __name__ == '__main__':
OUTPUT_PATH = "/data2/models/eda/gray_stat/"
num = sys.argv[1]
if num == "3|4":
OUTPUT_PATH = "/data2/models/eda/gray_stat/3|4"
table_name = "jerry_prod.nd_device_cid_similarity_matrix_tmp"
elif num == "5|6":
OUTPUT_PATH = "/data2/models/eda/gray_stat/5|6"
table_name = "eagle.ffm_diary_queue_temp"
elif num == "7|8":
OUTPUT_PATH = "/data2/models/eda/gray_stat/7|8"
table_name = "eagle.search_queue"
else:
print("参数必须是 '3|4' 或 '5|6' 或 '7|8'")
print("程序异常停止,请重新输入参数运行")
sys.exit()
main()