1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# -*- coding: UTF-8 -*-
import time
import pymysql
import datetime
def con_sql(sql):
#从数据库的表里获取数据
"""
:type sql : str
:rtype : tuple
"""
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def get_yesterday_date():
#自动获取昨天的日期,如"2018-08-08"
"""
:rtype : str
"""
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y-%m-%d")
return yesterday
OUTPUT_PATH = "/data2/models/eda/node2vec/"
class GrayStat(object):
def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
"""
self.cid_type = cid_type
self.uid_type = uid_type
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_count = con_sql(sql)[0][0]
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_clk_times = con_sql(sql)[0][0]
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{1}' \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_imp_times = con_sql(sql)[0][0]
return uid_imp_times
class AllStat(object):
def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
"""
self.cid_type = cid_type
self.uid_type = uid_type
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_count = con_sql(sql)[0][0]
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_clk_times = con_sql(sql)[0][0]
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{1}' \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_imp_times = con_sql(sql)[0][0]
return uid_imp_times
def main():
output = OUTPUT_PATH + "nd_ctr.csv"
with open(output,"a+") as f:
print("stat" + " " + get_yesterday_date())
g_class = GrayStat("diary","3|4")
a_class = AllStat("diary","3|4")
line1 = str(g_class.get_uid_count())+","+str(g_class.get_uid_imp_times())+","+str(g_class.get_uid_clk_times())
line2 = str(a_class.get_uid_count())+","+str(a_class.get_uid_imp_times())+","+str(a_class.get_uid_clk_times())
g_ctr = g_class.get_uid_clk_times()/g_class.get_uid_imp_times()
a_ctr = a_class.get_uid_clk_times()/a_class.get_uid_imp_times()
growth_rate = (g_ctr-a_ctr)/a_ctr
coverage_rate = g_class.get_uid_count()/a_class.get_uid_count()
line = get_yesterday_date() + "," + str(round(g_ctr*100,2))+'%' + "," + str(round(a_ctr*100,2))+'%' + "," + \
str(round(growth_rate*100,2))+'%' + "," + line1 + "," + line2 + "," + str(round(coverage_rate*100,2)) + "%" + "\n"
f.write(line)
if __name__ == '__main__':
main()