1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# -*- coding: UTF-8 -*-
import pymysql
import datetime
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def tuple2dict(tuple_result):
#把sql结果从tuple格式转换成dict格式
dict_result = {}
for i in range(len(tuple_result)):
dict_result[tuple_result[i][0]] = tuple_result[i][1]
return dict_result
def result2file(result_lst,fpath):
with open(fpath,'w') as f:
tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
f.write("Top 100 Question\n")
f.write("=================================================================\n")
f.write(tplt.format("平台","question_id","点击数","曝光数","点击率","question链接"))
for i in result_lst:
for j in i:
f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
f.write("=================================================================\n")
if i != result_lst[-1]:
f.write(tplt.format("平台","question_id","点击数","曝光数","点击率","question链接"))
f.write("\n\n")
#1 获取昨天所有平台的top100question
#1.1 获取昨天所有平台的top100点击数的question
def get_all_question_count_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='question' group by cid order by count(cid) desc"
all_question_count_by_click = con_sql(sql)
all_question_count_by_click = tuple2dict(all_question_count_by_click)
return all_question_count_by_click
#1.2 获取昨天所有平台的top100曝光数的question
def get_all_question_count_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='question' group by cid order by count(cid) desc"
all_question_count_by_imp = con_sql(sql)
all_question_count_by_imp = tuple2dict(all_question_count_by_imp)
return all_question_count_by_imp
#1.3 获取昨天所有平台的top100点击率的question
def get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question_count_by_imp):
all_top100_question_rate_by_ctr = []
if all_question_count_by_imp == {}:
for i in all_question_count_by_click:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
all_top100_question_rate_by_ctr.append(("所有",i,all_question_count_by_click[i],0,0,url))
all_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True)
return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr
else:
for i in all_question_count_by_click:
if i in all_question_count_by_imp.keys() and all_question_count_by_click[i]>2:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
all_top100_question_rate_by_ctr.append(("所有",i,all_question_count_by_click[i],all_question_count_by_imp[i], round(all_question_count_by_click[i]/all_question_count_by_imp[i],4),url))
all_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr
#2 获取昨天ios平台的top100question
#2.1 获取昨天ios平台的top100点击数的question
def get_ios_question_count_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore' and cid_type='question' group by cid order by count(cid) desc"
ios_question_count_by_click = con_sql(sql)
ios_question_count_by_click = tuple2dict(ios_question_count_by_click)
return ios_question_count_by_click
#2.2 获取昨天ios平台的top100曝光数的question
def get_ios_question_count_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='App Store' and cid_type='question' group by cid order by count(cid) desc"
ios_question_count_by_imp = con_sql(sql)
ios_question_count_by_imp = tuple2dict(ios_question_count_by_imp)
return ios_question_count_by_imp
#2.3 获取昨天ios平台的top100点击率的question
def get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question_count_by_imp):
ios_top100_question_rate_by_ctr = []
if ios_question_count_by_imp == {}:
for i in ios_question_count_by_click:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
ios_top100_question_rate_by_ctr.append(("苹果",i,ios_question_count_by_click[i],0,0,url))
ios_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True)
return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr
else:
for i in ios_question_count_by_click:
if i in ios_question_count_by_imp.keys() and ios_question_count_by_click[i]>2:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
ios_top100_question_rate_by_ctr.append(("苹果",i,ios_question_count_by_click[i],ios_question_count_by_imp[i], round(ios_question_count_by_click[i]/ios_question_count_by_imp[i],4),url))
ios_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr
#3 获取昨天安卓平台的top100question
#3.1 获取昨天安卓平台的top100点击数的question
def get_android_question_rate_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore' and cid_type='question' group by cid order by count(cid) desc"
android_question_count_by_click = con_sql(sql)
android_question_count_by_click = tuple2dict(android_question_count_by_click)
return android_question_count_by_click
#3.2 获取昨天安卓平台的top100曝光数的question
def get_android_question_rate_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='App Store' and cid_type='question' group by cid order by count(cid) desc"
android_question_count_by_imp = con_sql(sql)
android_question_count_by_imp = tuple2dict(android_question_count_by_imp)
return android_question_count_by_imp
#3.3 获取昨天安卓平台的top100点击率的question
def get_android_top100_question_rate_by_ctr(android_question_count_by_click,android_question_count_by_imp):
android_top100_question_rate_by_ctr = []
if android_question_count_by_imp == {}:
for i in android_question_count_by_click:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
android_top100_question_rate_by_ctr.append(("安卓",i,android_question_count_by_click[i],0,0,url))
android_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True)
return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr
else:
for i in android_question_count_by_click:
if i in android_question_count_by_imp.keys() and android_question_count_by_click[i]>2:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
android_top100_question_rate_by_ctr.append(("安卓",i,android_question_count_by_click[i],android_question_count_by_imp[i],round(android_question_count_by_click[i]/android_question_count_by_imp[i],4),url))
android_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr
if __name__ == "__main__":
print("开始获取top100点击率的question...")
all_question_count_by_click = get_all_question_count_by_click()
all_question_count_by_imp = get_all_question_count_by_imp()
all_top100_question_rate_by_ctr = get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question_count_by_imp)
print("5.1已获得所有平台的top100点击率的question")
ios_question_count_by_click = get_ios_question_count_by_click()
ios_question_count_by_imp = get_ios_question_count_by_imp()
ios_top100_question_rate_by_ctr = get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question_count_by_imp)
print("5.2已获得ios平台的top100点击率的question")
android_question_count_by_click = get_android_question_rate_by_click()
android_question_count_by_imp = get_android_question_rate_by_imp()
android_top100_question_rate_by_ctr = get_android_top100_question_rate_by_ctr(android_question_count_by_click,android_question_count_by_imp)
print("5.3已获得安卓平台的top100点击率的question")
result_lst = [all_top100_question_rate_by_ctr,ios_top100_question_rate_by_ctr,android_top100_question_rate_by_ctr]
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
output_path = "/data2/models/eda/recommended_indexs/5top100_ctr_question_%s.txt" % yesterday
result2file(result_lst,output_path)
print("已将top100点击率的question存入文件")