1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -*- coding: UTF-8 -*-
import pymysql
import datetime
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def tuple2dict(tuple_result):
#把sql结果从tuple格式转换成dict格式
dict_result = {}
for i in range(len(tuple_result)):
dict_result[tuple_result[i][0]] = tuple_result[i][1]
return dict_result
def result2file(result_lst,fpath):
with open(fpath,'w') as f:
tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
f.write("Top 100 Answer\n")
f.write("=================================================================\n")
f.write(tplt.format("平台","answer_id","点击数","曝光数","点击率","answer链接"))
for i in result_lst:
for j in i:
f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
f.write("=================================================================\n")
if i != result_lst[-1]:
f.write(tplt.format("平台","answer_id","点击数","曝光数","点击率","answer链接"))
f.write("\n\n")
#1 获取昨天所有平台的top100answer
#1.1 获取昨天所有平台的top100点击数的answer
def get_all_answer_count_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='answer' group by cid order by count(cid) desc"
all_answer_count_by_click = con_sql(sql)
all_answer_count_by_click = tuple2dict(all_answer_count_by_click)
return all_answer_count_by_click
#1.2 获取昨天所有平台的top100曝光数的answer
def get_all_answer_count_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='answer' group by cid order by count(cid) desc"
all_answer_count_by_imp = con_sql(sql)
all_answer_count_by_imp = tuple2dict(all_answer_count_by_imp)
return all_answer_count_by_imp
#1.3 获取昨天所有平台的top100点击率的answer
def get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count_by_imp):
all_top100_answer_rate_by_ctr = []
for i in all_answer_count_by_click:
if i in all_answer_count_by_imp.keys() and all_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
all_top100_answer_rate_by_ctr.append(("所有",i,all_answer_count_by_click[i],all_answer_count_by_imp[i], round(all_answer_count_by_click[i]/all_answer_count_by_imp[i],4),url))
all_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return all_top100_answer_rate_by_ctr[:100] if len(all_top100_answer_rate_by_ctr) > 100 else all_top100_answer_rate_by_ctr
#2 获取昨天ios平台的top100answer
#2.1 获取昨天ios平台的top100点击数的answer
def get_ios_answer_count_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore' and cid_type='answer' group by cid order by count(cid) desc"
ios_answer_count_by_click = con_sql(sql)
ios_answer_count_by_click = tuple2dict(ios_answer_count_by_click)
return ios_answer_count_by_click
#2.2 获取昨天ios平台的top100曝光数的answer
def get_ios_answer_count_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='App Store' and cid_type='answer' group by cid order by count(cid) desc"
ios_answer_count_by_imp = con_sql(sql)
ios_answer_count_by_imp = tuple2dict(ios_answer_count_by_imp)
return ios_answer_count_by_imp
#2.3 获取昨天ios平台的top100点击率的answer
def get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count_by_imp):
ios_top100_answer_rate_by_ctr = []
for i in ios_answer_count_by_click:
if i in ios_answer_count_by_imp.keys() and ios_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
ios_top100_answer_rate_by_ctr.append(("苹果",i,ios_answer_count_by_click[i],ios_answer_count_by_imp[i], round(ios_answer_count_by_click[i]/ios_answer_count_by_imp[i],4),url))
ios_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return ios_top100_answer_rate_by_ctr[:100] if len(ios_top100_answer_rate_by_ctr) > 100 else ios_top100_answer_rate_by_ctr
#3 获取昨天安卓平台的top100answer
#3.1 获取昨天安卓平台的top100点击数的answer
def get_android_answer_rate_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore' and cid_type='answer' group by cid order by count(cid) desc"
android_answer_count_by_click = con_sql(sql)
android_answer_count_by_click = tuple2dict(android_answer_count_by_click)
return android_answer_count_by_click
#3.2 获取昨天安卓平台的top100曝光数的answer
def get_android_answer_rate_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='App Store' and cid_type='answer' group by cid order by count(cid) desc"
android_answer_count_by_imp = con_sql(sql)
android_answer_count_by_imp = tuple2dict(android_answer_count_by_imp)
return android_answer_count_by_imp
#3.3 获取昨天安卓平台的top100点击率的answer
def get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_answer_count_by_imp):
android_top100_answer_rate_by_ctr = []
for i in android_answer_count_by_click:
if i in android_answer_count_by_imp.keys() and android_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
android_top100_answer_rate_by_ctr.append(("安卓",i,android_answer_count_by_click[i],android_answer_count_by_imp[i],round(android_answer_count_by_click[i]/android_answer_count_by_imp[i],4),url))
android_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return android_top100_answer_rate_by_ctr[:100] if len(android_top100_answer_rate_by_ctr) > 100 else android_top100_answer_rate_by_ctr
if __name__ == "__main__":
print("开始获取top100点击率的answer...")
all_answer_count_by_click = get_all_answer_count_by_click()
all_answer_count_by_imp = get_all_answer_count_by_imp()
all_top100_answer_rate_by_ctr = get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count_by_imp)
print("4.1已获得所有平台的top100点击率的answer")
ios_answer_count_by_click = get_ios_answer_count_by_click()
ios_answer_count_by_imp = get_ios_answer_count_by_imp()
ios_top100_answer_rate_by_ctr = get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count_by_imp)
print("4.2已获得ios平台的top100点击率的answer")
android_answer_count_by_click = get_android_answer_rate_by_click()
android_answer_count_by_imp = get_android_answer_rate_by_imp()
android_top100_answer_rate_by_ctr = get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_answer_count_by_imp)
print("4.3已获得安卓平台的top100点击率的answer")
result_lst = [all_top100_answer_rate_by_ctr,ios_top100_answer_rate_by_ctr,android_top100_answer_rate_by_ctr]
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
output_path = "/data2/models/eda/recommended_indexs/4top100_ctr_answer_%s.txt" % yesterday
result2file(result_lst,output_path)
print("已将top100点击率的answer存入文件")