topFeatures.py 5.63 KB
Newer Older
高雅喆's avatar
高雅喆 committed
1 2 3 4 5 6
# -*- coding: UTF-8 -*-
from utils import con_sql,tuple2dict,get_yesterday_date
from config import DIRECTORY_PATH


class TopFeatures(object):
7
	def __init__(self, platform, cid_type, top_n=-1):
高雅喆's avatar
高雅喆 committed
8 9 10
		"""
		platform : 'all';'ios';'android'
		cid_type : 'diary';'answer';'question'...
11
		top_n : the top rows of the result
高雅喆's avatar
高雅喆 committed
12 13
		"""
		if platform == "ios":
14
			self.platform = "='App Store'"
高雅喆's avatar
高雅喆 committed
15
		elif platform == "android":
16
			self.platform = "!='App Store'"
高雅喆's avatar
高雅喆 committed
17 18 19 20 21 22 23 24
		else:
			self.platform = " is not null"
		self.cid_type = cid_type
		self.top_n = top_n


	def get_click_times(self):
		# rtype : dict
高雅喆's avatar
高雅喆 committed
25
		sql = "select cid,count(cid) from data_feed_click \
高雅喆's avatar
高雅喆 committed
26
		where stat_date = '{0}' \
27
		and device_type{1} and cid_type='{2}' \
28
		group by cid \
29
		order by count(cid) desc".format(get_yesterday_date(), self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform, self.cid_type)
高雅喆's avatar
高雅喆 committed
30 31 32 33 34 35
		clk_times = tuple2dict(con_sql(sql))
		return clk_times


	def get_impression_times(self):
		# rtype : dict
高雅喆's avatar
高雅喆 committed
36
		sql = "select cid,count(cid) from data_feed_exposure \
高雅喆's avatar
高雅喆 committed
37
		where stat_date = '{0}' \
38
		and device_type{1} and cid_type='{2}' \
39
		group by cid order by count(cid) desc".format(get_yesterday_date(), self.platform, self.cid_type)
高雅喆's avatar
高雅喆 committed
40 41 42
		imp_times = tuple2dict(con_sql(sql))
		return imp_times

43
	def get_result(self, platform, clk_n=2, result_types="ctr"):
高雅喆's avatar
高雅喆 committed
44
		"""
45
		platform : "所有";"苹果","安卓"   #方便显示
高雅喆's avatar
高雅喆 committed
46 47 48
		clk : dict
		imp : dict
		clk_n : 获取topN点击率时,过滤的点击数
49
		result_types : sorted by ["clk","imp","ctr"]
高雅喆's avatar
高雅喆 committed
50 51
		rtype : list
		"""
52 53
		clk = self.get_click_times()
		imp = self.get_impression_times()
高雅喆's avatar
高雅喆 committed
54 55 56 57 58 59 60 61
		topn = []
		#获取topN的点击
		if imp == {} or result_types == "clk":
			for i in clk:
				if self.cid_type == "diary":
					url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
				else:
					url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
62
				topn.append((platform,i,clk[i],0,0,url))
高雅喆's avatar
高雅喆 committed
63 64 65 66 67 68 69 70 71
			topn.sort(key=lambda x:x[2],reverse=True)
			return topn[:int(self.top_n)]
		#获取topN的曝光
		elif clk == {} or result_types == "imp":
			for i in imp:
				if self.cid_type == "diary":
					url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
				else:
					url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
72
				topn.append((platform,i,0,imp[i],0,url))
高雅喆's avatar
高雅喆 committed
73 74 75 76 77 78 79 80 81 82
			topn.sort(key=lambda x:x[3],reverse=True)
			return topn[:int(self.top_n)]
		#获取topN的ctr
		else:
			for i in clk:
				if i in imp.keys() and clk[i] > clk_n:
					if self.cid_type == "diary":
						url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
					else:
						url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
83
					topn.append((platform,i,clk[i],imp[i],round(clk[i]/imp[i],4),url))
高雅喆's avatar
高雅喆 committed
84 85 86
			topn.sort(key=lambda x:x[4],reverse=True)
			return topn[:int(self.top_n)]

87
	def result2file(self, result_lst, fpath):
高雅喆's avatar
高雅喆 committed
88
		"""
高雅喆's avatar
高雅喆 committed
89
		result_lst : [all_result,ios_result,android_result]
高雅喆's avatar
高雅喆 committed
90 91 92 93
		fpath : output filename
		rtype : none
		"""
		with open(fpath, 'w') as f:
高雅喆's avatar
高雅喆 committed
94

高雅喆's avatar
高雅喆 committed
95 96 97
			tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
			f.write("Top {0} {1}\n".format(self.top_n,self.cid_type))
			sep = "=================================================================\n"
98
			header = tplt.format("平台","{0}_id".format(self.cid_type),"点击数","曝光数","点击率","{1}链接".format(self.cid_type,self.cid_type))
高雅喆's avatar
高雅喆 committed
99 100 101 102 103 104 105 106 107 108 109 110 111 112
			f.write(sep)
			f.write(header)
			for i in result_lst:
				for j in i:
					f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
				f.write(sep)
				if i != result_lst[-1]:
					f.write(header)
			f.write("\n\n")




def main():
113 114

	#1. Top diary
115
	top_diary_all = TopFeatures("all", "diary", 100)
116
	clk_diary_ctr_all = top_diary_all.get_result("所有", 4, "ctr")
117

118
	top_diary_ios = TopFeatures("ios", "diary", 100)
119
	clk_diary_ctr_ios = top_diary_ios.get_result("苹果", 4, "ctr")
120

121
	top_diary_android = TopFeatures("android", "diary", 100)
122
	clk_diary_ctr_android = top_diary_android.get_result("安卓", 4, "ctr")
123 124

	result_lst = [clk_diary_ctr_all, clk_diary_ctr_ios, clk_diary_ctr_android]
125
	output_path = DIRECTORY_PATH + "top100_ctr_diary_{}.txt".format(get_yesterday_date().replace('-',''))
126
	top_diary_all.result2file(result_lst, output_path)
127
	print("已获取 Top diary 特征")
128 129

	#2. Top answer
130
	top_answer_all = TopFeatures("all", "answer", 100)
131
	clk_answer_ctr_all = top_answer_all.get_result("所有", 2, "ctr")
高雅喆's avatar
高雅喆 committed
132

133
	top_answer_ios = TopFeatures("ios", "answer", 100)
134
	clk_answer_ctr_ios = top_answer_ios.get_result("苹果", 2, "ctr")
高雅喆's avatar
高雅喆 committed
135

136
	top_answer_android = TopFeatures("android", "answer", 100)
137
	clk_answer_ctr_android = top_answer_android.get_result("安卓", 2, "ctr")
高雅喆's avatar
高雅喆 committed
138 139

	result_lst = [clk_answer_ctr_all, clk_answer_ctr_ios, clk_answer_ctr_android]
140
	output_path = DIRECTORY_PATH + "top100_ctr_answer_{}.txt".format(get_yesterday_date().replace('-',''))
高雅喆's avatar
高雅喆 committed
141
	top_answer_all.result2file(result_lst, output_path)
142
	print("已获取 Top answer 特征")
高雅喆's avatar
高雅喆 committed
143 144


145
	#3. Top question
146
	top_question_all = TopFeatures("all", "question", 100)
147
	clk_question_ctr_all = top_question_all.get_result("所有", 2, "ctr")
148

149
	top_question_ios = TopFeatures("ios", "question", 100)
150
	clk_question_ctr_ios = top_question_ios.get_result("苹果", 2, "ctr")
151

152
	top_question_android = TopFeatures("android", "question", 100)
153
	clk_question_ctr_android = top_question_android.get_result("安卓", 2, "ctr")
154 155

	result_lst = [clk_question_ctr_all, clk_question_ctr_ios, clk_question_ctr_android]
高雅喆's avatar
高雅喆 committed
156
	output_path = DIRECTORY_PATH + "top100_ctr_question_{}.txt".format(get_yesterday_date().replace('-',''))
157
	top_question_all.result2file(result_lst, output_path)
158
	print("已获取 Top question 特征")
159

高雅喆's avatar
高雅喆 committed
160 161 162 163

if __name__ == '__main__':
	main()