main.py 14.7 KB
Newer Older
1
# -*- coding: UTF-8 -*-
2
from utils import get_yesterday_date
3
from config import DIRECTORY_PATH
4 5 6 7
from cidRate import CidRate
from clkCidUidRate import ClkCidUidRate
from topFeatures import TopFeatures
from func import *
高雅喆's avatar
高雅喆 committed
8
import time
9 10


高雅喆's avatar
高雅喆 committed
11
start = time.time()
高雅喆's avatar
高雅喆 committed
12
print("开始获取比例特征数据...")
高雅喆's avatar
高雅喆 committed
13 14 15 16 17 18 19
#1.0 question曝光占比(=question被曝光数/总cid被曝光数)
question_imp_rate_all = CidRate("all","question").get_cid_imp_rate("所有")
question_imp_rate_ios = CidRate("ios","question").get_cid_imp_rate("苹果")
question_imp_rate_android = CidRate("android","question").get_cid_imp_rate("安卓")
question_imp_rate_result = [question_imp_rate_all,question_imp_rate_ios,question_imp_rate_android]
print("已获取question曝光占比")

20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
#1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
answer_imp_rate_all = CidRate("all","answer").get_cid_imp_rate("所有")
answer_imp_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果")
answer_imp_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓")
answer_imp_rate_result = [answer_imp_rate_all,answer_imp_rate_ios,answer_imp_rate_android]
print("已获取answer曝光占比")

#1.2 diary曝光占比(=answer被曝光数/总cid被曝光数)
diary_imp_rate_all = CidRate("all","diary").get_cid_imp_rate("所有")
diary_imp_rate_ios = CidRate("ios","diary").get_cid_imp_rate("苹果")
diary_imp_rate_android = CidRate("android","diary").get_cid_imp_rate("安卓")
diary_imp_rate_result = [diary_imp_rate_all,diary_imp_rate_ios,diary_imp_rate_android]
print("已获取diary曝光占比")

#1.3 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)
activate_uid_ctr_all = get_activate_uid_ctr("all")
activate_uid_ctr_ios = get_activate_uid_ctr("ios")
activate_uid_ctr_android = get_activate_uid_ctr("android")
activate_uid_ctr_result = [activate_uid_ctr_all,activate_uid_ctr_ios,activate_uid_ctr_android]
print("已获取活跃用户点击率")

#1.4 活跃用户平均每天曝光次数(=活跃用户曝光数/独立活跃用户数)
activate_uid_imp_all = get_activate_uid_imp_times("all")
activate_uid_imp_beijing = get_activate_uid_imp_times("beijing")
activate_uid_imp_result = [activate_uid_imp_all,activate_uid_imp_beijing]
print("已获取活跃用户平均每天曝光次数")

#1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
click_answer_all = ClkCidUidRate("all","answer").get_clk_cid_uid_rate("所有")
click_answer_ios = ClkCidUidRate("ios","answer").get_clk_cid_uid_rate("苹果")
click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("安卓")
click_answer_result = [click_answer_all,click_answer_ios,click_answer_android]
print("已获取点击answer用户占比")

高雅喆's avatar
高雅喆 committed
54 55 56 57 58 59 60 61
#1.6 点击question用户占比(=点击question用户数/曝光question用户数)
click_question_all = ClkCidUidRate("all","question").get_clk_cid_uid_rate("所有")
click_question_ios = ClkCidUidRate("ios","question").get_clk_cid_uid_rate("苹果")
click_question_android = ClkCidUidRate("android","question").get_clk_cid_uid_rate("安卓")
click_question_result = [click_question_all,click_question_ios,click_question_android]
print("已获取点击question用户占比")

#1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数)
62 63 64 65 66 67
click_diary_all = ClkCidUidRate("all","diary").get_clk_cid_uid_rate("所有")
click_diary_ios = ClkCidUidRate("ios","diary").get_clk_cid_uid_rate("苹果")
click_diary_android = ClkCidUidRate("android","diary").get_clk_cid_uid_rate("安卓")
click_diary_result = [click_diary_all,click_diary_ios,click_diary_android]
print("已获取点击diary用户占比")

高雅喆's avatar
高雅喆 committed
68
#1.8 有点击用户占比(=有点击用户数/有曝光用户数)
69 70 71 72 73 74
click_everything_all = ClkCidUidRate("all","everything").get_clk_cid_uid_rate("所有")
click_everything_ios = ClkCidUidRate("ios","everything").get_clk_cid_uid_rate("苹果")
click_everything_android = ClkCidUidRate("android","everything").get_clk_cid_uid_rate("安卓")
click_everything_result = [click_everything_all,click_everything_ios,click_everything_android]
print("已获取有点击用户占比")

高雅喆's avatar
高雅喆 committed
75
#1.9 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数)   #注意:(]里面的数字指的是距离当前时间的天数
高雅喆's avatar
高雅喆 committed
76 77 78 79 80 81 82 83 84 85 86 87
# try:
# 	click_zero_uid_detail_all = get_click_zero_uid_rate_detail("all")
# 	click_zero_uid_detail_all["platform"] = "所有"
# 	click_zero_uid_detail_ios = get_click_zero_uid_rate_detail("ios")
# 	click_zero_uid_detail_ios["platform"] = "苹果"
# 	click_zero_uid_detail_android = get_click_zero_uid_rate_detail("android")
# 	click_zero_uid_detail_android["platform"] = "安卓"
# 	click_zero_uid_detail_result = [click_zero_uid_detail_all,click_zero_uid_detail_ios,click_zero_uid_detail_android]
# 	print("已获取无点击用户数激活日期分布")
# except:
# 	click_zero_uid_detail_result = []
# 	print("GC life time is shorter than transaction duration")
88 89 90 91 92 93


#==========================================================================================


#2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
高雅喆's avatar
高雅喆 committed
94
print("开始获取Top特征数据...")
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
click_times_to_count_uid = get_click_times_to_count_uid()
print("已获取用户点击次数分布")
#2.2 Top 100 diary(sorted by ctr)
top_diary_all = TopFeatures("all", "diary", 100).get_result("所有", 4, "ctr")
top_diary_ios = TopFeatures("ios", "diary", 100).get_result("苹果", 4, "ctr")
top_diary_android = TopFeatures("android", "diary", 100).get_result("安卓", 4, "ctr")
top_diary_result = [top_diary_all,top_diary_ios,top_diary_android]
print("已获取 Top diary 特征")
#2.3 Top 100 Answer(sorted by ctr)
top_answer_all = TopFeatures("all", "answer", 100).get_result("所有", 2, "ctr")
top_answer_ios = TopFeatures("ios", "answer", 100).get_result("苹果", 2, "ctr")
top_answer_android = TopFeatures("android", "answer", 100).get_result("安卓", 2, "ctr")
top_answer_result = [top_answer_all,top_answer_ios,top_answer_android]
print("已获取 Top answer 特征")
#2.4 Top 100 Question(sorted by click times)
top_question_all = TopFeatures("all", "question", 100).get_result("所有", 2, "ctr")
top_question_ios = TopFeatures("ios", "question", 100).get_result("苹果", 2, "ctr")
top_question_android = TopFeatures("android", "question", 100).get_result("安卓", 2, "ctr")
top_question_result = [top_question_all,top_question_ios,top_question_android]
print("已获取 Top question 特征")
print("done")


高雅喆's avatar
高雅喆 committed
118
end = time.time()
高雅喆's avatar
高雅喆 committed
119
print('程序执行时间: {}s'.format(end-start))
高雅喆's avatar
高雅喆 committed
120 121 122



123 124 125


def result2file():
126
	output_path = DIRECTORY_PATH + "result_{}.txt".format(get_yesterday_date().replace('-',''))
127 128 129 130
	with open(output_path, 'w') as f:
		tplt = "{0:\u3000<6}\t{1:\u3000<15}\t{2:\u3000<15}\t{3:\u3000<15}\n"
		line = """数据日期:{}
内容概览:以下所有数据都是昨天一天的首页的
高雅喆's avatar
高雅喆 committed
131 132 133
说明:
	(1)红色标记的为比较重要的特征
	(2)[A,+B]格式说明:A表示该特征在当天的数值,+B/-B表示该数值相对于昨天的差值
134
1. 比例特征
高雅喆's avatar
高雅喆 committed
135 136 137 138 139 140 141 142 143
	1.0 question曝光占比(=question被曝光数/总cid被曝光数) [,]
	1.1 answer曝光占比(=answer被曝光数/总cid被曝光数) [,]
	1.2 diary曝光占比(=diary被曝光数/总cid被曝光数) [,]
	1.3 活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数) [,]
	1.4 活跃用户平均每天曝光次数(=活跃用户曝光次数/独立活跃用户数) [,]
	1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数) [,]
	1.6 点击question用户占比(=点击question用户数/曝光question用户数) [,]
	1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数) [,]
	1.8 有点击用户占比(=有点击用户数/有曝光用户数) [,]
144 145 146 147 148 149 150 151
2.Top特征
	2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
	2.2 Top 100 diary (sorted by ctr)
	2.3 Top 100 Answer (sorted by ctr)
	2.4 Top 100 Question (sorted by ctr)



高雅喆's avatar
高雅喆 committed
152
具体内容:以下所有数据都是昨天一天的首页的
153
""".format(get_yesterday_date().replace('-',''))
154 155 156
		f.write(line)
		f.write("#1. 比例特征\n")
		f.write("=================================================================\n")
高雅喆's avatar
高雅喆 committed
157 158 159 160 161 162
		f.write("#1.0question曝光占比(=question被曝光数/总cid被曝光数)\n")
		f.write(tplt.format("平台","question被曝光数","总cid被曝光数","question被曝光数占比"))
		for i in question_imp_rate_result:
			line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
			f.write(line)
		f.write('\n')
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
		f.write("#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)\n")
		f.write(tplt.format("平台","answer被曝光数","总cid被曝光数","answer被曝光数占比"))
		for i in answer_imp_rate_result:
			line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
			f.write(line)
		f.write('\n')
		f.write("#1.2diary曝光占比(=diary被曝光数/总cid被曝光数)\n")
		f.write(tplt.format("平台","diary被曝光数","总cid被曝光数","diary被曝光数占比"))
		for i in diary_imp_rate_result:
			line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
			f.write(line)
		f.write('\n')
		f.write("#1.3活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)\n")
		f.write(tplt.format("平台","active用户点击次数","active用户曝光次数","active用户点击率"))
		for i in activate_uid_ctr_result:
			line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
			f.write(line)
		f.write('\n')
		f.write("#1.4活跃用户平均每天曝光次数(=活跃用户曝光次数/独立活跃用户数)\n")
		f.write(tplt.format("地区","active独立用户数","active用户曝光次数","activate用户平均曝光数"))
		for i in activate_uid_imp_result:
			line = tplt.format(i[0],i[1],i[2],i[3])
			f.write(line)
		f.write('\n')
		f.write("#1.5点击answer用户占比(=点击answer用户数/曝光answer用户数)\n")
高雅喆's avatar
高雅喆 committed
188
		f.write(tplt.format("平台","点击answer用户数","曝光answer用户数","点击answer用户占比"))
189 190 191 192
		for i in click_answer_result:
			line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
			f.write(line)
		f.write('\n')
高雅喆's avatar
高雅喆 committed
193
		f.write("#1.6点击question用户占比(=点击question用户数/曝光question用户数)\n")
高雅喆's avatar
高雅喆 committed
194
		f.write(tplt.format("平台","点击question用户数","曝光question用户数","点击question用户占比"))
高雅喆's avatar
高雅喆 committed
195 196 197 198 199
		for i in click_question_result:
			line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
			f.write(line)
		f.write('\n')
		f.write("#1.7点击diary用户占比(=点击diary用户数/曝光diary用户数)\n")
高雅喆's avatar
高雅喆 committed
200
		f.write(tplt.format("平台","点击diary用户数","曝光diary用户数","点击diary用户占比"))
201 202 203 204
		for i in click_diary_result:
			line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
			f.write(line)
		f.write('\n')
高雅喆's avatar
高雅喆 committed
205
		f.write("#1.8有点击用户占比(=有点击用户数/有曝光用户数)\n")
206 207 208 209
		f.write(tplt.format("平台","have点击用户数","have曝光用户数","have点击用户占比"))
		for i in click_everything_result:
			line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
			f.write(line)
高雅喆's avatar
高雅喆 committed
210 211 212 213 214 215 216 217 218 219 220 221 222
		# f.write('\n')
		# if click_zero_uid_detail_result != []:
		# 	f.write("#1.9无点击用户数分布(=无点击用户∩激活用户 / 激活用户数)   #注意:(]里面的数字指的是距离当前时间的天数\n")
		# 	f.write("平台"+'\t\t'+"0-7"+'\t\t'+"7-14"+'\t\t'+ \
		# 		"14-30"+'\t\t'+"30-60"+'\t\t'+"60-90"+'\t\t'+"90+"+'\n')
		# 	for i in click_zero_uid_detail_result:
		# 		f.write(i["platform"]+'\t\t'+\
		# 			"{}%".format(round(i["0-7"]*100,2))+'\t\t'+\
		# 			"{}%".format(round(i["7-14"]*100,2))+'\t\t'+\
		# 			"{}%".format(round(i["14-30"]*100,2))+'\t\t'+\
		# 			"{}%".format(round(i["30-60"]*100,2))+'\t\t'+\
		# 			"{}%".format(round(i["60-90"]*100,2))+'\t\t'+\
		# 			"{}%".format(round(i["90+"]*100,2))+'\n')
高雅喆's avatar
高雅喆 committed
223
		f.write('\n\n\n')
高雅喆's avatar
高雅喆 committed
224
		
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
#==========================================================================================
		tplt = "{0:^10}\t{1:^10}\n"
		f.write("#2. Top特征\n")
		f.write("=================================================================\n")
		f.write("2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)\n")
		f.write(tplt.format("click_times","count_uid"))	
		for i in click_times_to_count_uid:
			line = tplt.format(i[0],i[1])
			f.write(line)
		f.write("\n\n")
		tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
		f.write("2.2 Top 100 Diary\n")
		sep = "=================================================================\n"
		header = tplt.format("平台","diary_id","点击数","曝光数","点击率","diary链接")
		f.write(sep)
		f.write(header)
		for i in top_diary_result:
			for j in i:
高雅喆's avatar
高雅喆 committed
243
				f.write(tplt.format(j[0],j[1],j[2],j[3],"{}%".format(round(j[4]*100,2)),j[5]))
244 245 246 247 248 249 250 251 252 253 254
			f.write(sep)
			if i != top_diary_result[-1]:
				f.write(header)
		f.write("\n\n")
		f.write("2.3 Top 100 Answer\n")
		sep = "=================================================================\n"
		header = tplt.format("平台","answer_id","点击数","曝光数","点击率","answer链接")
		f.write(sep)
		f.write(header)
		for i in top_answer_result:
			for j in i:
高雅喆's avatar
高雅喆 committed
255
				f.write(tplt.format(j[0],j[1],j[2],j[3],"{}%".format(round(j[4]*100,2)),j[5]))
256 257 258 259 260 261 262 263 264 265 266
			f.write(sep)
			if i != top_answer_result[-1]:
				f.write(header)
		f.write("\n\n")
		f.write("2.4 Top 100 Question\n")
		sep = "=================================================================\n"
		header = tplt.format("平台","question_id","点击数","曝光数","点击率","question链接")
		f.write(sep)
		f.write(header)
		for i in top_question_result:
			for j in i:
高雅喆's avatar
高雅喆 committed
267
				f.write(tplt.format(j[0],j[1],j[2],j[3],"{}%".format(round(j[4]*100,2)),j[5]))
268 269 270 271
			f.write(sep)
			if i != top_question_result[-1]:
				f.write(header)
		f.write("\n\n")
272 273


274 275 276
def rate2file():
	output_path = DIRECTORY_PATH + "rate.csv"
	with open(output_path, 'a+') as f:
277
		line = get_yesterday_date().replace('-','')+','+\
278 279 280 281 282 283
			str(answer_imp_rate_all[3])+','+str(answer_imp_rate_ios[3])+','+str(answer_imp_rate_android[3])+','+\
			str(diary_imp_rate_all[3])+','+str(diary_imp_rate_ios[3])+','+str(diary_imp_rate_android[3])+','+\
			str(activate_uid_ctr_all[3])+','+str(activate_uid_ctr_ios[3])+','+str(activate_uid_ctr_android[3])+','+\
			str(activate_uid_imp_all[3])+','+str(activate_uid_imp_beijing[3])+','+\
			str(click_answer_all[3])+','+str(click_answer_ios[3])+','+str(click_answer_android[3])+','+\
			str(click_diary_all[3])+','+str(click_diary_ios[3])+','+str(click_diary_android[3])+','+\
高雅喆's avatar
高雅喆 committed
284 285 286
			str(click_everything_all[3])+','+str(click_everything_ios[3])+','+str(click_everything_android[3])+','+\
			str(question_imp_rate_all[3])+','+str(question_imp_rate_ios[3])+','+str(question_imp_rate_android[3])+','+\
			str(click_question_all[3])+','+str(click_question_ios[3])+','+str(click_question_android[3])+'\n'
287 288 289 290
		f.write(line)



291

292
if __name__ == '__main__':
293
	result2file()
294 295
	rate2file()