ctr.py 5.2 KB
Newer Older
王志伟's avatar
王志伟 committed
1
# -*- coding: UTF-8 -*-
高雅喆's avatar
高雅喆 committed
2 3
from utils import con_sql,get_yesterday_date,get_between_day
import time
高雅喆's avatar
高雅喆 committed
4 5
import sys

高雅喆's avatar
高雅喆 committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24

class GrayStat(object):
	def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
		"""
		cid_type : diary,answer,question
		uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
		ndays : '2018-08-30'....
		"""
		self.cid_type = cid_type
		self.uid_type = uid_type
		self.ndays = ndays

	def get_uid_count(self):
		sql = "select count(distinct(device_id)) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
			and device_id regexp '[{2}]$' \
			and device_id in \
				(select device_id \
高雅喆's avatar
高雅喆 committed
25
				from {3}) \
26 27
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
高雅喆's avatar
高雅喆 committed
28
				self.cid_type,self.uid_type,table_name)
高雅喆's avatar
高雅喆 committed
29 30 31 32 33 34 35 36 37 38
		uid_count = con_sql(sql)[0][0]
		return uid_count

	def get_uid_clk_times(self):
		sql = "select count(device_id) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
			and device_id regexp '[{2}]$' \
			and device_id in \
				(select device_id \
高雅喆's avatar
高雅喆 committed
39
				from {3}) \
40 41
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
高雅喆's avatar
高雅喆 committed
42
				self.cid_type,self.uid_type,table_name)
高雅喆's avatar
高雅喆 committed
43 44 45 46 47 48 49 50 51 52
		uid_clk_times = con_sql(sql)[0][0]
		return uid_clk_times

	def get_uid_imp_times(self):
		sql = "select count(device_id) from data_feed_exposure \
			where stat_date='{0}' \
			and cid_type='{1}' \
			and device_id regexp '[{2}]$' \
			and device_id in \
				(select device_id \
高雅喆's avatar
高雅喆 committed
53
				from {3}) \
54 55
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
高雅喆's avatar
高雅喆 committed
56
				self.cid_type,self.uid_type,table_name)
高雅喆's avatar
高雅喆 committed
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
		uid_imp_times = con_sql(sql)[0][0]
		return uid_imp_times



class AllStat(object):
	def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
		"""
		cid_type : diary,answer,question
		uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
		ndays : '2018-08-30'....
		"""
		self.cid_type = cid_type
		self.uid_type = uid_type
		self.ndays = ndays

	def get_uid_count(self):
		sql = "select count(distinct(device_id)) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
77 78 79 80
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
高雅喆's avatar
高雅喆 committed
81 82 83 84 85 86 87
		uid_count = con_sql(sql)[0][0]
		return uid_count

	def get_uid_clk_times(self):
		sql = "select count(device_id) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
88 89 90 91
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
高雅喆's avatar
高雅喆 committed
92 93 94 95 96 97 98
		uid_clk_times = con_sql(sql)[0][0]
		return uid_clk_times

	def get_uid_imp_times(self):
		sql = "select count(device_id) from data_feed_exposure \
			where stat_date='{0}' \
			and cid_type='{1}' \
99 100 101 102
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
高雅喆's avatar
高雅喆 committed
103 104 105 106 107 108
		uid_imp_times = con_sql(sql)[0][0]
		return uid_imp_times


def main():
	output = OUTPUT_PATH + "ctr.csv"
高雅喆's avatar
高雅喆 committed
109 110
	with open(output,"a+") as f:
		print("stat" + "  " + get_yesterday_date())
张彦钊's avatar
张彦钊 committed
111 112
		g_class = GrayStat("diary",num)
		a_class = AllStat("diary",num)
高雅喆's avatar
高雅喆 committed
113 114
		line1 = str(g_class.get_uid_count())+","+str(g_class.get_uid_imp_times())+","+str(g_class.get_uid_clk_times())
		line2 = str(a_class.get_uid_count())+","+str(a_class.get_uid_imp_times())+","+str(a_class.get_uid_clk_times())
高雅喆's avatar
高雅喆 committed
115 116 117
		g_ctr = g_class.get_uid_clk_times()/g_class.get_uid_imp_times()
		a_ctr = a_class.get_uid_clk_times()/a_class.get_uid_imp_times()
		growth_rate = (g_ctr-a_ctr)/a_ctr
高雅喆's avatar
高雅喆 committed
118 119 120
		coverage_rate = g_class.get_uid_count()/a_class.get_uid_count()
		line = get_yesterday_date() + "," + str(round(g_ctr*100,2))+'%' + "," + str(round(a_ctr*100,2))+'%' + "," + \
		str(round(growth_rate*100,2))+'%' + "," + line1 + "," + line2 + "," + str(round(coverage_rate*100,2)) + "%" + "\n"
高雅喆's avatar
高雅喆 committed
121
		f.write(line)
高雅喆's avatar
高雅喆 committed
122 123 124


if __name__ == '__main__':
张彦钊's avatar
张彦钊 committed
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
	OUTPUT_PATH = "/data2/models/eda/gray_stat/"
	num = sys.argv[1]
	if num == "3|4":
		OUTPUT_PATH = "/data2/models/eda/gray_stat/3|4"
		table_name = "jerry_prod.nd_device_cid_similarity_matrix_tmp"
	elif num == "5|6":
		OUTPUT_PATH = "/data2/models/eda/gray_stat/5|6"
		table_name = "eagle.ffm_diary_queue_temp"
	elif num == "7|8":
		OUTPUT_PATH = "/data2/models/eda/gray_stat/7|8"
		table_name = "eagle.search_queue"
	else:
		print("参数必须是 '3|4' 或 '5|6' 或 '7|8'")
		print("程序异常停止,请重新输入参数运行")
		sys.exit()
高雅喆's avatar
高雅喆 committed
140
	main()