add all ctr.py

735de249 · 高雅喆 · 0c08df5e · 735de249 · 735de249
Commit 735de249 authored Oct 15, 2018 by 高雅喆
Hide whitespace changes
Inline Side-by-side

Showing with 126 additions and 0 deletions

ctr.py eda/gray_stat/ctr.py +0 -0

node2vec_ctr.py node2vec_ctr.py +126 -0

No files found.
--- a/eda/gray_stat/node2vec_ctr.py
+++ b/eda/gray_stat/node2vec_ctr.py
--- a/node2vec_ctr.py
+++ b/node2vec_ctr.py
+# -*- coding: UTF-8 -*-
+from utils import con_sql,get_yesterday_date,get_between_day
+import time
+OUTPUT_PATH = "/data2/models/eda/node2vec/"
+class GrayStat(object):
+	def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
+		"""
+		cid_type : diary，answer，question
+		uid_type : 8：_8结尾；6：_6结尾；6|8：_6或者_8结尾;^68：不是6或者8结尾的
+		ndays : '2018-08-30'....
+		"""
+		self.cid_type = cid_type
+		self.uid_type = uid_type
+		self.ndays = ndays
+	def get_uid_count(self):
+		sql = "select count(distinct(device_id)) from data_feed_click \
+			where stat_date='{0}' \
+			and (cid_type='{1}' or cid_type='diary_video') \
+			and device_id regexp '[{2}]$' \
+			and device_id in \
+				(select device_id \
+				from nd_device_cid_similarity_matrix_tmp) \
+			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
+			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
+				self.cid_type,self.uid_type)
+		uid_count = con_sql(sql)[0][0]
+		return uid_count
+	def get_uid_clk_times(self):
+		sql = "select count(device_id) from data_feed_click \
+			where stat_date='{0}' \
+			and (cid_type='{1}' or cid_type='diary_video') \
+			and device_id regexp '[{2}]$' \
+			and device_id in \
+				(select device_id \
+				from nd_device_cid_similarity_matrix_tmp) \
+			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
+			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
+				self.cid_type,self.uid_type)
+		uid_clk_times = con_sql(sql)[0][0]
+		return uid_clk_times
+	def get_uid_imp_times(self):
+		sql = "select count(device_id) from data_feed_exposure \
+			where stat_date='{0}' \
+			and cid_type='{1}' \
+			and device_id regexp '[{2}]$' \
+			and device_id in \
+				(select device_id \
+				from nd_device_cid_similarity_matrix_tmp) \
+			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
+			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
+				self.cid_type,self.uid_type)
+		uid_imp_times = con_sql(sql)[0][0]
+		return uid_imp_times
+class AllStat(object):
+	def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
+		"""
+		cid_type : diary，answer，question
+		uid_type : 8：_8结尾；6：_6结尾；6|8：_6或者_8结尾;^68：不是6或者8结尾的
+		ndays : '2018-08-30'....
+		"""
+		self.cid_type = cid_type
+		self.uid_type = uid_type
+		self.ndays = ndays
+	def get_uid_count(self):
+		sql = "select count(distinct(device_id)) from data_feed_click \
+			where stat_date='{0}' \
+			and (cid_type='{1}' or cid_type='diary_video') \
+			and device_id regexp '[{2}]$' \
+			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
+			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
+				self.cid_type,self.uid_type)
+		uid_count = con_sql(sql)[0][0]
+		return uid_count
+	def get_uid_clk_times(self):
+		sql = "select count(device_id) from data_feed_click \
+			where stat_date='{0}' \
+			and (cid_type='{1}' or cid_type='diary_video') \
+			and device_id regexp '[{2}]$' \
+			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
+			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
+				self.cid_type,self.uid_type)
+		uid_clk_times = con_sql(sql)[0][0]
+		return uid_clk_times
+	def get_uid_imp_times(self):
+		sql = "select count(device_id) from data_feed_exposure \
+			where stat_date='{0}' \
+			and cid_type='{1}' \
+			and device_id regexp '[{2}]$' \
+			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
+			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
+				self.cid_type,self.uid_type)
+		uid_imp_times = con_sql(sql)[0][0]
+		return uid_imp_times
+def main():
+	output = OUTPUT_PATH + "ctr.csv"
+	with open(output,"a+") as f:
+		print("stat" + "  " + get_yesterday_date())
+		g_class = GrayStat("diary","3|4")
+		a_class = AllStat("diary","3|4")
+		line1 = str(g_class.get_uid_count())+"\t"+str(g_class.get_uid_imp_times())+"\t"+str(g_class.get_uid_clk_times())
+		line2 = str(a_class.get_uid_count())+"\t"+str(a_class.get_uid_imp_times())+"\t"+str(a_class.get_uid_clk_times())
+		g_ctr = g_class.get_uid_clk_times()/g_class.get_uid_imp_times()
+		a_ctr = a_class.get_uid_clk_times()/a_class.get_uid_imp_times()
+		growth_rate = (g_ctr-a_ctr)/a_ctr
+		line = get_yesterday_date() + "\t" + str(round(g_ctr*100,2))+'%' + "\t" + str(round(a_ctr*100,2))+'%' + "\t" + \
+		str(round(growth_rate*100,2))+'%' + "\t" + line1 + "\t" + line2 + "\n"
+		f.write(line)
+if __name__ == '__main__':
+	main()
\ No newline at end of file