# -*- coding: UTF-8 -*- from utils import con_sql,get_yesterday_date,get_between_day import time import sys class GrayStat(object): def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()): """ cid_type : diary,answer,question uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的 ndays : '2018-08-30'.... """ self.cid_type = cid_type self.uid_type = uid_type self.ndays = ndays def get_uid_count(self): sql = "select count(distinct(device_id)) from data_feed_click \ where stat_date='{0}' \ and (cid_type='{1}' or cid_type='diary_video') \ and device_id regexp '[{2}]$' \ and device_id in \ (select device_id \ from {3}) \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type,table_name) uid_count = con_sql(sql)[0][0] return uid_count def get_uid_clk_times(self): sql = "select count(device_id) from data_feed_click \ where stat_date='{0}' \ and (cid_type='{1}' or cid_type='diary_video') \ and device_id regexp '[{2}]$' \ and device_id in \ (select device_id \ from {3}) \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type,table_name) uid_clk_times = con_sql(sql)[0][0] return uid_clk_times def get_uid_imp_times(self): sql = "select count(device_id) from data_feed_exposure \ where stat_date='{0}' \ and cid_type='{1}' \ and device_id regexp '[{2}]$' \ and device_id in \ (select device_id \ from {3}) \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type,table_name) uid_imp_times = con_sql(sql)[0][0] return uid_imp_times class AllStat(object): def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()): """ cid_type : diary,answer,question uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的 ndays : '2018-08-30'.... """ self.cid_type = cid_type self.uid_type = uid_type self.ndays = ndays def get_uid_count(self): sql = "select count(distinct(device_id)) from data_feed_click \ where stat_date='{0}' \ and (cid_type='{1}' or cid_type='diary_video') \ and device_id regexp '[{2}]$' \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type) uid_count = con_sql(sql)[0][0] return uid_count def get_uid_clk_times(self): sql = "select count(device_id) from data_feed_click \ where stat_date='{0}' \ and (cid_type='{1}' or cid_type='diary_video') \ and device_id regexp '[{2}]$' \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type) uid_clk_times = con_sql(sql)[0][0] return uid_clk_times def get_uid_imp_times(self): sql = "select count(device_id) from data_feed_exposure \ where stat_date='{0}' \ and cid_type='{1}' \ and device_id regexp '[{2}]$' \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type) uid_imp_times = con_sql(sql)[0][0] return uid_imp_times def main(): output = OUTPUT_PATH + "ctr.csv" with open(output,"a+") as f: print("stat" + " " + get_yesterday_date()) g_class = GrayStat("diary",num) a_class = AllStat("diary",num) line1 = str(g_class.get_uid_count())+","+str(g_class.get_uid_imp_times())+","+str(g_class.get_uid_clk_times()) line2 = str(a_class.get_uid_count())+","+str(a_class.get_uid_imp_times())+","+str(a_class.get_uid_clk_times()) g_ctr = g_class.get_uid_clk_times()/g_class.get_uid_imp_times() a_ctr = a_class.get_uid_clk_times()/a_class.get_uid_imp_times() growth_rate = (g_ctr-a_ctr)/a_ctr coverage_rate = g_class.get_uid_count()/a_class.get_uid_count() line = get_yesterday_date() + "," + str(round(g_ctr*100,2))+'%' + "," + str(round(a_ctr*100,2))+'%' + "," + \ str(round(growth_rate*100,2))+'%' + "," + line1 + "," + line2 + "," + str(round(coverage_rate*100,2)) + "%" + "\n" f.write(line) if __name__ == '__main__': OUTPUT_PATH = "/data2/models/eda/gray_stat/" num = sys.argv[1] if num == "3|4": OUTPUT_PATH = "/data2/models/eda/gray_stat/3|4" table_name = "jerry_prod.nd_device_cid_similarity_matrix_tmp" elif num == "5|6": OUTPUT_PATH = "/data2/models/eda/gray_stat/5|6" table_name = "eagle.ffm_diary_queue_temp" elif num == "7|8": OUTPUT_PATH = "/data2/models/eda/gray_stat/7|8" table_name = "eagle.search_queue" else: print("参数必须是 '3|4' 或 '5|6' 或 '7|8'") print("程序异常停止,请重新输入参数运行") sys.exit() main()