# -*- coding: UTF-8 -*- from utils import con_sql,get_yesterday_date,get_between_day import time OUTPUT_PATH = "/data2/models/eda/gray_stat/" class GrayStat(object): def __init__(self, cid_type, uid_type, platform,ndays=get_yesterday_date()): """ cid_type : diary,answer,question uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的 platform : ios;android ndays : '2018-08-30'.... """ self.cid_type = cid_type self.uid_type = uid_type if platform == "ios": self.platform = "='App Store'" else: self.platform = "!='App Store'" self.ndays = ndays def get_uid_count(self): sql = "select count(distinct(device_id)) from data_feed_click \ where stat_date='{0}' \ and cid_type='{3}' \ and device_id regexp '[{1}]$' \ and device_type{2}".format(self.ndays,self.uid_type,\ self.platform.replace(' ',''),self.cid_type) uid_count = con_sql(sql)[0][0] return uid_count def get_uid_clk_times(self): sql = "select count(device_id) from data_feed_click \ where stat_date='{0}' \ and cid_type='{3}' \ and device_id regexp '[{1}]$' \ and device_type{2}".format(self.ndays,self.uid_type,\ self.platform.replace(' ',''),self.cid_type) uid_clk_times = con_sql(sql)[0][0] return uid_clk_times def get_uid_imp_times(self): sql = "select count(device_id) from data_feed_exposure \ where stat_date='{0}' \ and cid_type='{4}' \ and device_id regexp '[{1}]$' \ and device_type{2} \ and device_id in (select device_id from data_feed_click \ where stat_date='{0}' \ and cid_type='{4}' \ and device_id regexp '[{1}]$' \ and device_type{3})".format(self.ndays,self.uid_type,self.platform,\ self.platform.replace(' ',''),self.cid_type) uid_imp_times = con_sql(sql)[0][0] return uid_imp_times if __name__ == '__main__': # date_list = get_between_day('2018-08-20','2018-08-29') # for my_date in date_list: print("开始获取ffm中的灰度非灰度比例") start = time.time() #1.ffm中的灰度非灰度(ios和安卓一样): # 灰度:_6 # 非灰度:_8 result1 = [] #1.1获取ios和android平台的数据 platforms = ['ios','android'] grays = ['6','8'] for platform in platforms: for gray in grays: g_class = GrayStat('diary',gray,platform) uid_count = g_class.get_uid_count() uid_clk_times = g_class.get_uid_clk_times() uid_imp_times = g_class.get_uid_imp_times() uid_clk_rate = round(uid_clk_times/uid_imp_times,4) if uid_imp_times != 0 else 0 result1.append([g_class.ndays,g_class.cid_type,platform,gray,uid_count,\ uid_clk_times,uid_imp_times,uid_clk_rate]) result1.append([g_class.ndays,g_class.cid_type,platform,'all',\ result1[-1][4]+result1[-2][4],\ result1[-1][5]+result1[-2][5],\ result1[-1][6]+result1[-2][6],\ round((result1[-1][5]+result1[-2][5])/(result1[-1][6]+result1[-2][6]),4)]) if (result1[-1][6]+result1[-2][6]) != 0 else 0 #1.2获取所有平台的数据 labels = ['6','8','all'] for i in range(3): result1.append([g_class.ndays,g_class.cid_type,'all',labels[i],\ result1[i][4]+result1[i+3][4],\ result1[i][5]+result1[i+3][5],\ result1[i][6]+result1[i+3][6],\ round((result1[i][5]+result1[i+3][5])/(result1[i][6]+result1[i+3][6]),4)]) if (result1[i][6]+result1[i+3][6]) !=0 else 0 #1.3把一天所有的数据存入文件 output1 = OUTPUT_PATH + "gray_ffm.csv" with open(output1,'a+') as f: for line in result1: line = [str(i) for i in line] str_line = ','.join(line) + '\n' f.write(str_line) #2.实际中的灰度非灰度: # ios: # 灰度:6|8 #非灰度:^68 # android: # 灰度:0|1|2|6|8 #非灰度:^01268 print("开始获取实际中的灰度非灰度比例") result2 = [] #2.1获取ios和android平台的数据 platforms = ['ios','android'] for platform in platforms: #TODO 对于(安卓灰度放到 0 1 2 6 8;iOS灰度保持 6 8)问题,做一个id判断即可 if platform == 'ios': grays = ['6|8','^68'] else: grays = ['0|1|2|6|8','^01268'] for gray in grays: g_class = GrayStat('diary',gray,platform) uid_count = g_class.get_uid_count() uid_clk_times = g_class.get_uid_clk_times() uid_imp_times = g_class.get_uid_imp_times() uid_clk_rate = round(uid_clk_times/uid_imp_times,4) if uid_imp_times != 0 else 0 result2.append([g_class.ndays,g_class.cid_type,platform,gray,uid_count,\ uid_clk_times,uid_imp_times,uid_clk_rate]) result2.append([g_class.ndays,g_class.cid_type,platform,'all',\ result2[-1][4]+result2[-2][4],\ result2[-1][5]+result2[-2][5],\ result2[-1][6]+result2[-2][6],\ round((result2[-1][5]+result2[-2][5])/(result2[-1][6]+result2[-2][6]),4)]) if (result2[-1][6]+result2[-2][6]) != 0 else 0 #2.2获取所有平台的数据 labels = ['gray','not gray','all'] for i in range(3): result2.append([g_class.ndays,g_class.cid_type,'all',labels[i],\ result2[i][4]+result2[i+3][4],\ result2[i][5]+result2[i+3][5],\ result2[i][6]+result2[i+3][6],\ round((result2[i][5]+result2[i+3][5])/(result2[i][6]+result2[i+3][6]),4)]) if (result2[i][6]+result2[i+3][6]) !=0 else 0 #2.3把一天所有的数据写入文件 output2 = OUTPUT_PATH + "gray_all.csv" with open(output2,'a+') as f: for line in result2: line = [str(i) for i in line] str_line = ','.join(line) + '\n' f.write(str_line) end = time.time() print("程序执行时间:{}s".format(end-start))