# -*- coding: UTF-8 -*- import pymysql import datetime import pandas as pd def get_yesterday_date(): today = datetime.date.today() yesterday = today - datetime.timedelta(days=1) yesterday = yesterday.strftime("%Y-%m-%d") print(yesterday) return yesterday def get_black_user(): conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') cursor = conn2db.cursor() sql = "select distinct device_id from blacklist" cursor.execute(sql) result = cursor.fetchall() black_user = pd.DataFrame(list(result))[0].values.tolist() cursor.close() conn2db.close() return black_user def get_data(): conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle') cursor = conn2db.cursor() sql = "select distinct device_id from ffm_diary_queue_temp where device_id regexp '[5|6]$'" cursor.execute(sql) result = cursor.fetchall() device = pd.DataFrame(list(result))[0].values.tolist() cursor.close() conn2db.close() device = tuple(set(device)-set(black)) return device def ctr_all(): db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') cursor = db.cursor() sql_active = "select distinct device_id from data_feed_exposure " \ "where cid_type = 'diary'" \ "and device_id regexp'[5|6]$' and stat_date = '{}';".format(date) cursor.execute(sql_active) result = cursor.fetchall() tail56 = pd.DataFrame(list(result))[0].values.tolist() tail56 = set(tail56)-set(black) print("当天尾号5或6活跃用户总数:") print(len(tail56)) cover = len(tail56&set(device_id)) print("当天尾号5或6活跃用户覆盖数:") print(cover) cover_percent = format(cover / len(tail56), ".6f") print("当天尾号5或6活跃用户覆盖率:") print(cover_percent) return len(tail56),cover,cover_percent def ctr(): sql_click = "select count(cid) from data_feed_click " \ "where (cid_type = 'diary' or cid_type = 'diary_video') " \ "and stat_date = '{}' and device_id in {};".format(date,device_id) db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') cursor = db.cursor() cursor.execute(sql_click) click = cursor.fetchone()[0] print("实验用户点击数:"+str(click)) sql_exp = "select count(cid) from data_feed_exposure " \ "where cid_type = 'diary'" \ "and stat_date = '{}' and device_id in {};".format(date,device_id) cursor.execute(sql_exp) exp = cursor.fetchone()[0] print("实验用户曝光数:"+str(exp)) print("实验用户点击率:"+str(click/exp)) return click,exp,format(click/exp,".6f") def rate2file(): output_path = DIRECTORY_PATH + "56ctr.csv" with open(output_path,'a+') as f: line = date.replace('-', '')+','+str(temp_data[0])+','+str(temp_data[1])+','+str(temp_data[2])+\ ","+str(data[0])+","+str(data[1])+","+str(data[2])+'\n' f.write(line) if __name__ == "__main__": DIRECTORY_PATH = "/data/ffm/" date = get_yesterday_date() black = get_black_user() device_id = get_data() temp_data = ctr() data = ctr_all() rate2file()