# -*- coding: UTF-8 -*- import pymysql import datetime import pandas as pd DIRECTORY_PATH="/data2/ffm/" def get_yesterday_date(): today = datetime.date.today() yesterday = today - datetime.timedelta(days=1) yesterday = yesterday.strftime("%Y-%m-%d") print(yesterday) return yesterday def get_data(): conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle') cursor = conn2db.cursor() sql = "select device_id from search_queue where device_id regexp '[7|8]$'" cursor.execute(sql) result = cursor.fetchall() device_id = tuple(pd.DataFrame(list(result))[0].values.tolist()) cursor.close() print(device_id[0:2]) return device_id def ctr_all(): db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') cursor = db.cursor() sql_active = "select distinct device_id from data_feed_exposure where cid_type = 'diary'" \ "and device_id regexp'[7|8]$' and stat_date = '{}';".format(date) cursor.execute(sql_active) result = cursor.fetchall() tail56 = tuple(pd.DataFrame(list(result))[0].values.tolist()) print("当天尾号7或8活跃用户数:") print(len(tail56)) cover = len(set(device_id) & set(tail56)) print("当天尾号7或8活跃用户覆盖数:") print(cover) cover_percent = cover / len(set(device_id)) print("当天尾号7或8活跃用户覆盖率:") print(cover_percent) sql_click = "select count(cid) from data_feed_click " \ "where cid_type = 'diary'" \ "and stat_date = '{}' and device_id regexp '[7|8]$';".format(date) cursor.execute(sql_click) click = cursor.fetchone()[0] print("点击数:" + str(click)) sql_exp = "select count(cid) from data_feed_exposure " \ "where cid_type = 'diary' and stat_date = '{}' and " \ "device_id regexp '[7|8]$'".format(date) cursor.execute(sql_exp) exp = cursor.fetchone()[0] print("曝光数:" + str(exp)) print("点击率:" + str(click / exp)) return len(tail56),cover,cover_percent,click,exp,click / exp def ctr(): sql_click = "select count(cid) from data_feed_click " \ "where cid_type = 'diary'" \ "and stat_date = '{}' and device_id in {};".format(date,device_id) db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') cursor = db.cursor() cursor.execute(sql_click) click = cursor.fetchone()[0] print("实验用户点击数:"+str(click)) sql_exp = "select count(cid) from data_feed_exposure " \ "where cid_type = 'diary' and stat_date = '{}' and " \ "device_id in {}".format(date,device_id) cursor.execute(sql_exp) exp = cursor.fetchone()[0] print("实验用户曝光数:"+str(exp)) print("实验用户点击率:"+str(click/exp)) return click,exp,click/exp def rate2file(): output_path = DIRECTORY_PATH + "search_ctr.csv" with open(output_path,'a+') as f: line = date.replace('-', '')+','+str(temp_data[0])+','+str(temp_data[1])+','+str(temp_data[2])+\ ","+str(data[0])+","+str(data[1])+","+str(data[2])+","+str(data[3])+","+str(data[4])\ +","+str(data[5])+'\n' f.write(line) if __name__ == "__main__": date = get_yesterday_date() device_id = get_data() temp_data = ctr() data = ctr_all() rate2file()