import pandas as pd import pymysql from datetime import datetime from datetime import timedelta def get_tail8(): sql = "select distinct device_id from data_feed_click \ where stat_date='{}' \ and cid_type='{}' \ and device_id regexp '8$';".format(stat_date,cid_type) db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') cursor = db.cursor() print("开始获取") cursor.execute(sql) print("成功获取") result = cursor.fetchall() db.close() user = pd.DataFrame(list(result))[0].values.tolist() user = tuple(user) print("尾号是8的用户个数") print(len(user)) return user def get_ctr(user_tuple): sql = "select count(device_id) from data_feed_click \ where stat_date='{}' \ and cid_type='{}' \ and device_id in {}".format(stat_date, cid_type, user_tuple) db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') cursor = db.cursor() print("开始获取") cursor.execute(sql) click = cursor.fetchall()[0][0] print(click) sql = "select count(device_id) from data_feed_exposure \ where stat_date='{}' \ and cid_type='{}' \ and device_id in {}".format(stat_date, cid_type, user_tuple) cursor = db.cursor() print("开始获取") cursor.execute(sql) exp = cursor.fetchall()[0][0] db.close() print(exp) print(click / exp) def get_tail6(): df = pd.read_csv(path+"{}predictTail6Unique.csv".format(stat_date)) pre_list = tuple(eval(df.loc[0,"list"])) print(len(pre_list)) print(pre_list[:2]) db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') sql = "select distinct device_id from data_feed_click \ where stat_date='{}' \ and cid_type='{}' \ and device_id in {}".format(stat_date,cid_type,pre_list) cursor = db.cursor() print("开始获取") cursor.execute(sql) print("成功获取") result = cursor.fetchall() db.close() print(pd.DataFrame(list(result)).empty) user = pd.DataFrame(list(result))[0].values.tolist() user = tuple(user) print("用户个数") print(len(user)) return user if __name__ == "__main__": path = "/data/models/" cid_type = "diary" now = datetime.now() year = now.year month = now.month day = now.day stat_date = datetime(year, month, day) stat_date = (stat_date - timedelta(days=1)).strftime("%Y-%m-%d") print(stat_date) tail6 = get_tail6() get_ctr(tail6) tail8 = get_tail8() get_ctr(tail8)