import pandas as pd import pymysql from datetime import datetime from datetime import timedelta def get_city(): sql = "select distinct city_id from data_feed_exposure where stat_date >= '2018-10-01' order by city_id" db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') cursor = db.cursor() print("开始获取") cursor.execute(sql) print("成功获取") result = cursor.fetchall() db.close() user = pd.DataFrame(list(result))[0].values.tolist() print(user) sql = "select distinct name from api_tag where tag_type = 4" db = pymysql.connect(host='rdsfewzdmf0jfjp9un8xj.mysql.rds.aliyuncs.com', port=3306, user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing') cursor = db.cursor() print("开始获取") cursor.execute(sql) print("成功获取") result = cursor.fetchall() db.close() user = pd.DataFrame(list(result))[0].values.tolist() print(user) # def get_tail8(): # sql = "select distinct device_id from data_feed_click \ # where stat_date='{}' \ # and cid_type='{}' \ # and device_id regexp '8$';".format(stat_date,cid_type) # db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') # cursor = db.cursor() # print("开始获取") # cursor.execute(sql) # print("成功获取") # result = cursor.fetchall() # db.close() # user = pd.DataFrame(list(result))[0].values.tolist() # user = tuple(user) # print("尾号是8的用户个数") # print(len(user)) # return user # def get_ctr(user_tuple): # sql = "select count(device_id) from data_feed_click \ # where stat_date='{}' \ # and cid_type='{}' \ # and device_id in {}".format(stat_date, cid_type, user_tuple) # db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') # cursor = db.cursor() # print("开始获取") # cursor.execute(sql) # click = cursor.fetchall()[0][0] # print(click) # # sql = "select count(device_id) from data_feed_exposure \ # where stat_date='{}' \ # and cid_type='{}' \ # and device_id in {}".format(stat_date, cid_type, user_tuple) # cursor = db.cursor() # print("开始获取") # cursor.execute(sql) # exp = cursor.fetchall()[0][0] # db.close() # print(exp) # print(click / exp) # def get_tail6(): # df = pd.read_csv(path+"{}predictTail6Unique.csv".format(stat_date)) # pre_list = tuple(eval(df.loc[0,"list"])) # print(len(pre_list)) # print(pre_list[:2]) # db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') # sql = "select distinct device_id from data_feed_click \ # where stat_date='{}' \ # and cid_type='{}' \ # and device_id in {}".format(stat_date,cid_type,pre_list) # cursor = db.cursor() # print("开始获取") # cursor.execute(sql) # print("成功获取") # result = cursor.fetchall() # db.close() # print(pd.DataFrame(list(result)).empty) # user = pd.DataFrame(list(result))[0].values.tolist() # user = tuple(user) # print("用户个数") # print(len(user)) # return user if __name__ == "__main__": get_city() # path = "/data/models/" # cid_type = "diary" # now = datetime.now() # year = now.year # month = now.month # day = now.day # stat_date = datetime(year, month, day) # stat_date = (stat_date - timedelta(days=1)).strftime("%Y-%m-%d") # print(stat_date) # tail6 = get_tail6() # get_ctr(tail6) # tail8 = get_tail8() # get_ctr(tail8)