# -*- coding: UTF-8 -*- import time import pymysql import datetime def con_sql(sql): #从数据库的表里获取数据 """ :type sql : str :rtype : tuple """ db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') cursor = db.cursor() cursor.execute(sql) result = cursor.fetchall() db.close() return result def get_yesterday_date(): #自动获取昨天的日期,如"2018-08-08" """ :rtype : str """ today = datetime.date.today() yesterday = today - datetime.timedelta(days=1) yesterday = yesterday.strftime("%Y-%m-%d") return yesterday OUTPUT_PATH = "/data2/models/eda/node2vec/" class AllStat(object): def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()): """ cid_type : diary,answer,question uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的 ndays : '2018-08-30'.... """ self.cid_type = cid_type self.uid_type = uid_type self.ndays = ndays def get_uid_count(self): sql = "select count(distinct(device_id)) from data_feed_click \ where stat_date='{0}' \ and (cid_type='{1}' or cid_type='diary_video') \ and device_id regexp '[{2}]$' \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type) uid_count = con_sql(sql)[0][0] return uid_count def get_uid_clk_times(self): sql = "select count(device_id) from data_feed_click \ where stat_date='{0}' \ and (cid_type='{1}' or cid_type='diary_video') \ and device_id regexp '[{2}]$' \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type) uid_clk_times = con_sql(sql)[0][0] return uid_clk_times def get_uid_imp_times(self): sql = "select count(device_id) from data_feed_exposure \ where stat_date='{0}' \ and cid_type='{1}' \ and device_id regexp '[{2}]$' \ and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \ and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\ self.cid_type,self.uid_type) uid_imp_times = con_sql(sql)[0][0] return uid_imp_times def main(): output = OUTPUT_PATH + "all_ctr.csv" with open(output,"a+") as f: tail_nu = ["1|2","3|4","5|6","7|8","0-9"] resulst_ctr = [] for i in tail_nu: class_tail = AllStat("diary",i) ctr_tail = round(class_tail.get_uid_clk_times()/class_tail.get_uid_imp_times(),4) resulst_ctr.append(str(ctr_tail)) line = get_yesterday_date()+','+','.join([i for i in resulst_ctr])+'\n' f.write(line) if __name__ == '__main__': main()