from utils import con_sql import datetime import time import pymysql def fetch_data(start_date, end_date): # 获取点击表里的device_id db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') sql = "select distinct device_id from data_feed_click" click_device_id = con_sql(db,sql)[0].values.tolist() print("成功获取点击表里的device_id") # 获取点击表里的数据 sql = "select cid,device_id,time,stat_date from data_feed_click " \ "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date) # 因为上面的db已经关了,需要再写一遍 db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') click = con_sql(db,sql) click = click.rename(columns={0: "cid", 1: "device_id", 2: "time_date", 3: "stat_date"}) print("成功获取点击表里的数据") # 从time特征中抽取hour click["hour"] = click["time_date"].apply(lambda x: datetime.datetime.fromtimestamp(x).hour) click["minute"] = click["time_date"].apply(lambda x: datetime.datetime.fromtimestamp(x).minute) click = click.drop("time_date", axis=1) # 获取曝光表里的数据 sql = "select cid,device_id,time,stat_date from data_feed_exposure " \ "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date) start = time.time() # 因为上面的db已经关了,需要再写一遍 db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') exposure = con_sql(db,sql) end = time.time() print("获取曝光表耗时{}分".format((end-start)/60)) exposure = exposure.rename(columns={0: "cid", 1: "device_id", 2: "time_date", 3: "stat_date"}) print("成功获取曝光表里的数据") # 从time特征中抽取hour exposure["hour"] = exposure["time_date"].apply(lambda x: datetime.datetime.fromtimestamp(x).hour) exposure["minute"] = exposure["time_date"].apply(lambda x: datetime.datetime.fromtimestamp(x).minute) exposure = exposure.drop("time_date", axis=1) return exposure, click, click_device_id