prepareData.py 1.73 KB
from utils import con_sql
import datetime


def fetch_data(start_date, end_date):
    # 获取点击表里的device_id
    sql = "select distinct device_id from data_feed_click"
    click_device_id = con_sql(sql)[0].values.tolist()
    print("成功获取点击表里的device_id")

    # 获取点击表里的数据
    sql = "select cid,device_id,time,stat_date from data_feed_click " \
          "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date)
    click = con_sql(sql)
    click = click.rename(columns={0: "cid", 1: "device_id", 2: "time_date", 3: "stat_date"})
    print(click.head(5))
    print("成功获取点击表里的数据")
    # 从time特征中抽取hour
    click["hour"] = click["time_date"].apply(lambda x: datetime.datetime.fromtimestamp(x).hour)
    click["minute"] = click["time_date"].apply(lambda x: datetime.datetime.fromtimestamp(x).minute)
    click = click.drop("time_date", axis=1)
    print("点击表数据预览")
    print(click.head(2))

    # 获取曝光表里的数据
    sql = "select cid,device_id,time,stat_date from data_feed_exposure " \
          "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date)
    exposure = con_sql(sql)
    exposure = exposure.rename(columns={0: "cid", 1: "device_id", 2: "time_date", 3: "stat_date"})
    print("成功获取曝光表里的数据")
    # 从time特征中抽取hour
    exposure["hour"] = exposure["time_date"].apply(lambda x: datetime.datetime.fromtimestamp(x).hour)
    exposure["minute"] = exposure["time_date"].apply(lambda x: datetime.datetime.fromtimestamp(x).minute)
    exposure = exposure.drop("time_date", axis=1)
    print("曝光表数据预览")
    print(exposure.head(2))

    return exposure, click, click_device_id