ctr_search.py 3.46 KB
# -*- coding: UTF-8 -*-
import pymysql
import datetime
import pandas as pd

DIRECTORY_PATH="/data2/ffm/"

def get_yesterday_date():
    today = datetime.date.today()
    yesterday = today - datetime.timedelta(days=1)
    yesterday = yesterday.strftime("%Y-%m-%d")
    print(yesterday)
    return yesterday


def get_data():
    conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
    cursor = conn2db.cursor()
    sql = "select device_id from search_queue where device_id regexp '[7|8]$'"
    cursor.execute(sql)
    result = cursor.fetchall()
    device_id = tuple(pd.DataFrame(list(result))[0].values.tolist())
    cursor.close()
    print(device_id[0:2])

    return device_id


def ctr_all():
    db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
    cursor = db.cursor()
    sql_active = "select distinct device_id from data_feed_exposure where cid_type = 'diary'" \
                 "and device_id regexp'[7|8]$' and stat_date = '{}';".format(date)
    cursor.execute(sql_active)
    result = cursor.fetchall()
    tail56 = tuple(pd.DataFrame(list(result))[0].values.tolist())
    print("当天尾号7或8活跃用户数:")
    print(len(tail56))
    cover = len(set(device_id) & set(tail56))
    print("当天尾号7或8活跃用户覆盖数:")
    print(cover)
    cover_percent = cover / len(set(device_id))
    print("当天尾号7或8活跃用户覆盖率:")
    print(cover_percent)
    sql_click = "select count(cid) from data_feed_click " \
                "where cid_type = 'diary'" \
                "and stat_date = '{}' and device_id regexp '[7|8]$';".format(date)
    cursor.execute(sql_click)
    click = cursor.fetchone()[0]
    print("点击数:" + str(click))
    sql_exp = "select count(cid) from data_feed_exposure " \
              "where cid_type = 'diary' and stat_date = '{}' and " \
              "device_id regexp '[7|8]$'".format(date)
    cursor.execute(sql_exp)
    exp = cursor.fetchone()[0]
    print("曝光数:" + str(exp))
    print("点击率:" + str(click / exp))
    return len(tail56),cover,cover_percent,click,exp,click / exp


def ctr():
    sql_click = "select count(cid) from data_feed_click " \
              "where cid_type = 'diary'" \
                "and stat_date = '{}' and device_id in {};".format(date,device_id)
    db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
    cursor = db.cursor()
    cursor.execute(sql_click)
    click = cursor.fetchone()[0]
    print("实验用户点击数:"+str(click))
    sql_exp = "select count(cid) from data_feed_exposure " \
              "where cid_type = 'diary' and stat_date = '{}' and " \
              "device_id in {}".format(date,device_id)
    cursor.execute(sql_exp)
    exp = cursor.fetchone()[0]
    print("实验用户曝光数:"+str(exp))
    print("实验用户点击率:"+str(click/exp))

    return click,exp,click/exp


def rate2file():
    output_path = DIRECTORY_PATH + "search_ctr.csv"
    with open(output_path,'a+') as f:
        line = date.replace('-', '')+','+str(temp_data[0])+','+str(temp_data[1])+','+str(temp_data[2])+\
               ","+str(data[0])+","+str(data[1])+","+str(data[2])+","+str(data[3])+","+str(data[4])\
               +","+str(data[5])+'\n'
        f.write(line)

if __name__ == "__main__":
    date = get_yesterday_date()
    device_id = get_data()
    temp_data = ctr()
    data = ctr_all()
    rate2file()