Recommendation_strategy_all.py 4.58 KB
Newer Older
王志伟's avatar
王志伟 committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
#该文件统计ffm\node2vec\搜索推荐三个策略merge后的全量用户的ctr
#device_id尾号为1的用户为对照组
#该表中未能去除每天新用户


# -*- coding: UTF-8 -*-
from utils import con_sql,get_yesterday_date,get_between_day
import time
import sys

OUTPUT_PATH = "/data2/models/eda/gray_stat/"

#获取过滤机构用户和黑用户名单后的曝光量
def get_imp_count_all(stragety,sta_date):
    sql = "select count(cid_id) from jerry_prod.data_feed_exposure where " \
          "cid_type = 'diary' " \
          "and device_id regexp'{}$' " \
          "and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) " \
          "and device_id not in (select device_id from jerry_prod.blacklist) and stat_date='{}'".format(stragety,sta_date)
    imp_count_all = con_sql(sql)[0][0]
    return imp_count_all

def get_clk_count_all(stragety,sta_date):
    sql = "select count(cid_id) from jerry_prod.data_feed_click " \
          "where (cid_type = 'diary' or cid_type = 'diary_video') " \
          "and device_id regexp'{}$'  " \
          "and device_id not in (select device_id from jerry_test.bl_device_list) " \
          "and device_id not in (select device_id from jerry_prod.blacklist) " \
          "and stat_date='{}'".format(stragety,sta_date)
    clk_count_all = con_sql(sql)[0][0]
    return clk_count_all


if __name__ == '__main__':

    yesterday= get_yesterday_date()
    if len(sys.argv) != 2:
        print("usage: python recommendation_strategy_indicator.py date")
    date1 = sys.argv[1]
    date_list = get_between_day(date1, yesterday)
    stragety_list = ['[1|2]', '[3|4]', '[5|6]', '[7|8]']
    stragety_l = ['1', '3', '5', '7']
    start_time = time.time()

    for my_date in date_list:
        result1_imp=[]
        result2_imp=[]
        result1_clk = []
        result2_clk = []

        print ("开始获取{}数据".format(my_date))
        for i in stragety_list:
            result1 = get_clk_count_all(i, my_date)
            result1_clk.append(result1)

            result1_all=get_imp_count_all(i,my_date)
            result1_imp.append(result1_all)

        for j in stragety_l:
            result2 = get_clk_count(j, my_date)
            result2_clk.append(result2)

            result2_all=get_imp_count(j,my_date)
            result2_imp.append(result2_all)
        num_click_2=result1_clk[0]-result2_clk[0]
        num_click_4=result1_clk[1]-result2_clk[1]
        num_click_6 =result1_clk[2]-result2_clk[2]
        num_click_8 =result1_clk[3]-result2_clk[3]

        num_imp_2 = result1_imp[0] - result2_imp[0]
        num_imp_4 = result1_imp[1] - result2_imp[1]
        num_imp_6 = result1_imp[2] - result2_imp[2]
        num_imp_8 = result1_imp[3] - result2_imp[3]
        ctr_12 = round(result1_clk[0]/result1_imp[0],6)
        ctr_34 = round(result1_clk[1]/result1_imp[1],6)
        ctr_56 = round(result1_clk[2]/result1_imp[2],6)
        ctr_78 = round(result1_clk[3]/result1_imp[3],6)
        ctr_1 =round(result2_clk[0]/result2_imp[0],6)
        ctr_2 =round(num_click_2/num_imp_2,6)
        ctr_3 =round(result2_clk[1]/result2_imp[1],6)
        ctr_4 =round(num_click_4/num_imp_4,6)
        ctr_5 =round(result2_clk[2]/result2_imp[2],6)
        ctr_6 =round(num_click_6/num_imp_6,6)
        ctr_7 =round(result2_clk[3]/result2_imp[3],6)
        ctr_8 =round(num_click_8/num_imp_8,6)
        print(result1_clk[0])
        print(result1_imp[0])
        print(ctr_12)
        print(ctr_34)



        output_path = OUTPUT_PATH + "recommendation.csv"
        with open(output_path, 'a+') as f:
王志伟's avatar
王志伟 committed
95
            line = my_date+','+str(result1_clk[0])+','+str(result1_clk[1])+','+str(result1_clk[2])+','+str(result1_clk[3])\
王志伟's avatar
王志伟 committed
96 97 98 99 100 101 102 103 104 105 106
                   +','+str(result1_imp[0])+','+str(result1_imp[1])+','+str(result1_imp[2])+','+str(result1_imp[3])+','\
                   +str(result2_clk[0])+','+str(result2_clk[1])+','+str(result2_clk[2])+','+str(result2_clk[3])\
                   +','+str(result2_imp[0])+','+str(result2_imp[1])+','+str(result2_imp[2])+','+str(result2_imp[3]) \
                   + ',' + str(num_click_2)+','+str(num_click_4)+','+str(num_click_6)+','+str(num_click_8) \
                   + ',' + str(num_imp_2)+','+str(num_imp_4)+','+str(num_imp_6)+','+str(num_imp_8) \
                   + ',' + str(ctr_12)+ ',' + str(ctr_34)+ ',' + str(ctr_56)+ ',' + str(ctr_78) \
                   + ',' + str(ctr_1)+ ',' + str(ctr_2)+ ',' + str(ctr_3)+ ',' + str(ctr_4) \
                   + ',' + str(ctr_5)+ ',' + str(ctr_6)+ ',' + str(ctr_7)+ ',' + str(ctr_8)+'\n'
            f.write(line)
    end_time = time.time()
    print("程序执行时间:{}s".format(end_time - start_time))