Commit 2c37208a authored by 赵晨's avatar 赵晨

Merge remote-tracking branch 'origin/master'

parents ed2cb21f 4fd9eed9
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
import pymysql from utils import con_sql,get_yesterday_date,get_between_day
import datetime import time
import pandas as pd
DIRECTORY_PATH="/data2/ffm/" OUTPUT_PATH = "/data2/models/eda/node2vec/"
def get_yesterday_date():
#自动获取昨天的日期,如"2018-08-08" class GrayStat(object):
def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
""" """
:rtype : str self.cid_type = cid_type
self.uid_type = uid_type
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp \
where stat_date='{0}') \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_count = con_sql(sql)[0][0]
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp \
where stat_date='{0}') \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_clk_times = con_sql(sql)[0][0]
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{1}' \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp \
where stat_date='{0}') \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_imp_times = con_sql(sql)[0][0]
return uid_imp_times
class AllStat(object):
def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
""" """
today = datetime.date.today() cid_type : diary,answer,question
yesterday = today - datetime.timedelta(days=1) uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
yesterday = yesterday.strftime("%Y-%m-%d") ndays : '2018-08-30'....
print(yesterday) """
return yesterday self.cid_type = cid_type
#today = datetime.date.today().strftime("%Y%m%d") self.uid_type = uid_type
#return today self.ndays = ndays
def get_uid_count(self):
def get_data(): sql = "select count(distinct(device_id)) from data_feed_click \
conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') where stat_date='{0}' \
cursor = conn2db.cursor() and (cid_type='{1}' or cid_type='diary_video') \
sql = "select device_id from nd_device_cid_similarity_matrix where device_id regexp '[3|4]$'" and device_id regexp '[{2}]$' \
cursor.execute(sql) and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
result = cursor.fetchall() and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
device_id = tuple(pd.DataFrame(list(result))[0].values.tolist()) self.cid_type,self.uid_type)
cursor.close() uid_count = con_sql(sql)[0][0]
return device_id return uid_count
def get_uid_clk_times(self):
def ctr(date): sql = "select count(device_id) from data_feed_click \
device_id = get_data() where stat_date='{0}' \
sql_click = "select count(cid) from data_feed_click " \ and (cid_type='{1}' or cid_type='diary_video') \
"where cid_type = 'diary' " \ and device_id regexp '[{2}]$' \
"and stat_date = '{}' and device_id in {};".format(date,device_id) and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
cursor = db.cursor() self.cid_type,self.uid_type)
cursor.execute(sql_click) uid_clk_times = con_sql(sql)[0][0]
click = cursor.fetchone()[0] return uid_clk_times
print("点击数:"+str(click))
sql_exp = "select count(cid) from data_feed_exposure " \ def get_uid_imp_times(self):
"where cid_type = 'diary' and stat_date = '{}' and " \ sql = "select count(device_id) from data_feed_exposure \
"device_id in {}".format(date,device_id) where stat_date='{0}' \
cursor.execute(sql_exp) and cid_type='{1}' \
exp = cursor.fetchone()[0] and device_id regexp '[{2}]$' \
print("曝光数:"+str(exp)) and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
if exp != 0: and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
print("点击率:"+str(click/exp)) self.cid_type,self.uid_type)
uid_imp_times = con_sql(sql)[0][0]
return click,exp,click/exp return uid_imp_times
def rate2file():
output_path = DIRECTORY_PATH + "node2vec_ctr.csv" def main():
with open(output_path,'a+') as f: date_list = get_between_day('2018-10-11','2018-10-14')
line = get_yesterday_date().replace('-', '')+','+str(temp_data[0])+','+str(temp_data[1])+','+str(temp_data[2])+'\n' output = OUTPUT_PATH + "ctr.csv"
result = []
for my_date in date_list:
print("stat" + " " + my_date)
g_class = GrayStat("diary","3|4",my_date)
a_class = AllStat("diary","3|4",my_date)
line1 = str(g_class.get_uid_count())+"\t"+str(g_class.get_uid_imp_times())+"\t"+str(g_class.get_uid_clk_times())
line2 = str(a_class.get_uid_count())+"\t"+str(a_class.get_uid_imp_times())+"\t"+str(a_class.get_uid_clk_times())
g_ctr = g_class.get_uid_clk_times()/g_class.get_uid_imp_times()
a_ctr = a_class.get_uid_clk_times()/a_class.get_uid_imp_times()
growth_rate = (g_ctr-a_ctr)/a_ctr
line = my_date + "\t" + str(round(g_ctr*100,2))+'%' + "\t" + str(round(a_ctr*100,2))+'%' + "\t" + \
str(round(growth_rate*100,2))+'%' + "\t" + line1 + "\t" + line2 + "\n"
result.append(line)
with open(output,"a+") as f:
for line in result:
f.write(line) f.write(line)
if __name__ == "__main__":
#ctr(date) if __name__ == '__main__':
date = get_yesterday_date() main()
temp_data = ctr(date) \ No newline at end of file
rate2file()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment