Commit f0ee3a34 authored by 高雅喆's avatar 高雅喆

add node2vec/stat/

parent f781cf48
# -*- coding: UTF-8 -*-
import time
import pymysql
import datetime
def con_sql(sql):
#从数据库的表里获取数据
"""
:type sql : str
:rtype : tuple
"""
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def get_yesterday_date():
#自动获取昨天的日期,如"2018-08-08"
"""
:rtype : str
"""
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y-%m-%d")
return yesterday
OUTPUT_PATH = "/data2/models/eda/node2vec/"
class GrayStat(object):
def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
"""
self.cid_type = cid_type
self.uid_type = uid_type
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_count = con_sql(sql)[0][0]
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_clk_times = con_sql(sql)[0][0]
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{1}' \
and device_id regexp '[{2}]$' \
and device_id in \
(select device_id \
from nd_device_cid_similarity_matrix_tmp) \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_imp_times = con_sql(sql)[0][0]
return uid_imp_times
class AllStat(object):
def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
"""
self.cid_type = cid_type
self.uid_type = uid_type
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_count = con_sql(sql)[0][0]
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and (cid_type='{1}' or cid_type='diary_video') \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_clk_times = con_sql(sql)[0][0]
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{1}' \
and device_id regexp '[{2}]$' \
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
self.cid_type,self.uid_type)
uid_imp_times = con_sql(sql)[0][0]
return uid_imp_times
def main():
output = OUTPUT_PATH + "ctr.csv"
with open(output,"a+") as f:
print("stat" + " " + get_yesterday_date())
g_class = GrayStat("diary","3|4")
a_class = AllStat("diary","3|4")
line1 = str(g_class.get_uid_count())+","+str(g_class.get_uid_imp_times())+","+str(g_class.get_uid_clk_times())
line2 = str(a_class.get_uid_count())+","+str(a_class.get_uid_imp_times())+","+str(a_class.get_uid_clk_times())
g_ctr = g_class.get_uid_clk_times()/g_class.get_uid_imp_times()
a_ctr = a_class.get_uid_clk_times()/a_class.get_uid_imp_times()
growth_rate = (g_ctr-a_ctr)/a_ctr
coverage_rate = g_class.get_uid_count()/a_class.get_uid_count()
line = get_yesterday_date() + "," + str(round(g_ctr*100,2))+'%' + "," + str(round(a_ctr*100,2))+'%' + "," + \
str(round(growth_rate*100,2))+'%' + "," + line1 + "," + line2 + "," + str(round(coverage_rate*100,2)) + "%" + "\n"
f.write(line)
if __name__ == '__main__':
main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment