node2vec_ctr.py 5.27 KB
Newer Older
高雅喆's avatar
高雅喆 committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
# -*- coding: UTF-8 -*-
import time
import pymysql
import datetime

def con_sql(sql):
	#从数据库的表里获取数据
	"""
	:type sql : str
	:rtype : tuple
	"""
	db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
	cursor = db.cursor()
	cursor.execute(sql)
	result = cursor.fetchall()
	db.close()
	return result

def get_yesterday_date():
	#自动获取昨天的日期,如"2018-08-08"
	"""
	:rtype : str
	"""
	today = datetime.date.today()
	yesterday = today - datetime.timedelta(days=1)
	yesterday = yesterday.strftime("%Y-%m-%d")
	return yesterday




OUTPUT_PATH = "/data2/models/eda/node2vec/"


class GrayStat(object):
	def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
		"""
		cid_type : diary,answer,question
		uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
		ndays : '2018-08-30'....
		"""
		self.cid_type = cid_type
		self.uid_type = uid_type
		self.ndays = ndays

	def get_uid_count(self):
		sql = "select count(distinct(device_id)) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
			and device_id regexp '[{2}]$' \
			and device_id in \
				(select device_id \
				from nd_device_cid_similarity_matrix_tmp) \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_count = con_sql(sql)[0][0]
		return uid_count

	def get_uid_clk_times(self):
		sql = "select count(device_id) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
			and device_id regexp '[{2}]$' \
			and device_id in \
				(select device_id \
				from nd_device_cid_similarity_matrix_tmp) \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_clk_times = con_sql(sql)[0][0]
		return uid_clk_times

	def get_uid_imp_times(self):
		sql = "select count(device_id) from data_feed_exposure \
			where stat_date='{0}' \
			and cid_type='{1}' \
			and device_id regexp '[{2}]$' \
			and device_id in \
				(select device_id \
				from nd_device_cid_similarity_matrix_tmp) \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_imp_times = con_sql(sql)[0][0]
		return uid_imp_times



class AllStat(object):
	def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
		"""
		cid_type : diary,answer,question
		uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
		ndays : '2018-08-30'....
		"""
		self.cid_type = cid_type
		self.uid_type = uid_type
		self.ndays = ndays

	def get_uid_count(self):
		sql = "select count(distinct(device_id)) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_count = con_sql(sql)[0][0]
		return uid_count

	def get_uid_clk_times(self):
		sql = "select count(device_id) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_clk_times = con_sql(sql)[0][0]
		return uid_clk_times

	def get_uid_imp_times(self):
		sql = "select count(device_id) from data_feed_exposure \
			where stat_date='{0}' \
			and cid_type='{1}' \
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_imp_times = con_sql(sql)[0][0]
		return uid_imp_times


def main():
136
	output = OUTPUT_PATH + "nd_ctr.csv"
高雅喆's avatar
高雅喆 committed
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
	with open(output,"a+") as f:
		print("stat" + "  " + get_yesterday_date())
		g_class = GrayStat("diary","3|4")
		a_class = AllStat("diary","3|4")
		line1 = str(g_class.get_uid_count())+","+str(g_class.get_uid_imp_times())+","+str(g_class.get_uid_clk_times())
		line2 = str(a_class.get_uid_count())+","+str(a_class.get_uid_imp_times())+","+str(a_class.get_uid_clk_times())
		g_ctr = g_class.get_uid_clk_times()/g_class.get_uid_imp_times()
		a_ctr = a_class.get_uid_clk_times()/a_class.get_uid_imp_times()
		growth_rate = (g_ctr-a_ctr)/a_ctr
		coverage_rate = g_class.get_uid_count()/a_class.get_uid_count()
		line = get_yesterday_date() + "," + str(round(g_ctr*100,2))+'%' + "," + str(round(a_ctr*100,2))+'%' + "," + \
		str(round(growth_rate*100,2))+'%' + "," + line1 + "," + line2 + "," + str(round(coverage_rate*100,2)) + "%" + "\n"
		f.write(line)


if __name__ == '__main__':
	main()