all_ctr.py 2.81 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
# -*- coding: UTF-8 -*-
import time
import pymysql
import datetime

def con_sql(sql):
	#从数据库的表里获取数据
	"""
	:type sql : str
	:rtype : tuple
	"""
	db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
	cursor = db.cursor()
	cursor.execute(sql)
	result = cursor.fetchall()
	db.close()
	return result

def get_yesterday_date():
	#自动获取昨天的日期,如"2018-08-08"
	"""
	:rtype : str
	"""
	today = datetime.date.today()
	yesterday = today - datetime.timedelta(days=1)
	yesterday = yesterday.strftime("%Y-%m-%d")
	return yesterday




OUTPUT_PATH = "/data2/models/eda/node2vec/"


class AllStat(object):
	def __init__(self, cid_type, uid_type, ndays=get_yesterday_date()):
		"""
		cid_type : diary,answer,question
		uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
		ndays : '2018-08-30'....
		"""
		self.cid_type = cid_type
		self.uid_type = uid_type
		self.ndays = ndays

	def get_uid_count(self):
		sql = "select count(distinct(device_id)) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_count = con_sql(sql)[0][0]
		return uid_count

	def get_uid_clk_times(self):
		sql = "select count(device_id) from data_feed_click \
			where stat_date='{0}' \
			and (cid_type='{1}' or cid_type='diary_video') \
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_clk_times = con_sql(sql)[0][0]
		return uid_clk_times

	def get_uid_imp_times(self):
		sql = "select count(device_id) from data_feed_exposure \
			where stat_date='{0}' \
			and cid_type='{1}' \
			and device_id regexp '[{2}]$' \
			and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) \
			and device_id not in (select distinct(device_id) from jerry_prod.blacklist)".format(self.ndays,\
				self.cid_type,self.uid_type)
		uid_imp_times = con_sql(sql)[0][0]
		return uid_imp_times




def main():
	output = OUTPUT_PATH + "all_ctr.csv"
	with open(output,"a+") as f:
		tail_nu = ["1|2","3|4","5|6","7|8","0-9"]
		resulst_ctr = []
		for i in tail_nu:
			class_tail = AllStat("diary",i)
			ctr_tail = round(class_tail.get_uid_clk_times()/class_tail.get_uid_imp_times(),4)
			resulst_ctr.append(str(ctr_tail))
		line = get_yesterday_date()+','+','.join([i for i in resulst_ctr])+'\n'
		f.write(line)
高雅喆's avatar
高雅喆 committed
93 94 95

if __name__ == '__main__':
	main()
96 97 98 99 100 101