add a test dir in eda

652bfcb4 · 高雅喆 · 634de7a7 · 652bfcb4 · 652bfcb4 · 652bfcb4
Commit 652bfcb4 authored Aug 09, 2018 by 高雅喆
Showing with 177 additions and 1 deletion

getTop100Diary.py eda/recommended_indexs/code/getTop100Diary.py +0 -1

config.py eda/test/config.py +2 -0

getTopFeatures.py eda/test/getTopFeatures.py +138 -0

utils.py eda/test/utils.py +37 -0

No files found.
--- a/eda/recommended_indexs/code/getTop100Diary.py
+++ b/eda/recommended_indexs/code/getTop100Diary.py
@@ -21,7 +21,6 @@ def tuple2dict(tuple_result):
 def result2file(result_lst,fpath):
 	with open(fpath,'w') as f:
 		tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
-		tplt = "{0:<6}\t{1:<10}\t{2:^10}\t{3:^10}\t{4:^10}\t{5:<10}\n"
 		f.write("Top 100 diary\n")
 		f.write("=================================================================\n")
 		f.write(tplt.format("平台","diary_id","点击数","曝光数","点击率","diary链接"))

--- a/eda/test/config.py
+++ b/eda/test/config.py
+DIRECTORY_PATH="/data2/models/eda/recommended_indexs/"
\ No newline at end of file
--- a/eda/test/getTopFeatures.py
+++ b/eda/test/getTopFeatures.py
+# -*- coding: UTF-8 -*-
+from utils import con_sql,tuple2dict,get_yesterday_date
+from config import DIRECTORY_PATH
+
+
+class TopFeatures(object):
+	def __init__(self, ndays, platform, cid_type, top_n=-1):
+		"""
+		ndays : 1;2;3;4..
+		platform : 'all';'ios';'android'
+		cid_type : 'diary';'answer';'question'...
+		top_n : the rows of the result
+		"""
+		self.ndays = ndays
+		if platform == "ios":
+			self.platform = "='AppStore'"
+		elif platform == "android":
+			self.platform = "!='AppStore'"
+		else:
+			self.platform = " is not null"
+		self.cid_type = cid_type
+		self.top_n = top_n
+
+
+	def get_click_times(self):
+		# rtype : dict
+		if self.cid_type[-2] == 'e':
+			self.cid_type = self.cid_type.replace(' ','')
+		sql = "select cid,count(cid) from data_feed_click \
+		where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
+		and device_type{1}} and cid_type='{2}' \
+		group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type)
+		clk_times = tuple2dict(con_sql(sql))
+		return clk_times
+
+
+	def get_impression_times(self):
+		# rtype : dict
+		if self.cid_type[-2] == 'e'::
+			self.cid_type = self.cid_type[:-6] + ' ' + self.cid_type[:-6:]
+		sql = "select cid,count(cid) from data_feed_exposure \
+		where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
+		and device_type{1}} and cid_type='{2}' \
+		group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type)
+		imp_times = tuple2dict(con_sql(sql))
+		return imp_times
+
+	def get_result(self, result_types="ctr", clk, imp, clk_n=2):
+		"""
+		result_types : "clk";"imp";"ctr"
+		clk : dict
+		imp : dict
+		clk_n : 获取topN点击率时，过滤的点击数
+		rtype : list
+		"""
+		topn = []
+		#获取topN的点击
+		if imp == {} or result_types == "clk":
+			for i in clk:
+				if self.cid_type == "diary":
+					url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
+				else:
+					url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
+				topn.append((self.cid_type.strip(),i,clk[i],0,0,url))
+			topn.sort(key=lambda x:x[2],reverse=True)
+			return topn[:int(self.top_n)]
+		#获取topN的曝光
+		elif clk == {} or result_types == "imp":
+			for i in imp:
+				if self.cid_type == "diary":
+					url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
+				else:
+					url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
+				topn.append((self.cid_type.strip(),i,0,imp[i],0,url))
+			topn.sort(key=lambda x:x[3],reverse=True)
+			return topn[:int(self.top_n)]
+		#获取topN的ctr
+		else:
+			for i in clk:
+				if i in imp.keys() and clk[i] > clk_n:
+					if self.cid_type == "diary":
+						url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
+					else:
+						url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
+					topn.append((self.cid_type.strip(),i,clk[i],imp[i],round(clk[i]/imp[i],4),url))
+			topn.sort(key=lambda x:x[4],reverse=True)
+			return topn[:int(self.top_n)]
+
+	def result2file(self, result_lst, fpath):
+		"""
+		result_lst : [all,ios,android]
+		fpath : output filename
+		rtype : none
+		"""
+		with open(fpath, 'w') as f:
+			tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
+			f.write("Top {0} {1}\n".format(self.top_n,self.cid_type))
+			sep = "=================================================================\n"
+			header = tplt.format("平台","{}_id".format(self.cid_type),"点击数","曝光数","点击率","{}链接".format(self.cid_type))
+			f.write(sep)
+			f.write(header)
+			for i in result_lst:
+				for j in i:
+					f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
+				f.write(sep)
+				if i != result_lst[-1]:
+					f.write(header)
+			f.write("\n\n")
+
+
+
+
+def main():
+	top_diary = TopFeatures(1, "all", "diary")
+	clk_diary_times_all = get_click_times()
+	imp_diary_times_all = get_impression_times()
+	clk_diary_ctr_all = get_result("ctr", clk_diary_times_all, imp_diary_times_all, top_n=100)
+
+	top_diary = TopFeatures(1, "ios", "diary")
+	clk_diary_times_ios = get_click_times()
+	imp_diary_times_ios = get_impression_times()
+	clk_diary_ctr_ios = get_result("ctr", clk_diary_times_ios, imp_diary_times_ios, top_n=100)
+
+	top_diary = TopFeatures(1, "android", "diary")
+	clk_diary_times_android = get_click_times()
+	imp_diary_times_android = get_impression_times()
+	clk_diary_ctr_android = get_result("ctr", clk_diary_times_android, imp_diary_times_android, top_n=100)
+
+	result_lst = [clk_diary_ctr_all, clk_diary_ctr_ios, clk_diary_ctr_android]
+	output_path = DIRECTORY_PATH + "5top100_ctr_diary_%s.txt"
+	result2file(result_lst, output_path)
+
+
+
+if __name__ == '__main__':
+	main()
+		
\ No newline at end of file
--- a/eda/test/utils.py
+++ b/eda/test/utils.py
+# -*- coding: UTF-8 -*-
+import pymysql
+import datetime
+
+def con_sql(sql):
+	#从数据库的表里获取数据
+	"""
+	:type sql : str
+	:rtype : tuple
+	"""
+	db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
+	cursor = db.cursor()
+	cursor.execute(sql)
+	result = cursor.fetchall()
+	db.close()
+	return result
+
+def tuple2dict(tuple_result):
+	#把sql结果从tuple(tuple,tuple)格式转换成dict格式
+	"""
+	:type tuple_result : tuple
+	:rtype : dict
+	"""
+	dict_result = {}
+	for i in range(len(tuple_result)):
+		dict_result[tuple_result[i][0]] = tuple_result[i][1]
+	return dict_result
+
+def get_yesterday_date():
+	#自动获取昨天的日期,如"20180808"
+	"""
+	:rtype : str
+	"""
+	today = datetime.date.today()
+	yesterday = today - datetime.timedelta(days=1)
+	yesterday = yesterday.strftime("%Y%m%d")
+	return yesterday