# -*- coding: UTF-8 -*-
from utils import con_sql,tuple2dict,get_yesterday_date
from config import DIRECTORY_PATH


class TopFeatures(object):
	def __init__(self, ndays, platform, cid_type, top_n=-1):
		"""
		ndays : 1;2;3;4..
		platform : 'all';'ios';'android'
		cid_type : 'diary';'answer';'question'...
		top_n : the rows of the result
		"""
		self.ndays = ndays
		if platform == "ios":
			self.platform = "='AppStore'"
		elif platform == "android":
			self.platform = "!='AppStore'"
		else:
			self.platform = " is not null"
		self.cid_type = cid_type
		self.top_n = top_n


	def get_click_times(self):
		# rtype : dict
		if self.cid_type[-2] == 'e':
			self.cid_type = self.cid_type.replace(' ','')
		sql = "select cid,count(cid) from data_feed_click \
		where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
		and device_type{1} and cid_type='{2}' \
		group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type)
		clk_times = tuple2dict(con_sql(sql))
		return clk_times


	def get_impression_times(self):
		# rtype : dict
		if self.cid_type[-2] == 'e':
			self.cid_type = self.cid_type[:-6] + ' ' + self.cid_type[:-6:]
		sql = "select cid,count(cid) from data_feed_exposure \
		where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
		and device_type{1} and cid_type='{2}' \
		group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type)
		imp_times = tuple2dict(con_sql(sql))
		return imp_times

	def get_result(self, clk, imp, clk_n=2, result_types="ctr"):
		"""
		result_types : "clk";"imp";"ctr"
		clk : dict
		imp : dict
		clk_n : 获取topN点击率时，过滤的点击数
		rtype : list
		"""
		topn = []
		#获取topN的点击
		if imp == {} or result_types == "clk":
			for i in clk:
				if self.cid_type == "diary":
					url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
				else:
					url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
				topn.append((self.cid_type.strip(),i,clk[i],0,0,url))
			topn.sort(key=lambda x:x[2],reverse=True)
			return topn[:int(self.top_n)]
		#获取topN的曝光
		elif clk == {} or result_types == "imp":
			for i in imp:
				if self.cid_type == "diary":
					url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
				else:
					url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
				topn.append((self.cid_type.strip(),i,0,imp[i],0,url))
			topn.sort(key=lambda x:x[3],reverse=True)
			return topn[:int(self.top_n)]
		#获取topN的ctr
		else:
			for i in clk:
				if i in imp.keys() and clk[i] > clk_n:
					if self.cid_type == "diary":
						url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
					else:
						url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
					topn.append((self.cid_type.strip(),i,clk[i],imp[i],round(clk[i]/imp[i],4),url))
			topn.sort(key=lambda x:x[4],reverse=True)
			return topn[:int(self.top_n)]

	def result2file(self, result_lst, fpath):
		"""
		result_lst : [all,ios,android]
		fpath : output filename
		rtype : none
		"""
		with open(fpath, 'w') as f:
			tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
			f.write("Top {0} {1}\n".format(self.top_n,self.cid_type))
			sep = "=================================================================\n"
			header = tplt.format("平台","{}_id".format(self.cid_type),"点击数","曝光数","点击率","{}链接".format(self.cid_type))
			f.write(sep)
			f.write(header)
			for i in result_lst:
				for j in i:
					f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
				f.write(sep)
				if i != result_lst[-1]:
					f.write(header)
			f.write("\n\n")


def main():
	top_diary = TopFeatures(1, "all", "diary")
	clk_diary_times_all = top_diary.get_click_times()
	imp_diary_times_all = top_diary.get_impression_times()
	clk_diary_ctr_all = top_diary.get_result("ctr", clk_diary_times_all, imp_diary_times_all, top_n=100)

	clk_diary_times_ios = top_diary.get_click_times()
	imp_diary_times_ios = top_diary.get_impression_times()
	clk_diary_ctr_ios = top_diary.get_result("ctr", clk_diary_times_ios, imp_diary_times_ios, top_n=100)

	clk_diary_times_android = top_diary.get_click_times()
	imp_diary_times_android = top_diary.get_impression_times()
	clk_diary_ctr_android = top_diary.get_result("ctr", clk_diary_times_android, imp_diary_times_android, top_n=100)

	result_lst = [clk_diary_ctr_all, clk_diary_ctr_ios, clk_diary_ctr_android]
	output_path = DIRECTORY_PATH + "5top100_ctr_diary_%s.txt"
	top_diary.result2file(result_lst, output_path)


if __name__ == '__main__':
	main()