# -*- coding: UTF-8 -*-
import pymysql
import datetime

def con_sql(sql):
	# 从数据库的表里获取数据
    db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    db.close()
    return result

def tuple2dict(tuple_result):
	#把sql结果从tuple格式转换成dict格式
	dict_result = {}
	for i in range(len(tuple_result)):
		dict_result[tuple_result[i][0]] = tuple_result[i][1]
	return dict_result

def result2file(result_lst,fpath):
	with open(fpath,'w') as f:
		tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
		f.write("2.4 Top 100 Question(曝光表里cid类型没有question，因此下面的曝光数和点击率都为0)\n")
		f.write("=================================================================\n")
		f.write(tplt.format("平台","question_id","点击数","曝光数","点击率","question链接"))
		for i in result_lst:
			for j in i:
				f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
			f.write("=================================================================\n")
			if i != result_lst[-1]:
				f.write(tplt.format("平台","question_id","点击数","曝光数","点击率","question链接"))
		f.write("\n\n")



#1 获取昨天所有平台的top100question
#1.1 获取昨天所有平台的top100点击数的question
def get_all_question_count_by_click():
	sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='question' group by cid order by count(cid) desc"
	all_question_count_by_click = con_sql(sql)
	all_question_count_by_click = tuple2dict(all_question_count_by_click)
	return all_question_count_by_click

#1.2 获取昨天所有平台的top100曝光数的question
def get_all_question_count_by_imp():
	sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='question' group by cid order by count(cid) desc"
	all_question_count_by_imp = con_sql(sql)
	all_question_count_by_imp = tuple2dict(all_question_count_by_imp)
	return all_question_count_by_imp
#1.3 获取昨天所有平台的top100点击率的question
def get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question_count_by_imp):
	all_top100_question_rate_by_ctr = []
	if all_question_count_by_imp == {}:
		for i in all_question_count_by_click:
			url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
			all_top100_question_rate_by_ctr.append(("所有",i,all_question_count_by_click[i],0,0,url))
		all_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True)
		return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr
	else:
		for i in all_question_count_by_click:
			if i in all_question_count_by_imp.keys() and all_question_count_by_click[i]>2:
				url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
				all_top100_question_rate_by_ctr.append(("所有",i,all_question_count_by_click[i],all_question_count_by_imp[i], round(all_question_count_by_click[i]/all_question_count_by_imp[i],4),url))
		all_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
		return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr


#2 获取昨天ios平台的top100question
#2.1 获取昨天ios平台的top100点击数的question
def get_ios_question_count_by_click():
	sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore' and cid_type='question' group by cid order by count(cid) desc"
	ios_question_count_by_click = con_sql(sql)
	ios_question_count_by_click = tuple2dict(ios_question_count_by_click)
	return ios_question_count_by_click
#2.2 获取昨天ios平台的top100曝光数的question
def get_ios_question_count_by_imp():
	sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='App Store' and cid_type='question' group by cid order by count(cid) desc"
	ios_question_count_by_imp = con_sql(sql)
	ios_question_count_by_imp = tuple2dict(ios_question_count_by_imp)
	return ios_question_count_by_imp
#2.3 获取昨天ios平台的top100点击率的question
def get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question_count_by_imp):
	ios_top100_question_rate_by_ctr = []
	if ios_question_count_by_imp == {}:
		for i in ios_question_count_by_click:
			url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
			ios_top100_question_rate_by_ctr.append(("苹果",i,ios_question_count_by_click[i],0,0,url))
		ios_top100_question_rate_by_ctr.sort(key=lambda	x:x[2],reverse=True)
		return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr
	else:
		for i in ios_question_count_by_click:
			if i in ios_question_count_by_imp.keys() and ios_question_count_by_click[i]>2:
				url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
				ios_top100_question_rate_by_ctr.append(("苹果",i,ios_question_count_by_click[i],ios_question_count_by_imp[i], round(ios_question_count_by_click[i]/ios_question_count_by_imp[i],4),url))
		ios_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
		return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr


#3 获取昨天安卓平台的top100question
#3.1 获取昨天安卓平台的top100点击数的question
def get_android_question_rate_by_click():
    sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore' and cid_type='question' group by cid order by count(cid) desc"
    android_question_count_by_click = con_sql(sql)
    android_question_count_by_click = tuple2dict(android_question_count_by_click)
    return android_question_count_by_click
#3.2 获取昨天安卓平台的top100曝光数的question
def get_android_question_rate_by_imp():
    sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='App Store' and cid_type='question' group by cid order by count(cid) desc"
    android_question_count_by_imp = con_sql(sql)
    android_question_count_by_imp = tuple2dict(android_question_count_by_imp)
    return android_question_count_by_imp
#3.3 获取昨天安卓平台的top100点击率的question
def get_android_top100_question_rate_by_ctr(android_question_count_by_click,android_question_count_by_imp):
    android_top100_question_rate_by_ctr = []
    if android_question_count_by_imp == {}:
    	for i in android_question_count_by_click:
    		url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
    		android_top100_question_rate_by_ctr.append(("安卓",i,android_question_count_by_click[i],0,0,url))
    	android_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True)
    	return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr
    else:
	    for i in android_question_count_by_click:
	        if i in android_question_count_by_imp.keys() and android_question_count_by_click[i]>2:
	            url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
	            android_top100_question_rate_by_ctr.append(("安卓",i,android_question_count_by_click[i],android_question_count_by_imp[i],round(android_question_count_by_click[i]/android_question_count_by_imp[i],4),url))
	    android_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
	    return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr

if __name__ == "__main__":
	all_question_count_by_click = get_all_question_count_by_click()
	all_question_count_by_imp = get_all_question_count_by_imp()
	all_top100_question_rate_by_ctr = get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question_count_by_imp)

	ios_question_count_by_click = get_ios_question_count_by_click()
	ios_question_count_by_imp = get_ios_question_count_by_imp()
	ios_top100_question_rate_by_ctr = get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question_count_by_imp)

	android_question_count_by_click = get_android_question_rate_by_click()
	android_question_count_by_imp = get_android_question_rate_by_imp()
	android_top100_question_rate_by_ctr = get_android_top100_question_rate_by_ctr(android_question_count_by_click,android_question_count_by_imp)

	result_lst = [all_top100_question_rate_by_ctr,ios_top100_question_rate_by_ctr,android_top100_question_rate_by_ctr]
	today = datetime.date.today()
	yesterday = today - datetime.timedelta(days=1)
	yesterday = yesterday.strftime("%Y%m%d")
	output_path = "/data2/models/eda/recommended_indexs/5top100_ctr_question_%s.txt" % yesterday
	result2file(result_lst,output_path)
	print("2.4已将top100点击率的question存入文件")
	print("已完成所有Top特征提取")
