getTop100Answer.py 7.59 KB
# -*- coding: UTF-8 -*-
import pymysql
import datetime

def con_sql(sql):
	# 从数据库的表里获取数据
    db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    db.close()
    return result

def tuple2dict(tuple_result):
	#把sql结果从tuple格式转换成dict格式
	dict_result = {}
	for i in range(len(tuple_result)):
		dict_result[tuple_result[i][0]] = tuple_result[i][1]
	return dict_result

def result2file(result_lst,fpath):
	with open(fpath,'w') as f:
		tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
		f.write("Top 100 Answer\n")
		f.write("=================================================================\n")
		f.write(tplt.format("平台","answer_id","点击数","曝光数","点击率","answer链接"))
		for i in result_lst:
			for j in i:
				f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
			f.write("=================================================================\n")
			if i != result_lst[-1]:
				f.write(tplt.format("平台","answer_id","点击数","曝光数","点击率","answer链接"))
		f.write("\n\n")



#1 获取昨天所有平台的top100answer
#1.1 获取昨天所有平台的top100点击数的answer
def get_all_answer_count_by_click():
	sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='answer' group by cid order by count(cid) desc"
	all_answer_count_by_click = con_sql(sql)
	all_answer_count_by_click = tuple2dict(all_answer_count_by_click)
	return all_answer_count_by_click

#1.2 获取昨天所有平台的top100曝光数的answer
def get_all_answer_count_by_imp():
	sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='answer' group by cid order by count(cid) desc"
	all_answer_count_by_imp = con_sql(sql)
	all_answer_count_by_imp = tuple2dict(all_answer_count_by_imp)
	return all_answer_count_by_imp
#1.3 获取昨天所有平台的top100点击率的answer
def get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count_by_imp):
	all_top100_answer_rate_by_ctr = []
	for i in all_answer_count_by_click:
		if i in all_answer_count_by_imp.keys() and all_answer_count_by_click[i]>2:
			url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
			all_top100_answer_rate_by_ctr.append(("所有",i,all_answer_count_by_click[i],all_answer_count_by_imp[i], round(all_answer_count_by_click[i]/all_answer_count_by_imp[i],4),url))
	all_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
	return all_top100_answer_rate_by_ctr[:100] if len(all_top100_answer_rate_by_ctr) > 100 else all_top100_answer_rate_by_ctr


#2 获取昨天ios平台的top100answer
#2.1 获取昨天ios平台的top100点击数的answer
def get_ios_answer_count_by_click():
	sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore' and cid_type='answer' group by cid order by count(cid) desc"
	ios_answer_count_by_click = con_sql(sql)
	ios_answer_count_by_click = tuple2dict(ios_answer_count_by_click)
	return ios_answer_count_by_click
#2.2 获取昨天ios平台的top100曝光数的answer
def get_ios_answer_count_by_imp():
	sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='App Store' and cid_type='answer' group by cid order by count(cid) desc"
	ios_answer_count_by_imp = con_sql(sql)
	ios_answer_count_by_imp = tuple2dict(ios_answer_count_by_imp)
	return ios_answer_count_by_imp
#2.3 获取昨天ios平台的top100点击率的answer
def get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count_by_imp):
	ios_top100_answer_rate_by_ctr = []
	for i in ios_answer_count_by_click:
		if i in ios_answer_count_by_imp.keys() and ios_answer_count_by_click[i]>2:
			url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
			ios_top100_answer_rate_by_ctr.append(("苹果",i,ios_answer_count_by_click[i],ios_answer_count_by_imp[i], round(ios_answer_count_by_click[i]/ios_answer_count_by_imp[i],4),url))
	ios_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
	return ios_top100_answer_rate_by_ctr[:100] if len(ios_top100_answer_rate_by_ctr) > 100 else ios_top100_answer_rate_by_ctr


#3 获取昨天安卓平台的top100answer
#3.1 获取昨天安卓平台的top100点击数的answer
def get_android_answer_rate_by_click():
    sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore' and cid_type='answer' group by cid order by count(cid) desc"
    android_answer_count_by_click = con_sql(sql)
    android_answer_count_by_click = tuple2dict(android_answer_count_by_click)
    return android_answer_count_by_click
#3.2 获取昨天安卓平台的top100曝光数的answer
def get_android_answer_rate_by_imp():
    sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='App Store' and cid_type='answer' group by cid order by count(cid) desc"
    android_answer_count_by_imp = con_sql(sql)
    android_answer_count_by_imp = tuple2dict(android_answer_count_by_imp)
    return android_answer_count_by_imp
#3.3 获取昨天安卓平台的top100点击率的answer
def get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_answer_count_by_imp):
    android_top100_answer_rate_by_ctr = []
    for i in android_answer_count_by_click:
        if i in android_answer_count_by_imp.keys() and android_answer_count_by_click[i]>2:
            url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
            android_top100_answer_rate_by_ctr.append(("安卓",i,android_answer_count_by_click[i],android_answer_count_by_imp[i],round(android_answer_count_by_click[i]/android_answer_count_by_imp[i],4),url))
    android_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
    return android_top100_answer_rate_by_ctr[:100] if len(android_top100_answer_rate_by_ctr) > 100 else android_top100_answer_rate_by_ctr

if __name__ == "__main__":
	print("开始获取top100点击率的answer...")
	all_answer_count_by_click = get_all_answer_count_by_click()
	all_answer_count_by_imp = get_all_answer_count_by_imp()
	all_top100_answer_rate_by_ctr = get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count_by_imp)
	print("4.1已获得所有平台的top100点击率的answer")

	ios_answer_count_by_click = get_ios_answer_count_by_click()
	ios_answer_count_by_imp = get_ios_answer_count_by_imp()
	ios_top100_answer_rate_by_ctr = get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count_by_imp)
	print("4.2已获得ios平台的top100点击率的answer")

	android_answer_count_by_click = get_android_answer_rate_by_click()
	android_answer_count_by_imp = get_android_answer_rate_by_imp()
	android_top100_answer_rate_by_ctr = get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_answer_count_by_imp)
	print("4.3已获得安卓平台的top100点击率的answer")

	result_lst = [all_top100_answer_rate_by_ctr,ios_top100_answer_rate_by_ctr,android_top100_answer_rate_by_ctr]
	today = datetime.date.today()
	yesterday = today - datetime.timedelta(days=1)
	yesterday = yesterday.strftime("%Y%m%d")
	output_path = "/data2/models/eda/recommended_indexs/4top100_ctr_answer_%s.txt" % yesterday
	result2file(result_lst,output_path)
	print("已将top100点击率的answer存入文件")