# -*- coding: UTF-8 -*- import pymysql import datetime def con_sql(sql): # 从数据库的表里获取数据 db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') cursor = db.cursor() cursor.execute(sql) result = cursor.fetchall() db.close() return result def tuple2dict(tuple_result): #把sql结果从tuple格式转换成dict格式 dict_result = {} for i in range(len(tuple_result)): dict_result[tuple_result[i][0]] = tuple_result[i][1] return dict_result def result2file(result_lst,fpath): with open(fpath,'w') as f: tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n" f.write("2.4 Top 100 Question\n") f.write("=================================================================\n") f.write(tplt.format("平台","question_id","点击数","曝光数","点击率","question链接")) for i in result_lst: for j in i: f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5])) f.write("=================================================================\n") if i != result_lst[-1]: f.write(tplt.format("平台","question_id","点击数","曝光数","点击率","question链接")) f.write("\n\n") #1 获取昨天所有平台的top100question #1.1 获取昨天所有平台的top100点击数的question def get_all_question_count_by_click(): sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='question' group by cid order by count(cid) desc" all_question_count_by_click = con_sql(sql) all_question_count_by_click = tuple2dict(all_question_count_by_click) return all_question_count_by_click #1.2 获取昨天所有平台的top100曝光数的question def get_all_question_count_by_imp(): sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='question' group by cid order by count(cid) desc" all_question_count_by_imp = con_sql(sql) all_question_count_by_imp = tuple2dict(all_question_count_by_imp) return all_question_count_by_imp #1.3 获取昨天所有平台的top100点击率的question def get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question_count_by_imp): all_top100_question_rate_by_ctr = [] if all_question_count_by_imp == {}: for i in all_question_count_by_click: url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' all_top100_question_rate_by_ctr.append(("所有",i,all_question_count_by_click[i],0,0,url)) all_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True) return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr else: for i in all_question_count_by_click: if i in all_question_count_by_imp.keys() and all_question_count_by_click[i]>2: url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' all_top100_question_rate_by_ctr.append(("所有",i,all_question_count_by_click[i],all_question_count_by_imp[i], round(all_question_count_by_click[i]/all_question_count_by_imp[i],4),url)) all_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr #2 获取昨天ios平台的top100question #2.1 获取昨天ios平台的top100点击数的question def get_ios_question_count_by_click(): sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore' and cid_type='question' group by cid order by count(cid) desc" ios_question_count_by_click = con_sql(sql) ios_question_count_by_click = tuple2dict(ios_question_count_by_click) return ios_question_count_by_click #2.2 获取昨天ios平台的top100曝光数的question def get_ios_question_count_by_imp(): sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='App Store' and cid_type='question' group by cid order by count(cid) desc" ios_question_count_by_imp = con_sql(sql) ios_question_count_by_imp = tuple2dict(ios_question_count_by_imp) return ios_question_count_by_imp #2.3 获取昨天ios平台的top100点击率的question def get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question_count_by_imp): ios_top100_question_rate_by_ctr = [] if ios_question_count_by_imp == {}: for i in ios_question_count_by_click: url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' ios_top100_question_rate_by_ctr.append(("苹果",i,ios_question_count_by_click[i],0,0,url)) ios_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True) return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr else: for i in ios_question_count_by_click: if i in ios_question_count_by_imp.keys() and ios_question_count_by_click[i]>2: url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' ios_top100_question_rate_by_ctr.append(("苹果",i,ios_question_count_by_click[i],ios_question_count_by_imp[i], round(ios_question_count_by_click[i]/ios_question_count_by_imp[i],4),url)) ios_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr #3 获取昨天安卓平台的top100question #3.1 获取昨天安卓平台的top100点击数的question def get_android_question_rate_by_click(): sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore' and cid_type='question' group by cid order by count(cid) desc" android_question_count_by_click = con_sql(sql) android_question_count_by_click = tuple2dict(android_question_count_by_click) return android_question_count_by_click #3.2 获取昨天安卓平台的top100曝光数的question def get_android_question_rate_by_imp(): sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='App Store' and cid_type='question' group by cid order by count(cid) desc" android_question_count_by_imp = con_sql(sql) android_question_count_by_imp = tuple2dict(android_question_count_by_imp) return android_question_count_by_imp #3.3 获取昨天安卓平台的top100点击率的question def get_android_top100_question_rate_by_ctr(android_question_count_by_click,android_question_count_by_imp): android_top100_question_rate_by_ctr = [] if android_question_count_by_imp == {}: for i in android_question_count_by_click: url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' android_top100_question_rate_by_ctr.append(("安卓",i,android_question_count_by_click[i],0,0,url)) android_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True) return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr else: for i in android_question_count_by_click: if i in android_question_count_by_imp.keys() and android_question_count_by_click[i]>2: url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' android_top100_question_rate_by_ctr.append(("安卓",i,android_question_count_by_click[i],android_question_count_by_imp[i],round(android_question_count_by_click[i]/android_question_count_by_imp[i],4),url)) android_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr if __name__ == "__main__": all_question_count_by_click = get_all_question_count_by_click() all_question_count_by_imp = get_all_question_count_by_imp() all_top100_question_rate_by_ctr = get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question_count_by_imp) ios_question_count_by_click = get_ios_question_count_by_click() ios_question_count_by_imp = get_ios_question_count_by_imp() ios_top100_question_rate_by_ctr = get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question_count_by_imp) android_question_count_by_click = get_android_question_rate_by_click() android_question_count_by_imp = get_android_question_rate_by_imp() android_top100_question_rate_by_ctr = get_android_top100_question_rate_by_ctr(android_question_count_by_click,android_question_count_by_imp) result_lst = [all_top100_question_rate_by_ctr,ios_top100_question_rate_by_ctr,android_top100_question_rate_by_ctr] today = datetime.date.today() yesterday = today - datetime.timedelta(days=1) yesterday = yesterday.strftime("%Y%m%d") output_path = "/data2/models/eda/recommended_indexs/5top100_ctr_question_%s.txt" % yesterday result2file(result_lst,output_path) print("2.4已将top100点击率的question存入文件") print("已完成所有Top特征提取")