# -*- coding: UTF-8 -*- import pymysql import datetime def con_sql(sql): # 从数据库的表里获取数据 db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') cursor = db.cursor() cursor.execute(sql) result = cursor.fetchall() db.close() return result def tuple2dict(tuple_result): #把sql结果从tuple格式转换成dict格式 dict_result = {} for i in range(len(tuple_result)): dict_result[tuple_result[i][0]] = tuple_result[i][1] return dict_result def result2file(result_lst,fpath): with open(fpath,'w') as f: tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n" f.write("2.2 Top 100 diary\n") f.write("=================================================================\n") f.write(tplt.format("平台","diary_id","点击数","曝光数","点击率","diary链接")) for i in result_lst: for j in i: f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5])) f.write("=================================================================\n") if i != result_lst[-1]: f.write(tplt.format("平台","diary_id","点击数","曝光数","点击率","diary链接")) f.write("\n\n") #1 获取昨天所有平台的top100diary((sorted by ctr)) #1.1 获取昨天所有平台的diary的点击数 def get_all_diary_count_by_click(): sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='diary' group by cid order by count(cid) desc" all_diary_count_by_click = con_sql(sql) all_diary_count_by_click = tuple2dict(all_diary_count_by_click) return all_diary_count_by_click #1.2 获取昨天所有平台的diary的曝光数 def get_all_diary_count_by_imp(): sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='diary' group by cid order by count(cid) desc" all_diary_count_by_imp = con_sql(sql) all_diary_count_by_imp = tuple2dict(all_diary_count_by_imp) return all_diary_count_by_imp #1.3 获取昨天所有平台的top100点击率的diary def get_all_top100_diary_rate_by_ctr(all_diary_count_by_click,all_diary_count_by_imp): all_top100_diary_rate_by_ctr = [] for i in all_diary_count_by_click: if i in all_diary_count_by_imp.keys() and all_diary_count_by_click[i] > 4: url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/' all_top100_diary_rate_by_ctr.append(("所有",i,all_diary_count_by_click[i],all_diary_count_by_imp[i], round(all_diary_count_by_click[i]/all_diary_count_by_imp[i],4),url)) all_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) return all_top100_diary_rate_by_ctr[:100] if len(all_top100_diary_rate_by_ctr) > 100 else all_top100_diary_rate_by_ctr #2 获取昨天ios平台的top100diary(sorted by ctr) #2.1 获取昨天ios平台的diary的点击数 def get_ios_diary_count_by_click(): sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore' and cid_type='diary' group by cid order by count(cid) desc" ios_diary_count_by_click = con_sql(sql) ios_diary_count_by_click = tuple2dict(ios_diary_count_by_click) return ios_diary_count_by_click #2.2 获取昨天ios平台的diary的曝光数 def get_ios_diary_count_by_imp(): sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='App Store' and cid_type='diary' group by cid order by count(cid) desc" ios_diary_count_by_imp = con_sql(sql) ios_diary_count_by_imp = tuple2dict(ios_diary_count_by_imp) return ios_diary_count_by_imp #2.3 获取昨天ios平台的top00点击率的diary def get_ios_top100_diary_rate_by_ctr(ios_top100_diary_count_by_click,ios_top100_diary_count_by_imp): ios_top100_diary_rate_by_ctr = [] for i in ios_diary_count_by_click: if i in ios_diary_count_by_imp.keys() and ios_diary_count_by_click[i] > 4: url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/' ios_top100_diary_rate_by_ctr.append(("苹果",i,ios_diary_count_by_click[i],ios_diary_count_by_imp[i], round(ios_diary_count_by_click[i]/ios_diary_count_by_imp[i],4),url)) ios_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) return ios_top100_diary_rate_by_ctr[:100] if len(ios_top100_diary_rate_by_ctr) > 100 else ios_top100_diary_rate_by_ctr #3 获取昨天安卓平台的top100diary(sorted by ctr) #3.1 获取昨天安卓平台的diary的点击数 def get_android_diary_rate_by_click(): sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore' and cid_type='diary' group by cid order by count(cid) desc" android_diary_count_by_click = con_sql(sql) android_diary_count_by_click = tuple2dict(android_diary_count_by_click) return android_diary_count_by_click #3.2 获取昨天安卓平台的diary的曝光数 def get_android_diary_rate_by_imp(): sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='App Store' and cid_type='diary' group by cid order by count(cid) desc" android_diary_count_by_imp = con_sql(sql) android_diary_count_by_imp = tuple2dict(android_diary_count_by_imp) return android_diary_count_by_imp #3.3 获取昨天安卓平台的top100点击率的diary def get_android_top100_diary_rate_by_ctr(android_top100_diary_count_by_click,android_top100_diary_count_by_imp): android_top100_diary_rate_by_ctr = [] for i in android_diary_count_by_click: if i in android_diary_count_by_imp.keys() and android_diary_count_by_click[i] > 4: url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/' android_top100_diary_rate_by_ctr.append(("安卓",i,android_diary_count_by_click[i],android_diary_count_by_imp[i], round(android_diary_count_by_click[i]/android_diary_count_by_imp[i],4),url)) android_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) return android_top100_diary_rate_by_ctr[:100] if len(android_top100_diary_rate_by_ctr) > 100 else android_top100_diary_rate_by_ctr if __name__ == "__main__": all_diary_count_by_click = get_all_diary_count_by_click() all_diary_count_by_imp = get_all_diary_count_by_imp() all_top100_diary_rate_by_ctr = get_all_top100_diary_rate_by_ctr(all_diary_count_by_click,all_diary_count_by_imp) ios_diary_count_by_click = get_ios_diary_count_by_click() ios_diary_count_by_imp = get_ios_diary_count_by_imp() ios_top100_diary_rate_by_ctr = get_ios_top100_diary_rate_by_ctr(ios_diary_count_by_click,ios_diary_count_by_imp) android_diary_count_by_click = get_android_diary_rate_by_click() android_diary_count_by_imp = get_android_diary_rate_by_imp() android_top100_diary_rate_by_ctr = get_android_top100_diary_rate_by_ctr(android_diary_count_by_click,android_diary_count_by_imp) result_lst = [all_top100_diary_rate_by_ctr,ios_top100_diary_rate_by_ctr,android_top100_diary_rate_by_ctr] today = datetime.date.today() yesterday = today - datetime.timedelta(days=1) yesterday = yesterday.strftime("%Y%m%d") output_path = "/data2/models/eda/recommended_indexs/3top100_ctr_diary_%s.txt" % yesterday result2file(result_lst,output_path) print("2.2已将top100点击率的diary存入文件")