Commit 31f72a07 authored by 张彦钊's avatar 张彦钊

新增实时预测

parents 9caf9b4e aa7d043a
...@@ -21,7 +21,7 @@ def get_all_click_one_rate(): ...@@ -21,7 +21,7 @@ def get_all_click_one_rate():
impression_one_count = con_sql(sql) impression_one_count = con_sql(sql)
impression_one_count = impression_one_count[0][0] impression_one_count = impression_one_count[0][0]
all_click_one_rate = click_one_count / impression_one_count all_click_one_rate = click_one_count / impression_one_count
return ["all",click_one_count,impression_one_count,round(all_click_one_rate,4)] return ["所有",click_one_count,impression_one_count,round(all_click_one_rate,4)]
...@@ -35,7 +35,7 @@ def get_ios_click_one_rate(): ...@@ -35,7 +35,7 @@ def get_ios_click_one_rate():
impression_one_count = con_sql(sql) impression_one_count = con_sql(sql)
impression_one_count = impression_one_count[0][0] impression_one_count = impression_one_count[0][0]
ios_click_one_rate = click_one_count / impression_one_count ios_click_one_rate = click_one_count / impression_one_count
return ["ios",click_one_count,impression_one_count,round(ios_click_one_rate,4)] return ["苹果",click_one_count,impression_one_count,round(ios_click_one_rate,4)]
#3 获取安卓平台的有点击用户点击率 #3 获取安卓平台的有点击用户点击率
...@@ -47,7 +47,7 @@ def get_android_click_one_rate(): ...@@ -47,7 +47,7 @@ def get_android_click_one_rate():
impression_one_count = con_sql(sql) impression_one_count = con_sql(sql)
impression_one_count = impression_one_count[0][0] impression_one_count = impression_one_count[0][0]
android_click_one_rate = click_one_count / impression_one_count android_click_one_rate = click_one_count / impression_one_count
return ["android",click_one_count,impression_one_count,round(android_click_one_rate,4)] return ["安卓",click_one_count,impression_one_count,round(android_click_one_rate,4)]
......
...@@ -21,7 +21,7 @@ def get_all_answer_imp_rate(): ...@@ -21,7 +21,7 @@ def get_all_answer_imp_rate():
all_imp_count = con_sql(sql) all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0] all_imp_count = all_imp_count[0][0]
all_answer_imp_rate = answer_imp_count / all_imp_count all_answer_imp_rate = answer_imp_count / all_imp_count
return ["all",answer_imp_count,all_imp_count,round(all_answer_imp_rate,4)] return ["所有",answer_imp_count,all_imp_count,round(all_answer_imp_rate,4)]
#2 获取ios平台的问答曝光占比 #2 获取ios平台的问答曝光占比
...@@ -33,7 +33,7 @@ def get_ios_answer_imp_rate(): ...@@ -33,7 +33,7 @@ def get_ios_answer_imp_rate():
all_imp_count = con_sql(sql) all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0] all_imp_count = all_imp_count[0][0]
ios_answer_imp_rate = answer_imp_count / all_imp_count ios_answer_imp_rate = answer_imp_count / all_imp_count
return ["ios",answer_imp_count,all_imp_count,round(ios_answer_imp_rate,4)] return ["苹果",answer_imp_count,all_imp_count,round(ios_answer_imp_rate,4)]
#3 获取安卓平台的问答曝光占比 #3 获取安卓平台的问答曝光占比
...@@ -45,7 +45,7 @@ def get_android_answer_imp_rate(): ...@@ -45,7 +45,7 @@ def get_android_answer_imp_rate():
all_imp_count = con_sql(sql) all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0] all_imp_count = all_imp_count[0][0]
android_answer_imp_rate = answer_imp_count / all_imp_count android_answer_imp_rate = answer_imp_count / all_imp_count
return ["android",answer_imp_count,all_imp_count,round(android_answer_imp_rate,4)] return ["安卓",answer_imp_count,all_imp_count,round(android_answer_imp_rate,4)]
......
...@@ -21,7 +21,7 @@ def get_all_click_answer_rate(): ...@@ -21,7 +21,7 @@ def get_all_click_answer_rate():
impression_answer_count = con_sql(sql) impression_answer_count = con_sql(sql)
impression_answer_count = impression_answer_count[0][0] impression_answer_count = impression_answer_count[0][0]
all_click_answer_rate = click_answer_count / impression_answer_count all_click_answer_rate = click_answer_count / impression_answer_count
return ["all",click_answer_count,impression_answer_count,round(all_click_answer_rate,4)] return ["所有",click_answer_count,impression_answer_count,round(all_click_answer_rate,4)]
...@@ -34,7 +34,7 @@ def get_ios_click_answer_rate(): ...@@ -34,7 +34,7 @@ def get_ios_click_answer_rate():
impression_answer_count = con_sql(sql) impression_answer_count = con_sql(sql)
impression_answer_count = impression_answer_count[0][0] impression_answer_count = impression_answer_count[0][0]
ios_click_answer_rate = click_answer_count / impression_answer_count ios_click_answer_rate = click_answer_count / impression_answer_count
return ["ios",click_answer_count,impression_answer_count,round(ios_click_answer_rate,4)] return ["苹果",click_answer_count,impression_answer_count,round(ios_click_answer_rate,4)]
...@@ -48,7 +48,7 @@ def get_android_click_answer_rate(): ...@@ -48,7 +48,7 @@ def get_android_click_answer_rate():
impression_answer_count = con_sql(sql) impression_answer_count = con_sql(sql)
impression_answer_count = impression_answer_count[0][0] impression_answer_count = impression_answer_count[0][0]
android_click_answer_rate = click_answer_count / impression_answer_count android_click_answer_rate = click_answer_count / impression_answer_count
return ["android",click_answer_count,impression_answer_count,round(android_click_answer_rate,4)] return ["安卓",click_answer_count,impression_answer_count,round(android_click_answer_rate,4)]
......
...@@ -21,7 +21,7 @@ def get_all_click_diary_rate(): ...@@ -21,7 +21,7 @@ def get_all_click_diary_rate():
impression_diary_count = con_sql(sql) impression_diary_count = con_sql(sql)
impression_diary_count = impression_diary_count[0][0] impression_diary_count = impression_diary_count[0][0]
all_click_diary_rate = click_diary_count / impression_diary_count all_click_diary_rate = click_diary_count / impression_diary_count
return ["all",click_diary_count,impression_diary_count,round(all_click_diary_rate,4)] return ["所有",click_diary_count,impression_diary_count,round(all_click_diary_rate,4)]
...@@ -34,7 +34,7 @@ def get_ios_click_diary_rate(): ...@@ -34,7 +34,7 @@ def get_ios_click_diary_rate():
impression_diary_count = con_sql(sql) impression_diary_count = con_sql(sql)
impression_diary_count = impression_diary_count[0][0] impression_diary_count = impression_diary_count[0][0]
ios_click_diary_rate = click_diary_count / impression_diary_count ios_click_diary_rate = click_diary_count / impression_diary_count
return ["ios",click_diary_count,impression_diary_count,round(ios_click_diary_rate,4)] return ["苹果",click_diary_count,impression_diary_count,round(ios_click_diary_rate,4)]
#3 获取安卓平台的点击问答用户占比 #3 获取安卓平台的点击问答用户占比
...@@ -46,7 +46,7 @@ def get_android_click_diary_rate(): ...@@ -46,7 +46,7 @@ def get_android_click_diary_rate():
impression_diary_count = con_sql(sql) impression_diary_count = con_sql(sql)
impression_diary_count = impression_diary_count[0][0] impression_diary_count = impression_diary_count[0][0]
android_click_diary_rate = click_diary_count / impression_diary_count android_click_diary_rate = click_diary_count / impression_diary_count
return ["android",click_diary_count,impression_diary_count,round(android_click_diary_rate,4)] return ["安卓",click_diary_count,impression_diary_count,round(android_click_diary_rate,4)]
......
...@@ -31,12 +31,13 @@ def get_click_times_to_count_uid_df(): ...@@ -31,12 +31,13 @@ def get_click_times_to_count_uid_df():
def df2file(df,fpath): def df2file(df,fpath):
with open(fpath,"w") as f: with open(fpath,"w") as f:
tplt = "{0:^10}\t{1:^10}\n"
f.write("#2. Top特征\n") f.write("#2. Top特征\n")
f.write("=================================================================\n") f.write("=================================================================\n")
f.write("2.1用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)\n") f.write("2.1用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)\n")
f.write("click_times"+"\t"+"count_uid"+"\n") f.write(tplt.format("click_times","count_uid"))
for row in df.iterrows(): for row in df.iterrows():
line = str(row[1][0]) + "\t" + str(row[1][1]) + "\n" line = tplt.format(row[1][0],row[1][1])
f.write(line) f.write(line)
f.write("\n\n") f.write("\n\n")
......
...@@ -23,7 +23,7 @@ def get_all_click_zero_rate(): ...@@ -23,7 +23,7 @@ def get_all_click_zero_rate():
impression_zero_count = impression_zero_count[0][0] impression_zero_count = impression_zero_count[0][0]
click_zero_count = impression_zero_count-click_zero_count click_zero_count = impression_zero_count-click_zero_count
all_click_zero_rate = click_zero_count / impression_zero_count all_click_zero_rate = click_zero_count / impression_zero_count
return ["all",click_zero_count,impression_zero_count,round(all_click_zero_rate,4)] return ["所有",click_zero_count,impression_zero_count,round(all_click_zero_rate,4)]
...@@ -37,7 +37,7 @@ def get_ios_click_zero_rate(): ...@@ -37,7 +37,7 @@ def get_ios_click_zero_rate():
impression_zero_count = impression_zero_count[0][0] impression_zero_count = impression_zero_count[0][0]
click_zero_count = impression_zero_count-click_zero_count click_zero_count = impression_zero_count-click_zero_count
ios_click_zero_rate = click_zero_count / impression_zero_count ios_click_zero_rate = click_zero_count / impression_zero_count
return ["ios",click_zero_count,impression_zero_count,round(ios_click_zero_rate,4)] return ["苹果",click_zero_count,impression_zero_count,round(ios_click_zero_rate,4)]
#3 获取安卓平台的0点击用户占比 #3 获取安卓平台的0点击用户占比
...@@ -50,7 +50,7 @@ def get_android_click_zero_rate(): ...@@ -50,7 +50,7 @@ def get_android_click_zero_rate():
impression_zero_count = impression_zero_count[0][0] impression_zero_count = impression_zero_count[0][0]
click_zero_count = impression_zero_count-click_zero_count click_zero_count = impression_zero_count-click_zero_count
android_click_zero_rate = click_zero_count / impression_zero_count android_click_zero_rate = click_zero_count / impression_zero_count
return ["android",click_zero_count,impression_zero_count,round(android_click_zero_rate,4)] return ["安卓",click_zero_count,impression_zero_count,round(android_click_zero_rate,4)]
......
...@@ -15,78 +15,68 @@ def get_yesterday_date(): ...@@ -15,78 +15,68 @@ def get_yesterday_date():
def result2file(fpath): def result2file(fpath):
with open(fpath,'w') as f: with open(fpath,'w') as f:
tplt = "{0:\u3000<6}\t{1:\u3000<15}\t{2:\u3000<15}\t{3:\u3000<15}\n"
f.write("#注意:以下数据都是首页的\n") f.write("#注意:以下数据都是首页的\n")
f.write("#1. 比例特征\n") f.write("#1. 比例特征\n")
f.write("=================================================================\n") f.write("=================================================================\n")
f.write("#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)\n") f.write("#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)\n")
f.write("平台"+"\t"+"answer被曝光数"+"\t"+"总cid被曝光数"+"\t"+"answer被曝光数占比\n") f.write(tplt.format("平台","answer被曝光数","总cid被曝光数","answer被曝光数占比"))
all_answer_imp_rate = get_all_answer_imp_rate() all_answer_imp_rate = get_all_answer_imp_rate()
ios_answer_imp_rate = get_ios_answer_imp_rate() ios_answer_imp_rate = get_ios_answer_imp_rate()
android_answer_imp_rate = get_android_answer_imp_rate() android_answer_imp_rate = get_android_answer_imp_rate()
lst = [all_answer_imp_rate,ios_answer_imp_rate,android_answer_imp_rate] lst = [all_answer_imp_rate,ios_answer_imp_rate,android_answer_imp_rate]
for i in lst: for i in lst:
line = "" line = tplt.format(i[0],i[1],i[2],i[3])
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line) f.write(line)
f.write('\n')
print("1.1已将answer曝光占比存入文件") print("1.1已将answer曝光占比存入文件")
f.write("#1.2有点击用户点击率(=有点击用户点击次数/有点击用户曝光次数)\n") f.write("#1.2活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)\n")
f.write("平台"+"\t"+"有点击用户点击次数"+"\t"+"有点击用户曝光次数"+"\t"+"有点击用户点击率\n") f.write(tplt.format("平台","active用户点击次数","active用户曝光次数","active用户点击率"))
all_click_one_rate = get_all_click_one_rate() all_click_one_rate = get_all_click_one_rate()
ios_click_one_rate = get_ios_click_one_rate() ios_click_one_rate = get_ios_click_one_rate()
android_click_one_rate = get_android_click_one_rate() android_click_one_rate = get_android_click_one_rate()
lst = [all_click_one_rate,ios_click_one_rate,android_click_one_rate] lst = [all_click_one_rate,ios_click_one_rate,android_click_one_rate]
for i in lst: for i in lst:
line = "" line = tplt.format(i[0],i[1],i[2],i[3])
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line) f.write(line)
print("1.2已将有点击用户点击率存入文件") f.write('\n')
print("1.2已将活跃用户点击率存入文件")
f.write("#1.3点击answer用户占比(=点击answer用户数/曝光answer用户数)\n") f.write("#1.3点击answer用户占比(=点击answer用户数/曝光answer用户数)\n")
f.write("平台"+"\t"+"点击answer用户数"+"\t"+"曝光answer用户数"+"\t"+"点击answer用户占比\n") f.write(tplt.format("平台","点击answer用户数","曝光answer用户数","击answer用户占比"))
all_click_answer_rate = get_all_click_answer_rate() all_click_answer_rate = get_all_click_answer_rate()
ios_click_answer_rate = get_ios_click_answer_rate() ios_click_answer_rate = get_ios_click_answer_rate()
android_click_answer_rate = get_android_click_answer_rate() android_click_answer_rate = get_android_click_answer_rate()
lst = [all_click_answer_rate,ios_click_answer_rate,android_click_answer_rate] lst = [all_click_answer_rate,ios_click_answer_rate,android_click_answer_rate]
for i in lst: for i in lst:
line = "" line = tplt.format(i[0],i[1],i[2],i[3])
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line) f.write(line)
f.write('\n')
print("1.3已将点击answer用户占比存入文件") print("1.3已将点击answer用户占比存入文件")
f.write("#1.4点击diary用户占比(=点击diary用户数/曝光diary用户数)\n") f.write("#1.4点击diary用户占比(=点击diary用户数/曝光diary用户数)\n")
f.write("平台"+"\t"+"点击diary用户数"+"\t"+"曝光diary用户数"+"\t"+"点击diary用户占比\n") f.write(tplt.format("平台","点击diary用户数","曝光diary用户数","击diary用户占比"))
all_click_diary_rate = get_all_click_diary_rate() all_click_diary_rate = get_all_click_diary_rate()
ios_click_diary_rate = get_ios_click_diary_rate() ios_click_diary_rate = get_ios_click_diary_rate()
android_click_diary_rate = get_android_click_diary_rate() android_click_diary_rate = get_android_click_diary_rate()
lst = [all_click_diary_rate,ios_click_diary_rate,android_click_diary_rate] lst = [all_click_diary_rate,ios_click_diary_rate,android_click_diary_rate]
for i in lst: for i in lst:
line = "" line = tplt.format(i[0],i[1],i[2],i[3])
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line) f.write(line)
f.write('\n')
print("1.4已将点击diary用户占比存入文件") print("1.4已将点击diary用户占比存入文件")
f.write("#1.5无点击用户占比(=无点击用户数/有曝光用户数)\n") f.write("#1.5无点击用户占比(=无点击用户数/有曝光用户数)\n")
f.write("平台"+"\t"+"无点击用户数"+"\t"+"有曝光用户数"+"\t"+"无点击用户占比\n") f.write(tplt.format("平台","no点击用户数","have曝光用户数","no点击用户占比"))
all_click_zero_rate = get_all_click_zero_rate() all_click_zero_rate = get_all_click_zero_rate()
ios_click_zero_rate = get_ios_click_zero_rate() ios_click_zero_rate = get_ios_click_zero_rate()
android_click_zero_rate = get_android_click_zero_rate() android_click_zero_rate = get_android_click_zero_rate()
lst = [all_click_zero_rate,ios_click_zero_rate,android_click_zero_rate] lst = [all_click_zero_rate,ios_click_zero_rate,android_click_zero_rate]
for i in lst: for i in lst:
line = "" line = tplt.format(i[0],i[1],i[2],i[3])
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line) f.write(line)
f.write('\n\n') f.write('\n')
print("1.5已将无点击用户占比存入文件") print("1.5已将无点击用户占比存入文件")
......
...@@ -20,20 +20,16 @@ def tuple2dict(tuple_result): ...@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
def result2file(result_lst,fpath): def result2file(result_lst,fpath):
with open(fpath,'w') as f: with open(fpath,'w') as f:
header = "平台"+'\t'+"answer_id"+'\t'+"answer被点击数"+'\t'+"answer被曝光数"+'\t'+"answer被点击率"+'\t'+"answer链接"+'\n' tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
f.write("Top 100 Answer\n") f.write("Top 100 Answer\n")
f.write("=================================================================\n") f.write("=================================================================\n")
f.write(header) f.write(tplt.format("平台","answer_id","点击数","曝光数","点击率","answer链接"))
for i in result_lst: for i in result_lst:
for j in i: for j in i:
line = "" f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
for k in j:
line += str(k) + '\t'
line = line[:-1] + '\n'
f.write(line)
f.write("=================================================================\n") f.write("=================================================================\n")
if i != result_lst[-1]: if i != result_lst[-1]:
f.write(header) f.write(tplt.format("平台","answer_id","点击数","曝光数","点击率","answer链接"))
f.write("\n\n") f.write("\n\n")
...@@ -58,7 +54,7 @@ def get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count ...@@ -58,7 +54,7 @@ def get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count
for i in all_answer_count_by_click: for i in all_answer_count_by_click:
if i in all_answer_count_by_imp.keys() and all_answer_count_by_click[i]>2: if i in all_answer_count_by_imp.keys() and all_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
all_top100_answer_rate_by_ctr.append(("all",i,all_answer_count_by_click[i],all_answer_count_by_imp[i], round(all_answer_count_by_click[i]/all_answer_count_by_imp[i],4),url)) all_top100_answer_rate_by_ctr.append(("所有",i,all_answer_count_by_click[i],all_answer_count_by_imp[i], round(all_answer_count_by_click[i]/all_answer_count_by_imp[i],4),url))
all_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) all_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return all_top100_answer_rate_by_ctr[:100] if len(all_top100_answer_rate_by_ctr) > 100 else all_top100_answer_rate_by_ctr return all_top100_answer_rate_by_ctr[:100] if len(all_top100_answer_rate_by_ctr) > 100 else all_top100_answer_rate_by_ctr
...@@ -82,7 +78,7 @@ def get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count ...@@ -82,7 +78,7 @@ def get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count
for i in ios_answer_count_by_click: for i in ios_answer_count_by_click:
if i in ios_answer_count_by_imp.keys() and ios_answer_count_by_click[i]>2: if i in ios_answer_count_by_imp.keys() and ios_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
ios_top100_answer_rate_by_ctr.append(("ios",i,ios_answer_count_by_click[i],ios_answer_count_by_imp[i], round(ios_answer_count_by_click[i]/ios_answer_count_by_imp[i],4),url)) ios_top100_answer_rate_by_ctr.append(("苹果",i,ios_answer_count_by_click[i],ios_answer_count_by_imp[i], round(ios_answer_count_by_click[i]/ios_answer_count_by_imp[i],4),url))
ios_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) ios_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return ios_top100_answer_rate_by_ctr[:100] if len(ios_top100_answer_rate_by_ctr) > 100 else ios_top100_answer_rate_by_ctr return ios_top100_answer_rate_by_ctr[:100] if len(ios_top100_answer_rate_by_ctr) > 100 else ios_top100_answer_rate_by_ctr
...@@ -106,7 +102,7 @@ def get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_ ...@@ -106,7 +102,7 @@ def get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_
for i in android_answer_count_by_click: for i in android_answer_count_by_click:
if i in android_answer_count_by_imp.keys() and android_answer_count_by_click[i]>2: if i in android_answer_count_by_imp.keys() and android_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
android_top100_answer_rate_by_ctr.append(("android",i,android_answer_count_by_click[i],android_answer_count_by_imp[i],round(android_answer_count_by_click[i]/android_answer_count_by_imp[i],4),url)) android_top100_answer_rate_by_ctr.append(("安卓",i,android_answer_count_by_click[i],android_answer_count_by_imp[i],round(android_answer_count_by_click[i]/android_answer_count_by_imp[i],4),url))
android_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) android_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return android_top100_answer_rate_by_ctr[:100] if len(android_top100_answer_rate_by_ctr) > 100 else android_top100_answer_rate_by_ctr return android_top100_answer_rate_by_ctr[:100] if len(android_top100_answer_rate_by_ctr) > 100 else android_top100_answer_rate_by_ctr
......
...@@ -20,20 +20,16 @@ def tuple2dict(tuple_result): ...@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
def result2file(result_lst,fpath): def result2file(result_lst,fpath):
with open(fpath,'w') as f: with open(fpath,'w') as f:
header = "平台"+'\t'+"diary_id"+'\t'+"diary被点击数"+'\t'+"diary被曝光数"+'\t'+"diary被点击率"+'\t'+"diary链接"+'\n' tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
f.write("Top 100 diary\n") f.write("Top 100 diary\n")
f.write("=================================================================\n") f.write("=================================================================\n")
f.write(header) f.write(tplt.format("平台","diary_id","点击数","曝光数","点击率","diary链接"))
for i in result_lst: for i in result_lst:
for j in i: for j in i:
line = "" f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
for k in j:
line += str(k) + '\t'
line = line[:-1] + '\n'
f.write(line)
f.write("=================================================================\n") f.write("=================================================================\n")
if i != result_lst[-1]: if i != result_lst[-1]:
f.write(header) f.write(tplt.format("平台","diary_id","点击数","曝光数","点击率","diary链接"))
f.write("\n\n") f.write("\n\n")
...@@ -57,7 +53,7 @@ def get_all_top100_diary_rate_by_ctr(all_diary_count_by_click,all_diary_count_by ...@@ -57,7 +53,7 @@ def get_all_top100_diary_rate_by_ctr(all_diary_count_by_click,all_diary_count_by
for i in all_diary_count_by_click: for i in all_diary_count_by_click:
if i in all_diary_count_by_imp.keys() and all_diary_count_by_click[i] > 4: if i in all_diary_count_by_imp.keys() and all_diary_count_by_click[i] > 4:
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
all_top100_diary_rate_by_ctr.append(("all",i,all_diary_count_by_click[i],all_diary_count_by_imp[i], round(all_diary_count_by_click[i]/all_diary_count_by_imp[i],4),url)) all_top100_diary_rate_by_ctr.append(("所有",i,all_diary_count_by_click[i],all_diary_count_by_imp[i], round(all_diary_count_by_click[i]/all_diary_count_by_imp[i],4),url))
all_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) all_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return all_top100_diary_rate_by_ctr[:100] if len(all_top100_diary_rate_by_ctr) > 100 else all_top100_diary_rate_by_ctr return all_top100_diary_rate_by_ctr[:100] if len(all_top100_diary_rate_by_ctr) > 100 else all_top100_diary_rate_by_ctr
...@@ -81,7 +77,7 @@ def get_ios_top100_diary_rate_by_ctr(ios_top100_diary_count_by_click,ios_top100_ ...@@ -81,7 +77,7 @@ def get_ios_top100_diary_rate_by_ctr(ios_top100_diary_count_by_click,ios_top100_
for i in ios_diary_count_by_click: for i in ios_diary_count_by_click:
if i in ios_diary_count_by_imp.keys() and ios_diary_count_by_click[i] > 4: if i in ios_diary_count_by_imp.keys() and ios_diary_count_by_click[i] > 4:
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
ios_top100_diary_rate_by_ctr.append(("ios",i,ios_diary_count_by_click[i],ios_diary_count_by_imp[i], round(ios_diary_count_by_click[i]/ios_diary_count_by_imp[i],4),url)) ios_top100_diary_rate_by_ctr.append(("苹果",i,ios_diary_count_by_click[i],ios_diary_count_by_imp[i], round(ios_diary_count_by_click[i]/ios_diary_count_by_imp[i],4),url))
ios_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) ios_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return ios_top100_diary_rate_by_ctr[:100] if len(ios_top100_diary_rate_by_ctr) > 100 else ios_top100_diary_rate_by_ctr return ios_top100_diary_rate_by_ctr[:100] if len(ios_top100_diary_rate_by_ctr) > 100 else ios_top100_diary_rate_by_ctr
...@@ -105,7 +101,7 @@ def get_android_top100_diary_rate_by_ctr(android_top100_diary_count_by_click,and ...@@ -105,7 +101,7 @@ def get_android_top100_diary_rate_by_ctr(android_top100_diary_count_by_click,and
for i in android_diary_count_by_click: for i in android_diary_count_by_click:
if i in android_diary_count_by_imp.keys() and android_diary_count_by_click[i] > 4: if i in android_diary_count_by_imp.keys() and android_diary_count_by_click[i] > 4:
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
android_top100_diary_rate_by_ctr.append(("android",i,android_diary_count_by_click[i],android_diary_count_by_imp[i], round(android_diary_count_by_click[i]/android_diary_count_by_imp[i],4),url)) android_top100_diary_rate_by_ctr.append(("安卓",i,android_diary_count_by_click[i],android_diary_count_by_imp[i], round(android_diary_count_by_click[i]/android_diary_count_by_imp[i],4),url))
android_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) android_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return android_top100_diary_rate_by_ctr[:100] if len(android_top100_diary_rate_by_ctr) > 100 else android_top100_diary_rate_by_ctr return android_top100_diary_rate_by_ctr[:100] if len(android_top100_diary_rate_by_ctr) > 100 else android_top100_diary_rate_by_ctr
......
...@@ -20,20 +20,16 @@ def tuple2dict(tuple_result): ...@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
def result2file(result_lst,fpath): def result2file(result_lst,fpath):
with open(fpath,'w') as f: with open(fpath,'w') as f:
header = "平台"+'\t'+"question_id"+'\t'+"question被点击数"+'\t'+"question被曝光数"+'\t'+"question被点击率"+'\t'+"question链接"+'\n' tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
f.write("Top 100 question\n") f.write("Top 100 Question\n")
f.write("=================================================================\n") f.write("=================================================================\n")
f.write(header) f.write(tplt.format("平台","question_id","点击数","曝光数","点击率","question链接"))
for i in result_lst: for i in result_lst:
for j in i: for j in i:
line = "" f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
for k in j:
line += str(k) + '\t'
line = line[:-1] + '\n'
f.write(line)
f.write("=================================================================\n") f.write("=================================================================\n")
if i != result_lst[-1]: if i != result_lst[-1]:
f.write(header) f.write(tplt.format("平台","question_id","点击数","曝光数","点击率","question链接"))
f.write("\n\n") f.write("\n\n")
...@@ -58,14 +54,14 @@ def get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question ...@@ -58,14 +54,14 @@ def get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question
if all_question_count_by_imp == {}: if all_question_count_by_imp == {}:
for i in all_question_count_by_click: for i in all_question_count_by_click:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
all_top100_question_rate_by_ctr.append(("all",i,all_question_count_by_click[i],0,0,url)) all_top100_question_rate_by_ctr.append(("所有",i,all_question_count_by_click[i],0,0,url))
all_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True) all_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True)
return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr
else: else:
for i in all_question_count_by_click: for i in all_question_count_by_click:
if i in all_question_count_by_imp.keys() and all_question_count_by_click[i]>2: if i in all_question_count_by_imp.keys() and all_question_count_by_click[i]>2:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
all_top100_question_rate_by_ctr.append(("all",i,all_question_count_by_click[i],all_question_count_by_imp[i], round(all_question_count_by_click[i]/all_question_count_by_imp[i],4),url)) all_top100_question_rate_by_ctr.append(("所有",i,all_question_count_by_click[i],all_question_count_by_imp[i], round(all_question_count_by_click[i]/all_question_count_by_imp[i],4),url))
all_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) all_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr return all_top100_question_rate_by_ctr[:100] if len(all_top100_question_rate_by_ctr) > 100 else all_top100_question_rate_by_ctr
...@@ -89,14 +85,14 @@ def get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question ...@@ -89,14 +85,14 @@ def get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question
if ios_question_count_by_imp == {}: if ios_question_count_by_imp == {}:
for i in ios_question_count_by_click: for i in ios_question_count_by_click:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
ios_top100_question_rate_by_ctr.append(("ios",i,ios_question_count_by_click[i],0,0,url)) ios_top100_question_rate_by_ctr.append(("苹果",i,ios_question_count_by_click[i],0,0,url))
ios_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True) ios_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True)
return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr
else: else:
for i in ios_question_count_by_click: for i in ios_question_count_by_click:
if i in ios_question_count_by_imp.keys() and ios_question_count_by_click[i]>2: if i in ios_question_count_by_imp.keys() and ios_question_count_by_click[i]>2:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
ios_top100_question_rate_by_ctr.append(("ios",i,ios_question_count_by_click[i],ios_question_count_by_imp[i], round(ios_question_count_by_click[i]/ios_question_count_by_imp[i],4),url)) ios_top100_question_rate_by_ctr.append(("苹果",i,ios_question_count_by_click[i],ios_question_count_by_imp[i], round(ios_question_count_by_click[i]/ios_question_count_by_imp[i],4),url))
ios_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) ios_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr return ios_top100_question_rate_by_ctr[:100] if len(ios_top100_question_rate_by_ctr) > 100 else ios_top100_question_rate_by_ctr
...@@ -120,14 +116,14 @@ def get_android_top100_question_rate_by_ctr(android_question_count_by_click,andr ...@@ -120,14 +116,14 @@ def get_android_top100_question_rate_by_ctr(android_question_count_by_click,andr
if android_question_count_by_imp == {}: if android_question_count_by_imp == {}:
for i in android_question_count_by_click: for i in android_question_count_by_click:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
android_top100_question_rate_by_ctr.append(("android",i,android_question_count_by_click[i],0,0,url)) android_top100_question_rate_by_ctr.append(("安卓",i,android_question_count_by_click[i],0,0,url))
android_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True) android_top100_question_rate_by_ctr.sort(key=lambda x:x[2],reverse=True)
return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr
else: else:
for i in android_question_count_by_click: for i in android_question_count_by_click:
if i in android_question_count_by_imp.keys() and android_question_count_by_click[i]>2: if i in android_question_count_by_imp.keys() and android_question_count_by_click[i]>2:
url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/' url = "http://m.igengmei.com/question/" + i[i.index('|')+1:] + '/'
android_top100_question_rate_by_ctr.append(("android",i,android_question_count_by_click[i],android_question_count_by_imp[i],round(android_question_count_by_click[i]/android_question_count_by_imp[i],4),url)) android_top100_question_rate_by_ctr.append(("安卓",i,android_question_count_by_click[i],android_question_count_by_imp[i],round(android_question_count_by_click[i]/android_question_count_by_imp[i],4),url))
android_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True) android_top100_question_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr return android_top100_question_rate_by_ctr[:100] if len(android_top100_question_rate_by_ctr) > 100 else android_top100_question_rate_by_ctr
......
...@@ -4,4 +4,4 @@ python getTop100Diary.py ...@@ -4,4 +4,4 @@ python getTop100Diary.py
python getTop100Answer.py python getTop100Answer.py
python getTop100Question.py python getTop100Question.py
dt=$(date -d last-day +%Y%m%d) dt=$(date -d last-day +%Y%m%d)
cat 1rate_features_$dt.txt 2click_times_to_count_uid_$dt.txt 3top100_ctr_diary_$dt.txt 4top100_ctr_answer_$dt.txt 5top100_ctr_question_$dt.txt > result_all_$dt.txt cat /data2/models/eda/recommended_indexs/1rate_features_$dt.txt /data2/models/eda/recommended_indexs/2click_times_to_count_uid_$dt.txt /data2/models/eda/recommended_indexs/3top100_ctr_diary_$dt.txt /data2/models/eda/recommended_indexs/4top100_ctr_answer_$dt.txt /data2/models/eda/recommended_indexs/5top100_ctr_question_$dt.txt > /data2/models/eda/recommended_indexs/result_all_$dt.txt
\ No newline at end of file \ No newline at end of file
DIRECTORY_PATH="/data2/models/eda/recommended_indexs/"
\ No newline at end of file
# -*- coding: UTF-8 -*-
from utils import con_sql,tuple2dict,get_yesterday_date
from config import DIRECTORY_PATH
class TopFeatures(object):
def __init__(self, ndays, platform, cid_type, top_n=-1):
"""
ndays : 1;2;3;4..
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question'...
top_n : the rows of the result
"""
self.ndays = ndays
if platform == "ios":
self.platform = "='AppStore'"
elif platform == "android":
self.platform = "!='AppStore'"
else:
self.platform = " is not null"
self.cid_type = cid_type
self.top_n = top_n
def get_click_times(self):
# rtype : dict
if self.cid_type[-2] == 'e':
self.cid_type = self.cid_type.replace(' ','')
sql = "select cid,count(cid) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}} and cid_type='{2}' \
group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type)
clk_times = tuple2dict(con_sql(sql))
return clk_times
def get_impression_times(self):
# rtype : dict
if self.cid_type[-2] == 'e':
self.cid_type = self.cid_type[:-6] + ' ' + self.cid_type[:-6:]
sql = "select cid,count(cid) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}} and cid_type='{2}' \
group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type)
imp_times = tuple2dict(con_sql(sql))
return imp_times
def get_result(self, clk, imp, clk_n=2, result_types="ctr"):
"""
result_types : "clk";"imp";"ctr"
clk : dict
imp : dict
clk_n : 获取topN点击率时,过滤的点击数
rtype : list
"""
topn = []
#获取topN的点击
if imp == {} or result_types == "clk":
for i in clk:
if self.cid_type == "diary":
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.cid_type.strip(),i,clk[i],0,0,url))
topn.sort(key=lambda x:x[2],reverse=True)
return topn[:int(self.top_n)]
#获取topN的曝光
elif clk == {} or result_types == "imp":
for i in imp:
if self.cid_type == "diary":
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.cid_type.strip(),i,0,imp[i],0,url))
topn.sort(key=lambda x:x[3],reverse=True)
return topn[:int(self.top_n)]
#获取topN的ctr
else:
for i in clk:
if i in imp.keys() and clk[i] > clk_n:
if self.cid_type == "diary":
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.cid_type.strip(),i,clk[i],imp[i],round(clk[i]/imp[i],4),url))
topn.sort(key=lambda x:x[4],reverse=True)
return topn[:int(self.top_n)]
def result2file(self, result_lst, fpath):
"""
result_lst : [all,ios,android]
fpath : output filename
rtype : none
"""
with open(fpath, 'w') as f:
tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
f.write("Top {0} {1}\n".format(self.top_n,self.cid_type))
sep = "=================================================================\n"
header = tplt.format("平台","{}_id".format(self.cid_type),"点击数","曝光数","点击率","{}链接".format(self.cid_type))
f.write(sep)
f.write(header)
for i in result_lst:
for j in i:
f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
f.write(sep)
if i != result_lst[-1]:
f.write(header)
f.write("\n\n")
def main():
top_diary = TopFeatures(1, "all", "diary")
clk_diary_times_all = top_diary.get_click_times()
imp_diary_times_all = top_diary.get_impression_times()
clk_diary_ctr_all = top_diary.get_result("ctr", clk_diary_times_all, imp_diary_times_all, top_n=100)
clk_diary_times_ios = top_diary.get_click_times()
imp_diary_times_ios = top_diary.get_impression_times()
clk_diary_ctr_ios = top_diary.get_result("ctr", clk_diary_times_ios, imp_diary_times_ios, top_n=100)
clk_diary_times_android = top_diary.get_click_times()
imp_diary_times_android = top_diary.get_impression_times()
clk_diary_ctr_android = top_diary.get_result("ctr", clk_diary_times_android, imp_diary_times_android, top_n=100)
result_lst = [clk_diary_ctr_all, clk_diary_ctr_ios, clk_diary_ctr_android]
output_path = DIRECTORY_PATH + "5top100_ctr_diary_%s.txt"
top_diary.result2file(result_lst, output_path)
if __name__ == '__main__':
main()
\ No newline at end of file
# -*- coding: UTF-8 -*-
import pymysql
import datetime
def con_sql(sql):
#从数据库的表里获取数据
"""
:type sql : str
:rtype : tuple
"""
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def tuple2dict(tuple_result):
#把sql结果从tuple(tuple,tuple)格式转换成dict格式
"""
:type tuple_result : tuple
:rtype : dict
"""
dict_result = {}
for i in range(len(tuple_result)):
dict_result[tuple_result[i][0]] = tuple_result[i][1]
return dict_result
def get_yesterday_date():
#自动获取昨天的日期,如"20180808"
"""
:rtype : str
"""
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
return yesterday
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment