Commit 776804b0 authored by 高雅喆's avatar 高雅喆

add eda of recommended indexs

parent 724824ce
No preview for this file type
File added
import pymysql
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
#1 获取所有平台的有点击用户点击率
def get_all_click_one_rate():
sql = "select count(device_id) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type is not null"
click_one_count = con_sql(sql)
click_one_count = click_one_count[0][0]
sql = "select count(device_id) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_id in (select device_id from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day))"
impression_one_count = con_sql(sql)
impression_one_count = impression_one_count[0][0]
all_click_one_rate = click_one_count / impression_one_count
return ["all",click_one_count,impression_one_count,round(all_click_one_rate,4)]
#2 获取ios平台的有点击用户点击率
def get_ios_click_one_rate():
sql = "select count(device_id) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore'"
click_one_count = con_sql(sql)
click_one_count = click_one_count[0][0]
sql = "select count(device_id) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_id in (select device_id from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore') and device_type='App Store'"
impression_one_count = con_sql(sql)
impression_one_count = impression_one_count[0][0]
ios_click_one_rate = click_one_count / impression_one_count
return ["ios",click_one_count,impression_one_count,round(ios_click_one_rate,4)]
#3 获取安卓平台的有点击用户点击率
def get_android_click_one_rate():
sql = "select count(device_id) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore'"
click_one_count = con_sql(sql)
click_one_count = click_one_count[0][0]
sql = "select count(device_id) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_id in (select device_id from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore') and device_type!='App Store'"
impression_one_count = con_sql(sql)
impression_one_count = impression_one_count[0][0]
android_click_one_rate = click_one_count / impression_one_count
return ["android",click_one_count,impression_one_count,round(android_click_one_rate,4)]
if __name__ == "__main__":
all_click_one_rate = get_all_click_one_rate()
ios_click_one_rate = get_ios_click_one_rate()
android_click_one_rate = get_android_click_one_rate()
import pymysql
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
#1 获取所有平台的问答曝光占比
def get_all_answer_imp_rate():
sql = "select count(cid) from data_feed_exposure where cid_type='answer' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
answer_imp_count = con_sql(sql)
answer_imp_count = answer_imp_count[0][0]
sql = "select count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0]
all_answer_imp_rate = answer_imp_count / all_imp_count
return ["all",answer_imp_count,all_imp_count,round(all_answer_imp_rate,4)]
#2 获取ios平台的问答曝光占比
def get_ios_answer_imp_rate():
sql = "select count(cid) from data_feed_exposure where cid_type='answer' and device_type='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
answer_imp_count = con_sql(sql)
answer_imp_count = answer_imp_count[0][0]
sql = "select count(cid) from data_feed_exposure where device_type='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0]
ios_answer_imp_rate = answer_imp_count / all_imp_count
return ["ios",answer_imp_count,all_imp_count,round(ios_answer_imp_rate,4)]
#3 获取安卓平台的问答曝光占比
def get_android_answer_imp_rate():
sql = "select count(cid) from data_feed_exposure where cid_type='answer' and device_type!='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
answer_imp_count = con_sql(sql)
answer_imp_count = answer_imp_count[0][0]
sql = "select count(cid) from data_feed_exposure where device_type!='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0]
android_answer_imp_rate = answer_imp_count / all_imp_count
return ["android",answer_imp_count,all_imp_count,round(android_answer_imp_rate,4)]
if __name__ == "__main__":
all_answer_imp_rate = get_all_answer_imp_rate()
ios_answer_imp_rate = get_ios_answer_imp_rate()
android_answer_imp_rate = get_android_answer_imp_rate()
import pymysql
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
#1 获取所有平台的点击问答用户占比
def get_all_click_answer_rate():
sql = "select count(distinct(device_id)) from data_feed_click where cid_type='answer' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_answer_count = con_sql(sql)
click_answer_count = click_answer_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where cid_type='answer' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_answer_count = con_sql(sql)
impression_answer_count = impression_answer_count[0][0]
all_click_answer_rate = click_answer_count / impression_answer_count
return ["all",click_answer_count,impression_answer_count,round(all_click_answer_rate,4)]
#2 获取ios平台的点击问答用户占比
def get_ios_click_answer_rate():
sql = "select count(distinct(device_id)) from data_feed_click where cid_type='answer' and device_type='AppStore' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_answer_count = con_sql(sql)
click_answer_count = click_answer_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where cid_type='answer' and device_type='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_answer_count = con_sql(sql)
impression_answer_count = impression_answer_count[0][0]
ios_click_answer_rate = click_answer_count / impression_answer_count
return ["ios",click_answer_count,impression_answer_count,round(ios_click_answer_rate,4)]
#3 获取安卓平台的点击问答用户占比
def get_android_click_answer_rate():
sql = "select count(distinct(device_id)) from data_feed_click where cid_type='answer' and device_type!='AppStore' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_answer_count = con_sql(sql)
click_answer_count = click_answer_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where cid_type='answer' and device_type!='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_answer_count = con_sql(sql)
impression_answer_count = impression_answer_count[0][0]
android_click_answer_rate = click_answer_count / impression_answer_count
return ["android",click_answer_count,impression_answer_count,round(android_click_answer_rate,4)]
if __name__ == "__main__":
all_click_answer_rate = get_all_click_answer_rate()
ios_click_answer_rate = get_ios_click_answer_rate()
android_click_answer_rate = get_android_click_answer_rate()
import pymysql
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
#1 获取所有平台的点击问答用户占比
def get_all_click_diary_rate():
sql = "select count(distinct(device_id)) from data_feed_click where cid_type='diary' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_diary_count = con_sql(sql)
click_diary_count = click_diary_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where cid_type='diary' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_diary_count = con_sql(sql)
impression_diary_count = impression_diary_count[0][0]
all_click_diary_rate = click_diary_count / impression_diary_count
return ["all",click_diary_count,impression_diary_count,round(all_click_diary_rate,4)]
#2 获取ios平台的点击问答用户占比
def get_ios_click_diary_rate():
sql = "select count(distinct(device_id)) from data_feed_click where cid_type='diary' and device_type='AppStore' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_diary_count = con_sql(sql)
click_diary_count = click_diary_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where cid_type='diary' and device_type='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_diary_count = con_sql(sql)
impression_diary_count = impression_diary_count[0][0]
ios_click_diary_rate = click_diary_count / impression_diary_count
return ["ios",click_diary_count,impression_diary_count,round(ios_click_diary_rate,4)]
#3 获取安卓平台的点击问答用户占比
def get_android_click_diary_rate():
sql = "select count(distinct(device_id)) from data_feed_click where cid_type='diary' and device_type!='AppStore' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_diary_count = con_sql(sql)
click_diary_count = click_diary_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where cid_type='diary' and device_type!='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_diary_count = con_sql(sql)
impression_diary_count = impression_diary_count[0][0]
android_click_diary_rate = click_diary_count / impression_diary_count
return ["android",click_diary_count,impression_diary_count,round(android_click_diary_rate,4)]
if __name__ == "__main__":
all_click_diary_rate = get_all_click_diary_rate()
ios_click_diary_rate = get_ios_click_diary_rate()
android_click_diary_rate = get_android_click_diary_rate()
import pymysql
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
#1 获取所有平台的0点击用户占比
def get_all_click_zero_rate():
sql = "select count(distinct(device_id)) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_zero_count = con_sql(sql)
click_zero_count = click_zero_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_zero_count = con_sql(sql)
impression_zero_count = impression_zero_count[0][0]
click_zero_count = impression_zero_count-click_zero_count
all_click_zero_rate = click_zero_count / impression_zero_count
return ["all",click_zero_count,impression_zero_count,round(all_click_zero_rate,4)]
#2 获取ios平台的0点击用户占比
def get_ios_click_zero_rate():
sql = "select count(distinct(device_id)) from data_feed_click where device_type='AppStore' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_zero_count = con_sql(sql)
click_zero_count = click_zero_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where device_type='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_zero_count = con_sql(sql)
impression_zero_count = impression_zero_count[0][0]
click_zero_count = impression_zero_count-click_zero_count
ios_click_zero_rate = click_zero_count / impression_zero_count
return ["ios",click_zero_count,impression_zero_count,round(ios_click_zero_rate,4)]
#3 获取安卓平台的0点击用户占比
def get_android_click_zero_rate():
sql = "select count(distinct(device_id)) from data_feed_click where device_type!='AppStore' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
click_zero_count = con_sql(sql)
click_zero_count = click_zero_count[0][0]
sql = "select count(distinct(device_id)) from data_feed_exposure where device_type!='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
impression_zero_count = con_sql(sql)
impression_zero_count = impression_zero_count[0][0]
click_zero_count = impression_zero_count-click_zero_count
android_click_zero_rate = click_zero_count / impression_zero_count
return ["android",click_zero_count,impression_zero_count,round(android_click_zero_rate,4)]
if __name__ == "__main__":
all_click_zero_rate = get_all_click_zero_rate()
ios_click_zero_rate = get_ios_click_zero_rate()
android_click_zero_rate = get_android_click_zero_rate()
import datetime
from getAnswerImpRate import get_all_answer_imp_rate,get_ios_answer_imp_rate,get_android_answer_imp_rate
from getActivateUidCtr import get_all_click_one_rate,get_ios_click_one_rate,get_android_click_one_rate
from getClickAnswerUidRate import get_all_click_answer_rate,get_ios_click_answer_rate,get_android_click_answer_rate
from getClickDiaryUidRate import get_all_click_diary_rate,get_ios_click_diary_rate,get_android_click_diary_rate
from getClickZeroUidRate import get_all_click_zero_rate,get_ios_click_zero_rate,get_android_click_zero_rate
def get_yesterday_date():
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
return yesterday
def result2file(fpath):
with open(fpath,'w') as f:
f.write("#注意:以下数据都是首页的\n")
f.write("#1. 比例特征\n")
f.write("=================================================================\n")
f.write("#1.1问答曝光占比(=问答被曝光数/总cid被曝光数)\n")
f.write("平台"+"\t"+"问答被曝光数"+"\t"+"总cid被曝光数"+"\t"+"问答被曝光数占比\n")
all_answer_imp_rate = get_all_answer_imp_rate()
ios_answer_imp_rate = get_ios_answer_imp_rate()
android_answer_imp_rate = get_android_answer_imp_rate()
lst = [all_answer_imp_rate,ios_answer_imp_rate,android_answer_imp_rate]
for i in lst:
line = ""
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line)
print("1.1已将问答曝光占比存入文件")
f.write("#1.2有点击用户点击率(=有点击用户点击次数/有点击用户曝光次数)\n")
f.write("平台"+"\t"+"有点击用户点击次数"+"\t"+"有点击用户曝光次数"+"\t"+"有点击用户点击率\n")
all_click_one_rate = get_all_click_one_rate()
ios_click_one_rate = get_ios_click_one_rate()
android_click_one_rate = get_android_click_one_rate()
lst = [all_click_one_rate,ios_click_one_rate,android_click_one_rate]
for i in lst:
line = ""
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line)
print("1.2已将有点击用户点击率存入文件")
f.write("#1.3点击问答用户占比(=点击问答用户数/曝光问答用户数)\n")
f.write("平台"+"\t"+"点击问答用户数"+"\t"+"曝光问答用户数"+"\t"+"点击问答用户占比\n")
all_click_answer_rate = get_all_click_answer_rate()
ios_click_answer_rate = get_ios_click_answer_rate()
android_click_answer_rate = get_android_click_answer_rate()
lst = [all_click_answer_rate,ios_click_answer_rate,android_click_answer_rate]
for i in lst:
line = ""
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line)
print("1.3已将点击问答用户占比存入文件")
f.write("#1.4点击日记用户占比(=点击日记用户数/曝光日记用户数)\n")
f.write("平台"+"\t"+"点击日记用户数"+"\t"+"曝光日记用户数"+"\t"+"点击日记用户占比\n")
all_click_diary_rate = get_all_click_diary_rate()
ios_click_diary_rate = get_ios_click_diary_rate()
android_click_diary_rate = get_android_click_diary_rate()
lst = [all_click_diary_rate,ios_click_diary_rate,android_click_diary_rate]
for i in lst:
line = ""
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line)
print("1.4已将点击日记用户占比存入文件")
f.write("#1.5无点击用户占比(=无点击用户数/有曝光用户数)\n")
f.write("平台"+"\t"+"无点击用户数"+"\t"+"有曝光用户数"+"\t"+"无点击用户占比\n")
all_click_zero_rate = get_all_click_zero_rate()
ios_click_zero_rate = get_ios_click_zero_rate()
android_click_zero_rate = get_android_click_zero_rate()
lst = [all_click_zero_rate,ios_click_zero_rate,android_click_zero_rate]
for i in lst:
line = ""
for j in i:
line += str(j) + '\t'
line = line[:-1]+'\n'
f.write(line)
print("1.5已将无点击用户占比存入文件")
def main():
output_path = "../data/rate_features_%s.txt" % get_yesterday_date()
print("开始获取比例特征...")
result2file(output_path)
print("已将所有比例特征存入文件")
if __name__ == '__main__':
main()
import pymysql
import datetime
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def tuple2dict(tuple_result):
#把sql结果从tuple格式转换成dict格式
dict_result = {}
for i in range(len(tuple_result)):
dict_result[tuple_result[i][0]] = tuple_result[i][1]
return dict_result
def result2file(result_lst,fpath):
with open(fpath,'w') as f:
header = "平台"+'\t'+"问答id"+'\t'+"问答被点击数"+'\t'+"问答被曝光数"+'\t'+"问答被点击率"+'\t'+"问答链接"+'\n'
f.write(header)
for i in result_lst:
for j in i:
line = ""
for k in j:
line += str(k) + '\t'
line = line[:-1] + '\n'
f.write(line)
f.write("=================================================================\n")
#1 获取昨天所有平台的top100问答
#1.1 获取昨天所有平台的top100点击数的问答
def get_all_answer_count_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='answer' group by cid order by count(cid) desc"
all_answer_count_by_click = con_sql(sql)
all_answer_count_by_click = tuple2dict(all_answer_count_by_click)
return all_answer_count_by_click
#1.2 获取昨天所有平台的top100曝光数的问答
def get_all_answer_count_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='answer' group by cid order by count(cid) desc"
all_answer_count_by_imp = con_sql(sql)
all_answer_count_by_imp = tuple2dict(all_answer_count_by_imp)
return all_answer_count_by_imp
#1.3 获取昨天所有平台的top100点击率的问答
def get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count_by_imp):
all_top100_answer_rate_by_ctr = []
for i in all_answer_count_by_click:
if i in all_answer_count_by_imp.keys() and all_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
all_top100_answer_rate_by_ctr.append(("all",i,all_answer_count_by_click[i],all_answer_count_by_imp[i], round(all_answer_count_by_click[i]/all_answer_count_by_imp[i],4),url))
all_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return all_top100_answer_rate_by_ctr[:100] if len(all_top100_answer_rate_by_ctr) > 100 else all_top100_answer_rate_by_ctr
#2 获取昨天ios平台的top100问答
#2.1 获取昨天ios平台的top100点击数的问答
def get_ios_answer_count_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore' and cid_type='answer' group by cid order by count(cid) desc"
ios_answer_count_by_click = con_sql(sql)
ios_answer_count_by_click = tuple2dict(ios_answer_count_by_click)
return ios_answer_count_by_click
#2.2 获取昨天ios平台的top100曝光数的问答
def get_ios_answer_count_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='App Store' and cid_type='answer' group by cid order by count(cid) desc"
ios_answer_count_by_imp = con_sql(sql)
ios_answer_count_by_imp = tuple2dict(ios_answer_count_by_imp)
return ios_answer_count_by_imp
#2.3 获取昨天ios平台的top100点击率的问答
def get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count_by_imp):
ios_top100_answer_rate_by_ctr = []
for i in ios_answer_count_by_click:
if i in ios_answer_count_by_imp.keys() and ios_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
ios_top100_answer_rate_by_ctr.append(("ios",i,ios_answer_count_by_click[i],ios_answer_count_by_imp[i], round(ios_answer_count_by_click[i]/ios_answer_count_by_imp[i],4),url))
ios_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return ios_top100_answer_rate_by_ctr[:100] if len(ios_top100_answer_rate_by_ctr) > 100 else ios_top100_answer_rate_by_ctr
#3 获取昨天安卓平台的top100问答
#3.1 获取昨天安卓平台的top100点击数的问答
def get_android_answer_rate_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore' and cid_type='answer' group by cid order by count(cid) desc"
android_answer_count_by_click = con_sql(sql)
android_answer_count_by_click = tuple2dict(android_answer_count_by_click)
return android_answer_count_by_click
#3.2 获取昨天安卓平台的top100曝光数的问答
def get_android_answer_rate_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='App Store' and cid_type='answer' group by cid order by count(cid) desc"
android_answer_count_by_imp = con_sql(sql)
android_answer_count_by_imp = tuple2dict(android_answer_count_by_imp)
return android_answer_count_by_imp
#3.3 获取昨天安卓平台的top100点击率的问答
def get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_answer_count_by_imp):
android_top100_answer_rate_by_ctr = []
for i in android_answer_count_by_click:
if i in android_answer_count_by_imp.keys() and android_answer_count_by_click[i]>2:
url = "http://m.igengmei.com/answer/" + i[i.index('|')+1:] + '/'
android_top100_answer_rate_by_ctr.append(("android",i,android_answer_count_by_click[i],android_answer_count_by_imp[i],round(android_answer_count_by_click[i]/android_answer_count_by_imp[i],4),url))
android_top100_answer_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return android_top100_answer_rate_by_ctr[:100] if len(android_top100_answer_rate_by_ctr) > 100 else android_top100_answer_rate_by_ctr
if __name__ == "__main__":
print("开始获取top100点击率的问答...")
all_answer_count_by_click = get_all_answer_count_by_click()
all_answer_count_by_imp = get_all_answer_count_by_imp()
all_top100_answer_rate_by_ctr = get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count_by_imp)
print("3.1已获得所有平台的top100点击率的问答")
ios_answer_count_by_click = get_ios_answer_count_by_click()
ios_answer_count_by_imp = get_ios_answer_count_by_imp()
ios_top100_answer_rate_by_ctr = get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count_by_imp)
print("3.2已获得ios平台的top100点击率的问答")
android_answer_count_by_click = get_android_answer_rate_by_click()
android_answer_count_by_imp = get_android_answer_rate_by_imp()
android_top100_answer_rate_by_ctr = get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_answer_count_by_imp)
print("3.3已获得安卓平台的top100点击率的问答")
result_lst = [all_top100_answer_rate_by_ctr,ios_top100_answer_rate_by_ctr,android_top100_answer_rate_by_ctr]
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
output_path = "../data/top100_ctr_answer_%s.csv" % yesterday
result2file(result_lst,output_path)
print("已将top100点击率的问答存入文件")
import pymysql
import datetime
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def tuple2dict(tuple_result):
#把sql结果从tuple格式转换成dict格式
dict_result = {}
for i in range(len(tuple_result)):
dict_result[tuple_result[i][0]] = tuple_result[i][1]
return dict_result
def result2file(result_lst,fpath):
with open(fpath,'w') as f:
header = "平台"+'\t'+"日记id"+'\t'+"日记被点击数"+'\t'+"日记被曝光数"+'\t'+"日记被点击率"+'\t'+"日记链接"+'\n'
f.write(header)
for i in result_lst:
for j in i:
line = ""
for k in j:
line += str(k) + '\t'
line = line[:-1] + '\n'
f.write(line)
f.write("=================================================================\n")
#1 获取昨天所有平台的top100日记((sorted by ctr))
#1.1 获取昨天所有平台的日记的点击数
def get_all_diary_count_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='diary' group by cid order by count(cid) desc"
all_diary_count_by_click = con_sql(sql)
all_diary_count_by_click = tuple2dict(all_diary_count_by_click)
return all_diary_count_by_click
#1.2 获取昨天所有平台的日记的曝光数
def get_all_diary_count_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and cid_type='diary' group by cid order by count(cid) desc"
all_diary_count_by_imp = con_sql(sql)
all_diary_count_by_imp = tuple2dict(all_diary_count_by_imp)
return all_diary_count_by_imp
#1.3 获取昨天所有平台的top100点击率的日记
def get_all_top100_diary_rate_by_ctr(all_diary_count_by_click,all_diary_count_by_imp):
all_top100_diary_rate_by_ctr = []
for i in all_diary_count_by_click:
if i in all_diary_count_by_imp.keys() and all_diary_count_by_click[i] > 4:
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
all_top100_diary_rate_by_ctr.append(("all",i,all_diary_count_by_click[i],all_diary_count_by_imp[i], round(all_diary_count_by_click[i]/all_diary_count_by_imp[i],4),url))
all_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return all_top100_diary_rate_by_ctr[:100] if len(all_top100_diary_rate_by_ctr) > 100 else all_top100_diary_rate_by_ctr
#2 获取昨天ios平台的top100日记(sorted by ctr)
#2.1 获取昨天ios平台的日记的点击数
def get_ios_diary_count_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='AppStore' and cid_type='diary' group by cid order by count(cid) desc"
ios_diary_count_by_click = con_sql(sql)
ios_diary_count_by_click = tuple2dict(ios_diary_count_by_click)
return ios_diary_count_by_click
#2.2 获取昨天ios平台的日记的曝光数
def get_ios_diary_count_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type='App Store' and cid_type='diary' group by cid order by count(cid) desc"
ios_diary_count_by_imp = con_sql(sql)
ios_diary_count_by_imp = tuple2dict(ios_diary_count_by_imp)
return ios_diary_count_by_imp
#2.3 获取昨天ios平台的top00点击率的日记
def get_ios_top100_diary_rate_by_ctr(ios_top100_diary_count_by_click,ios_top100_diary_count_by_imp):
ios_top100_diary_rate_by_ctr = []
for i in ios_diary_count_by_click:
if i in ios_diary_count_by_imp.keys() and ios_diary_count_by_click[i] > 4:
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
ios_top100_diary_rate_by_ctr.append(("ios",i,ios_diary_count_by_click[i],ios_diary_count_by_imp[i], round(ios_diary_count_by_click[i]/ios_diary_count_by_imp[i],4),url))
ios_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return ios_top100_diary_rate_by_ctr[:100] if len(ios_top100_diary_rate_by_ctr) > 100 else ios_top100_diary_rate_by_ctr
#3 获取昨天安卓平台的top100日记(sorted by ctr)
#3.1 获取昨天安卓平台的日记的点击数
def get_android_diary_rate_by_click():
sql = "select cid,count(cid) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='AppStore' and cid_type='diary' group by cid order by count(cid) desc"
android_diary_count_by_click = con_sql(sql)
android_diary_count_by_click = tuple2dict(android_diary_count_by_click)
return android_diary_count_by_click
#3.2 获取昨天安卓平台的日记的曝光数
def get_android_diary_rate_by_imp():
sql = "select cid,count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) and device_type!='App Store' and cid_type='diary' group by cid order by count(cid) desc"
android_diary_count_by_imp = con_sql(sql)
android_diary_count_by_imp = tuple2dict(android_diary_count_by_imp)
return android_diary_count_by_imp
#3.3 获取昨天安卓平台的top100点击率的日记
def get_android_top100_diary_rate_by_ctr(android_top100_diary_count_by_click,android_top100_diary_count_by_imp):
android_top100_diary_rate_by_ctr = []
for i in android_diary_count_by_click:
if i in android_diary_count_by_imp.keys() and android_diary_count_by_click[i] > 4:
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
android_top100_diary_rate_by_ctr.append(("android",i,android_diary_count_by_click[i],android_diary_count_by_imp[i], round(android_diary_count_by_click[i]/android_diary_count_by_imp[i],4),url))
android_top100_diary_rate_by_ctr.sort(key=lambda x:x[4],reverse=True)
return android_top100_diary_rate_by_ctr[:100] if len(android_top100_diary_rate_by_ctr) > 100 else android_top100_diary_rate_by_ctr
if __name__ == "__main__":
print("开始获取top100点击率的日记...")
all_diary_count_by_click = get_all_diary_count_by_click()
all_diary_count_by_imp = get_all_diary_count_by_imp()
all_top100_diary_rate_by_ctr = get_all_top100_diary_rate_by_ctr(all_diary_count_by_click,all_diary_count_by_imp)
print("2.1已获得所有平台的top100点击率的日记")
ios_diary_count_by_click = get_ios_diary_count_by_click()
ios_diary_count_by_imp = get_ios_diary_count_by_imp()
ios_top100_diary_rate_by_ctr = get_ios_top100_diary_rate_by_ctr(ios_diary_count_by_click,ios_diary_count_by_imp)
print("2.2已获得ios平台的top100点击率的日记")
android_diary_count_by_click = get_android_diary_rate_by_click()
android_diary_count_by_imp = get_android_diary_rate_by_imp()
android_top100_diary_rate_by_ctr = get_android_top100_diary_rate_by_ctr(android_diary_count_by_click,android_diary_count_by_imp)
print("2.3已获得安卓平台的top100点击率的日记")
result_lst = [all_top100_diary_rate_by_ctr,ios_top100_diary_rate_by_ctr,android_top100_diary_rate_by_ctr]
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
output_path = "../data/top100_ctr_diary_%s.csv" % yesterday
result2file(result_lst,output_path)
print("已将top100点击率的日记存入文件")
python getRate.py
python getTop100Diary.py
python getTop100Answer.py
import pymysql
import datetime
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def get_yesterday_date():
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
return yesterday
def get_uid_click_times():
sql = "select device_id,count(cid_type) click_times from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) group by device_id order by click_times desc"
uid_click_times = con_sql(sql)
return uid_click_times
def result2file(result,fpath):
with open(fpath,'w') as f:
f.write("device_id"+'\t'+"click_times"+'\n')
for i in result:
f.write(str(i[0])+'\t'+str(i[1])+'\n')
def main():
print("开始获取用户点击次数表...")
uid_click_times = get_uid_click_times()
output_path = "../data/uid_click_times_%s.txt" % get_yesterday_date()
result2file(uid_click_times,output_path)
print("获取完成")
if __name__ == '__main__':
main()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment