Commit 4758923b authored by 张彦钊's avatar 张彦钊

add new file

parents 11eee554 c8a1b61c
......@@ -19,7 +19,11 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s"))
def get_rate_detail(platform):
def get_click_zero_uid_count(platform):
"""
platform : "ios","android","all"
rtype : dict
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
......@@ -126,26 +130,16 @@ def get_rate_detail(platform):
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def result2dict(result):
"""
result : tuple2
rtype : dict
"""
dct = {}
sum_count = 0
for i in result:
sum_count += i[1]
for i in result:
dct[i[0]] = "{}--{}%".format(i[1],round(i[1]/sum_count*100,2))
print("sum:{}".format(sum_count))
dct[i[0]] = i[1]
return dct
if __name__ == '__main__':
no_click_uid_detail_all = result2dict(get_rate_detail("all"))
no_click_uid_detail_ios = result2dict(get_rate_detail("ios"))
no_click_uid_detail_android = result2dict(get_rate_detail("android"))
no_click_uid_detail_all = get_click_zero_uid_count("all")
no_click_uid_detail_ios = get_click_zero_uid_count("ios")
no_click_uid_detail_android = get_click_zero_uid_count("android")
......@@ -27,7 +27,7 @@ def result2file(fpath):
1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.7 无点击用户占比(=无点击用户数/有曝光用户数)
1.8 无点击用户数分布(根据激活日期和平台来分) #注意:(]里面的数字指的是距离当前时间的天数
1.8 无点击用户数分布(=无点击用户∩激活用户数 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr)
......
......@@ -19,7 +19,10 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s"))
def get_rate_detail():
def get_register_uid_count():
"""
rtype : dict
"""
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) \
......@@ -72,26 +75,14 @@ def get_rate_detail():
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def result2dict(result):
"""
result : tuple2
rtype : dict
"""
dct = {}
sum_count = 0
for i in result:
sum_count += i[1]
for i in result:
dct[i[0]] = "{}--{}%".format(i[1],round(i[1]/sum_count*100,2))
print("sum:{}".format(sum_count))
dct[i[0]] = i[1]
return dct
if __name__ == '__main__':
register_uid_detail_all = result2dict(get_rate_detail())
register_uid_detail_ios = result2dict(get_rate_detail())
register_uid_detail_android = result2dict(get_rate_detail())
register_uid_detail = get_register_uid_count()
from utils import con_sql
from getClickZeroUidDetail import get_click_zero_uid_count
from getRegisterUidDetail import get_register_uid_count
#获取各个平台下的活跃用户点击率
def get_activate_uid_ctr(platform, ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(ndays, platform.replace(' ','') if platform[-2]=='e' else platform)
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \
and device_type{2}) \
and device_type{3}".format(ndays, ndays, platform.replace(' ','') if platform[-2]=='e' else platform, platform)
imp_count = con_sql(sql_imp)[0][0]
clk_rate = round(clk_count/imp_count, 4 )
if platform == "='App Store'":
platform = "苹果"
elif platform == "!='App Store'":
platform = "安卓"
else:
platform = "所有"
return [platform, clk_count, imp_count, clk_rate]
#获取活跃用户平均每天曝光次数
def get_activate_uid_imp_times(city,ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
city : 'beijing';'all'
rtype : list
"""
if city == "beijing":
city = "='beijing'"
else:
city = " is not null"
sql_uid = "select count(distinct(device_id)) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and city_id{1}".format(ndays,city)
sql_uid_count = con_sql(sql_uid)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and city_id{1}) \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and city_id{1}".format(ndays,city)
sql_imp_times = con_sql(sql_imp)[0][0]
if city == "beijing":
city = "北京"
else:
city = "所有"
return [city,sql_uid_count,sql_imp_times,round(sql_imp_times/sql_uid_count,2)]
#获取无点击用户数分布(=无点击用户∩激活用户数 / 激活用户数) ;并且根据平台和激活日记来分
def get_click_zero_uid_rate_detail(platform):
"""
platform : "ios","android","all"
rtype : dict
"""
dct1 = get_click_zero_uid_count(platform)
dct2 = get_register_uid_count()
result = {}
for k in dct1:
result[k] = dct1[k]/dct2[k]
return result
#获取 (用户点击次数 : 独立用户数)
def get_click_times_to_count_uid():
"""
rtype : tuple
"""
sql = "select times,count(device_id) \
from (select device_id,count(cid_type) as times \
from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
group by device_id) as t \
group by times order by times"
result = con_sql(sql)
return result
\ No newline at end of file
import datetime
import pymysql
#一周之前的timestamp(7)
my_date1 = datetime.date.today() - datetime.timedelta(days=7)
my_tm1 = int(my_date1.strftime("%s"))
#二周之前的timestamp(14)
my_date2 = datetime.date.today() - datetime.timedelta(days=14)
my_tm2 = int(my_date2.strftime("%s"))
#一个月之前的timestamp(30)
my_date3 = datetime.date.today() - datetime.timedelta(days=30)
my_tm3 = int(my_date3.strftime("%s"))
#两个月之前的timestamp(60)
my_date4 = datetime.date.today() - datetime.timedelta(days=60)
my_tm4 = int(my_date4.strftime("%s"))
#三个月之前的timestamp(90)
my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s"))
def get_click_zero_uid_count(platform):
"""
platform : "ios","android","all"
rtype : dict
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {2})) \
union all \
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {2})) \
union all \
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {4}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {3})) \
union all \
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {5}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {4})) \
union all \
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {6}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {5})) \
union all \
select '90+' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql)
result = cursor.fetchall()
db.close()
dct = {}
for i in result:
dct[i[0]] = i[1]
return dct
if __name__ == '__main__':
no_click_uid_detail_all = get_click_zero_uid_count("all")
no_click_uid_detail_ios = get_click_zero_uid_count("ios")
no_click_uid_detail_android = get_click_zero_uid_count("android")
import datetime
import pymysql
#一周之前的timestamp(7)
my_date1 = datetime.date.today() - datetime.timedelta(days=7)
my_tm1 = int(my_date1.strftime("%s"))
#二周之前的timestamp(14)
my_date2 = datetime.date.today() - datetime.timedelta(days=14)
my_tm2 = int(my_date2.strftime("%s"))
#一个月之前的timestamp(30)
my_date3 = datetime.date.today() - datetime.timedelta(days=30)
my_tm3 = int(my_date3.strftime("%s"))
#两个月之前的timestamp(60)
my_date4 = datetime.date.today() - datetime.timedelta(days=60)
my_tm4 = int(my_date4.strftime("%s"))
#三个月之前的timestamp(90)
my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s"))
def get_register_uid_count():
"""
rtype : dict
"""
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {0}) \
union all \
select '7-14' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {1}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {0}) \
union all \
select '14-30' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {2}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {1}) \
union all \
select '30-60' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {2}) \
union all \
select '60-90' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {4}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
union all \
select '90+' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {4})".format(my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql)
result = cursor.fetchall()
db.close()
dct = {}
for i in result:
dct[i[0]] = i[1]
return dct
if __name__ == '__main__':
register_uid_detail = get_register_uid_count()
# -*- coding: UTF-8 -*-
from utils import *
from utils import get_yesterday_date
from config import DIRECTORY_PATH
from getCidRate import *
from getClkCidUidRate import *
from getTopFeatures import *
def main():
print("开始获取特征数据...")
#1. 比例特征
#1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
answer_imp_rate_all = CidRate("all","answer").get_cid_imp_rate("所有")
answer_imp_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果")
answer_imp_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓")
answer_imp_rate_result = [answer_imp_rate_all,answer_imp_rate_ios,answer_imp_rate_android]
print("已获取answer曝光占比")
#1.2 diary曝光占比(=answer被曝光数/总cid被曝光数)
diary_imp_rate_all = CidRate("all","diary").get_cid_imp_rate("所有")
diary_imp_rate_ios = CidRate("ios","diary").get_cid_imp_rate("苹果")
diary_imp_rate_android = CidRate("android","diary").get_cid_imp_rate("安卓")
diary_imp_rate_result = [diary_imp_rate_all,diary_imp_rate_ios,diary_imp_rate_android]
print("已获取diary曝光占比")
#1.3 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)
activate_uid_ctr_all = get_activate_uid_ctr("all")
activate_uid_ctr_ios = get_activate_uid_ctr("ios")
activate_uid_ctr_android = get_activate_uid_ctr("android")
activate_uid_ctr_result = [activate_uid_ctr_all,activate_uid_ctr_ios,activate_uid_ctr_android]
print("已获取活跃用户点击率")
#1.4 点击answer用户占比(=点击answer用户数/曝光answer用户数)
click_answer_all = ClkCidUidRate("all","answer").get_clk_cid_uid_rate("所有")
click_answer_ios = ClkCidUidRate("ios","answer").get_clk_cid_uid_rate("苹果")
click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("安卓")
click_answer_result = [click_answer_all,click_answer_ios,click_answer_android]
print("已获取点击answer用户占比")
#1.5 点击diary用户占比(=点击diary用户数/曝光diary用户数)
click_diary_all = ClkCidUidRate("all","diary").get_clk_cid_uid_rate("所有")
click_diary_ios = ClkCidUidRate("ios","diary").get_clk_cid_uid_rate("苹果")
click_diary_android = ClkCidUidRate("android","diary").get_clk_cid_uid_rate("安卓")
click_diary_result = [click_diary_all,click_diary_ios,click_diary_android]
print("已获取点击diary用户占比")
#1.6 有点击用户占比(=有点击用户数/有曝光用户数)
click_everything_all = ClkCidUidRate("all","everything").get_clk_cid_uid_rate("所有")
click_everything_ios = ClkCidUidRate("ios","everything").get_clk_cid_uid_rate("苹果")
click_everything_android = ClkCidUidRate("android","everything").get_clk_cid_uid_rate("安卓")
click_everything_result = [click_everything_all,click_everything_ios,click_everything_android]
print("已获取有点击用户占比")
#2. Top特征
#2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
df = get_click_times_to_count_uid_df()
print("已获取用户点击次数分布")
#2.2 Top 100 diary(sorted by ctr)
top_diary_all = TopFeatures("all", "diary", 100).get_result("所有", 4, "ctr")
top_diary_ios = TopFeatures("ios", "diary", 100).get_result("苹果", 4, "ctr")
top_diary_android = TopFeatures("android", "diary", 100).get_result("安卓", 4, "ctr")
print("已获取 Top diary 特征")
#2.3 Top 100 Answer(sorted by ctr)
top_answer_all = TopFeatures("all", "answer", 100).get_result("所有", 2, "ctr")
top_answer_ios = TopFeatures("ios", "answer", 100).get_result("苹果", 2, "ctr")
top_answer_android = TopFeatures("android", "answer", 100).get_result("安卓", 2, "ctr")
print("已获取 Top answer 特征")
#2.4 Top 100 Question(sorted by click times)
top_question_all = TopFeatures("all", "question", 100).get_result("所有", 2, "ctr")
top_question_ios = TopFeatures("ios", "question", 100).get_result("苹果", 2, "ctr")
top_question_android = TopFeatures("android", "question", 100).get_result("安卓", 2, "ctr")
print("已获取 Top question 特征")
print("done")
from cidRate import CidRate
from clkCidUidRate import ClkCidUidRate
from topFeatures import TopFeatures
from func import *
print("开始获取特征数据...")
#1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
answer_imp_rate_all = CidRate("all","answer").get_cid_imp_rate("所有")
answer_imp_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果")
answer_imp_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓")
answer_imp_rate_result = [answer_imp_rate_all,answer_imp_rate_ios,answer_imp_rate_android]
print("已获取answer曝光占比")
#1.2 diary曝光占比(=answer被曝光数/总cid被曝光数)
diary_imp_rate_all = CidRate("all","diary").get_cid_imp_rate("所有")
diary_imp_rate_ios = CidRate("ios","diary").get_cid_imp_rate("苹果")
diary_imp_rate_android = CidRate("android","diary").get_cid_imp_rate("安卓")
diary_imp_rate_result = [diary_imp_rate_all,diary_imp_rate_ios,diary_imp_rate_android]
print("已获取diary曝光占比")
#1.3 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)
activate_uid_ctr_all = get_activate_uid_ctr("all")
activate_uid_ctr_ios = get_activate_uid_ctr("ios")
activate_uid_ctr_android = get_activate_uid_ctr("android")
activate_uid_ctr_result = [activate_uid_ctr_all,activate_uid_ctr_ios,activate_uid_ctr_android]
print("已获取活跃用户点击率")
#1.4 活跃用户平均每天曝光次数(=活跃用户曝光数/独立活跃用户数)
activate_uid_imp_all = get_activate_uid_imp_times("all")
activate_uid_imp_beijing = get_activate_uid_imp_times("beijing")
activate_uid_imp_result = [activate_uid_imp_all,activate_uid_imp_beijing]
print("已获取活跃用户平均每天曝光次数")
#1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
click_answer_all = ClkCidUidRate("all","answer").get_clk_cid_uid_rate("所有")
click_answer_ios = ClkCidUidRate("ios","answer").get_clk_cid_uid_rate("苹果")
click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("安卓")
click_answer_result = [click_answer_all,click_answer_ios,click_answer_android]
print("已获取点击answer用户占比")
#1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
click_diary_all = ClkCidUidRate("all","diary").get_clk_cid_uid_rate("所有")
click_diary_ios = ClkCidUidRate("ios","diary").get_clk_cid_uid_rate("苹果")
click_diary_android = ClkCidUidRate("android","diary").get_clk_cid_uid_rate("安卓")
click_diary_result = [click_diary_all,click_diary_ios,click_diary_android]
print("已获取点击diary用户占比")
#1.7 有点击用户占比(=有点击用户数/有曝光用户数)
click_everything_all = ClkCidUidRate("all","everything").get_clk_cid_uid_rate("所有")
click_everything_ios = ClkCidUidRate("ios","everything").get_clk_cid_uid_rate("苹果")
click_everything_android = ClkCidUidRate("android","everything").get_clk_cid_uid_rate("安卓")
click_everything_result = [click_everything_all,click_everything_ios,click_everything_android]
print("已获取有点击用户占比")
#1.8 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
click_zero_uid_detail_all = get_click_zero_uid_rate_detail("all")
click_zero_uid_detail_all["platform"] = "所有"
click_zero_uid_detail_ios = get_click_zero_uid_rate_detail("ios")
click_zero_uid_detail_ios["platform"] = "苹果"
click_zero_uid_detail_android = get_click_zero_uid_rate_detail("android")
click_zero_uid_detail_android["platform"] = "安卓"
click_zero_uid_detail_result = [click_zero_uid_detail_all,click_zero_uid_detail_ios,click_zero_uid_detail_android]
print("已获取无点击用户数激活日期分布")
#==========================================================================================
#2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
print("开始Top特征数据...")
click_times_to_count_uid = get_click_times_to_count_uid()
print("已获取用户点击次数分布")
#2.2 Top 100 diary(sorted by ctr)
top_diary_all = TopFeatures("all", "diary", 100).get_result("所有", 4, "ctr")
top_diary_ios = TopFeatures("ios", "diary", 100).get_result("苹果", 4, "ctr")
top_diary_android = TopFeatures("android", "diary", 100).get_result("安卓", 4, "ctr")
top_diary_result = [top_diary_all,top_diary_ios,top_diary_android]
print("已获取 Top diary 特征")
#2.3 Top 100 Answer(sorted by ctr)
top_answer_all = TopFeatures("all", "answer", 100).get_result("所有", 2, "ctr")
top_answer_ios = TopFeatures("ios", "answer", 100).get_result("苹果", 2, "ctr")
top_answer_android = TopFeatures("android", "answer", 100).get_result("安卓", 2, "ctr")
top_answer_result = [top_answer_all,top_answer_ios,top_answer_android]
print("已获取 Top answer 特征")
#2.4 Top 100 Question(sorted by click times)
top_question_all = TopFeatures("all", "question", 100).get_result("所有", 2, "ctr")
top_question_ios = TopFeatures("ios", "question", 100).get_result("苹果", 2, "ctr")
top_question_android = TopFeatures("android", "question", 100).get_result("安卓", 2, "ctr")
top_question_result = [top_question_all,top_question_ios,top_question_android]
print("已获取 Top question 特征")
print("done")
def result2file():
output_path = DIRECTORY_PATH + "result_{}.txt".format(get_yesterday_date())
with open(output_path, 'w') as f:
tplt = "{0:\u3000<6}\t{1:\u3000<15}\t{2:\u3000<15}\t{3:\u3000<15}\n"
line = """数据日期:{}
内容概览:以下所有数据都是昨天一天的首页的
1. 比例特征
1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
1.2 diary曝光占比(=diary被曝光数/总cid被曝光数)
1.3 活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)
1.4 活跃用户平均每天曝光次数(=活跃用户曝光次数/独立活跃用户数)
1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.7 有点击用户占比(=有点击用户数/有曝光用户数)
1.8 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr)
2.3 Top 100 Answer (sorted by ctr)
2.4 Top 100 Question (sorted by ctr)
具体内容:以下所有数据都昨天一天的首页的
""".format(get_yesterday_date())
f.write(line)
f.write("#1. 比例特征\n")
f.write("=================================================================\n")
f.write("#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)\n")
f.write(tplt.format("平台","answer被曝光数","总cid被曝光数","answer被曝光数占比"))
for i in answer_imp_rate_result:
line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
f.write(line)
f.write('\n')
f.write("#1.2diary曝光占比(=diary被曝光数/总cid被曝光数)\n")
f.write(tplt.format("平台","diary被曝光数","总cid被曝光数","diary被曝光数占比"))
for i in diary_imp_rate_result:
line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
f.write(line)
f.write('\n')
f.write("#1.3活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)\n")
f.write(tplt.format("平台","active用户点击次数","active用户曝光次数","active用户点击率"))
for i in activate_uid_ctr_result:
line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
f.write(line)
f.write('\n')
f.write("#1.4活跃用户平均每天曝光次数(=活跃用户曝光次数/独立活跃用户数)\n")
f.write(tplt.format("地区","active独立用户数","active用户曝光次数","activate用户平均曝光数"))
for i in activate_uid_imp_result:
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
f.write("#1.5点击answer用户占比(=点击answer用户数/曝光answer用户数)\n")
f.write(tplt.format("平台","点击answer用户数","曝光answer用户数","击answer用户占比"))
for i in click_answer_result:
line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
f.write(line)
f.write('\n')
f.write("#1.6点击diary用户占比(=点击diary用户数/曝光diary用户数)\n")
f.write(tplt.format("平台","点击diary用户数","曝光diary用户数","击diary用户占比"))
for i in click_diary_result:
line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
f.write(line)
f.write('\n')
f.write("#1.7有点击用户占比(=有点击用户数/有曝光用户数)\n")
f.write(tplt.format("平台","have点击用户数","have曝光用户数","have点击用户占比"))
for i in click_everything_result:
line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
f.write(line)
f.write('\n')
f.write("#1.8无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数\n")
f.write("平台"+'\t'+"0-7"+'\t'+"7-14"+'\t'+ \
"14-30"+'\t'+"30-60"+'\t'+"60-90"+'\t'+"90+"+'\n')
for i in click_zero_uid_detail_result:
f.write(i["platform"]+'\t'+str(i["0-7"])+'\t'+str(i["7-14"])+'\t'+str(i["14-30"])+ \
'\t'+str(i["30-60"])+'\t'+str(i["60-90"])+'\t'+str(i["90+"])+'\n')
f.write('\n\n\n')
#==========================================================================================
tplt = "{0:^10}\t{1:^10}\n"
f.write("#2. Top特征\n")
f.write("=================================================================\n")
f.write("2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)\n")
f.write(tplt.format("click_times","count_uid"))
for i in click_times_to_count_uid:
line = tplt.format(i[0],i[1])
f.write(line)
f.write("\n\n")
tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
f.write("2.2 Top 100 Diary\n")
sep = "=================================================================\n"
header = tplt.format("平台","diary_id","点击数","曝光数","点击率","diary链接")
f.write(sep)
f.write(header)
for i in top_diary_result:
for j in i:
f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
f.write(sep)
if i != top_diary_result[-1]:
f.write(header)
f.write("\n\n")
f.write("2.3 Top 100 Answer\n")
sep = "=================================================================\n"
header = tplt.format("平台","answer_id","点击数","曝光数","点击率","answer链接")
f.write(sep)
f.write(header)
for i in top_answer_result:
for j in i:
f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
f.write(sep)
if i != top_answer_result[-1]:
f.write(header)
f.write("\n\n")
f.write("2.4 Top 100 Question\n")
sep = "=================================================================\n"
header = tplt.format("平台","question_id","点击数","曝光数","点击率","question链接")
f.write(sep)
f.write(header)
for i in top_question_result:
for j in i:
f.write(tplt.format(j[0],j[1],j[2],j[3],j[4],j[5]))
f.write(sep)
if i != top_question_result[-1]:
f.write(header)
f.write("\n\n")
if __name__ == '__main__':
main()
result2file()
......@@ -35,52 +35,3 @@ def get_yesterday_date():
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
return yesterday
#获取各个平台下的活跃用户点击率
def get_activate_uid_ctr(platform, ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(ndays, platform.replace(' ','') if platform[-2]=='e' else platform)
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \
and device_type{2}) \
and device_type{3}".format(ndays, ndays, platform.replace(' ','') if platform[-2]=='e' else platform, platform)
imp_count = con_sql(sql_imp)[0][0]
clk_rate = round(clk_count/imp_count, 4 )
if platform == "='App Store'":
platform = "苹果"
elif platform == "!='App Store'":
platform = "安卓"
else:
platform = "所有"
return [platform, clk_count, imp_count, clk_rate]
#获取 (点击次数 : 独立用户数)
def get_click_times_to_count_uid_df():
"""
rtype : tuple
"""
sql = "select times,count(device_id) \
from (select device_id,count(cid_type) as times \
from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
group by device_id) as t \
group by times order by times"
result = con_sql(sql)
return result
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment