Commit a9b79edf authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

增加日记队列比较功能
parents dd935b51 c9ac8c8e
......@@ -31,10 +31,10 @@ def get_click_zero_uid_count(platform):
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \
and stat_date = get_yesterday_date() \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
......@@ -45,10 +45,10 @@ def get_click_zero_uid_count(platform):
union all \
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \
and stat_date = get_yesterday_date() \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
......@@ -62,10 +62,10 @@ def get_click_zero_uid_count(platform):
union all \
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \
and stat_date = get_yesterday_date() \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
......@@ -79,10 +79,10 @@ def get_click_zero_uid_count(platform):
union all \
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \
and stat_date = get_yesterday_date() \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
......@@ -96,10 +96,10 @@ def get_click_zero_uid_count(platform):
union all \
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \
and stat_date = get_yesterday_date() \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
......@@ -113,17 +113,17 @@ def get_click_zero_uid_count(platform):
union all \
select '90+' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \
and stat_date = get_yesterday_date() \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure2 \
where device_id in \
(select distinct(device_id) from data_feed_exposure2 \
where stat_date < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_date1,my_date2,my_date3,my_date4,my_date5)
where stat_date < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_date1,my_date2,my_date3,my_date4,my_date5,get_yesterday_date())
cursor.execute(sql)
result = cursor.fetchall()
db.close()
......
......@@ -73,18 +73,18 @@ click_everything_result = [click_everything_all,click_everything_ios,click_every
print("已获取有点击用户占比")
#1.9 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
try:
click_zero_uid_detail_all = get_click_zero_uid_rate_detail("all")
click_zero_uid_detail_all["platform"] = "所有"
click_zero_uid_detail_ios = get_click_zero_uid_rate_detail("ios")
click_zero_uid_detail_ios["platform"] = "苹果"
click_zero_uid_detail_android = get_click_zero_uid_rate_detail("android")
click_zero_uid_detail_android["platform"] = "安卓"
click_zero_uid_detail_result = [click_zero_uid_detail_all,click_zero_uid_detail_ios,click_zero_uid_detail_android]
print("已获取无点击用户数激活日期分布")
except:
click_zero_uid_detail_result = []
print("GC life time is shorter than transaction duration")
# try:
# click_zero_uid_detail_all = get_click_zero_uid_rate_detail("all")
# click_zero_uid_detail_all["platform"] = "所有"
# click_zero_uid_detail_ios = get_click_zero_uid_rate_detail("ios")
# click_zero_uid_detail_ios["platform"] = "苹果"
# click_zero_uid_detail_android = get_click_zero_uid_rate_detail("android")
# click_zero_uid_detail_android["platform"] = "安卓"
# click_zero_uid_detail_result = [click_zero_uid_detail_all,click_zero_uid_detail_ios,click_zero_uid_detail_android]
# print("已获取无点击用户数激活日期分布")
# except:
# click_zero_uid_detail_result = []
# print("GC life time is shorter than transaction duration")
#==========================================================================================
......@@ -138,7 +138,6 @@ def result2file():
1.6 点击question用户占比(=点击question用户数/曝光question用户数)
1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.8 有点击用户占比(=有点击用户数/有曝光用户数)
1.9 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:平台一排的数字指的是距离当前时间的天数
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr)
......@@ -205,20 +204,20 @@ def result2file():
for i in click_everything_result:
line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
f.write(line)
f.write('\n')
if click_zero_uid_detail_result != []:
f.write("#1.9无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数\n")
f.write("平台"+'\t\t'+"0-7"+'\t\t'+"7-14"+'\t\t'+ \
"14-30"+'\t\t'+"30-60"+'\t\t'+"60-90"+'\t\t'+"90+"+'\n')
for i in click_zero_uid_detail_result:
f.write(i["platform"]+'\t\t'+\
"{}%".format(round(i["0-7"]*100,2))+'\t\t'+\
"{}%".format(round(i["7-14"]*100,2))+'\t\t'+\
"{}%".format(round(i["14-30"]*100,2))+'\t\t'+\
"{}%".format(round(i["30-60"]*100,2))+'\t\t'+\
"{}%".format(round(i["60-90"]*100,2))+'\t\t'+\
"{}%".format(round(i["90+"]*100,2))+'\n')
f.write('\n\n\n')
# f.write('\n')
# if click_zero_uid_detail_result != []:
# f.write("#1.9无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数\n")
# f.write("平台"+'\t\t'+"0-7"+'\t\t'+"7-14"+'\t\t'+ \
# "14-30"+'\t\t'+"30-60"+'\t\t'+"60-90"+'\t\t'+"90+"+'\n')
# for i in click_zero_uid_detail_result:
# f.write(i["platform"]+'\t\t'+\
# "{}%".format(round(i["0-7"]*100,2))+'\t\t'+\
# "{}%".format(round(i["7-14"]*100,2))+'\t\t'+\
# "{}%".format(round(i["14-30"]*100,2))+'\t\t'+\
# "{}%".format(round(i["30-60"]*100,2))+'\t\t'+\
# "{}%".format(round(i["60-90"]*100,2))+'\t\t'+\
# "{}%".format(round(i["90+"]*100,2))+'\n')
f.write('\n\n\n')
#==========================================================================================
tplt = "{0:^10}\t{1:^10}\n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment