Commit a9b79edf authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

增加日记队列比较功能
parents dd935b51 c9ac8c8e
...@@ -31,10 +31,10 @@ def get_click_zero_uid_count(platform): ...@@ -31,10 +31,10 @@ def get_click_zero_uid_count(platform):
cursor = db.cursor() cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure2 \ sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and stat_date = get_yesterday_date() \ and stat_date = '{7}' \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click2 \ (select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \ where stat_date = '{7}' \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
...@@ -45,10 +45,10 @@ def get_click_zero_uid_count(platform): ...@@ -45,10 +45,10 @@ def get_click_zero_uid_count(platform):
union all \ union all \
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure2 \ select '7-14' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and stat_date = get_yesterday_date() \ and stat_date = '{7}' \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click2 \ (select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \ where stat_date = '{7}' \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
...@@ -62,10 +62,10 @@ def get_click_zero_uid_count(platform): ...@@ -62,10 +62,10 @@ def get_click_zero_uid_count(platform):
union all \ union all \
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure2 \ select '14-30' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and stat_date = get_yesterday_date() \ and stat_date = '{7}' \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click2 \ (select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \ where stat_date = '{7}' \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
...@@ -79,10 +79,10 @@ def get_click_zero_uid_count(platform): ...@@ -79,10 +79,10 @@ def get_click_zero_uid_count(platform):
union all \ union all \
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure2 \ select '30-60' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and stat_date = get_yesterday_date() \ and stat_date = '{7}' \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click2 \ (select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \ where stat_date = '{7}' \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
...@@ -96,10 +96,10 @@ def get_click_zero_uid_count(platform): ...@@ -96,10 +96,10 @@ def get_click_zero_uid_count(platform):
union all \ union all \
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure2 \ select '60-90' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and stat_date = get_yesterday_date() \ and stat_date = '{7}' \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click2 \ (select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \ where stat_date = '{7}' \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
...@@ -113,17 +113,17 @@ def get_click_zero_uid_count(platform): ...@@ -113,17 +113,17 @@ def get_click_zero_uid_count(platform):
union all \ union all \
select '90+' as label,count(distinct(device_id)) from data_feed_exposure2 \ select '90+' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and stat_date = get_yesterday_date() \ and stat_date = '{7}' \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click2 \ (select distinct(device_id) from data_feed_click2 \
where stat_date = get_yesterday_date() \ where stat_date = '{7}' \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
from data_feed_exposure2 \ from data_feed_exposure2 \
where device_id in \ where device_id in \
(select distinct(device_id) from data_feed_exposure2 \ (select distinct(device_id) from data_feed_exposure2 \
where stat_date < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_date1,my_date2,my_date3,my_date4,my_date5) where stat_date < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_date1,my_date2,my_date3,my_date4,my_date5,get_yesterday_date())
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
db.close() db.close()
......
...@@ -73,18 +73,18 @@ click_everything_result = [click_everything_all,click_everything_ios,click_every ...@@ -73,18 +73,18 @@ click_everything_result = [click_everything_all,click_everything_ios,click_every
print("已获取有点击用户占比") print("已获取有点击用户占比")
#1.9 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数 #1.9 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
try: # try:
click_zero_uid_detail_all = get_click_zero_uid_rate_detail("all") # click_zero_uid_detail_all = get_click_zero_uid_rate_detail("all")
click_zero_uid_detail_all["platform"] = "所有" # click_zero_uid_detail_all["platform"] = "所有"
click_zero_uid_detail_ios = get_click_zero_uid_rate_detail("ios") # click_zero_uid_detail_ios = get_click_zero_uid_rate_detail("ios")
click_zero_uid_detail_ios["platform"] = "苹果" # click_zero_uid_detail_ios["platform"] = "苹果"
click_zero_uid_detail_android = get_click_zero_uid_rate_detail("android") # click_zero_uid_detail_android = get_click_zero_uid_rate_detail("android")
click_zero_uid_detail_android["platform"] = "安卓" # click_zero_uid_detail_android["platform"] = "安卓"
click_zero_uid_detail_result = [click_zero_uid_detail_all,click_zero_uid_detail_ios,click_zero_uid_detail_android] # click_zero_uid_detail_result = [click_zero_uid_detail_all,click_zero_uid_detail_ios,click_zero_uid_detail_android]
print("已获取无点击用户数激活日期分布") # print("已获取无点击用户数激活日期分布")
except: # except:
click_zero_uid_detail_result = [] # click_zero_uid_detail_result = []
print("GC life time is shorter than transaction duration") # print("GC life time is shorter than transaction duration")
#========================================================================================== #==========================================================================================
...@@ -138,7 +138,6 @@ def result2file(): ...@@ -138,7 +138,6 @@ def result2file():
1.6 点击question用户占比(=点击question用户数/曝光question用户数) 1.6 点击question用户占比(=点击question用户数/曝光question用户数)
1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数) 1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.8 有点击用户占比(=有点击用户数/有曝光用户数) 1.8 有点击用户占比(=有点击用户数/有曝光用户数)
1.9 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:平台一排的数字指的是距离当前时间的天数
2.Top特征 2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量) 2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr) 2.2 Top 100 diary (sorted by ctr)
...@@ -205,20 +204,20 @@ def result2file(): ...@@ -205,20 +204,20 @@ def result2file():
for i in click_everything_result: for i in click_everything_result:
line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2))) line = tplt.format(i[0],i[1],i[2],"{}%".format(round(i[3]*100,2)))
f.write(line) f.write(line)
f.write('\n') # f.write('\n')
if click_zero_uid_detail_result != []: # if click_zero_uid_detail_result != []:
f.write("#1.9无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数\n") # f.write("#1.9无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数\n")
f.write("平台"+'\t\t'+"0-7"+'\t\t'+"7-14"+'\t\t'+ \ # f.write("平台"+'\t\t'+"0-7"+'\t\t'+"7-14"+'\t\t'+ \
"14-30"+'\t\t'+"30-60"+'\t\t'+"60-90"+'\t\t'+"90+"+'\n') # "14-30"+'\t\t'+"30-60"+'\t\t'+"60-90"+'\t\t'+"90+"+'\n')
for i in click_zero_uid_detail_result: # for i in click_zero_uid_detail_result:
f.write(i["platform"]+'\t\t'+\ # f.write(i["platform"]+'\t\t'+\
"{}%".format(round(i["0-7"]*100,2))+'\t\t'+\ # "{}%".format(round(i["0-7"]*100,2))+'\t\t'+\
"{}%".format(round(i["7-14"]*100,2))+'\t\t'+\ # "{}%".format(round(i["7-14"]*100,2))+'\t\t'+\
"{}%".format(round(i["14-30"]*100,2))+'\t\t'+\ # "{}%".format(round(i["14-30"]*100,2))+'\t\t'+\
"{}%".format(round(i["30-60"]*100,2))+'\t\t'+\ # "{}%".format(round(i["30-60"]*100,2))+'\t\t'+\
"{}%".format(round(i["60-90"]*100,2))+'\t\t'+\ # "{}%".format(round(i["60-90"]*100,2))+'\t\t'+\
"{}%".format(round(i["90+"]*100,2))+'\n') # "{}%".format(round(i["90+"]*100,2))+'\n')
f.write('\n\n\n') f.write('\n\n\n')
#========================================================================================== #==========================================================================================
tplt = "{0:^10}\t{1:^10}\n" tplt = "{0:^10}\t{1:^10}\n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment