Commit 2d0eff76 authored by 魏艺敏's avatar 魏艺敏

push codes

parent 34b7d118
......@@ -3,3 +3,4 @@ home_content_detail=首页内容数据-分日明细
home_content_by_month=首页内容数据-月均
ai_content_detail=ai内容数据-分日明细
ai_content_by_month=ai内容数据-月均
home_content_retention=分类用户次留
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 内容日报-简化版-for 思璟
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_content_retention
(
day_id string comment '{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}',
device_os_type string comment '{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}',
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
channel string comment '{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}',
dau BIGINT comment '{"chs_name":"日活","description":"","etl":"","value":"","remark":""}',
retention_rate string comment '{"chs_name":"次留率","description":"","etl":"","value":"","remark":""}',
home_good_click_uv BIGINT comment '{"chs_name":"首页good click设备数","description":"","etl":"","value":"","remark":""}',
home_good_click_quality string comment '{"chs_name":"首页gc用户次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_ungood_click_uv BIGINT comment '{"chs_name":"点击首页卡片但非gc设备数","description":"","etl":"","value":"","remark":""}',
home_ungood_click_quality string comment '{"chs_name":"点击首页卡片但非gc设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
no_click_uv BIGINT comment '{"chs_name":"未点击首页feed卡片设备数","description":"","etl":"","value":"","remark":""}',
no_click_uv_quality string comment '{"chs_name":"未点击首页feed卡片设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_good_click_retention_quality string comment '{"chs_name":"当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
)comment '内容日报-分用户次留'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
......@@ -53,9 +53,9 @@ FROM
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day >='20190701' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
WHERE partition_day =regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
......@@ -75,7 +75,7 @@ FROM
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t2
ON t1.device_id=t2.device_id
left join
......@@ -115,9 +115,9 @@ left join
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day >='20190701' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
......@@ -133,40 +133,41 @@ left join
lateral view explode(mas.channel ) v as channel
)t1
left join
(
SELECT partition_date,cl_id,count(distinct time_str) as pv
from online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
and params['card_content_type'] in ('diary','user_post','answer','qa')--首页内容卡片点击,未限制tab
GROUP BY partition_date,cl_id
)t3
ON t1.partition_date=t3.partition_date AND t1.device_id=t3.cl_id
left join
(
SELECT partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
and params['is_push']=0
-- and params['is_first']=1
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect','welfare_detail')--9月11日新增了可以跳转商详页评论的日记卡片
and page_stay>=20
group by partition_date,cl_id
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
left join
(
SELECT partition_date,cl_id,count(distinct time_str) as pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
and params['card_content_type'] in ('diary','user_post','answer','qa')--首页内容卡片点击,未限制tab
GROUP BY partition_date,cl_id
)t3
ON t1.partition_date=t3.partition_date AND t1.device_id=t3.cl_id
ON t3.partition_date=t2.partition_date
AND t3.cl_id=t2.cl_id
left join
(
SELECT partition_date
,cl_id
,sum(page_stay) as page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
......@@ -215,9 +216,9 @@ left join
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day >='20190701' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
......@@ -238,7 +239,7 @@ left join
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer in ('report_result','face_detect_result','float_tag_detail')
AND action = 'page_view'
-- and params['is_first']=1
......@@ -253,7 +254,7 @@ left join
(
SELECT cl_id,partition_date,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20190701' and partition_date <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
and page_name in ('report_result','face_detect_result','float_tag_detail') --ai测颜值、ai测肤质、模拟整形结果页
AND params['card_content_type'] in ('diary','user_post','answer','qa')
......@@ -265,7 +266,7 @@ left join
(
SELECT cl_id,partition_date,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20190701' and partition_date <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'page_view'
and page_name in ('report_result','face_detect_result','float_tag_detail') --ai测颜值、ai测肤质、模拟整形结果页
-- AND params['card_content_type'] in ('diary','user_post','answer','qa')
......@@ -287,4 +288,136 @@ left join
on t1.partition_date=t3.partition_date
and t1.active_type=t3.active_type
and t1.device_os_type=t3.device_os_type
and t1.channel=t3.channel
\ No newline at end of file
and t1.channel=t3.channel;
INSERT OVERWRITE TABLE pm.tl_pm_content_retention PARTITION (PARTITION_DAY = ${partition_day})
SELECT
day_id
,device_os_type
,active_type
,channel
,dau
,COALESCE(ROUND(retention_num/dau*100,2),0) AS retention_rate
,home_good_click_uv
,COALESCE(ROUND(home_good_click_retention_num*dau/home_good_click_uv/retention_num,2),0) as home_good_click_quality
,home_ungood_click_uv
,COALESCE(ROUND(home_ungood_click_retention_num*dau/home_ungood_click_uv/retention_num,2),0) as home_ungood_click_quality
,no_click_uv
,COALESCE(ROUND(no_click_uv_retention_num*dau/no_click_uv/retention_num,2),0) as no_click_uv_quality
,COALESCE(ROUND(home_good_click_uv_2*dau/home_good_click_uv/retention_num,2),0) as home_good_click_retention_quality
FROM
(
SELECT regexp_replace(substr(t1.partition_date,1,10),'-','') as day_id
,device_os_type
,active_type
,channel
,count(distinct t1.device_id) as dau
,count(distinct t2.device_id) as retention_num
,count(distinct t3.cl_id) as home_good_click_uv
,count(distinct t5.cl_id) as home_good_click_uv_2
,count(distinct case when t3.cl_id is not null then t2.device_id end) as home_good_click_retention_num
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t4.cl_id end) as home_ungood_click_uv
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t2.device_id end) as home_ungood_click_retention_num
,count(distinct case when t4.cl_id is null and t3.cl_id is null then t1.device_id end) as no_click_uv
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t2.device_id end) as no_click_uv_retention_num
FROM
(
SELECT partition_date,a.device_os_type,b.active_type,device_id,v.channel
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,array(device_os_type,'合计') as device_os_type
,array(CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END,'合计') AS active_type
,device_id
,array(CASE WHEN tmp.is_ai_channel='true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day =regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
lateral view explode(mas.device_os_type ) a as device_os_type
lateral view explode(mas.active_type ) b as active_type
lateral view explode(mas.channel ) v as channel
)t1
LEFT JOIN
(--活跃设备
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t2
ON t1.device_id=t2.device_id and date_add(t1.partition_date,1)=t2.partition_date
left join
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND page_name ='home'
and params['card_content_type'] in ('diary','user_post','answer','qa')
group by partition_date,cl_id
)t4
on t1.partition_date=t4.partition_date and t1.device_id=t4.cl_id
left join
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
and params['is_push']=0--ios从push点击进入的数据referrer也为首页,故需要去掉(可能存在个别时期的数据有问题)
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect','welfare_detail')
and page_stay>=20
group by partition_date,cl_id
)t3
on t4.partition_date=t3.partition_date and t4.cl_id=t3.cl_id
LEFT JOIN
(--活跃设备
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect','welfare_detail')
and page_stay>=20
group by partition_date,cl_id
)t5
ON t3.cl_id=t5.cl_id and date_add(t3.partition_date,1)=t5.partition_date
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
)t1
\ No newline at end of file
......@@ -8,9 +8,9 @@ SELECT
,ai_report_card_click_uv AS `ai结果页内容卡片点击uv`
,ai_report_card_click_pv AS `ai结果页内容卡片点击pv`
,ai_report_good_click AS `来源于ai结果页的good click`
,CONCAT(ROUND(ai_report_card_click_uv/ai_report_uv*100,2),'%') AS `ai结果页内容卡片点击uv/DAU`
,CONCAT(ROUND(ai_report_card_click_pv/ai_report_uv*100,2),'%') AS `ai结果页内容卡片点击pv/DAU`
,CONCAT(ROUND(ai_report_good_click/ai_report_uv*100,2),'%') AS `来源于ai结果页的good click/DAU`
,CONCAT(ROUND(ai_report_card_click_uv/ai_report_uv*100,2),'%') AS `ai结果页内容卡片点击uv/ai结果页uv`
,CONCAT(ROUND(ai_report_card_click_pv/ai_report_uv*100,2),'%') AS `ai结果页内容卡片点击pv/ai结果页uv`
,CONCAT(ROUND(ai_report_good_click/ai_report_uv*100,2),'%') AS `来源于ai结果页的good click/ai结果页uv`
FROM
(
select substr(day_id,1,6) as month
......
......@@ -8,9 +8,9 @@ SELECT
,ai_report_card_click_uv AS `ai结果页内容卡片点击uv`
,ai_report_card_click_pv AS `ai结果页内容卡片点击pv`
,ai_report_good_click AS `来源于ai结果页的good click`
,NVL(CONCAT(ROUND(ai_report_card_click_uv/ai_report_uv*100,2),'%'),0) AS `ai结果页内容卡片点击uv/DAU`
,NVL(CONCAT(ROUND(ai_report_card_click_pv/ai_report_uv*100,2),'%'),0) AS `ai结果页内容卡片点击pv/DAU`
,NVL(CONCAT(ROUND(ai_report_good_click/ai_report_uv*100,2),'%'),0) AS `来源于ai结果页的good click/DAU`
,NVL(CONCAT(ROUND(ai_report_card_click_uv/ai_report_uv*100,2),'%'),0) AS `ai结果页内容卡片点击uv/ai结果页uv`
,NVL(CONCAT(ROUND(ai_report_card_click_pv/ai_report_uv*100,2),'%'),0) AS `ai结果页内容卡片点击pv/ai结果页uv`
,NVL(CONCAT(ROUND(ai_report_good_click/ai_report_uv*100,2),'%'),0) AS `来源于ai结果页的good click/ai结果页uv`
FROM pm.tl_pm_content_v3
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
select substr(day_id,1,6) `日期`
,device_os_type `系统`
,active_type `活跃`
,channel `渠道`
,round(avg(home_good_click_uv),0) as `首页good click设备数`
,round(avg(if(home_good_click_quality=0,NULL,home_good_click_quality)),2) as `首页gc用户次留率/全站次留率`
,round(avg(home_ungood_click_uv),0) as `点击首页卡片但非gc设备数`
,round(avg(if(home_ungood_click_quality=0,NULL,home_ungood_click_quality)),2) as `点击首页卡片但非gc设备次留率/全站次留率`
,round(avg(no_click_uv),0) as `未点击首页feed卡片设备数`
,round(avg(if(no_click_uv_quality=0,NULL,no_click_uv_quality)),2) as `未点击首页feed卡片设备次留率/全站次留率`
,round(avg(if(home_good_click_retention_quality=0,NULL,home_good_click_retention_quality)),2) as `当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率`
FROM pm.tl_pm_content_retention
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by substr(day_id,1,6),device_os_type,active_type,channel
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
daily_content_retention=内容日报-次留率
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_DV_DEVICECLEAN_DIMEN_D
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive DIM DIM_AI_CHANNEL_ZP_NEW
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/email/daily_content_retention/weiyimin@igengmei.com/hanyingyue@igengmei.com
\ No newline at end of file
select substr(day_id,1,6) as month
,device_os_type
,active_type
,channel
,round(avg(dau),0) as dau
,round(avg(if(retention_rate=0,NULL,retention_rate)),2) as retention_rate
,round(avg(home_good_click_uv),0) as home_good_click_uv
,round(avg(if(home_good_click_quality=0,NULL,home_good_click_quality)),2) as home_good_click_quality
,round(avg(home_ungood_click_uv),0) as home_ungood_click_uv
,round(avg(if(home_ungood_click_quality=0,NULL,home_ungood_click_quality)),2) as home_ungood_click_quality
,round(avg(no_click_uv),0) as no_click_uv
,round(avg(if(no_click_uv_quality=0,NULL,no_click_uv_quality)),2) as no_click_uv_quality
,round(avg(if(home_good_click_retention_quality=0,NULL,home_good_click_retention_quality)),2) as home_good_click_retention_quality
from
(
SELECT
day_id
,device_os_type
,active_type
,channel
,dau
,COALESCE(ROUND(retention_num/dau*100,2),0) AS retention_rate
,home_good_click_uv
,COALESCE(ROUND(home_good_click_retention_num*dau/home_good_click_uv/retention_num,2),0) as home_good_click_quality
,home_ungood_click_uv
,COALESCE(ROUND(home_ungood_click_retention_num*dau/home_ungood_click_uv/retention_num,2),0) as home_ungood_click_quality
,no_click_uv
,COALESCE(ROUND(no_click_uv_retention_num*dau/no_click_uv/retention_num,2),0) as no_click_uv_quality
,COALESCE(ROUND(home_good_click_uv_2*dau/home_good_click_uv/retention_num,2),0) as home_good_click_retention_quality
FROM
(
SELECT regexp_replace(substr(t1.partition_date,1,10),'-','') as day_id
,device_os_type
,active_type
,channel
,count(distinct t1.device_id) as dau
,count(distinct t2.device_id) as retention_num
,count(distinct t3.cl_id) as home_good_click_uv
,count(distinct t5.cl_id) as home_good_click_uv_2
,count(distinct case when t3.cl_id is not null then t2.device_id end) as home_good_click_retention_num
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t4.cl_id end) as home_ungood_click_uv
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t2.device_id end) as home_ungood_click_retention_num
,count(distinct case when t4.cl_id is null and t3.cl_id is null then t1.device_id end) as no_click_uv
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t2.device_id end) as no_click_uv_retention_num
FROM
(
SELECT partition_date,a.device_os_type,b.active_type,device_id,v.channel
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,array(device_os_type,'合计') as device_os_type
,array(CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END,'合计') AS active_type
,device_id
,array(CASE WHEN tmp.is_ai_channel='true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day >='20190701' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
lateral view explode(mas.device_os_type ) a as device_os_type
lateral view explode(mas.active_type ) b as active_type
lateral view explode(mas.channel ) v as channel
)t1
LEFT JOIN
(--活跃设备
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t2
ON t1.device_id=t2.device_id and date_add(t1.partition_date,1)=t2.partition_date
left join
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect','welfare_detail')
and page_stay>=20
group by partition_date,cl_id
)t3
on t1.partition_date=t3.partition_date and t1.device_id=t3.cl_id
left join
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND page_name ='home'
and params['card_content_type'] in ('diary','user_post','answer','qa')
group by partition_date,cl_id
)t4
on t1.partition_date=t4.partition_date and t1.device_id=t4.cl_id
LEFT JOIN
(--活跃设备
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect','welfare_detail')
and page_stay>=20
group by partition_date,cl_id
)t5
ON t3.cl_id=t5.cl_id and date_add(t3.partition_date,1)=t5.partition_date
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
)t1
)t2
group by substr(day_id,1,6)
,device_os_type
,active_type
,channel
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment