Commit ee150695 authored by 魏艺敏's avatar 魏艺敏

push codes

parent f3815cbc
--新手精选帖子
--帖子关联标签 3315
SELECT card_id as `日记本id`
,type as `内容类型`
,content_level as `星级`
,case when is_cpc>0 then '是' else '否' end as `是否商业化内容`
,create_date as `上线日期`
,audit_date as `最近审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round((nvl(reply_num_30,0)+nvl(vote_num_30,0)+nvl(favor_num_30,0)+nvl(share_num_30,0))/page_pv_30*100,2),'%'),0) as `前30日互动率`
,nvl(concat(round(click_pv_30/exp_pv_30*100,2),'%'),0) as `前30日ctr`
,nvl(click_pv_30,0) as `前30日点击`
,nvl(exp_pv_30,0) as `前30日曝光`
,nvl(page_pv_30,0) as `前30日浏览pv`
,nvl(reply_num_30,0) as `前30日真实评论`
,nvl(vote_num_30,0) as `前30日真实点赞`
,nvl(favor_num_30,0) as `前30日收藏`
,nvl(share_num_30,0) as `前30日转发`
,nvl(page_pv_20_30,0) as `前30日超过20秒阅读pv`
,nvl(avg_page_stay_30,0) as `前30日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(click_pv,0) as `历史点击`
,nvl(exp_pv,0) as `历史曝光`
,nvl(page_pv,0) as `历史浏览pv`
,nvl(reply_num,0) as `历史真实评论`
,nvl(vote_num,0) as `历史真实点赞`
,nvl(favor_num,0) as `历史收藏`
,nvl(share_num,0) as `历史转发`
,nvl(page_pv_20,0) as `历史超过20秒阅读pv`
,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM
(
--历史数据,指从审核时间至今的数据
SELECT t1.card_id,type,content_level,create_date,audit_date,tag_list
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(reply_num) as reply_num
,sum(vote_num) as vote_num
,sum(favor_num) as favor_num
,sum(share_num) as share_num
,sum(page_pv_20) as page_pv_20
,round(avg(avg_page_stay),2) as avg_page_stay
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then exp_pv end) as exp_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then click_pv end) as click_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv end) as page_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then reply_num end) as reply_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then vote_num end) as vote_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then favor_num end) as favor_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then share_num end) as share_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv_20 end) as page_pv_20_30
,round(avg(case when t1.partition_date>=DATE_SUB(current_date,30) then avg_page_stay end),2) as avg_page_stay_30
from
(
SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
,nvl(t1.card_id,t2.answer_id) as card_id
,is_cpc,exp_pv,click_pv,page_pv,page_pv_20,avg_page_stay
,reply_num,vote_num,favor_num,share_num
from
(
select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(page_pv_20) as page_pv_20
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
,case when transaction_type in ('cpc','advertise') then 1 else 0 end as is_cpc
from online.ml_community_precise_exposure_detail
where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND card_content_type in ('answer')
AND tab_name = '精选'
group by partition_date,card_id,cl_id,case when transaction_type in ('cpc','advertise') then 1 else 0 end
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
full join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('question_answer_detail','answer_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
full join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id
,count(distinct time_str) as page_pv
,count(case when page_stay>=20 then time_str end) as page_pv_20
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_name in ('question_answer_detail','answer_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true'
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d
on a.cl_id=d.device_id and a.partition_date=d.partition_date
where c.device_id is null and d.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
full join
(
SELECT a.answer_id,a.create_date
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT answer_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'reply' as type,count(create_time) as num
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND answer_id is not NULL
group by user_id,answer_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--真实点赞数
SELECT answer_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'vote' as type,count(create_time) as num
FROM online.tl_hdfs_answer_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by answer_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--关注问答
SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'favor' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='on_click_button' and params['button_name']='attention'
and page_name in ('question_answer_detail')
group by params['business_id'],partition_date,user_id
union all
--收藏回答
SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'favor' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='on_click_button' and params['button_name']='收藏'
and page_name in ('answer_detail')
group by params['business_id'],partition_date,user_id
union all
--点击分享数
SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('question_answer_detail','answer_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.answer_id,a.create_date
)t2
on t1.card_id=t2.answer_id and t1.partition_date =t2.create_date
)t1
join
(
select id,content_level,user_id,create_date,audit_date,type,tag_list
FROM
(
-- select id,content_level,user_id,substr(create_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'帖子' as type
-- from online.tl_hdfs_api_tractate_view
-- where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by id,content_level,user_id,create_time,audit_time
--
-- union all
-- select id,content_level,user_id,substr(created_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'日记本' as type
-- from online.tl_hdfs_diary_view
-- where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by id,content_level,user_id,created_time,audit_time
-- union all
--
select id,level as content_level,user_id,substr(create_time,1,10) as create_date,substr(update_time,1,10) as audit_date,'回答' as type--回答无审核时间,用更新时间近似取代
from online.tl_hdfs_answer_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,level,user_id,create_time,update_time
)a
left join
(
select card_id,collect_set(tag_name) as tag_list
from
(
-- select diary_id as card_id, tag_id
-- from online.tl_hdfs_diary_tags_view --日记
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by diary_id, tag_id
-- union all
--
-- select tractate_id as card_id,tag_id
-- from online.tl_hdfs_api_tractate_tag_view --用户贴
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by tractate_id,tag_id
--
-- union all
--
select a.card_id,b.tag_id
from
(
select id as card_id,question_id
from online.tl_hdfs_answer_view --回答
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online ='true'
)a
join
(
select question_id,tag_id
from online.tl_hdfs_questiontag_view
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
)b
on a.question_id =b.question_id
group by a.card_id,b.tag_id
)c
join
(
select id as tag_id
,name as tag_name
from online.tl_hdfs_api_tag_view --老标签库
where partition_date = '20200212'
and tag_type in (2,3)
)d
on c.tag_id=d.tag_id
group by c.card_id
)b
on a.id=b.card_id
)t2
on t1.card_id= t2.id
group by t1.card_id,type,content_level,create_date,audit_date,tag_list
)t4
order by `历史曝光` desc
--新手精选帖子
--帖子关联标签 3315
SELECT card_id as `日记本id`
,type as `内容类型`
,content_level as `星级`
,case when is_cpc>0 then '是' else '否' end as `是否商业化内容`
,create_date as `上线日期`
,audit_date as `最近审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round((nvl(reply_num_30,0)+nvl(vote_num_30,0)+nvl(favor_num_30,0)+nvl(share_num_30,0))/page_pv_30*100,2),'%'),0) as `前30日互动率`
,nvl(concat(round(click_pv_30/exp_pv_30*100,2),'%'),0) as `前30日ctr`
,nvl(click_pv_30,0) as `前30日点击`
,nvl(exp_pv_30,0) as `前30日曝光`
,nvl(page_pv_30,0) as `前30日浏览pv`
,nvl(reply_num_30,0) as `前30日真实评论`
,nvl(vote_num_30,0) as `前30日真实点赞`
,nvl(favor_num_30,0) as `前30日收藏`
,nvl(share_num_30,0) as `前30日转发`
,nvl(page_pv_20_30,0) as `前30日超过20秒阅读pv`
,nvl(avg_page_stay_30,0) as `前30日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(click_pv,0) as `历史点击`
,nvl(exp_pv,0) as `历史曝光`
,nvl(page_pv,0) as `历史浏览pv`
,nvl(reply_num,0) as `历史真实评论`
,nvl(vote_num,0) as `历史真实点赞`
,nvl(favor_num,0) as `历史收藏`
,nvl(share_num,0) as `历史转发`
,nvl(page_pv_20,0) as `历史超过20秒阅读pv`
,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM
(
--历史数据,指从审核时间至今的数据
SELECT t1.card_id,type,content_level,create_date,audit_date,tag_list
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(reply_num) as reply_num
,sum(vote_num) as vote_num
,sum(favor_num) as favor_num
,sum(share_num) as share_num
,sum(page_pv_20) as page_pv_20
,round(avg(avg_page_stay),2) as avg_page_stay
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then exp_pv end) as exp_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then click_pv end) as click_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv end) as page_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then reply_num end) as reply_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then vote_num end) as vote_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then favor_num end) as favor_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then share_num end) as share_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv_20 end) as page_pv_20_30
,round(avg(case when t1.partition_date>=DATE_SUB(current_date,30) then avg_page_stay end),2) as avg_page_stay_30
from
(
SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
,nvl(t1.card_id,t2.diary_id) as card_id
,is_cpc,exp_pv,click_pv,page_pv,page_pv_20,avg_page_stay
,reply_num,vote_num,favor_num,share_num
from
(
select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(page_pv_20) as page_pv_20
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
,case when transaction_type in ('cpc','advertise') then 1 else 0 end as is_cpc
from online.ml_community_precise_exposure_detail
where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND card_content_type in ('diary')
AND tab_name = '精选'
group by partition_date,card_id,cl_id,case when transaction_type in ('cpc','advertise') then 1 else 0 end
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
full join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('diary_detail','topic_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
full join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id
,count(distinct time_str) as page_pv
,count(case when page_stay>=20 then time_str end) as page_pv_20
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_name in ('diary_detail','topic_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true'
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d
on a.cl_id=d.device_id and a.partition_date=d.partition_date
where c.device_id is null and d.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
full join
(
SELECT a.diary_id,a.create_date
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT t2.diary_id,create_date,t1.user_id,'reply' as type,sum(reply_num) as num
FROM
(
SELECT problem_id,user_id,regexp_replace(substr(reply_date,1,10),'-','') as create_date,count(distinct reply_date) as reply_num
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by user_id,regexp_replace(substr(reply_date,1,10),'-',''),problem_id
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by t2.diary_id,create_date,t1.user_id
union all
--真实点赞数
SELECT t1.diary_id,create_date,t1.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT diary_id,user_id,regexp_replace(substr(vote_time,1,10),'-','') as create_date,count(distinct vote_time) as vote_num
FROM online.tl_hdfs_diaryvote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
GROUP BY diary_id,user_id,regexp_replace(substr(vote_time,1,10),'-','')
)t1
JOIN
(
SELECT user_id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
group by user_id,diary_id
)t2
on t1.diary_id = t2.diary_id
GROUP BY t1.diary_id,t1.user_id,t1.create_date
UNION ALL
SELECT t2.diary_id,t1.create_date,t1.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT topic_id,user_id,regexp_replace(substr(vote_time,1,10),'-','') as create_date,count(distinct vote_time) as vote_num
FROM online.tl_hdfs_topicvote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by topic_id,user_id,regexp_replace(substr(vote_time,1,10),'-','')
)t1
JOIN
(
SELECT id ,diary_id
from online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by id,diary_id
)t2
on t2.id = t1.topic_id
GROUP BY t2.diary_id,t1.create_date,t1.user_id
UNION ALL
--收藏过日记本
SELECT diary_id,regexp_replace(substr(creatd_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct creatd_time) as num
FROM online.tl_hdfs_diaryfavor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by user_id,diary_id,regexp_replace(substr(creatd_time,1,10),'-','')
UNION ALL
--有收藏过日记帖的用户
SELECT t1.diary_id,create_date,t2.user_id,'favor' as type,sum(favor_num) as num
FROM
(
SELECT user_id,problem_id,regexp_replace(substr(created_time,1,10),'-','') as create_date,count(distinct created_time) as favor_num
FROM online.tl_hdfs_problemfavor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by user_id,problem_id,regexp_replace(substr(created_time,1,10),'-','')
)t2
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
group by id,diary_id
)t1
ON t1.id = t2.problem_id
GROUP BY t1.diary_id,create_date,t2.user_id
union all
--点击分享数
SELECT params['business_id'] as diary_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('diary_detail','topic_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.diary_id,a.create_date
)t2
on t1.card_id=t2.diary_id and t1.partition_date =t2.create_date
)t1
join
(
select id,content_level,user_id,create_date,audit_date,type,tag_list
FROM
(
-- select id,content_level,user_id,substr(create_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'帖子' as type
-- from online.tl_hdfs_api_tractate_view
-- where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by id,content_level,user_id,create_time,audit_time
--
-- union all
select id,content_level,user_id,substr(created_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'日记本' as type
from online.tl_hdfs_diary_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,content_level,user_id,created_time,audit_time
-- union all
--
-- select id,level as content_level,user_id,substr(create_time,1,10) as create_date,substr(update_time,1,10) as audit_date,'回答' as type--回答无审核时间,用更新时间近似取代
-- from online.tl_hdfs_answer_view
-- where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by id,level,user_id,create_time,update_time
)a
left join
(
select card_id,collect_set(tag_name) as tag_list
from
(
select diary_id as card_id, tag_id
from online.tl_hdfs_diary_tags_view --日记
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by diary_id, tag_id
-- union all
--
-- select tractate_id as card_id,tag_id
-- from online.tl_hdfs_api_tractate_tag_view --用户贴
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by tractate_id,tag_id
--
-- union all
--
-- select a.card_id,b.tag_id
-- from
-- (
-- select id as card_id,question_id
-- from online.tl_hdfs_answer_view --回答
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- and is_online ='true'
-- )a
-- join
-- (
-- select question_id,tag_id
-- from online.tl_hdfs_questiontag_view
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- )b
-- on a.question_id =b.question_id
-- group by a.card_id,b.tag_id
)c
join
(
select id as tag_id
,name as tag_name
from online.tl_hdfs_api_tag_view --老标签库
where partition_date = '20200212'
and tag_type in (2,3)
)d
on c.tag_id=d.tag_id
group by c.card_id
)b
on a.id=b.card_id
)t2
on t1.card_id= t2.id
group by t1.card_id,type,content_level,create_date,audit_date,tag_list
)t4
order by `历史曝光` desc
daily_content=内容日报-简化版
home_content-detail=首页内容数据-分日明细
home_content_by_month=首页内容数据-月均
ai_content_detail=ai内容数据-分日明细
ai_content_by_month=ai内容数据-月均
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 内容日报-简化版-for 思璟
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_content_v2
(
day_id string comment '{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}',
device_os_type string comment '{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}',
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
channel string comment '{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}',
dau BIGINT comment '{"chs_name":"日活","description":"","etl":"","value":"","remark":""}',
retention_rate_2 string comment '{"chs_name":"次留率","description":"","etl":"","value":"","remark":""}',
retention_rate_3 string comment '{"chs_name":"第3日留存率","description":"","etl":"","value":"","remark":""}',
retention_rate_7 string comment '{"chs_name":"第7日留存率","description":"","etl":"","value":"","remark":""}',
home_card_click_uv BIGINT comment '{"chs_name":"首页卡片点击uv","description":"","etl":"","value":"","remark":""}',
home_card_click_pv BIGINT comment '{"chs_name":"首页卡片点击pv","description":"","etl":"","value":"","remark":""}',
home_good_click BIGINT comment '{"chs_name":"来源于首页的good click(浏览时长>=20s的内容)","description":"","etl":"","value":"","remark":""}',
page_stay_per_device BIGINT comment '{"chs_name":"单设备内容浏览时长(m)","description":"","etl":"","value":"","remark":""}',
ai_report_card_click_uv BIGINT comment '{"chs_name":"ai结果页内容卡片点击uv","description":"","etl":"","value":"","remark":""}',
ai_report_card_click_pv BIGINT comment '{"chs_name":"ai结果页内容卡片点击pv","description":"","etl":"","value":"","remark":""}',
ai_report_good_click BIGINT comment '{"chs_name":"来源于结果页的good click(浏览时长>=20s的内容)","description":"","etl":"","value":"","remark":""}',
)comment '内容日报-简化版'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;
--内容日报简化
INSERT OVERWRITE TABLE pm.tl_pm_content_v2 PARTITION (PARTITION_DAY = ${partition_day})
SELECT
t1.partition_date as day_id
,t1.device_os_type
,t1.active_type
,t1.channel
,dau
,NVL(ROUND(retention_num2/dau*100,2),0) AS retention_rate2
,NVL(ROUND(retention_num3/dau*100,2),0) AS retention_rate3
,NVL(ROUND(retention_num7/dau*100,2),0) AS retention_rate7
,NVL(t2.home_click_uv,0) AS home_card_click_uv
,NVL(t2.home_click_pv,0) AS home_card_click_pv
,NVL(t2.good_pv,0) AS home_good_click
,NVL(t2.avg_page_stay,0) AS page_stay_per_device
,NVL(t3.ai_click_uv,0) AS ai_report_card_click_uv
,NVL(t3.ai_click_pv,0) AS ai_report_card_click_pv
,NVL(t3.good_pv,0) AS ai_report_good_click
FROM
(
SELECT regexp_replace(substr(t1.partition_date,1,10),'-','') as partition_date
,device_os_type
,active_type
,channel
,count(distinct t1.device_id) as dau
,count(case when date_add(t1.partition_date,1)=t2.partition_date then t2.device_id end) as retention_num2
,count(case when date_add(t1.partition_date,2)=t2.partition_date then t2.device_id end) as retention_num3
,count(case when date_add(t1.partition_date,6)=t2.partition_date then t2.device_id end) as retention_num7
FROM
(
SELECT partition_date,a.device_os_type,b.active_type,device_id,v.channel
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,array(device_os_type,'合计') as device_os_type
,array(CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END,'合计') AS active_type
,device_id
,array(CASE WHEN tmp.is_ai_channel='true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
lateral view explode(mas.device_os_type ) a as device_os_type
lateral view explode(mas.active_type ) b as active_type
lateral view explode(mas.channel ) v as channel
)t1
LEFT JOIN
(--活跃设备
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t2
ON t1.device_id=t2.device_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
)t1
left join
(
SELECT t1.partition_date
,device_os_type
,active_type
,channel
,count(distinct t3.cl_id) as home_click_uv
,sum(t3.pv) as home_click_pv
,count(distinct t2.cl_id) as good_uv
,sum(t2.pv) as good_pv
,round(sum(page_stay)/count(distinct t4.cl_id)/60,2) as avg_page_stay
FROM
(
SELECT partition_date,a.device_os_type,b.active_type,device_id,v.channel
FROM
(
SELECT partition_date
,array(device_os_type,'合计') as device_os_type
,array(CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END,'合计') AS active_type
,device_id
,array(CASE WHEN tmp.is_ai_channel='true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
lateral view explode(mas.device_os_type ) a as device_os_type
lateral view explode(mas.active_type ) b as active_type
lateral view explode(mas.channel ) v as channel
)t1
left join
(
SELECT partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
-- and params['is_first']=1
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
and page_stay>=20
group by partition_date,cl_id
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
left join
(
SELECT partition_date,cl_id,count(distinct time_str) as pv
from online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
and params['card_content_type'] in ('diary','user_post','answer','qa')--首页内容卡片点击,未限制tab
GROUP BY partition_date,cl_id
)t3
ON t1.partition_date=t3.partition_date AND t1.device_id=t3.cl_id
left join
(
SELECT partition_date
,cl_id
,sum(page_stay) as page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
and page_stay>=0 AND page_stay<1000
group by partition_date,cl_id
)t4
ON t1.partition_date=t4.partition_date AND t1.device_id=t4.cl_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date ,device_os_type,active_type,channel
)t2
on t1.partition_date=t2.partition_date
and t1.active_type=t2.active_type
and t1.device_os_type=t2.device_os_type
and t1.channel=t2.channel
left join
(
SELECT t1.partition_date
,device_os_type
,active_type
,channel
,count(distinct t3.cl_id) as ai_click_uv
,sum(t3.pv) as ai_click_pv
,count(distinct t2.cl_id) as good_uv
,sum(t2.pv) as good_pv
FROM
(
SELECT partition_date,a.device_os_type,b.active_type,device_id,v.channel
FROM
(
SELECT partition_date
,array(device_os_type,'合计') as device_os_type
,array(CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END,'合计') AS active_type
,device_id
,array(CASE WHEN tmp.is_ai_channel='true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day =regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
lateral view explode(mas.device_os_type ) a as device_os_type
lateral view explode(mas.active_type ) b as active_type
lateral view explode(mas.channel ) v as channel
)t1
left join
(--good click
SELECT partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer in ('report_result','face_detect_result','float_tag_detail')
AND action = 'page_view'
-- and params['is_first']=1
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
and page_stay>=20
group by partition_date,cl_id
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
left join
(
SELECT cl_id,partition_date,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
and page_name in ('report_result','face_detect_result','float_tag_detail') --ai测颜值、ai测肤质、模拟整形结果页
AND params['card_content_type'] in ('diary','user_post','answer','qa')
-- AND params['tab_name'] in ('猜你喜欢','为你定制','reference_case')--不限制tab name,只要是从ai结果页到内容页的就算
group by cl_id,partition_date
)t3
ON t1.partition_date=t3.partition_date AND t1.device_id=t3.cl_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date ,device_os_type,active_type,channel
)t3
on t1.partition_date=t3.partition_date
and t1.active_type=t3.active_type
and t1.device_os_type=t3.device_os_type
and t1.channel=t3.channel
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive DIM DIM_AI_CHANNEL_ZP_NEW
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/email/daily_content/weiyimin@igengmei.com/hanyingyue@igengmei.com
\ No newline at end of file
SELECT
month AS `日期`
,device_os_type AS `系统`
,active_type AS `活跃`
,channel as `渠道`
,dau AS `DAU`
,ai_report_card_click_uv AS `ai结果页内容卡片点击uv`
,ai_report_card_click_pv AS `ai结果页内容卡片点击pv`
,ai_report_good_click AS `来源于ai结果页的good click`
,CONCAT(ROUND(ai_report_card_click_uv/dau*100,2),'%') AS `ai结果页内容卡片点击uv/DAU`
,CONCAT(ROUND(ai_report_card_click_pv/dau*100,2),'%') AS `ai结果页内容卡片点击pv/DAU`
,CONCAT(ROUND(ai_report_good_click/dau*100,2),'%') AS `来源于ai结果页的good click/DAU`
FROM
(
select substr(day_id,1,6) as month
,device_os_type
,active_type
,channel
,round(avg(dau),0) as dau
,round(avg(ai_report_card_click_uv),0) as ai_report_card_click_uv
,round(avg(ai_report_card_click_pv),0) as ai_report_card_click_pv
,round(avg(ai_report_good_click),0) as ai_report_good_click
FROM pm.tl_pm_content_v2
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by substr(day_id,1,6),device_os_type,active_type,channel
)t1
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
SELECT
day_id AS `日期`
,device_os_type AS `系统`
,active_type AS `活跃`
,channel as `渠道`
,dau AS `DAU`
,ai_report_card_click_uv AS `ai结果页内容卡片点击uv`
,ai_report_card_click_pv AS `ai结果页内容卡片点击pv`
,ai_report_good_click AS `来源于ai结果页的good click`
,NVL(CONCAT(ROUND(ai_report_card_click_uv/dau*100,2),'%'),0) AS `ai结果页内容卡片点击uv/DAU`
,NVL(CONCAT(ROUND(ai_report_card_click_pv/dau*100,2),'%'),0) AS `ai结果页内容卡片点击pv/DAU`
,NVL(CONCAT(ROUND(ai_report_good_click/dau*100,2),'%'),0) AS `来源于ai结果页的good click/DAU`
FROM pm.tl_pm_content_v2
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
SELECT
month AS `日期`
,device_os_type AS `系统`
,active_type AS `活跃`
,channel as `渠道`
,dau AS `DAU`
,retention_rate_2 AS `次留率(%)`
,retention_rate_3 AS `第3日留存率(%)`
,retention_rate_7 AS `第7日留存率(%)`
,home_card_click_uv AS `首页卡片点击uv`
,home_card_click_pv AS `首页卡片点击pv`
,home_good_click AS `来源于首页的good click`
,CONCAT(ROUND(home_card_click_uv/dau*100,2),'%') AS `首页卡片点击uv/DAU`
,CONCAT(ROUND(home_card_click_pv/dau*100,2),'%') AS `首页卡片点击pv/DAU`
,CONCAT(ROUND(home_good_click/dau*100,2),'%') AS `来源于首页的good click/DAU`
,page_stay_per_device AS `单设备内容浏览时长(m)`
FROM
(
select substr(day_id,1,6) as month
,device_os_type
,active_type
,channel
,round(avg(dau),0) as dau
,round(avg(retention_rate_2),2) as retention_rate_2
,round(avg(retention_rate_3),2) as retention_rate_3
,round(avg(retention_rate_7),2) as retention_rate_7
,round(avg(home_card_click_uv),0) as home_card_click_uv
,round(avg(home_card_click_pv),0) as home_card_click_pv
,round(avg(home_good_click),0) as home_good_click
,round(avg(page_stay_per_device),2) as page_stay_per_device
FROM pm.tl_pm_content_v2
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by substr(day_id,1,6),device_os_type,active_type,channel
)t1
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
SELECT
day_id AS `日期`
,device_os_type AS `系统`
,active_type AS `活跃`
,channel as `渠道`
,dau AS `DAU`
,retention_rate_2 AS `次留率(%)`
,retention_rate_3 AS `第3日留存率(%)`
,retention_rate_7 AS `第7日留存率(%)`
,home_card_click_uv AS `首页卡片点击uv`
,home_card_click_pv AS `首页卡片点击pv`
,home_good_click AS `来源于首页的good click`
,NVL(CONCAT(ROUND(home_card_click_uv/dau*100,2),'%'),0) AS `首页卡片点击uv/DAU`
,NVL(CONCAT(ROUND(home_card_click_pv/dau*100,2),'%'),0) AS `首页卡片点击pv/DAU`
,NVL(CONCAT(ROUND(home_good_click/dau*100,2),'%'),0) AS `来源于首页的good click/DAU`
,NVL(page_stay_per_device,0) AS `单设备内容浏览时长(m)`
FROM pm.tl_pm_content_v2
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment