Commit ca4ee003 authored by 魏艺敏's avatar 魏艺敏

push codes

parent a4ee2822
--新手精选帖子
--帖子关联标签 3315 --帖子关联标签 3315
SELECT t1.id as `帖子id` SELECT id as `帖子id`
,content as `帖子标题` ,content as `帖子标题`
,t1.create_date as `上线日期` ,audit_date as `审核日期`
,t1.tag_list as `所有关联标签` ,tag_list as `所有关联标签`
,nvl(concat(round((nvl(t2.reply_num,0)+nvl(t2.vote_num,0)+nvl(t2.favor_num,0)+nvl(t2.share_num,0))/t2.page_pv*100,2),'%'),0) as `今日互动率` ,nvl(concat(round((nvl(reply_num_1,0)+nvl(vote_num_1,0)+nvl(favor_num_1,0)+nvl(share_num_1,0))/page_pv_1*100,2),'%'),0) as `前1日互动率`
,nvl(concat(round(t2.click_pv/t2.exp_pv*100,2),'%'),0) as `今日ctr` ,nvl(concat(round(click_pv_1/exp_pv_1*100,2),'%'),0) as `前1日ctr`
,nvl(t2.click_pv,0) as `今日点击pv` ,nvl(click_pv_1,0) as `前1日点击`
,nvl(t2.exp_pv,0) as `今日曝光pv` ,nvl(exp_pv_1,0) as `前1日曝光`
,nvl(t2.page_pv,0) as `今日浏览pv` ,nvl(page_pv_1,0) as `前1日浏览pv`
,nvl(t2.reply_num,0) as `今日真实评论数` ,nvl(reply_num_1,0) as `前1日真实评论`
,nvl(t2.vote_num,0) as `今日真实点赞数` ,nvl(vote_num_1,0) as `前1日真实点赞`
,nvl(t2.favor_num,0) as `今日收藏数` ,nvl(favor_num_1,0) as `前1日收藏`
,nvl(t2.share_num,0) as `今日转发数` ,nvl(share_num_1,0) as `前1日转发`
,nvl(t2.avg_page_stay,0) as `今日平均阅读时长(s)` ,nvl(avg_page_stay_1,0) as `前1日平均阅读时长(s)`
,nvl(concat(round((nvl(t3.reply_num,0)+nvl(t3.vote_num,0)+nvl(t3.favor_num,0)+nvl(t3.share_num,0))/t3.page_pv*100,2),'%'),0) as `3日互动率` ,nvl(concat(round((nvl(reply_num_3,0)+nvl(vote_num_3,0)+nvl(favor_num_3,0)+nvl(share_num_3,0))/page_pv_3*100,2),'%'),0) as `前3日互动率`
,nvl(concat(round(t3.click_pv/t3.exp_pv*100,2),'%'),0) as `3日ctr` ,nvl(concat(round(click_pv_3/exp_pv_3*100,2),'%'),0) as `前3日ctr`
,nvl(t3.click_pv,0) as `3日点击pv` ,nvl(click_pv_3,0) as `前3日点击`
,nvl(t3.exp_pv,0) as `3日曝光pv` ,nvl(exp_pv_3,0) as `前3日曝光`
,nvl(t3.page_pv,0) as `3日浏览pv` ,nvl(page_pv_3,0) as `前3日浏览pv`
,nvl(t3.reply_num,0) as `3日真实评论数` ,nvl(reply_num_3,0) as `前3日真实评论`
,nvl(t3.vote_num,0) as `3日真实点赞数` ,nvl(vote_num_3,0) as `前3日真实点赞`
,nvl(t3.favor_num,0) as `3日收藏数` ,nvl(favor_num_3,0) as `前3日收藏`
,nvl(t3.share_num,0) as `3日转发数` ,nvl(share_num_3,0) as `前3日转发`
,nvl(t3.avg_page_stay,0) as `3日平均阅读时长(s)` ,nvl(avg_page_stay_3,0) as `前3日平均阅读时长(s)`
,nvl(concat(round((nvl(t5.reply_num,0)+nvl(t5.vote_num,0)+nvl(t5.favor_num,0)+nvl(t5.share_num,0))/t5.page_pv*100,2),'%'),0) as `10日互动率` ,nvl(concat(round((nvl(reply_num_10,0)+nvl(vote_num_10,0)+nvl(favor_num_10,0)+nvl(share_num_10,0))/page_pv_10*100,2),'%'),0) as `前10日互动率`
,nvl(concat(round(t5.click_pv/t5.exp_pv*100,2),'%'),0) as `10日ctr` ,nvl(concat(round(click_pv_10/exp_pv_10*100,2),'%'),0) as `前10日ctr`
,nvl(t5.click_pv,0) as `10日点击pv` ,nvl(click_pv_10,0) as `前10日点击`
,nvl(t5.exp_pv,0) as `10日曝光pv` ,nvl(exp_pv_10,0) as `前10日曝光`
,nvl(t5.page_pv,0) as `10日浏览pv` ,nvl(page_pv_10,0) as `前10日浏览pv`
,nvl(t5.reply_num,0) as `10日真实评论数` ,nvl(reply_num_10,0) as `前10日真实评论`
,nvl(t5.vote_num,0) as `10日真实点赞数` ,nvl(vote_num_10,0) as `前10日真实点赞`
,nvl(t5.favor_num,0) as `10日收藏数` ,nvl(favor_num_10,0) as `前10日收藏`
,nvl(t5.share_num,0) as `10日转发数` ,nvl(share_num_10,0) as `前10日转发`
,nvl(t5.avg_page_stay,0) as `10日平均阅读时长(s)` ,nvl(avg_page_stay_10,0) as `前10日平均阅读时长(s)`
,nvl(concat(round((nvl(t4.reply_num,0)+nvl(t4.vote_num,0)+nvl(t4.favor_num,0)+nvl(t4.share_num,0))/t4.page_pv*100,2),'%'),0) as `历史互动率` ,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
,nvl(concat(round(t4.click_pv/t4.exp_pv*100,2),'%'),0) as `历史ctr` ,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(t4.click_pv,0) as `历史点击pv` ,nvl(click_pv,0) as `历史点击`
,nvl(t4.exp_pv,0) as `历史曝光pv` ,nvl(exp_pv,0) as `历史曝光`
,nvl(t4.page_pv,0) as `历史浏览pv` ,nvl(page_pv,0) as `历史浏览pv`
,nvl(t4.reply_num,0) as `历史真实评论数` ,nvl(reply_num,0) as `历史真实评论`
,nvl(t4.vote_num,0) as `历史真实点赞数` ,nvl(vote_num,0) as `历史真实点赞`
,nvl(t4.favor_num,0) as `历史收藏数` ,nvl(favor_num,0) as `历史收藏`
,nvl(t4.share_num,0) as `历史转发数` ,nvl(share_num,0) as `历史转发`
,nvl(t4.avg_page_stay,0) as `历史平均阅读时长(s)` ,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM FROM
( (
select a.id,content,a.create_date,collect_set(d.name) as tag_list SELECT t1.id,content,audit_date,tag_list
from ,sum(exp_pv) as exp_pv
( ,sum(click_pv) as click_pv
select id,content,user_id,substr(create_time,1,10) as create_date ,sum(page_pv) as page_pv
from online.tl_hdfs_api_tractate_view ,sum(reply_num) as reply_num
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','') ,sum(vote_num) as vote_num
and content_level>=3 ,sum(favor_num) as favor_num
and is_online='true' ,sum(share_num) as share_num
)a ,round(avg(avg_page_stay),2) as avg_page_stay
join
( ,sum(case when t2.partition_date>=DATE_SUB(current_date,1) then exp_pv end) as exp_pv_1
select distinct tractate_id ,sum(case when t2.partition_date>=DATE_SUB(current_date,1) then click_pv end) as click_pv_1
from tl.tl_mp_api_tractate_tag_v3 ,sum(case when t2.partition_date>=DATE_SUB(current_date,1) then page_pv end) as page_pv_1
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','') ,sum(case when t2.partition_date>=DATE_SUB(current_date,1) then reply_num end) as reply_num_1
and tag_v3_id='3315' ,sum(case when t2.partition_date>=DATE_SUB(current_date,1) then vote_num end) as vote_num_1
)b ,sum(case when t2.partition_date>=DATE_SUB(current_date,1) then favor_num end) as favor_num_1
on a.id=b.tractate_id ,sum(case when t2.partition_date>=DATE_SUB(current_date,1) then share_num end) as share_num_1
join ,round(avg(case when t2.partition_date>=DATE_SUB(current_date,1) then avg_page_stay end),2) as avg_page_stay_1
,sum(case when t2.partition_date>=DATE_SUB(current_date,3) then exp_pv end) as exp_pv_3
,sum(case when t2.partition_date>=DATE_SUB(current_date,3) then click_pv end) as click_pv_3
,sum(case when t2.partition_date>=DATE_SUB(current_date,3) then page_pv end) as page_pv_3
,sum(case when t2.partition_date>=DATE_SUB(current_date,3) then reply_num end) as reply_num_3
,sum(case when t2.partition_date>=DATE_SUB(current_date,3) then vote_num end) as vote_num_3
,sum(case when t2.partition_date>=DATE_SUB(current_date,3) then favor_num end) as favor_num_3
,sum(case when t2.partition_date>=DATE_SUB(current_date,3) then share_num end) as share_num_3
,round(avg(case when t2.partition_date>=DATE_SUB(current_date,3) then avg_page_stay end),2) as avg_page_stay_3
,sum(case when t2.partition_date>=DATE_SUB(current_date,10) then exp_pv end) as exp_pv_10
,sum(case when t2.partition_date>=DATE_SUB(current_date,10) then click_pv end) as click_pv_10
,sum(case when t2.partition_date>=DATE_SUB(current_date,10) then page_pv end) as page_pv_10
,sum(case when t2.partition_date>=DATE_SUB(current_date,10) then reply_num end) as reply_num_10
,sum(case when t2.partition_date>=DATE_SUB(current_date,10) then vote_num end) as vote_num_10
,sum(case when t2.partition_date>=DATE_SUB(current_date,10) then favor_num end) as favor_num_10
,sum(case when t2.partition_date>=DATE_SUB(current_date,10) then share_num end) as share_num_10
,round(avg(case when t2.partition_date>=DATE_SUB(current_date,10) then avg_page_stay end),2) as avg_page_stay_10
FROM
( (
select tractate_id,tag_v3_id select a.id,content,a.audit_date,collect_set(d.name) as tag_list
from tl.tl_mp_api_tractate_tag_v3
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,tag_v3_id
)c
on a.id=c.tractate_id
left join
(
select id,name
from online.tl_hdfs_api_tag_3_0_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,name
)d
on d.id=c.tag_v3_id
group by a.id,content,a.create_date
)t1
left join
(
SELECT t1.card_id
,exp_pv
,click_pv
,page_pv
,reply_num
,vote_num
,favor_num
,share_num
,avg_page_stay
from
(
select a.card_id
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
from online.ml_community_precise_exposure_detail
where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
group by partition_date,cl_id,card_id
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id,cl_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
left join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
left join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id,count(distinct time_str) as page_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and a.card_id=f.business_id and a.cl_id=f.cl_id
left join
( (
select distinct device_id select id,content,user_id,substr(audit_time,1,10) as audit_date
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D from online.tl_hdfs_api_tractate_view
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','') where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true' and content_level>=3
OR IS_STAFF = 'true' and is_online='true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d
on a.cl_id=d.device_id and a.partition_date=d.partition_date
where c.device_id is null and d.device_id is null
group by a.card_id
)t1
left join
(
SELECT a.tractate_id
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num
from
(--评论数
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') = regexp_replace(DATE_SUB(current_date,1) ,'-','')
GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '回帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND a.create_date = b.partition_date
group by tractate_id,create_date,a.user_id
union all
--真实点赞数
SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
and regexp_replace(substr(create_time,1,10),'-','') = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
UNION ALL
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_reply_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
and regexp_replace(substr(create_time,1,10),'-','') = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '点赞'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND a.create_date = b.partition_date
GROUP BY tractate_id,create_date,a.user_id
union all
--真实收藏数
SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num
FROM online.tl_hdfs_api_tractate_favor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
and regexp_replace(substr(create_time,1,10),'-','') = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--点击分享数
SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by params['business_id'],partition_date,user_id
)a )a
left join join
( (
select distinct user_id select distinct tractate_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D from tl.tl_mp_api_tractate_tag_v3
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','') where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true' and tag_v3_id='3315'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.tractate_id
)t2
on t1.card_id=t2.tractate_id
)t2
on t1.id= t2.card_id
left join
(
SELECT t1.card_id
,exp_pv
,click_pv
,page_pv
,reply_num
,vote_num
,favor_num
,share_num
,avg_page_stay
from
(
select a.card_id
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
from online.ml_community_precise_exposure_detail
where partition_date>=regexp_replace(DATE_SUB(current_date,3) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
group by partition_date,cl_id,card_id
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id,cl_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,3) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b )b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id on a.id=b.tractate_id
left join join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,3) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
left join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id,count(distinct time_str) as page_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,3) ,'-','')
and action='page_view'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and a.card_id=f.business_id and a.cl_id=f.cl_id
left join
( (
select distinct device_id select tractate_id,tag_v3_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D from tl.tl_mp_api_tractate_tag_v3
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','') where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true' group by tractate_id,tag_v3_id
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c )c
on a.cl_id=c.device_id on a.id=c.tractate_id
left join left join
( (
SELECT partition_date,device_id select id,name
FROM from online.tl_hdfs_api_tag_3_0_view
(--找出user_id当天活跃的第一个设备id where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
SELECT user_id,partition_date, group by id,name
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >=regexp_replace(DATE_SUB(current_date,3) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d )d
on a.cl_id=d.device_id and a.partition_date=d.partition_date on d.id=c.tag_v3_id
where c.device_id is null and d.device_id is null group by a.id,content,a.audit_date
group by a.card_id
)t1 )t1
left join join
( (--历史数据,指从审核时间至今的数据
SELECT a.tractate_id SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
,sum(case when type='reply' then num end) as reply_num ,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
,sum(case when type='vote' then num end) as vote_num ,nvl(t1.card_id,t2.tractate_id) as card_id
,sum(case when type='favor' then num end) as favor_num ,exp_pv
,sum(case when type='share' then num end) as share_num ,click_pv
,page_pv
,reply_num
,vote_num
,favor_num
,share_num
,avg_page_stay
from from
( --真实评论数 (
SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from from
(--评论数 (--曝光
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
FROM online.tl_hdfs_api_tractate_reply_view from online.ml_community_precise_exposure_detail
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','') where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,3) ,'-','') AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
group by partition_date,cl_id,card_id
)a )a
JOIN --限制用户是在app进行的回复 left join
( (--点击
SELECT a.partition_date,user_id SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id,cl_id) as click_pv
FROM from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
full join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
( (
SELECT partition_date,user_id,device_id,action SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_operation_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,3) ,'-','') WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a )a
JOIN )e
( on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
SELECT code full join
FROM dim.dim_community_action_type (--浏览pv
WHERE communityuserbehavior_type_name = '回帖'
)type SELECT partition_date,cl_id,params['business_id'] as business_id,count(distinct time_str) as page_pv
ON a.action = code FROM online.bl_hdfs_maidian_updates
GROUP BY a.partition_date,user_id WHERE partition_date >= '20160101'
)b and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
ON a.user_id = b.user_id and action='page_view'
AND a.create_date = b.partition_date and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by tractate_id,create_date,a.user_id group by partition_date,cl_id,params['business_id']
)f
union all on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
--真实点赞数 left join
SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num (
FROM select distinct device_id
( from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
FROM online.tl_hdfs_api_tractate_vote_view AND (IS_MORE_USER = 'true'
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','') OR IS_STAFF = 'true'
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,3) ,'-','') OR IS_SPAM_CHANNEL = 'true'
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
UNION ALL )c
on a.cl_id=c.device_id
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num left join
FROM online.tl_hdfs_api_tractate_reply_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,3) ,'-','')
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
( (
SELECT a.partition_date,user_id SELECT partition_date,device_id
FROM FROM
( (--找出user_id当天活跃的第一个设备id
SELECT partition_date,user_id,device_id,action SELECT user_id,partition_date,
FROM online.bl_hdfs_operation_updates if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,3) ,'-','') FROM online.ml_user_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a )a
JOIN join
( (
SELECT code select distinct user_id
FROM dim.dim_community_action_type from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
WHERE communityuserbehavior_type_name = '点赞' where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
)type and (is_doctor = 'true'
ON a.action = code or is_puppet = 'true'
GROUP BY a.partition_date,user_id or is_classify_user = 'true'
)b or is_reply_fake = 'true'
ON a.user_id = b.user_id or is_staff = 'true'
AND a.create_date = b.partition_date or is_associated_issuedevice = 'true')
GROUP BY tractate_id,create_date,a.user_id )b
on a.user_id=b.user_id
union all )d
--真实收藏数 on a.cl_id=d.device_id and a.partition_date=d.partition_date
SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num where c.device_id is null and d.device_id is null
FROM online.tl_hdfs_api_tractate_favor_view group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','') )t1
and regexp_replace(substr(create_time,1,10),'-','') >=regexp_replace(DATE_SUB(current_date,3) ,'-','') full join
group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--点击分享数
SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,3) ,'-','')
and action='page_click_share'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by params['business_id'],partition_date,user_id
)a
left join
( (
select distinct user_id SELECT a.tractate_id,a.create_date
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D ,sum(case when type='reply' then num end) as reply_num
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','') ,sum(case when type='vote' then num end) as vote_num
and (is_doctor = 'true' ,sum(case when type='favor' then num end) as favor_num
or is_puppet = 'true' ,sum(case when type='share' then num end) as share_num
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.tractate_id
)t2
on t1.card_id=t2.tractate_id
)t3
on t1.id= t3.card_id
left join
(
SELECT t1.card_id
,exp_pv
,click_pv
,page_pv
,reply_num
,vote_num
,favor_num
,share_num
,avg_page_stay
from
(
select a.card_id
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
from online.ml_community_precise_exposure_detail
where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
group by partition_date,cl_id,card_id
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id,cl_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
left join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from from
( ( --真实评论数
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num
FROM online.bl_hdfs_maidian_updates from
WHERE partition_date >= '20160101' (--评论数
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','') SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num
and action='page_view' FROM online.tl_hdfs_api_tractate_reply_view
and page_stay>=0 and page_stay<1000 WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('post_detail','user_post_detail','doctor_post_detail') -- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
group by partition_date,cl_id,params['business_id'],page_stay,time_str -- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
)a GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)e )a
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id -- JOIN --限制用户是在app进行的回复
left join -- (
(--浏览pv -- SELECT a.partition_date,user_id
-- FROM
-- (
-- SELECT partition_date,user_id,device_id,action
-- FROM online.bl_hdfs_operation_updates
-- WHERE partition_date>='20160101'
-- AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- )a
-- JOIN
-- (
-- SELECT code
-- FROM dim.dim_community_action_type
-- WHERE communityuserbehavior_type_name = '回帖'
-- )type
-- ON a.action = code
-- GROUP BY a.partition_date,user_id
-- )b
-- ON a.user_id = b.user_id
-- AND a.create_date = b.partition_date
group by tractate_id,create_date,a.user_id
union all
--真实点赞数
SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
-- UNION ALL
-- SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
-- FROM online.tl_hdfs_api_tractate_reply_vote_view
-- WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- -- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- -- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
-- group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
-- JOIN --限制用户是在app进行的回复
-- (
-- SELECT a.partition_date,user_id
-- FROM
-- (
-- SELECT partition_date,user_id,device_id,action
-- FROM online.bl_hdfs_operation_updates
-- WHERE partition_date>='20160101'
-- AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- )a
-- JOIN
-- (
-- SELECT code
-- FROM dim.dim_community_action_type
-- WHERE communityuserbehavior_type_name = '点赞'
-- )type
-- ON a.action = code
-- GROUP BY a.partition_date,user_id
-- )b
-- ON a.user_id = b.user_id
-- AND a.create_date = b.partition_date
GROUP BY tractate_id,create_date,a.user_id
union all
--真实收藏数
SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num
FROM online.tl_hdfs_api_tractate_favor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '${start_date}'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')
SELECT partition_date,cl_id,params['business_id'] as business_id,count(distinct time_str) as page_pv union all
--点击分享数
SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101' WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','') and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view' and action='page_click_share'
and page_name in ('post_detail','user_post_detail','doctor_post_detail') and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id'] group by params['business_id'],partition_date,user_id
)f
on a.partition_date=f.partition_date and a.card_id=f.business_id and a.cl_id=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true'
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a )a
join left join
( (
select distinct user_id select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
...@@ -639,362 +358,16 @@ left join ...@@ -639,362 +358,16 @@ left join
or is_associated_issuedevice = 'true') or is_associated_issuedevice = 'true')
)b )b
on a.user_id=b.user_id on a.user_id=b.user_id
)d where b.user_id is null
on a.cl_id=d.device_id and a.partition_date=d.partition_date group by a.tractate_id,a.create_date
where c.device_id is null and d.device_id is null )t2
group by a.card_id on t1.card_id=t2.tractate_id and t1.partition_date =t2.create_date
)t1
left join
(
SELECT a.tractate_id
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num
from
(--评论数
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>='20160101'
AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '回帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND a.create_date = b.partition_date
group by tractate_id,create_date,a.user_id
union all
--真实点赞数
SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
UNION ALL
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_reply_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>='20160101'
AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '点赞'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND a.create_date = b.partition_date
GROUP BY tractate_id,create_date,a.user_id
union all
--真实收藏数
SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num
FROM online.tl_hdfs_api_tractate_favor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '${start_date}'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--点击分享数
SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.tractate_id
)t2 )t2
on t1.card_id=t2.tractate_id on t1.id= t2.card_id
where t2.partition_date>=t1.audit_date
group by t1.id,content,audit_date,tag_list
)t4 )t4
on t1.id= t4.card_id order by `前10日曝光` desc
left join
(
SELECT t1.card_id
,exp_pv
,click_pv
,page_pv
,reply_num
,vote_num
,favor_num
,share_num
,avg_page_stay
from
(
select a.card_id
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
from online.ml_community_precise_exposure_detail
where partition_date>=regexp_replace(DATE_SUB(current_date,10) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
group by partition_date,cl_id,card_id
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id,cl_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,10) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
left join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,10) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
left join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id,count(distinct time_str) as page_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,10) ,'-','')
and action='page_view'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and a.card_id=f.business_id and a.cl_id=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true'
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >=regexp_replace(DATE_SUB(current_date,3) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d
on a.cl_id=d.device_id and a.partition_date=d.partition_date
where c.device_id is null and d.device_id is null
group by a.card_id
)t1
left join
(
SELECT a.tractate_id
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num
from
(--评论数
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,10) ,'-','')
GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,10) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '回帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND a.create_date = b.partition_date
group by tractate_id,create_date,a.user_id
union all
--真实点赞数
SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,10) ,'-','')
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
UNION ALL
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_reply_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,10) ,'-','')
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,10) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '点赞'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND a.create_date = b.partition_date
GROUP BY tractate_id,create_date,a.user_id
union all
--真实收藏数
SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num
FROM online.tl_hdfs_api_tractate_favor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
and regexp_replace(substr(create_time,1,10),'-','') >=regexp_replace(DATE_SUB(current_date,10) ,'-','')
group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--点击分享数
SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,10) ,'-','')
and action='page_click_share'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.tractate_id
)t2
on t1.card_id=t2.tractate_id
)t5
on t1.id= t5.card_id
order by `3日曝光pv` desc
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment