

--新手精选帖子
--帖子关联标签 3315

SELECT card_id as `日记本id`
		,type as `内容类型`
		,content_level as `星级`
		,case when is_cpc>0 then '是' else '否' end as `是否商业化内容`
		,create_date as `上线日期`
		,audit_date as `最近审核日期`
		,tag_list as `所有关联标签`
		,nvl(concat(round((nvl(reply_num_30,0)+nvl(vote_num_30,0)+nvl(favor_num_30,0)+nvl(share_num_30,0))/page_pv_30*100,2),'%'),0) as `前30日互动率`
		,nvl(concat(round(click_pv_30/exp_pv_30*100,2),'%'),0) as `前30日ctr`
		,nvl(click_pv_30,0) as `前30日点击`
		,nvl(exp_pv_30,0) as `前30日曝光`
		,nvl(page_pv_30,0) as `前30日浏览pv`
		,nvl(reply_num_30,0) as `前30日真实评论`
		,nvl(vote_num_30,0) as `前30日真实点赞`
		,nvl(favor_num_30,0) as `前30日收藏`
		,nvl(share_num_30,0) as `前30日转发`
		,nvl(page_pv_20_30,0) as `前30日超过20秒阅读pv`
		,nvl(avg_page_stay_30,0) as `前30日平均阅读时长(s)`

		,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
		,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
		,nvl(click_pv,0) as `历史点击`
		,nvl(exp_pv,0) as `历史曝光`
		,nvl(page_pv,0) as `历史浏览pv`
		,nvl(reply_num,0) as `历史真实评论`
		,nvl(vote_num,0) as `历史真实点赞`
		,nvl(favor_num,0) as `历史收藏`
		,nvl(share_num,0) as `历史转发`
		,nvl(page_pv_20,0) as `历史超过20秒阅读pv`
		,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM
(
    --历史数据，指从审核时间至今的数据
	SELECT t1.card_id,type,content_level,create_date,audit_date,tag_list
			,sum(is_cpc) as is_cpc
			,sum(exp_pv) as exp_pv
			,sum(click_pv) as click_pv
			,sum(page_pv) as page_pv
			,sum(reply_num) as reply_num
			,sum(vote_num) as vote_num
			,sum(favor_num) as favor_num
			,sum(share_num) as share_num
			,sum(page_pv_20) as page_pv_20
			,round(avg(avg_page_stay),2) as avg_page_stay

			,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then exp_pv end) as exp_pv_30
			,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then click_pv end) as click_pv_30
			,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv end) as page_pv_30
			,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then reply_num end) as reply_num_30
			,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then vote_num end) as vote_num_30
			,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then favor_num end) as favor_num_30
			,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then share_num end) as share_num_30
			,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv_20 end) as page_pv_20_30
			,round(avg(case when t1.partition_date>=DATE_SUB(current_date,30) then avg_page_stay end),2) as avg_page_stay_30
	from
	(
	    SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
							,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
					,nvl(t1.card_id,t2.answer_id) as card_id
	    		,is_cpc,exp_pv,click_pv,page_pv,page_pv_20,avg_page_stay
	    		,reply_num,vote_num,favor_num,share_num
	    from
		(
			select  nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
			        ,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
					,sum(is_cpc) as is_cpc
					,sum(exp_pv) as exp_pv
					,sum(click_pv) as click_pv
					,sum(page_pv) as page_pv
					,sum(page_pv_20) as page_pv_20
					,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
			from
			(--曝光
				select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
						,case when transaction_type in ('cpc','advertise') then 1 else 0 end as is_cpc
				from online.ml_community_precise_exposure_detail
				where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
				AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
				AND is_exposure = '1'  ----精准曝光
				AND page_name ='home'
				AND card_content_type in ('answer')
				AND tab_name = '精选'
				group by partition_date,card_id,cl_id,case when transaction_type in ('cpc','advertise') then 1 else 0 end
			)a
			left join
			(--点击
				SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id) as click_pv
				from online.bl_hdfs_maidian_updates
				WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
				AND action='on_click_card'
				AND params['page_name'] ='home'
				AND params['tab_name'] = '精选'
				GROUP BY partition_date,params['card_id'],cl_id
			)b
			on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
			full join
			(--平均阅读时长
				SELECT partition_date,business_id,cl_id,page_stay
				from
				(
				    SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
					FROM  online.bl_hdfs_maidian_updates
					WHERE partition_date >= '20160101'
				    and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
				    and action='page_view'
				    and page_stay>=0 and page_stay<1000
				    and page_name in ('question_answer_detail','answer_detail')
				    group by partition_date,cl_id,params['business_id'],page_stay,time_str
				)a
			)e
			on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
			full join
			(--浏览pv

			    SELECT partition_date,cl_id,params['business_id'] as business_id
			    		,count(distinct time_str) as page_pv
			    		,count(case when page_stay>=20 then time_str end) as page_pv_20
				FROM  online.bl_hdfs_maidian_updates
				WHERE partition_date >= '20160101'
			    and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
			    and action='page_view'
			    and page_name in ('question_answer_detail','answer_detail')
			    group by partition_date,cl_id,params['business_id']
			)f
			on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
			left join
			(
				select distinct device_id
				from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
				where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
				AND (IS_MORE_USER = 'true'
					OR IS_STAFF = 'true'
					OR IS_SPAM_CHANNEL = 'true'
					OR IS_SUSPICIOUS = 'true'
					OR IS_ASSOCIATED_ISSUEUSER = 'ture')
			)c
			on a.cl_id=c.device_id
			left join
			(
				SELECT partition_date,device_id
				FROM
				(--找出user_id当天活跃的第一个设备id
					SELECT user_id,partition_date,
			            if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
				    FROM online.ml_user_updates
				    WHERE partition_date >= '20160101'
			    	and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
				)a
				join
				(
					select distinct user_id
					from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
					where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
					and (is_doctor = 'true'
						or is_puppet = 'true'
						or is_classify_user = 'true'
						or is_reply_fake = 'true'
						or is_staff = 'true'
						or is_associated_issuedevice = 'true')
				)b
				on a.user_id=b.user_id
			)d
			on a.cl_id=d.device_id and a.partition_date=d.partition_date
			where c.device_id is null and d.device_id is null
			group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
		)t1
		full join
		(
			SELECT a.answer_id,a.create_date
					,sum(case when type='reply' then num end) as reply_num
					,sum(case when type='vote' then num end) as vote_num
					,sum(case when type='favor' then num end) as favor_num
					,sum(case when type='share' then num end) as share_num
			from
			(	--真实评论数
				SELECT answer_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'reply' as type,count(create_time) as num
                FROM online.tl_hdfs_answer_reply_view
                WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
                AND answer_id is not NULL
                group by user_id,answer_id,regexp_replace(substr(create_time,1,10),'-','')

				union all
				--真实点赞数
				SELECT answer_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'vote' as type,count(create_time) as num
                FROM online.tl_hdfs_answer_vote_view
                WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
                group by answer_id,user_id,regexp_replace(substr(create_time,1,10),'-','')

                union all
                --关注问答
                SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'favor' as type,count(distinct time_str) as num
				FROM  online.bl_hdfs_maidian_updates
				WHERE partition_date >= '20160101'
			    and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
			    and action='on_click_button' and params['button_name']='attention'
			    and page_name in ('question_answer_detail')
			    group by params['business_id'],partition_date,user_id

			    union all
                --收藏回答
                SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'favor' as type,count(distinct time_str) as num
				FROM  online.bl_hdfs_maidian_updates
				WHERE partition_date >= '20160101'
			    and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
			    and action='on_click_button' and params['button_name']='收藏'
			    and page_name in ('answer_detail')
			    group by params['business_id'],partition_date,user_id

			    union all
				--点击分享数
			    SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
				FROM  online.bl_hdfs_maidian_updates
				WHERE partition_date >= '20160101'
			    and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
			    and action='page_click_share'
			    and page_name in ('question_answer_detail','answer_detail')
			    group by params['business_id'],partition_date,user_id
			)a
			left join
			(
				select distinct user_id
				from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
				where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
				and (is_doctor = 'true'
					or is_puppet = 'true'
					or is_classify_user = 'true'
					or is_reply_fake = 'true'
					or is_staff = 'true'
					or is_associated_issuedevice = 'true')
			)b
			on a.user_id=b.user_id
			where b.user_id is null
			group by a.answer_id,a.create_date
		)t2
		on t1.card_id=t2.answer_id and t1.partition_date =t2.create_date
	)t1
	join
	(
		select id,content_level,user_id,create_date,audit_date,type,tag_list
		FROM
		(
--			select id,content_level,user_id,substr(create_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'帖子' as type
--			from online.tl_hdfs_api_tractate_view
--			where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
--			group by id,content_level,user_id,create_time,audit_time
--
--			union all

--			select id,content_level,user_id,substr(created_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'日记本' as type
--			from online.tl_hdfs_diary_view
--			where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
--			group by id,content_level,user_id,created_time,audit_time

--			union all
--
			select id,level as content_level,user_id,substr(create_time,1,10) as create_date,substr(update_time,1,10) as audit_date,'回答' as type--回答无审核时间，用更新时间近似取代
			from online.tl_hdfs_answer_view
			where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
			group by id,level,user_id,create_time,update_time
		)a
		left join
		(
			select card_id,collect_set(tag_name) as tag_list
			from
			(
--				select diary_id as card_id, tag_id
--				from online.tl_hdfs_diary_tags_view   --日记
--				where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
--				group by diary_id, tag_id

--				union all
--
--				select tractate_id as card_id,tag_id
--				from online.tl_hdfs_api_tractate_tag_view   --用户贴
--				where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
--				group by tractate_id,tag_id
--
--				union all
--
				select a.card_id,b.tag_id
				from
				(
					select id as card_id,question_id
					from online.tl_hdfs_answer_view     --回答
					where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
				    and is_online ='true'
				)a
				join
				(
					select question_id,tag_id
					from online.tl_hdfs_questiontag_view
					where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
				)b
				on a.question_id =b.question_id
				group by a.card_id,b.tag_id
			)c
			join
			(
				select id as tag_id
				    ,name as tag_name
				from online.tl_hdfs_api_tag_view  --老标签库
				where partition_date = '20200212'
				    and tag_type in (2,3)
			)d
			on c.tag_id=d.tag_id
			group by c.card_id
		)b
		on a.id=b.card_id
	)t2
	on t1.card_id= t2.id
	group by t1.card_id,type,content_level,create_date,audit_date,tag_list
)t4
order by `历史曝光` desc
