
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;


INSERT OVERWRITE TABLE pm.tl_pm_userpost_d_v2 PARTITION (PARTITION_DAY = ${partition_day})
SELECT t1.id as post_id
        ,title
        ,audit_date
        ,tag_list
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then exp_pv end),0) as exp_pv_1
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then click_pv end),0) as click_pv_1
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then page_pv end),0) as page_pv_1
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then page_pv_20s end),0) as page_pv_20s_1
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then reply_num end),0) as reply_num_1
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then vote_num end),0) as vote_num_1
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then favor_num end),0) as favor_num_1
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then share_num end),0) as share_num_1
        ,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,1) then avg_page_stay end),2),0) as avg_page_stay_1

        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then exp_pv end),0) as exp_pv_3
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then click_pv end),0) as click_pv_3
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then page_pv end),0) as page_pv_3
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then page_pv_20s end),0) as page_pv_20s_3
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then reply_num end),0) as reply_num_3
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then vote_num end),0) as vote_num_3
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then favor_num end),0) as favor_num_3
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then share_num end),0) as share_num_3
        ,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,3) then avg_page_stay end),2),0) as avg_page_stay_3

        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then exp_pv end),0) as exp_pv_10
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then click_pv end),0) as click_pv_10
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then page_pv end),0) as page_pv_10
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then page_pv_20s end),0) as page_pv_20s_10
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then reply_num end),0) as reply_num_10
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then vote_num end),0) as vote_num_10
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then favor_num end),0) as favor_num_10
        ,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then share_num end),0) as share_num_10
        ,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,10) then avg_page_stay end),2),0) as avg_page_stay_10

        ,nvl(sum(exp_pv),0) as exp_pv
        ,nvl(sum(click_pv),0) as click_pv
        ,nvl(sum(page_pv),0) as page_pv
        ,nvl(sum(page_pv_20s),0) as page_pv_20s
        ,nvl(sum(reply_num),0) as reply_num
        ,nvl(sum(vote_num),0) as vote_num
        ,nvl(sum(favor_num),0) as favor_num
        ,nvl(sum(share_num),0) as share_num
        ,nvl(round(avg(avg_page_stay),2),0) as avg_page_stay
FROM
(
    select a.id,title,a.audit_date,collect_set(d.name) as tag_list
    from
    (
        select id,title,user_id,substr(audit_time,1,10) as audit_date
        from tl.tl_mp_api_tractate
        where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
        and content_level>=3
        and is_online='true'
    )a
    join
    (
        select distinct tractate_id
        from tl.tl_mp_api_tractate_tag_v3
        where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
        and tag_v3_id='3315'
    )b
    on a.id=b.tractate_id
    join
    (
        select tractate_id,tag_v3_id
        from tl.tl_mp_api_tractate_tag_v3
        where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
        group by tractate_id,tag_v3_id
    )c
    on a.id=c.tractate_id
    left join
    (
        select id,name
        from online.tl_hdfs_api_tag_3_0_view
        where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
        group by id,name
    )d
    on d.id=c.tag_v3_id
    group by a.id,title,a.audit_date
)t1
join
(--历史数据，指从审核时间至今的数据
    SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
                ,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
            ,nvl(t1.card_id,t2.tractate_id) as card_id
            ,exp_pv
            ,click_pv
            ,page_pv
            ,page_pv_20s
            ,reply_num
            ,vote_num
            ,favor_num
            ,share_num
            ,avg_page_stay
    from
    (
        select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
                ,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
                ,sum(exp_pv) as exp_pv
                ,sum(click_pv) as click_pv
                ,sum(page_pv) as page_pv
                ,sum(page_pv_20s) as page_pv_20s
                ,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
        from
        (--曝光
            select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
            from online.ml_community_precise_exposure_detail
            where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
            AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
            AND is_exposure = '1'  ----精准曝光
            AND page_name ='home'
            AND tab_name = '精选'
            group by partition_date,cl_id,card_id
        )a
        left join
        (--点击
            SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id,cl_id) as click_pv
            from online.bl_hdfs_maidian_updates
            WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
            AND action='on_click_card'
            AND params['page_name'] ='home'
            AND params['tab_name'] = '精选'
            GROUP BY partition_date,params['card_id'],cl_id
        )b
        on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
        full join
        (--平均阅读时长
            SELECT partition_date,business_id,cl_id,page_stay
            from
            (
                SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
                FROM  online.bl_hdfs_maidian_updates
                WHERE partition_date >= '20160101'
                and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
                and action='page_view'
                and page_stay>=0 and page_stay<1000
                and page_name in ('post_detail','user_post_detail','doctor_post_detail')
                group by partition_date,cl_id,params['business_id'],page_stay,time_str
            )a
        )e
        on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
        full join
        (--浏览pv
            SELECT partition_date,cl_id,params['business_id'] as business_id
                    ,count(distinct time_str) as page_pv
                    ,count(distinct case when page_stay>=20 then time_str end) as page_pv_20s
            FROM  online.bl_hdfs_maidian_updates
            WHERE partition_date >= '20160101'
            and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
            and action='page_view'
            and page_name in ('post_detail','user_post_detail','doctor_post_detail')
            group by partition_date,cl_id,params['business_id']
        )f
        on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
        left join
        (
            select distinct device_id
            from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
            where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
            AND is_abnormal_device = 'true'
        )c
        on a.cl_id=c.device_id
        where c.device_id is null
        group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
    )t1
    full join
    (
        SELECT a.tractate_id,a.create_date
                ,sum(case when type='reply' then num end) as reply_num
                ,sum(case when type='vote' then num end) as vote_num
                ,sum(case when type='favor' then num end) as favor_num
                ,sum(case when type='share' then num end) as share_num
        from
        (	--真实评论数
            SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num
            from
            (--评论数
                SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num
                FROM  online.tl_hdfs_api_tractate_reply_view
                WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
                -- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
                -- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
                GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
            )a
            group by tractate_id,create_date,a.user_id

            union all
            --真实点赞数
            SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num
            FROM
            (
                SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
                FROM online.tl_hdfs_api_tractate_vote_view
                WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
                -- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
                -- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
                group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
            )a
            GROUP BY tractate_id,create_date,a.user_id

            union all
            --真实收藏数
            SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num
            FROM  online.tl_hdfs_api_tractate_favor_view
            WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
            -- and regexp_replace(substr(create_time,1,10),'-','') >= '${start_date}'
            -- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
            group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')


            union all
            --点击分享数
            SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
            FROM  online.bl_hdfs_maidian_updates
            WHERE partition_date >= '20160101'
            and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
            and action='page_click_share'
            and page_name in ('post_detail','user_post_detail','doctor_post_detail')
            group by params['business_id'],partition_date,user_id
        )a
        left join
        (
            select distinct user_id
            from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
            where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
            and is_abnormal_user = 'true'
        )b
        on a.user_id=b.user_id
        where b.user_id is null
        group by a.tractate_id,a.create_date
    )t2
    on t1.card_id=t2.tractate_id and t1.partition_date =t2.create_date
)t2
on t1.id= t2.card_id
where t2.partition_date>=t1.audit_date
group by t1.id,title,audit_date,tag_list;