SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;



INSERT OVERWRITE TABLE pm.tl_pm_recommend_strategy_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT
    t1.partition_date as day_id,
    t1.device_os_type as device_os_type,
    t1.active_type as active_type,
    t2.card_content_type as card_content_type,
    t2.recommend_type as recommend_type,
    NVL(sum(t3.session_pv),0) as card_click,
    NVL(sum(t2.session_pv),0) as card_exposure,
    NVL(round(sum(page_stay)/count(distinct t4.cl_id)/60,2),0) as avg_page_stay,
    NVL(sum(navbar_pv),0) as navbar_search,
    NVL(sum(highlight_pv),0) as highlight_word,
    NVL(sum(self_wel_pv),0) as self_welfare_card,
    NVL(sum(recom_wel_pv),0)-NVL(sum(self_wel_pv),0) as recommend_welfare_card,--需要排除关联的商品卡片点击
        NVL(sum(recom_content_pv),0) as recommend_content_card,
    NULL as recommend_special_card,
    NULL as transfer_card,
    NULL as video_consultation
FROM
    (
        SELECT partition_date
             ,device_os_type
             ,CASE WHEN active_type = '4'  THEN '老活'
                   WHEN active_type  IN ('1','2')  THEN '新增' END AS active_type
             ,device_id
        FROM online.ml_device_day_active_status
        WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
          AND active_type IN ('1','2','4')
          AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
            ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
            ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
            ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
            ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
            ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
            ,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
          AND first_channel_source_type not LIKE 'promotion\_jf\_%'
    )t1
        JOIN
    (--精准曝光，卡片id和session_id去重
        SELECT  partition_date,
                card_content_type,
                cl_id,
                recommend_type,
                card_id,
                count(distinct app_session_id) as session_pv
        FROM
        (
           SELECT partition_date,
                  cl_id,
                  case when card_content_type in ('qa','answer') then 'qa' else card_content_type end as card_content_type,
                  CASE WHEN transaction_type in ('ctr')  THEN 'ctr预估'
                       WHEN transaction_type in ('cvr') THEN 'cvr预估'
                       WHEN transaction_type in ('-1','smr') THEN 'smr'
                       when transaction_type in ('pgc','hotspot') then '热点卡片'
                       when transaction_type in ('newdata') then '保量卡片'
                       when transaction_type in ('samecity_ctr') then 'samecity_ctr'
                       when transaction_type in ('samecity_cvr') then 'samecity_cvr'
                  END AS recommend_type,
                  card_id,
                  app_session_id
           from online.ml_community_precise_exposure_detail
           WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
           AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
           AND is_exposure = '1'  ----精准曝光
           AND page_name ='home'
           AND tab_name = '精选'
           AND transaction_type in ('-1','ctr','smr','cvr','hotspot','pgc','newdata','samecity_ctr','samecity_cvr')
           AND card_content_type in ('qa','diary','user_post','answer')
           group by partition_date,
                  case when card_content_type in ('qa','answer') then 'qa' else card_content_type end,
                  cl_id,
                  CASE WHEN transaction_type in ('ctr')  THEN 'ctr预估'
                       WHEN transaction_type in ('cvr') THEN 'cvr预估'
                       WHEN transaction_type in ('-1','smr') THEN 'smr'
                       when transaction_type in ('pgc','hotspot') then '热点卡片'
                       when transaction_type in ('newdata') then '保量卡片'
                       when transaction_type in ('samecity_ctr') then 'samecity_ctr'
                       when transaction_type in ('samecity_cvr') then 'samecity_cvr' END,
                  card_id,
                  app_session_id
        )a
        group by partition_date,card_content_type,cl_id,recommend_type,card_id
      )t2
    on t1.device_id=t2.cl_id and t1.partition_date=t2.partition_date
        LEFT JOIN
    (--卡片，卡片id和session_id去重
        SELECT  partition_date,
                card_content_type,
                cl_id,
                recommend_type,
                card_id,
                count(distinct app_session_id) as session_pv
        FROM
        (
           SELECT partition_date,
                  cl_id,
                  case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end as card_content_type,
                  CASE WHEN params['transaction_type'] in ('ctr')  THEN 'ctr预估'
                       WHEN params['transaction_type'] in ('cvr') THEN 'cvr预估'
                       WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
                       when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
                       when params['transaction_type'] in ('newdata') then '保量卡片'
                       when params['transaction_type'] in ('samecity_ctr') then 'samecity_ctr'
                       when params['transaction_type'] in ('samecity_cvr') then 'samecity_cvr'
                  END AS recommend_type,
                  params['card_id'] as card_id,
                  app_session_id
           from online.bl_hdfs_maidian_updates
           WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
           AND action='on_click_card'
           AND params['page_name'] ='home'
           AND params['tab_name'] = '精选'
           AND params['transaction_type'] in ('-1','ctr','smr','cvr','hotspot','pgc','newdata','samecity_ctr','samecity_cvr')
           AND params['card_content_type'] in ('qa','diary','user_post','answer')
           GROUP BY partition_date,
                  cl_id,
                  case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end,
                  CASE WHEN params['transaction_type'] in ('ctr')  THEN 'ctr预估'
                       WHEN params['transaction_type'] in ('cvr') THEN 'cvr预估'
                       WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
                       when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
                       when params['transaction_type'] in ('newdata') then '保量卡片'
                       when params['transaction_type'] in ('samecity_ctr') then 'samecity_ctr'
                       when params['transaction_type'] in ('samecity_cvr') then 'samecity_cvr' END,
                  params['card_id'],
                  app_session_id
        )a
        group by partition_date,card_content_type,cl_id,recommend_type,card_id
      )t3
    on t2.partition_date=t3.partition_date
        and t2.cl_id=t3.cl_id
        and t2.card_id=t3.card_id
        and t2.card_content_type=t3.card_content_type
        and t2.recommend_type=t3.recommend_type
        LEFT JOIN
    (--页面浏览时长
          SELECT  partition_date,cl_id,business_id,
                  case when page_name in ('diary_detail','topic_detail') then 'diary'
                       when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                       when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
                  sum(page_stay) as page_stay
          FROM online.bl_hdfs_maidian_updates
          WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
          AND action='page_view'
          AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
          AND referrer='home'
          AND page_stay>=0 AND page_stay<1000
          GROUP BY partition_date,cl_id,business_id,
                   case when page_name in ('diary_detail','topic_detail') then 'diary'
                       when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                       when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
      )t4
    on t4.partition_date=t3.partition_date
        and t4.cl_id=t3.cl_id
        and t4.business_id=t3.card_id
        and t4.page_name=t3.card_content_type
        LEFT JOIN
    (--搜索框和点击行为
          SELECT partition_date,cl_id,business_id,
                 case when page_name in ('diary_detail','topic_detail') then 'diary'
                       when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                       when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
                 count(1) as navbar_pv
          FROM online.bl_hdfs_maidian_updates
          WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
          AND action in ('on_click_navbar_search','do_search')
          AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
          AND (referrer='home' or
                  (params['referrer_link'] like '%[%' and
                   json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
          group by partition_date,cl_id,business_id,
                 case when page_name in ('diary_detail','topic_detail') then 'diary'
                       when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                       when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
      )t5
    on t5.partition_date=t3.partition_date
        and t5.cl_id=t3.cl_id
        and t5.business_id=t3.card_id
        and t5.page_name=t3.card_content_type
        LEFT JOIN
    (--点击高亮词
          SELECT partition_date,cl_id,business_id,
                 case when page_name in ('diary_detail','topic_detail') then 'diary'
                       when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                       when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
                 count(1) as highlight_pv
          FROM online.bl_hdfs_maidian_updates
          WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
          AND action='on_click_card'
          and params['card_type']='highlight_word'
          AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
          AND (referrer='home' or
                  (params['referrer_link'] like '%[%' and
                   json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
          group by partition_date,cl_id,business_id,
                 case when page_name in ('diary_detail','topic_detail') then 'diary'
                       when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                       when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
      )t6
    on t6.partition_date=t3.partition_date
        and t6.cl_id=t3.cl_id
        and t6.business_id=t3.card_id
        and t6.page_name=t3.card_content_type
        LEFT JOIN
    (--关联的美购卡片
        SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as self_wel_pv
        FROM
        (
            SELECT  partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
                    case when page_name in ('diary_detail','topic_detail') then 'diary'
                         when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                         when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
                    count(1) as pv
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
            AND (get_json_object(params['extra_param'], '$.type')='交互栏'
                   or get_json_object(params['extra_param'], '$.jump_from')='msg_link'
                   or params['in_page_pos']='top' or params['in_page_pos']='bottom')
            AND action='on_click_card'
            and params['card_content_type']='service'
            AND page_name IN ('diary_detail','topic_detail')
            AND (referrer='home' or
                  (params['referrer_link'] like '%[%' and
                   json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
            group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
                    case when page_name in ('diary_detail','topic_detail') then 'diary'
                         when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                         when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
        )a
        group by partition_date,cl_id,business_id,page_name
      )t7
    on t7.partition_date=t3.partition_date
        and t7.cl_id=t3.cl_id
        and t7.business_id=t3.card_id
        and t7.page_name=t3.card_content_type
        LEFT JOIN
    (--推荐的美购卡片(需要排除作者消费的美购)
        SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_wel_pv
        FROM
        (
            SELECT  partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
                    case when page_name in ('diary_detail','topic_detail') then 'diary'
                         when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                         when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
                    count(1) as service_pv
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
            AND (action='on_click_card'and  params['card_content_type']='service'
                   or action='on_click_button' and params['button_name']='unfold')
            AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
            AND (referrer='home' or
                  (params['referrer_link'] like '%[%' and
                   json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
            group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
                    case when page_name in ('diary_detail','topic_detail') then 'diary'
                         when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                         when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
        )a
        group by partition_date,cl_id,business_id,page_name
      )t8
    on t8.partition_date=t3.partition_date
        and t8.cl_id=t3.cl_id
        and t8.business_id=t3.card_id
        and t8.page_name=t3.card_content_type
        LEFT JOIN
    (--推荐的内容卡片
        SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_content_pv
        FROM
        (
            SELECT  partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
                    case when page_name in ('diary_detail','topic_detail') then 'diary'
                         when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                         when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
                    count(1) as service_pv
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
            AND action='on_click_card'
            and params['card_content_type'] in ('qa','diary','user_post','answer')
            AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
            AND (referrer='home' or
                  (params['referrer_link'] like '%[%' and
                   json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
            group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
                    case when page_name in ('diary_detail','topic_detail') then 'diary'
                         when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
                         when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
        )a
        group by partition_date,cl_id,business_id,page_name
      )t9
    on t9.partition_date=t3.partition_date
        and t9.cl_id=t3.cl_id
        and t9.business_id=t3.card_id
        and t9.page_name=t3.card_content_type
        LEFT JOIN
    (
        select distinct device_id
        from ml.ml_d_ct_dv_devicespam_d  --去除机构刷单设备，即作弊设备（浏览和曝光事件去除）
        WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')

        union all

        select distinct device_id
        from dim.dim_device_user_staff   --去除内网用户
    )spam_pv
    on spam_pv.device_id=t2.cl_id
        LEFT JOIN
    (
        SELECT partition_date,device_id
        FROM
            (--找出user_id当天活跃的第一个设备id
          SELECT user_id,partition_date,
                  if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
            FROM online.ml_user_updates
            WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
        )t1
                JOIN
            (  --医生账号
          SELECT distinct user_id
          FROM online.tl_hdfs_doctor_view
          WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')

          --马甲账号/模特用户
          UNION ALL
          SELECT user_id
          FROM ml.ml_c_ct_ui_user_dimen_d
          WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
          AND (is_puppet = 'true' or is_classifyuser = 'true')

          UNION ALL
          --公司内网覆盖用户
          select distinct user_id
          from dim.dim_device_user_staff

          UNION ALL
          --登陆过医生设备
          SELECT distinct t1.user_id
            FROM
            (
                SELECT user_id, v.device_id as device_id
                FROM online.ml_user_history_detail
                    LATERAL VIEW EXPLODE(device_history_list) v AS device_id
                WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
            ) t1
            JOIN
            (
                SELECT device_id
                FROM online.ml_device_history_detail
                WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
                AND is_login_doctor = '1'
            ) t2
                ON t1.device_id = t2.device_id
        )t2
            on t1.user_id=t2.user_id
        group by partition_date,device_id
    )dev
    on t2.partition_date=dev.partition_date and t2.cl_id=dev.device_id
WHERE spam_pv.device_id IS NULL
  and dev.device_id is null
GROUP BY t1.partition_date,t1.device_os_type,t1.active_type,t2.card_content_type,t2.recommend_type
order by day_id,device_os_type,active_type,card_content_type,recommend_type;