SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;


ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';

INSERT OVERWRITE TABLE pm.tl_pm_fangan_d PARTITION (PARTITION_DAY = ${partition_day})

SELECT t1.partition_date as day_id
,grey_type
,count(DISTINCT t1.device_id) as DAU_730
,count(DISTINCT CASE WHEN device_type = '新增' THEN t1.device_id END) as new_DAU
,count(DISTINCT CASE WHEN device_type = '老活' THEN t1.device_id END) as old_DAU
,round(count(DISTINCT CASE WHEN device_type = '老活' AND device_os_type = 'android' AND channel_type = 'AI' THEN t1.device_id END)*0.16
      +count(DISTINCT CASE WHEN device_type = '老活' AND device_os_type = 'android' AND channel_type = '医美' THEN t1.device_id END)*0.70
      +count(DISTINCT CASE WHEN device_type = '新增' AND device_os_type = 'android' AND channel_type = 'AI' THEN t1.device_id END)*0.10
      +count(DISTINCT CASE WHEN device_type = '新增' AND device_os_type = 'android' AND channel_type = '医美' THEN t1.device_id END)*0.20
      +count(DISTINCT CASE WHEN device_type = '老活' AND device_os_type = 'ios' AND channel_type = 'AI' THEN t1.device_id END)*0.35
      +count(DISTINCT CASE WHEN device_type = '老活' AND device_os_type = 'ios' AND channel_type = '医美' THEN t1.device_id END)*1.00
      +count(DISTINCT CASE WHEN device_type = '新增' AND device_os_type = 'ios' AND channel_type = 'AI' THEN t1.device_id END)*0.21
      +count(DISTINCT CASE WHEN device_type = '新增' AND device_os_type = 'ios' AND channel_type = '医美' THEN t1.device_id END)*0.21,0) as quanzhong_DAU
,round(sum(t1.rent_num)/count(DISTINCT t1.device_id)*100,2) as liucun
,sum(wel_pv) as wel_pv
,count(DISTINCT CASE WHEN wel_pv is not NULL and wel_pv <> 0 THEN t3.cl_id END) as wel_uv
,sum(pay_num) as pay_num
,count(DISTINCT t5.device_id) as valid_dev_num
,sum(t5.clue_num) as valid_pv
,count(DISTINCT t6.device_id) as true_valid_dev_num
,sum(t6.clue_num) as true_valid_pv
FROM
(   SELECT t1.partition_date,t1.device_id,device_type,device_os_type,channel_type,grey_type,count(DISTINCT CASE WHEN date_add(t1.partition_date,1) = t.partition_date THEN t.device_id END) as rent_num
    FROM
    (---dau数据
        SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,device_id,device_os_type
            ,case WHEN active_type = '4'  THEN '老活'
              WHEN active_type  in ('1','2')  then '新增' END as device_type
            ,CASE WHEN (partition_date >= '20200725' AND partition_date <= '20200727' AND substr(convup(sha1(device_id),16,10),-1) in ('0','1'))
                         or (partition_date = '20200724' AND substr(convup(sha1(device_id),16,10),-1) in ('0','1','2','3','4'))
                         or (partition_date >= '20200728' AND substr(convup(sha1(device_id),16,10),-1) in ('0','1','2','3','4')) THEN '测试组A（方案库）'
                else '测试组B（无方案库）' END as grey_type
            ,CASE WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
                            or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
                            or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
                            THEN 'AI'  ELSE '医美' END as channel_type
        FROM online.ml_device_day_active_status
            LEFT JOIN
            (SELECT col1,col2  --col1:子渠道，col2：是否属于AI，col3:标识
               FROM pm.tl_pm_ydl
               WHERE col3='0204_danlei_channel')tmp
            on first_channel_source_type=tmp.col1
        where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
        AND active_type in ('1','2','4')
    )t1

    JOIN
    (--7.30.0 版本以上dau数据
        SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,device_id
        FROM
        (
            SELECT partition_date,app_version_list,device_id
            FROM online.ml_device_updates
            WHERE partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
        )a
        LATERAL VIEW EXPLODE (app_version_list) t as app_version
        WHERE int(split(t.app_version,'\\.')[1]) >= 30  --版本限制为7.30.0以上
        and int(split(t.app_version,'\\.')[0]) = 7
        GROUP BY partition_date,device_id
    )t2
        ON t1.partition_date = t2.partition_date
        AND t1.device_id = t2.device_id
    LEFT JOIN
    (
        SELECT device_id
        ,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
        FROM online.ml_device_day_active_status
        WHERE partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
    )t
    on t1.device_id = t.device_id
    GROUP BY t1.partition_date,t1.device_id,device_type,grey_type,device_os_type,channel_type
)t1

LEFT JOIN
(--商详页pv
    SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id,count(1) as wel_pv
    FROM online.bl_hdfs_maidian_updates
    where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
    and page_name = 'welfare_detail'
    and action = 'page_view'
    AND int(split(app_version,'\\.')[1]) >= 30
    AND int(split(app_version,'\\.')[0]) = 7
    GROUP BY partition_date,cl_id
)t3
    ON t1.partition_date = t3.partition_date
    AND t1.device_id = t3.cl_id

LEFT JOIN
(--纯用户支付订单数
    SELECT device_id,SUBSTR(pay_time, 1, 10) as pay_date,count(DISTINCT order_id) as pay_num
    FROM online.ml_trade_order_detail_day   --美购交易表
    WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
    AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') >= '20200724'
    AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') <= regexp_replace(date_sub(current_date(),1),'-','')
    AND is_pure_user = 'true'        --取纯用户
    GROUP BY device_id,SUBSTR(pay_time, 1, 10)
)t4
    ON t1.partition_date = t4.pay_date
    AND t1.device_id = t4.device_id

LEFT JOIN
(--有效线索数和线索人次
    SELECT
        from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd') dt,device_id,count(1) AS clue_num
    FROM
    (
        SELECT NVL(T1.USER_ID,T2.USER_ID) AS USER_ID,
                NVL(T1.merchant_id,T2.merchant_id) AS merchant_id,
                NVL(T1.DATA_MONTH,T2.CALL_MONTH) AS DATA_MONTH,
                least(NVL(DATA_DAY,'99999999'),NVL(FIRST_CALL_DATE,'99999999')) AS DATA_DAY,
                NVL(T1.CONSULT_NUM,0) AS CONSULT_NUM,
                NVL(T1.PAY_NUM,0) AS PAY_NUM,
                NVL(T2.call_num,0) AS CALL_NUM
        FROM
        (
            SELECT
                NVL(T3.USER_ID,T4.USER_ID) AS USER_ID,
                NVL(T3.merchant_id,T4.merchant_id) AS merchant_id,
                NVL(T3.CONSULT_MONTH,T4.PAY_MONTH) AS DATA_MONTH,
                least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')) AS DATA_DAY,
                NVL(T3.CONSULT_NUM,0) AS CONSULT_NUM,
                NVL(T4.PAY_NUM,0) AS PAY_NUM
            FROM
            (
                SELECT
                    user_id
                    ,merchant_id
                    ,substr(partition_date,1,6) AS consult_month
                    ,min(partition_date) AS first_consult_date
                    ,1 AS consult_num
                FROM  online.AL_COMMUNITY_USER_VALID_CONSULT_DAY
                WHERE
                    partition_date  between '20200701' AND regexp_replace(date_sub(current_date(),1),'-','')
                    AND is_valid_consult='true'
                    AND merchant_id IS NOT NULL
                GROUP BY user_id,merchant_id,substr(partition_date,1,6)
            )T3
            FULL JOIN
            (
                SELECT
                    T.USER_ID,
                    T.merchant_id,
                    regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','') AS PAY_MONTH,
                    MIN(regexp_replace(SUBSTR(T.PAY_DATE,1,10),'-','')) AS FIRST_PAY_DATE,
                    1 AS PAY_NUM
                FROM  ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
                WHERE
                    T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
                    AND T.PAY_DATE between '2020-07-01' AND date_sub(current_date(),1)
                    AND T.is_pure_user='true'
                GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
            ) T4
            ON T3.USER_ID = T4.USER_ID AND T3.merchant_id = T4.merchant_id AND T3.CONSULT_MONTH = T4.PAY_MONTH
            GROUP BY NVL(T3.USER_ID,T4.USER_ID),NVL(T3.merchant_id,T4.merchant_id),NVL(T3.CONSULT_MONTH,T4.PAY_MONTH),
                least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')),NVL(T3.CONSULT_NUM,0),NVL(T4.PAY_NUM,0)
        )T1

        FULL JOIN
        (
            SELECT user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','') AS CALL_MONTH,
            MIN(partition_date) as FIRST_CALL_DATE,1 AS call_num
            FROM
            (
                SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
                FROM  tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表，call_type呼叫类型
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
                AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>='20200701'
                AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date(),1),'-','')
                AND start_time+ interval 30 second <= release_time --通话时长大于30秒
            )a
            LEFT JOIN
            (
                SELECT id,sub_id
                FROM tl.tl_gm_sl_ali_virtual_phone_binding --阿里记录表
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )b
                ON a.sub_id = b.sub_id
            LEFT JOIN
            (
                SELECT id,platform_binding_id
                FROM tl.tl_gm_sl_virtual_phone_binding
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )c
                ON b.id = c.platform_binding_id
            LEFT JOIN
            (
                SELECT phone_binding_id,lead_task_id
                FROM tl.tl_gm_sl_lead_task_phone_binding
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )d
                ON c.id = d.phone_binding_id
            JOIN
            (
                SELECT id,user_id,merchant_id
                FROM tl.tl_gm_sl_lead_task  --线索任务表（用户点击授权后记入该表）
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
                AND source='2' --用户行为电话授权
            )e
                ON d.lead_task_id = e.id
            GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
        )T2
        ON T1.USER_ID = T2.USER_ID AND T1.merchant_id = T2.merchant_id AND T1.DATA_MONTH = T2.CALL_MONTH
    )T1
    JOIN
    (--找出user_id当天活跃的第一个设备id
        SELECT user_id,partition_date,
              if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
        FROM online.ml_user_updates
        WHERE partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
    )T2
        ON T1.USER_ID = T2.USER_ID AND T1.DATA_DAY = T2.PARTITION_DATE
    GROUP BY from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd'),device_id
)t5
    ON t1.partition_date = t5.dt
    AND t1.device_id = t5.device_id

LEFT JOIN
(--有效线索分日数据
    SELECT
        from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd') dt,dev.device_id,count(1) AS clue_num
    FROM
    (
        SELECT NVL(T1.USER_ID,T2.USER_ID) AS USER_ID,
                NVL(T1.merchant_id,T2.merchant_id) AS merchant_id,
                NVL(T1.DATA_MONTH,T2.CALL_MONTH) AS DATA_MONTH,
                least(NVL(DATA_DAY,'99999999'),NVL(FIRST_CALL_DATE,'99999999')) AS DATA_DAY,
                NVL(T1.CONSULT_NUM,0) AS CONSULT_NUM,
                NVL(T1.PAY_NUM,0) AS PAY_NUM,
                NVL(T2.call_num,0) AS CALL_NUM
        FROM
        (
            SELECT
                NVL(T3.USER_ID,T4.USER_ID) AS USER_ID,
                NVL(T3.merchant_id,T4.merchant_id) AS merchant_id,
                NVL(T3.CONSULT_MONTH,T4.PAY_MONTH) AS DATA_MONTH,
                least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')) AS DATA_DAY,
                NVL(T3.CONSULT_NUM,0) AS CONSULT_NUM,
                NVL(T4.PAY_NUM,0) AS PAY_NUM
            FROM
            (
                SELECT
                    user_id
                    ,merchant_id
                    ,substr(partition_date,1,6) AS consult_month
                    ,min(partition_date) AS first_consult_date
                    ,1 AS consult_num
                FROM  online.AL_COMMUNITY_USER_VALID_CONSULT_DAY
                WHERE
                    partition_date  between '20200701' AND regexp_replace(date_sub(current_date(),1),'-','')
                    AND is_valid_consult='true'
                    AND merchant_id IS NOT NULL
                GROUP BY user_id,merchant_id,substr(partition_date,1,6)
            )T3
            FULL JOIN
            (
                SELECT
                    T.USER_ID,
                    T.merchant_id,
                    regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','') AS PAY_MONTH,
                    MIN(regexp_replace(SUBSTR(T.PAY_DATE,1,10),'-','')) AS FIRST_PAY_DATE,
                    1 AS PAY_NUM
                FROM  ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
                WHERE
                    T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
                    AND T.PAY_DATE between '2020-07-01' AND date_sub(current_date(),1)
                    AND T.is_pure_user='true'
                GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
            ) T4
            ON T3.USER_ID = T4.USER_ID AND T3.merchant_id = T4.merchant_id AND T3.CONSULT_MONTH = T4.PAY_MONTH
            GROUP BY NVL(T3.USER_ID,T4.USER_ID),NVL(T3.merchant_id,T4.merchant_id),NVL(T3.CONSULT_MONTH,T4.PAY_MONTH),
                least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')),NVL(T3.CONSULT_NUM,0),NVL(T4.PAY_NUM,0)
        )T1

        FULL JOIN
        (
            SELECT user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','') AS CALL_MONTH,
            MIN(partition_date) as FIRST_CALL_DATE,1 AS call_num
            FROM
            (
                SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
                FROM  tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表，call_type呼叫类型
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
                AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>='20200701'
                AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date(),1),'-','')
                AND start_time+ interval 30 second <= release_time --通话时长大于30秒
            )a
            LEFT JOIN
            (
                SELECT id,sub_id
                FROM tl.tl_gm_sl_ali_virtual_phone_binding --阿里记录表
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )b
                ON a.sub_id = b.sub_id
            LEFT JOIN
            (
                SELECT id,platform_binding_id
                FROM tl.tl_gm_sl_virtual_phone_binding
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )c
                ON b.id = c.platform_binding_id
            LEFT JOIN
            (
                SELECT phone_binding_id,lead_task_id
                FROM tl.tl_gm_sl_lead_task_phone_binding
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )d
                ON c.id = d.phone_binding_id
            JOIN
            (
                SELECT id,user_id,merchant_id
                FROM tl.tl_gm_sl_lead_task  --线索任务表（用户点击授权后记入该表）
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
                AND source='2' --用户行为电话授权
            )e
                ON d.lead_task_id = e.id
            GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
        )T2
        ON T1.USER_ID = T2.USER_ID AND T1.merchant_id = T2.merchant_id AND T1.DATA_MONTH = T2.CALL_MONTH
    )T1
    LEFT JOIN
    (
        SELECT user_id
        FROM
        (
          --医生账号
          SELECT user_id
          FROM online.tl_hdfs_doctor_view
          WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')

          --马甲账号/模特用户
          UNION ALL
          SELECT user_id
          FROM ml.ml_c_ct_ui_user_dimen_d
          WHERE partition_day = regexp_replace(date_sub(current_date(),1),'-','')
          AND (is_puppet = 'true' or is_classifyuser = 'true')

          UNION ALL
          --更美内网用户
          SELECT user_id
          FROM dim.dim_device_user_staff

          UNION ALL
          --登陆过医生设备
          SELECT t1.user_id
          FROM
          (
              SELECT user_id, v.device_id as device_id
              FROM online.ml_user_history_detail
                  LATERAL VIEW EXPLODE(device_history_list) v AS device_id
              WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
          ) t1
          JOIN
          (
              SELECT device_id
              FROM online.ml_device_history_detail
              WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
              AND is_login_doctor = '1'
          ) t2
              ON t1.device_id = t2.device_id
        )a
        GROUP BY user_id
    )T2
        ON T1.user_id=T2.user_id

    LEFT JOIN
    (
        SELECT t.user_id,partition_date,
                if(size(t.device_list) > 0, device_list [ 0 ], '') device_id
        FROM online.ml_user_updates t
        WHERE t.partition_date >= '20200701'
        and t.partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
    )T3
        ON T1.user_id = T3.user_id
        AND T1.DATA_DAY = T3.partition_date
    JOIN
    ( -- 1.去掉机刷、积分墙渠道来的设备
        SELECT partition_date,device_id
        FROM online.ml_device_day_active_status
        where partition_date >= '20200701'
        AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
        AND active_type in ('1','2','4')
    )dev
    on dev.device_id = T3.device_id
    AND dev.partition_date = T3.partition_date
    WHERE (T2.user_id is null or T2.user_id = '')
    GROUP BY from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd'),dev.device_id
)t6
    ON t1.partition_date = t6.dt
    AND t1.device_id = t6.device_id
GROUP BY t1.partition_date,grey_type


