SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;


ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';


INSERT OVERWRITE TABLE pm.tl_pm_sign_deepaction_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT mas.partition_date as day_id
		,device_os_type
		,case when c.device_id is null then '非签到用户'
			  when c.sign_date=c.first_sign_date then '首日签到用户'
			  when c.sign_date>c.first_sign_date then '非首日签到用户' end as user_type
        ,count(distinct mas.device_id) as active_num
		,sum(wel_pv) as wel_pv
		,count(distinct case when wel_pv <> 0 then a.cl_id end) as wel_uv
        ,sum(CASE WHEN b.device_id is NOT NULL THEN b.valid_pv END) as clue_pv
		,count(distinct case when valid_pv <> 0 then b.device_id end) as clue_uv

FROM
(
	SELECT  partition_date
			,device_os_type
	      	,device_id
	FROM online.ml_device_day_active_status
	WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','') AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
	AND active_type IN ('4')
	AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
	        ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
	        ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
	        ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
	        ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
	        ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
	        ,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
	AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
LEFT JOIN
(
    SELECT partition_date,cl_id,wel_pv
    FROM
    (
        SELECT partition_date,cl_id,count(1) as wel_pv
        FROM online.bl_hdfs_maidian_updates
        WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
        AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
        AND action = 'page_view'
        AND page_name in ('welfare_detail','organization_detail','expert_detail')
        GROUP BY partition_date,cl_id
    )a
    LEFT JOIN
    ( -- 去掉疑似机构刷量的PV和UV
        select distinct device_id
        from ml.ml_d_ct_dv_devicespam_d
        where partition_day=regexp_replace(date_sub(current_date(),1),'-','')
    )spam_pv
    on a.cl_id=spam_pv.device_id
    WHERE spam_pv.device_id IS NULL
)a
on a.partition_date=mas.partition_date and a.cl_id=mas.device_id
LEFT JOIN
(   --有效线索分日数据(按用户维度)
    SELECT t2.device_id,date_day,count(1) as valid_pv
    FROM
    (
        SELECT NVL(T1.USER_ID,T2.USER_ID) AS USER_ID,
                NVL(T1.merchant_id,T2.merchant_id) AS merchant_id,
                NVL(T1.DATE_MONTH,T2.CALL_MONTH) AS DATE_MONTH,
                least(NVL(DATE_DAY,'99999999'),NVL(FIRST_CALL_DATE,'99999999')) AS DATE_DAY,
                NVL(T1.CONSULT_NUM,0) AS CONSULT_NUM,
                NVL(T1.PAY_NUM,0) AS PAY_NUM,
                NVL(T2.call_num,0) AS CALL_NUM
        FROM
        (
            SELECT
                NVL(T3.USER_ID,T4.USER_ID) AS USER_ID,
                NVL(T3.merchant_id,T4.merchant_id) AS merchant_id,
                NVL(T3.CONSULT_MONTH,T4.PAY_MONTH) AS DATE_MONTH,
                least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')) AS DATE_DAY,
                NVL(T3.CONSULT_NUM,0) AS CONSULT_NUM,
                NVL(T4.PAY_NUM,0) AS PAY_NUM
            FROM
            (
                SELECT user_id,merchant_id,SUBSTR(partition_day,1,6) AS consult_month,min(partition_day) as first_consult_date,1 as CONSULT_NUM
                FROM ml.ml_c_et_msg_conversation_dimen_inc_d --新仓库私信表
                WHERE partition_day >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
                AND partition_day <=regexp_replace(date_sub(current_date(),1),'-','')
                and is_valid='true'
                GROUP BY user_id,merchant_id,SUBSTR(partition_day,1,6)
            )T3
            FULL JOIN
            (
                SELECT
                    T.USER_ID,
                    T.merchant_id,
                    regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','') AS PAY_MONTH,
                    MIN(regexp_replace(SUBSTR(T.PAY_DATE,1,10),'-','')) AS FIRST_PAY_DATE,
                    1 AS PAY_NUM
                FROM  ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
                WHERE
                    T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
                    AND regexp_replace(SUBSTR(T.pay_date,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
                    AND regexp_replace(SUBSTR(T.pay_date,1,10),'-','')<=regexp_replace(date_sub(current_date(),1),'-','')
                    AND T.is_pure_user='true'
                GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
            ) T4
            ON T3.USER_ID = T4.USER_ID AND T3.merchant_id = T4.merchant_id AND T3.CONSULT_MONTH = T4.PAY_MONTH
            GROUP BY NVL(T3.USER_ID,T4.USER_ID),NVL(T3.merchant_id,T4.merchant_id),NVL(T3.CONSULT_MONTH,T4.PAY_MONTH),
                least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')),NVL(T3.CONSULT_NUM,0),NVL(T4.PAY_NUM,0)
        )T1

        FULL JOIN
        (
            SELECT user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','') AS CALL_MONTH,
            MIN(partition_date) as FIRST_CALL_DATE,1 AS call_num
            FROM
            (
                SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
                FROM  tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表，call_type呼叫类型
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
                AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
                AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date(),1),'-','')
                AND start_time<release_time--电话拨通
                and start_time>call_time
            )a
            LEFT JOIN
            (
                SELECT id,sub_id
                FROM tl.tl_gm_sl_ali_virtual_phone_binding --阿里记录表
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )b
                ON a.sub_id = b.sub_id
            LEFT JOIN
            (
                SELECT id,platform_binding_id
                FROM tl.tl_gm_sl_virtual_phone_binding
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )c
                ON b.id = c.platform_binding_id
            LEFT JOIN
            (
                SELECT phone_binding_id,lead_task_id
                FROM tl.tl_gm_sl_lead_task_phone_binding
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
            )d
                ON c.id = d.phone_binding_id
            JOIN
            (
                SELECT id,user_id,merchant_id
                FROM tl.tl_gm_sl_lead_task  --线索任务表（用户点击授权后记入该表）
                WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
                AND source='2' --用户行为电话授权
                AND (user_id is NOT NULL or USER_ID <> '')
            )e
                ON d.lead_task_id = e.id
            GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
        )T2
        ON T1.USER_ID = T2.USER_ID AND T1.merchant_id = T2.merchant_id AND T1.DATE_MONTH = T2.CALL_MONTH
    )t1
    JOIN
    (
        SELECT t.user_id,partition_date,if(size(t.device_list) > 0, device_list [ 0 ], '') device_id
        FROM online.ml_user_updates t
        WHERE t.partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
        and t.partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
    )t2
        on t1.user_id=t2.user_id and t1.date_day=t2.partition_date
        GROUP BY t2.device_id,date_day
)b
ON b.date_day=mas.partition_date and b.device_id=mas.device_id
LEFT JOIN
(
    SELECT device_id,sign_date,first_sign_date
    FROM
    (
        SELECT user_id, REGEXP_REPLACE(substr(sign_time,1,10), '-', '') as sign_date
        FROM online.tl_hdfs_api_user_sign_v2_view   --20190601之后使用的用户签到表
        WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
        and regexp_replace(substr(sign_time,1,10),'-','') >='20190601'
        and regexp_replace(substr(sign_time,1,10),'-','') <=regexp_replace(date_sub(current_date(),1),'-','')
        and sign_type <> '2'  --2表示补签

        union all

        SELECT user_id, REGEXP_REPLACE(substr(sign_time,1,10), '-', '') as sign_date
        FROM online.tl_hdfs_api_sign_record_view   --20190601之前使用的用户签到表
        WHERE partition_date = '20190531'
        and regexp_replace(substr(sign_time,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
        and  regexp_replace(substr(sign_time,1,10),'-','') <'20190601'
        and sign_type <> '3'  --3表示补签
    )a
    left join
    (
    	SELECT user_id,REGEXP_REPLACE(min(sign_date), '-', '') as first_sign_date
    	FROM
	    (
	        SELECT user_id, substr(sign_time,1,10) as sign_date
	        FROM online.tl_hdfs_api_user_sign_v2_view   --20190601之后使用的用户签到表
	        WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
	        and regexp_replace(substr(sign_time,1,10),'-','') >='20190601'
	        and regexp_replace(substr(sign_time,1,10),'-','') <=regexp_replace(date_sub(current_date(),1),'-','')
	        and sign_type <> '2'  --2表示补签

	        union all

	        SELECT user_id, substr(sign_time,1,10) as sign_date
	        FROM online.tl_hdfs_api_sign_record_view   --20190601之前使用的用户签到表 ,缺少20190101-20190226以及201905整月的数据
	        WHERE partition_date = '20190531'
	        and regexp_replace(substr(sign_time,1,10),'-','') >=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
	        and  regexp_replace(substr(sign_time,1,10),'-','') <'20190601'
	        and sign_type <> '3'  --3表示补签
	    )first
	    group by user_id
	)a_first
	on a.user_id=a_first.user_id
    LEFT JOIN
    (
        SELECT user_id  --医生账号
        FROM online.tl_hdfs_doctor_view
        WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
    )b
    ON a.user_id = b.user_id

	JOIN
	(
	    select t.user_id,partition_date,
	            if(size(t.device_list) > 0, device_list [ 0 ], '') device_id
	    from online.ml_user_updates t
	    where t.partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
        and t.partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
	)c
	on a.user_id=c.user_id and a.sign_date=c.partition_date
	where b.user_id is null
	group by  device_id,sign_date,first_sign_date
)c
on c.device_id=mas.device_id and c.sign_date=mas.partition_date

group by mas.partition_date
		,device_os_type
		,case when c.device_id is null then '非签到用户'
			  when c.sign_date=c.first_sign_date then '首日签到用户'
			  when c.sign_date>c.first_sign_date then '非首日签到用户' end
order by 1,2,3


