SET mapreduce.job.queuename=data;
set role admin;
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';

SELECT mas.partition_date
		,count(distinct t1.cl_id) as `AI自测首页uv`
		,sum(t1.pv) as `AI自测首页pv`
		,count(distinct t5.cl_id) as `第一个问题出现uv`
		,sum(t5.pv) as `第一个问题出现pv`
		,count(distinct t4.cl_id) as `第一层答案点击uv`
		,sum(t4.pv) as `第一层答案点击pv`
		,count(distinct t2.cl_id) as `完成AI自测uv`
		,sum(t2.pv) as `完成AI自测pv`
		,count(distinct t3.cl_id) as `落地页点击uv`
		,sum(t3.pv) as `落地页点击pv`
FROM
(
	SELECT
	partition_date,device_id,device_os_type
	,case when active_type in ('1','2') then '新增设备'
                      when active_type ='4' then '老活跃设备' end  as active_type
	FROM online.ml_device_day_active_status
	where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
	AND active_type in ('1','2','4')
	and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
    ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
    ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
    ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
    ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
    ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
    ,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
    AND first_channel_source_type not like 'promotion\_jf\_%'
	AND substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0--取灰度
)mas
left join
(--自测页浏览
	SELECT partition_date,cl_id,count(1) as pv
	FROM online.bl_hdfs_maidian_updates
	where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
	and page_name = 'ai_conversation_detail'
	and action='page_view'
	group by partition_date,cl_id
)t1
on mas.partition_date=t1.partition_date and mas.device_id=t1.cl_id
left join
(--完成自测（即接下来会出现卡片）
	SELECT a.partition_date,a.cl_id,sum(1) as pv
	FROM
	(
		SELECT partition_date,json_map(t2.result,'string,string')['answer_id'] answer_id
			,json_map(t2.result,'string,string')['question_id'] question_id
			,cl_id
		from
		(
		    SELECT partition_date,cl_id
		    		,json_split(params['result_list']) as result_list
			FROM online.bl_hdfs_maidian_updates
			where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
			and page_name = 'ai_conversation_detail'
			and action='on_click_button'
			and params['button_name']='ai_answer'
		)t1
		LATERAL VIEW EXPLODE(t1.result_list) t2 as result
	)a
	join
	(
		select ai_question_id,ai_answer_id
		from tl.tl_zx_api_ai_qa_question_answer
		where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
		and next_type=1--推荐卡片
	)b
	on a.answer_id=b.ai_answer_id and a.question_id=b.ai_question_id
	group by a.partition_date,a.cl_id
)t2
on mas.partition_date=t2.partition_date and mas.device_id=t2.cl_id
left join
(--落地页点击pv
	SELECT partition_date,cl_id,count(1) as pv
	FROM online.bl_hdfs_maidian_updates
	where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
	and page_name = 'ai_conversation_detail'
	and action='on_click_card'
	group by partition_date,cl_id
)t3
on mas.partition_date=t3.partition_date and mas.device_id=t3.cl_id
left join
(--第一层答案点击
	SELECT a.partition_date,a.cl_id,count(1) as pv
	FROM
	(
		SELECT partition_date,json_map(t2.result,'string,string')['answer_id'] answer_id
			,cl_id
		from
		(
		    SELECT partition_date,cl_id
		    		,json_split(params['result_list']) as result_list
			FROM online.bl_hdfs_maidian_updates
			where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
			and page_name = 'ai_conversation_detail'
			and action='on_click_button'
			and params['button_name']='ai_answer'
		)t1
		LATERAL VIEW EXPLODE(t1.result_list) t2 as result
	)a
	JOIN
	(
		select ai_question_id,ai_answer_id
		from tl.tl_zx_api_ai_qa_question_answer
		where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
		and ai_question_id=2
	)b
	on a.answer_id=b.ai_answer_id
	group by a.partition_date,a.cl_id
)t4
on mas.partition_date=t4.partition_date and mas.device_id=t4.cl_id
left join
(--第一层答案曝光
    SELECT partition_date,cl_id,count(1) as pv
	FROM online.bl_hdfs_maidian_updates
	where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
	and page_name = 'ai_conversation_detail'
	and action='report_status'
	and (params['content_id']=2 or params['concent_id']=2)
	group by partition_date,cl_id
)t5
on mas.partition_date=t5.partition_date and mas.device_id=t5.cl_id
left join
( -- 去掉疑似机构刷量的PV和UV
    select distinct device_id
	from ml.ml_d_ct_dv_devicespam_d
	where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on mas.device_id=spam_pv.device_id
left join
(
	SELECT partition_date,device_id
	FROM
	(--找出user_id当天活跃的第一个设备id
		SELECT user_id,partition_date,
            if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
	    FROM online.ml_user_updates
	    WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
	    AND partition_date<regexp_replace((current_date),'-','')
	)a
	join
	(	--医生账号
		SELECT distinct user_id
		FROM online.tl_hdfs_doctor_view
		WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')

		--马甲账号/模特用户
		UNION ALL
		SELECT user_id
		FROM ml.ml_c_ct_ui_user_dimen_d
		WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
		AND (is_puppet = 'true' or is_classifyuser = 'true')

		UNION ALL
		--公司内网覆盖用户
		select distinct user_id
		from dim.dim_device_user_staff

		UNION ALL
		--登陆过医生设备
		SELECT distinct t1.user_id
	    FROM
	    (
	        SELECT user_id, v.device_id as device_id
	        FROM online.ml_user_history_detail
	            LATERAL VIEW EXPLODE(device_history_list) v AS device_id
	        WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
	    ) t1
	    JOIN
	    (
	        SELECT device_id
	        FROM online.ml_device_history_detail
	        WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
	        AND is_login_doctor = '1'
	    ) t2
	        ON t1.device_id = t2.device_id
	)b
	on a.user_id=b.user_id
	group by partition_date,device_id
)dev
on mas.partition_date=dev.partition_date and mas.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is NULL
group by mas.partition_date;