内容灰度数据.sql 12.6 KB

---插入每天数据
SELECT mas.partition_date day_id
		,device_os_type
		,active_type
		,grey_type
		,case when t1.page_name in ('post_detail','user_post_detail','doctor_post_detail') then '帖子详情页'
			  when t1.page_name in ('diary_detail') then '日记本详情页'
			  when t1.page_name in ('topic_detail') then '日记帖详情页'
			  when t1.page_name in ('question_answer_detail') then '问答详情页' end as page_name
		,nvl(sum(t1.pv),0) as content_pv
		,nvl(count(distinct t1.cl_id),0) as content_uv
		,nvl(sum(t4.pv),0) as wel_exp_pv
		,nvl(sum(t5.pv),0) as content_exp_pv
		,nvl(sum(t2.pv),0) as wel_click_pv
		,nvl(sum(t3.pv),0) as content_click_pv
		,nvl(sum(t6.pv),0) as slide_wel_click_pv
		,nvl(sum(t7.pv),0) as self_wel_click_pv
FROM
(
	SELECT
	partition_date,device_id,device_os_type
	,case WHEN active_type = '4'  THEN '老活跃设备'
    		      WHEN active_type  in ('1','2')  then '新增设备' END as active_type
    ,CASE WHEN substr(convup(setencryption(device_id,'sha-1'),16,10),-1) in ('0','1','2','3','4') THEN '灰度' ELSE '非灰' END AS grey_type
	FROM online.ml_device_day_active_status
	where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
	AND active_type in ('1','2','4')
	AND first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
	        ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
	        ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
	        ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
	        ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
	        ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
	        ,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
    AND first_channel_source_type not like 'promotion\_jf\_%'
)mas
join
(
	SELECT partition_date,cl_id,page_name,count(cl_id) as pv
	FROM online.bl_hdfs_maidian_updates
	WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
	AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
	AND action='page_view'
	AND int(split(app_version,'\\.')[1]) >= 30
	group by partition_date,cl_id,page_name
)t1
on mas.partition_date=t1.partition_date
and mas.device_id=t1.cl_id
left join
(--你可能喜欢美购卡片点击
	SELECT partition_date,cl_id,page_name,count(1) as pv
	FROM
	(
		SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
		FROM online.bl_hdfs_maidian_updates
		WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
		AND action='on_click_card'
		and params['card_content_type']='service'
		and params['in_page_pos']='bottom'
		and params['tab_name'] is null--与横划区域区分
		and ((params['position'] is not null and cl_type='android')
				or (params['card_but_pos'] is null and cl_type='ios')) --与关联卡片区分
		AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
		AND int(split(app_version,'\\.')[1]) >= 30
		group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
	)a
	group by partition_date,cl_id,page_name
)t2
on t1.partition_date=t2.partition_date
and t1.cl_id=t2.cl_id
and t1.page_name=t2.page_name
left join
(--你可能喜欢内容卡片点击
	SELECT partition_date,cl_id,page_name,count(1) as pv
	FROM
	(
		SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
		FROM online.bl_hdfs_maidian_updates
		WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
		AND action='on_click_card'
		and params['card_content_type']<>'service'
		and params['in_page_pos']='bottom'
		and params['tab_name'] is null
		AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
		AND int(split(app_version,'\\.')[1]) >= 30
		group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
	)a
	group by partition_date,cl_id,page_name
)t3
on t1.partition_date=t3.partition_date
and t1.cl_id=t3.cl_id
and t1.page_name=t3.page_name
left join
(--你可能喜欢美购卡片曝光
	SELECT partition_date,cl_id,page_name,count(1) as pv
	FROM
	(
		select partition_date,cl_id,page_name,business_id,card_id,app_session_id
		from online.ml_community_precise_exposure_detail
		where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
		and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
		and card_content_type='service'
		and action ='page_precise_exposure'
		and params['tab_name']='feed_recommend'
		and params['in_page_pos']='bottom'
		and int(split(app_version,'\\.')[1]) >= 30
		and is_exposure='1'
		group by partition_date,cl_id,page_name,business_id,card_id,app_session_id
	)a
	group by partition_date,cl_id,page_name
)t4
on t1.partition_date=t4.partition_date
and t1.cl_id=t4.cl_id
and t1.page_name=t4.page_name
left join
(--你可能喜欢内容卡片曝光
	SELECT partition_date,cl_id,page_name,count(1) as pv
	FROM
	(
		select partition_date,cl_id,page_name,business_id,card_id,app_session_id
		from online.ml_community_precise_exposure_detail
		where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
		and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
		and card_content_type in ('diary','user_post','answer')
		and action ='page_precise_exposure'
		and params['tab_name']='feed_recommend'
		and params['in_page_pos']='bottom'
		and int(split(app_version,'\\.')[1]) >= 30
		and is_exposure='1'
		group by partition_date,cl_id,page_name,business_id,card_id,app_session_id
	)a
	group by partition_date,cl_id,page_name
)t5
on t1.partition_date=t5.partition_date
and t1.cl_id=t5.cl_id
and t1.page_name=t5.page_name
left join
(--横划美购卡片点击
	SELECT partition_date,cl_id,page_name,count(1) as pv
	FROM
	(
		SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
		FROM online.bl_hdfs_maidian_updates
		WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
		and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
		and (( action='on_click_button' and params['button_name'] = 'more_recommendations')
			    or (action ='on_click_card' and params['in_page_pos']='recommand' and params['card_content_type']='service')
			    or (action ='on_click_card' and params['tab_name']='hot_recommendation'  and params['card_content_type']='service'))
		and int(split(app_version,'\\.')[1]) >= 30
        group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
	)a
	group by partition_date,cl_id,page_name
)t6
on t1.partition_date=t6.partition_date
and t1.cl_id=t6.cl_id
and t1.page_name=t6.page_name
left join
(--关联卡片
	SELECT partition_date,cl_id,page_name,count(1) as pv
	FROM
	(
		SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
		FROM online.bl_hdfs_maidian_updates
		WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
		AND (get_json_object(params['extra_param'], '$.type')='交互栏'
		       or get_json_object(params['extra_param'], '$.jump_from')='msg_link'
		       or params['in_page_pos']='top'
		       or (params['in_page_pos']='bottom' and params['position'] is null and cl_type='android')
				or (params['in_page_pos']='bottom' and params['card_but_pos'] is not null and cl_type='ios'))
		AND action='on_click_card'
		and params['card_content_type']='service'
		AND page_name in ('diary_detail','topic_detail')
		AND params['position'] is null
		and int(split(app_version,'\\.')[1]) >= 30
		group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
	)a
	group by partition_date,cl_id,page_name
)t7
on t1.partition_date=t7.partition_date
and t1.cl_id=t7.cl_id
and t1.page_name=t7.page_name
LEFT JOIN
(
    select distinct device_id
    from ml.ml_d_ct_dv_devicespam_d  --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
    WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')

    union all

    select distinct device_id
    from dim.dim_device_user_staff   --去除内网用户
)spam_pv
on spam_pv.device_id=mas.device_id
LEFT JOIN
(
    SELECT partition_date,device_id
    FROM
        (--找出user_id当天活跃的第一个设备id
      SELECT user_id,partition_date,
              if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
        FROM online.ml_user_updates
        WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
    )t1
            JOIN
        (  --医生账号
      SELECT distinct user_id
      FROM online.tl_hdfs_doctor_view
      WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')

      --马甲账号/模特用户
      UNION ALL
      SELECT user_id
      FROM ml.ml_c_ct_ui_user_dimen_d
      WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
      AND (is_puppet = 'true' or is_classifyuser = 'true')

      UNION ALL
      --公司内网覆盖用户
      select distinct user_id
      from dim.dim_device_user_staff

      UNION ALL
      --登陆过医生设备
      SELECT distinct t1.user_id
        FROM
        (
            SELECT user_id, v.device_id as device_id
            FROM online.ml_user_history_detail
                LATERAL VIEW EXPLODE(device_history_list) v AS device_id
            WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
        ) t1
        JOIN
        (
            SELECT device_id
            FROM online.ml_device_history_detail
            WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
            AND is_login_doctor = '1'
        ) t2
            ON t1.device_id = t2.device_id
    )t2
        on t1.user_id=t2.user_id
    group by partition_date,device_id
)dev
on mas.partition_date=dev.partition_date and mas.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
   and dev.device_id is null
group by mas.partition_date
		,device_os_type,active_type,grey_type
		,case when t1.page_name in ('post_detail','user_post_detail','doctor_post_detail') then '帖子详情页'
			  when t1.page_name in ('diary_detail') then '日记本详情页'
			  when t1.page_name in ('topic_detail') then '日记帖详情页'
			  when t1.page_name in ('question_answer_detail') then '问答详情页' end
order by 1,2,3,4,5;




--引用数据


SELECT
    day_id as `日期`,
    device_os_type as `设备类型`,
    active_type as `活跃类型`,
    grey_type as `灰度`,
    page_name as `页面`,
    NVL(CONCAT(ROUND(wel_click_pv/wel_exp_pv*100,2),'%'),0) as `内容页你可能喜欢美购卡片点击PV/该类卡片曝光PV`,
    NVL(CONCAT(ROUND(content_click_pv/content_exp_pv*100,2),'%'),0) as `内容页你可能喜欢内容卡片点击PV/该类卡片曝光PV`,
    content_pv as `内容详情页pv`,
    content_uv as `内容详情页uv`,
    wel_exp_pv as `你可能喜欢美购卡片曝光pv`,
    content_exp_pv as `你可能喜欢内容卡片曝光pv`,
    wel_click_pv as `你可能喜欢美购卡片点击pv`,
    content_click_pv as `你可能喜欢内容卡片点击pv`,
    slide_wel_click_pv as `横划卡片点击pv`,
    self_wel_click_pv as `关联卡片点击pv`
FROM pm.tl_pm_contentpage_ctr
WHERE partition_day>='20200730' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期` desc,`设备类型`,`活跃类型`,`灰度`,`页面`

sparkConf.set("prod.jerry.jdbcuri", "jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")

CREATE TABLE conent_detail_page_grayscale_ctr(
   day_id varchar(100),
   device_os_type varchar(100),
   active_type varchar(100),
   grey_type varchar(100),
   page_name varchar(100),
   content_pv BIGINT(20),
   content_uv BIGINT(20),
   wel_exp_pv BIGINT(20),
   content_exp_pv BIGINT(20),
   wel_click_pv BIGINT(20),
   content_click_pv BIGINT(20),
   slide_wel_click_pv BIGINT(20),
   self_wel_click_pv BIGINT(20),
   partition_day varchar(100),
   neirong_ctr FLOAT,
   meigou_ctr FLOAT,
   grey_neirong_ctr FLOAT,
   grey_meigou_ctr FLOAT,
   pid varchar(100),
   PRIMARY KEY ( pid )
)ENGINE=InnoDB DEFAULT CHARSET=utf8;

alert 


select * from conent_detail_page_grayscale_ctr as s1 left join conent_detail_page_grayscale_ctr as s2 on s1.device_os_type = s2.device_os_type and s1.active_type = s2.active_type and page_name