Commit 2b68baec authored by 魏艺敏's avatar 魏艺敏

push codes

parent 39bb9e3e
kyc_q2=用户兴趣引导日报及ai自测日报
ai_daily=AI自测漏斗数据
ai_qa=AI自测问答选项
kyc_qa_daily=用户兴趣引导日报
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_answer
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_question
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_userextra_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_message_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_conversationuserstatus_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_message_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_doctor_relation_dimen_day
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_call_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_virtual_phone_binding
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_d_ct_dv_devicespam_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_question_answer
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_doctor_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_ui_user_dimen_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_history_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_history_detail
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14,step1_15,step1_16,step1_17,step1_18,step1_19,step1_20,step1_21
command=curl -X GET http://localhost:8553/api/report/email/kyc_q2/weiyimin@igengmei.com/weiyimin@qq.com
\ No newline at end of file
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';
SELECT mas.partition_date
,count(distinct t1.cl_id) as `AI自测首页uv`
,sum(t1.pv) as `AI自测首页pv`
,count(distinct t5.cl_id) as `第一个问题出现uv`
,sum(t5.pv) as `第一个问题出现pv`
,count(distinct t4.cl_id) as `第一层答案点击uv`
,sum(t4.pv) as `第一层答案点击pv`
,count(distinct t2.cl_id) as `完成AI自测uv`
,sum(t2.pv) as `完成AI自测pv`
,count(distinct t3.cl_id) as `落地页点击uv`
,sum(t3.pv) as `落地页点击pv`
FROM
(
SELECT
partition_date,device_id,device_os_type
,case when active_type in ('1','2') then '新增设备'
when active_type ='4' then '老活跃设备' end as active_type
FROM online.ml_device_day_active_status
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
AND substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0--取灰度
)mas
left join
(--自测页浏览
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='page_view'
group by partition_date,cl_id
)t1
on mas.partition_date=t1.partition_date and mas.device_id=t1.cl_id
left join
(--完成自测(即接下来会出现卡片)
SELECT a.partition_date,a.cl_id,sum(1) as pv
FROM
(
SELECT partition_date,json_map(t2.result,'string,string')['answer_id'] answer_id
,json_map(t2.result,'string,string')['question_id'] question_id
,cl_id
from
(
SELECT partition_date,cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
)a
join
(
select ai_question_id,ai_answer_id
from tl.tl_zx_api_ai_qa_question_answer
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and next_type=1--推荐卡片
)b
on a.answer_id=b.ai_answer_id and a.question_id=b.ai_question_id
group by a.partition_date,a.cl_id
)t2
on mas.partition_date=t2.partition_date and mas.device_id=t2.cl_id
left join
(--落地页点击pv
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_card'
group by partition_date,cl_id
)t3
on mas.partition_date=t3.partition_date and mas.device_id=t3.cl_id
left join
(--第一层答案点击
SELECT a.partition_date,a.cl_id,count(1) as pv
FROM
(
SELECT partition_date,json_map(t2.result,'string,string')['answer_id'] answer_id
,cl_id
from
(
SELECT partition_date,cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
)a
JOIN
(
select ai_question_id,ai_answer_id
from tl.tl_zx_api_ai_qa_question_answer
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and ai_question_id=2
)b
on a.answer_id=b.ai_answer_id
group by a.partition_date,a.cl_id
)t4
on mas.partition_date=t4.partition_date and mas.device_id=t4.cl_id
left join
(--第一层答案曝光
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='report_status'
and (params['content_id']=2 or params['concent_id']=2)
group by partition_date,cl_id
)t5
on mas.partition_date=t5.partition_date and mas.device_id=t5.cl_id
left join
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on mas.device_id=spam_pv.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)a
join
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)b
on a.user_id=b.user_id
group by partition_date,device_id
)dev
on mas.partition_date=dev.partition_date and mas.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is NULL
group by mas.partition_date;
\ No newline at end of file
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';--进制转换函数,支持各种进制转换
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';
select t2.title
,sum(pv) as `选择pv`
,count(distinct cl_id) as `选择uv`
from
(--自测页浏览
SELECT json_map(t2.result,'string,string')['answer_id'] answer_id
,json_map(t2.result,'string,string')['question_id'] question_id
,cl_id
,count(1) as pv
from
(
SELECT cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200729' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
group by json_map(t2.result,'string,string')['answer_id']
,json_map(t2.result,'string,string')['question_id']
,cl_id
)t1
JOIN
(
SELECT id,title
FROM tl.tl_zx_api_ai_qa_answer
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online='true'
)t2
on t1.answer_id=t2.id
join
(
SELECT id,title
FROM tl.tl_zx_api_ai_qa_question
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online='true'
)t3
on t1.question_id=t3.id
join
(
SELECT distinct device_id
FROM online.ml_device_day_active_status
where partition_date>='20200729' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
AND substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0--取灰度
)mas
on mas.device_id=t1.cl_id
left join
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on mas.device_id=spam_pv.device_id
left join
(
SELECT distinct device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>='20200729' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)b
on a.user_id=b.user_id
)dev
on t1.cl_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is NULL
group by t2.title
\ No newline at end of file
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';
SELECT mas.partition_date as `日期`
,mas.device_os_type as `设备类型`
,mas.active_type as `活跃类型`
,case when binding.type>=1 then '是' else '否' end as `是否授权电话`
,count(distinct mas.device_id) as `dau`
,count(distinct case when register_num>0 then phone.cl_id end) as `新注册设备`
,count(distinct is_open.cl_id) as `授权推送设备数`
,count(distinct loc.cl_id) as `授权地域设备数`
,count(distinct t1.cl_id) as `kyc兴趣页面uv`
,count(distinct case when kyc_phone_pv>0 then t2.cl_id end) as `授权电话页面浏览uv`
,count(distinct case when phone_agree_pv>0 then t3.cl_id end) as `同意授权电话uv`
,count(distinct case when phone_disagree_pv>0 then t3.cl_id end) as `不同意授权电话uv`
,sum(clue_pv) as `授权电话线索数`
,sum(call_clue_pv) as `机构拨打电话数量`
,sum(valid_clue_pv) as `有效电话线索数>30s`
,count(distinct case when kyc_message_pv>0 then t2.cl_id end) as `授权私信页面浏览uv`
,count(distinct case when message_agree_pv>0 then t3.cl_id end) as `同意发送私信uv`
,count(distinct case when message_disagree_pv>0 then t3.cl_id end) as `不同意发送私信uv`
,sum(t4.conv_pv) as `发送私信对话数`
,count(distinct case when t4.view_pv>0 then t4.cl_id end ) as `查看私信用户数`
,sum(t4.view_pv) as `查看私信对话数`
,sum(t4.valid_conv_pv) as `有效私信对话数`
,count(distinct case when t7.login_pv>0 then t7.cl_id end) as `一键登录点击uv`
,count(distinct case when t7.quit_pv>0 then t7.cl_id end) as `跳过点击uv`
,count(distinct case when t7.wechat_pv>0 then t7.cl_id end) as `微信点击uv`
,count(distinct case when t7.microblog_pv>0 then t7.cl_id end) as `微博点击uv`
,count(distinct case when t7.qq_pv>0 then t7.cl_id end) as `qq点击uv`
,count(distinct case when t7.apple_pv>0 then t7.cl_id end) as `苹果点击uv`
,count(distinct case when t7.change_account_pv>0 then t7.cl_id end) as `其他账号点击uv`
,count(distinct case when t8.page_pv>0 then t8.cl_id end) as `无法识别一键登录页,显示手机号登录页浏览uv`
,count(distinct case when t8.login_pv>0 then t8.cl_id end) as `手机号登录页手机登录点击uv`
,count(distinct case when t8.wechat_pv>0 then t8.cl_id end) as `手机号登录页微信点击uv`
,count(distinct case when t8.microblog_pv>0 then t8.cl_id end) as `手机号登录页微博点击uv`
,count(distinct case when t8.qq_pv>0 then t8.cl_id end) as `手机号登录页qq点击uv`
FROM
(
SELECT
partition_date,device_id,device_os_type
,case when active_type in ('1','2') then '新增设备'
when active_type ='4' then '老活跃设备' end as active_type
FROM online.ml_device_day_active_status
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
and device_id is not null
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
AND substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0--取灰度
)mas
left join
(
select a.partition_date,a.cl_id --开启push设备
from
(
SELECT partition_date,cl_id,max(time_str) as max_time_str
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='is_open_push'
group by partition_date,cl_id
)a
left join
(
SELECT partition_date,
cl_id,time_str,COALESCE(params['type'],params['tyep']) as type
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='is_open_push'
)b
on b.partition_date=a.partition_date and b.cl_id=a.cl_id and b.time_str=a.max_time_str
where type=1
group by a.partition_date,a.cl_id
)is_open
on is_open.partition_date=mas.partition_date and is_open.cl_id=mas.device_id
left join
(--授权电话设备
select partition_date,cl_id
,count(case when params['is_new_register']=1 then 1 end) as register_num
,count(case when params['is_new_register']=0 then 1 end) as login_num
from online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action ='login_success'
and page_name in ('new_register','one_click_login','login_phone','login_password')
and params['button_name'] in ('qq','microblog','wechat','login','quick_login','phone','密码登录','手机登录')
and params['status']='succeed'
group by partition_date,cl_id
)phone
on phone.partition_date=mas.partition_date and phone.cl_id=mas.device_id
left join
(--手机号授权
SELECT partition_date,cl_id,sum(type) as type
FROM
(
select partition_date,cl_id,user_id
from online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and user_id <>''
group by partition_date,cl_id,user_id
)a
join
(
select user_id,case when length(phone)=11 then '1' else 0 end as type
from online.tl_hdfs_userextra_view
where partition_date='${partition_date}'
)b
on a.user_id=b.user_id
group by partition_date,cl_id
)binding
on binding.partition_date=mas.partition_date and binding.cl_id=mas.device_id
left join
(--地理位置授权
SELECT partition_date,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'start_question'
and ((get_json_object(params['extra_param'], '$.kyc_id')=1 and action='page_view')
or (params['kyc_id']=1 and action='report_status'))
and device['lng'] not in ('0','0.0') and device['lat'] not in ('0','0.0')
group by partition_date,cl_id
)loc
on loc.partition_date=mas.partition_date and loc.cl_id=mas.device_id
left join
(--兴趣页面浏览(question_id=198)
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'start_question'
and ((get_json_object(params['extra_param'], '$.kyc_id') in (1,2) and action='page_view' and get_json_object(params['extra_param'], '$.question_id') in (198))
or (params['kyc_id']in (1,2) and action='report_status' and params['result_list'] like '%"question_id":198%'))
group by partition_date,cl_id
)t1
on mas.partition_date=t1.partition_date and mas.device_id=t1.cl_id
left join
(--页面浏览
SELECT partition_date,cl_id
,count(distinct case when page_name='kyc_message' then 1 end) as kyc_message_pv
,count(distinct case when page_name='kyc_phone' then 1 end) as kyc_phone_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('kyc_message','kyc_phone')
and action='page_view'
group by partition_date,cl_id
)t2
on mas.partition_date=t2.partition_date and mas.device_id=t2.cl_id
left join
(--kyc点击同意和不同意
SELECT partition_date,cl_id
,count( case when page_name='kyc_phone' and params['button_name']='agree' then 1 end) as phone_agree_pv
,count( case when page_name='kyc_phone' and params['button_name']='no_need' then 1 end) as phone_disagree_pv
,count( case when page_name='kyc_message' and params['button_name']='agree' then 1 end) as message_agree_pv
,count( case when page_name='kyc_message' and params['button_name']='no_need' then 1 end) as message_disagree_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('kyc_message','kyc_phone')
and action='on_click_button'
and ((params['popup_name']='kyc_disagree' and params['button_name']='no_need')
or params['button_name']='agree')
group by partition_date,cl_id
)t3
on mas.partition_date=t3.partition_date and mas.device_id=t3.cl_id
left join
(
SELECT b.partition_date,b.cl_id
,count(distinct b.conversation_id) as conv_pv
,count(distinct c.doctor_id,c.user_id) as view_pv
,count(distinct d.conversation_id) as valid_conv_pv
FROM
(
SELECT b1.conversation_id,a.cl_id,b2.user_id,b2.doctor_id,a.partition_date
FROM
(--点击同意
SELECT partition_date,cl_id,user_id
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and params['button_name'] = 'agree'
and action='on_click_button'
and page_name='kyc_message'
group by partition_date,cl_id,user_id
)a
join
(
SELECT conversation_id,user_id,regexp_replace(to_date(send_time) ,'-','') as send_date
FROM online.tl_hdfs_message_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(to_date(send_time) ,'-','')>='20200801'
and regexp_replace(to_date(send_time) ,'-','')<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and content like '%对%感兴趣'
group by conversation_id,user_id,regexp_replace(to_date(send_time) ,'-','')
)b1
on a.partition_date=b1.send_date and a.user_id=b1.user_id
join
(--conversation对应的user_id和doctor_id
SELECT conversation_id,t.doctor_id,t.user_id
FROM
(
SELECT conversation_id,collect_set(doctor_id) as doctor_id,collect_set(user_id) as user_id
FROM
(
SELECT t2.conversation_id
,t3.user_id as doctor_id
,case when t3.user_id is null then t2.user_id end user_id
FROM
(
SELECT conversation_id,user_id
FROM online.tl_hdfs_conversationuserstatus_view
WHERE partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by conversation_id,user_id
)t2
left join
(
SELECT user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
GROUP BY user_id
)t3
ON t2.user_id = t3.user_id
)a
group by conversation_id
)b
LATERAL VIEW EXPLODE(b.doctor_id) t as doctor_id
LATERAL VIEW EXPLODE(b.user_id) t as user_id
)b2
on b1.conversation_id=b2.conversation_id and b1.user_id=b2.user_id
)b
left join
(--查看私信
SELECT partition_date
,split(params['business_id'],'_')[0] as doctor_id
,split(params['business_id'],'_')[1] as user_id
,cl_id
,count(1) as view_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name='conversation_detail'
and action='page_view'
group by partition_date
,split(params['business_id'],'_')[0]
,split(params['business_id'],'_')[1]
,cl_id
)c
on b.partition_date=c.partition_date
and b.doctor_id=c.doctor_id
and b.user_id=c.user_id
and b.cl_id=c.cl_id
left join
(--有效私信
SELECT t1.partition_date,t1.conversation_id,t1.user_id,t1.cl_id
FROM
(
SELECT partition_date, conversation_id ,user_id,cl_id,count(distinct msg_id) as msg_num
FROM online.ml_community_message_detail
WHERE partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND app['is_system']=0
AND msg_type <> '3'
AND target_doctor_id is not null
AND target_doctor_id <> ''
--去除更美客服聊天,即去掉所长大人、返现小助手、更美小秘书、小美医生、更美用户体验中心、更美测试
AND target_user_id NOT in ('23899734','515333','31605392','22','24481051','30174022','22691355','22144926','602329')
GROUP BY partition_date,conversation_id,user_id,cl_id
having msg_num>=3
)t1
JOIN --除去医生主动联系的(即认为对话框的第一句话是医生发的)
(
SELECT a.send_date,a.conversation_id
FROM
(
SELECT conversation_id, send_date,user_id
FROM
(
SELECT conversation_id,REGEXP_REPLACE(SUBSTR(send_time, 1, 10), '-', '') as send_date,user_id
,row_number () over (PARTITION BY REGEXP_REPLACE(SUBSTR(send_time, 1, 10), '-', ''),conversation_id ORDER BY send_time asc) rn
FROM online.tl_hdfs_message_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND REGEXP_REPLACE(SUBSTR(send_time, 1, 10), '-', '') >= '20200801'
AND REGEXP_REPLACE(SUBSTR(send_time, 1, 10), '-', '') <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
WHERE rn = 1
GROUP BY conversation_id,send_date,user_id
)a
LEFT JOIN
(
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
)b
ON a.user_id = b.user_id
WHERE b.user_id is NULL
GROUP BY a.send_date,a.conversation_id
)t2
on t1.partition_date=t2.send_date and t1.conversation_id=t2.conversation_id
)d
on b.partition_date=d.partition_date
and b.conversation_id=d.conversation_id
and b.user_id=d.user_id
and b.cl_id=d.cl_id
group by b.partition_date,b.cl_id
)t4
on mas.partition_date=t4.partition_date and mas.device_id=t4.cl_id
left join
(--授权的电话线索数
SELECT partition_date,cl_id,count(distinct cl_id,hospital_id) as clue_pv
from
(
SELECT partition_date,cl_id,json_split(params['hospital_id'],'string') as hospital_ids
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('kyc_phone')
and action='on_click_button'
and params['button_name']='agree'
and params['hospital_id'] like '%[%'
)a
LATERAL VIEW explode(hospital_ids) b AS hospital_id
group by partition_date,cl_id
)t5
on mas.partition_date=t5.partition_date and mas.device_id=t5.cl_id
left join
(--授权的有效电话线索数
SELECT a.partition_date,cl_id
,count(1) as call_clue_pv
,count(case when type='有效' then 1 end) as valid_clue_pv
FROM
(
SELECT partition_date,cl_id,user_id,hospital_id
from
(
SELECT partition_date,cl_id,user_id,json_split(params['hospital_id'],'string') as hospital_ids
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('kyc_phone')
and action='on_click_button'
and params['button_name']='agree'
and params['hospital_id'] like '%[%'--其实传的是doctor_id
)a
LATERAL VIEW explode(hospital_ids) b AS hospital_id
group by partition_date,cl_id,user_id,hospital_id
)a
join
(
SELECT doctor_id,merchant_id
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND doctor_is_merchant='true' --医生是商户
GROUP BY doctor_id,merchant_id
)b
on a.hospital_id=b.doctor_id
join
(
SELECT user_id,merchant_id,partition_date,type
FROM
(
SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
,case when unix_timestamp(substr(start_time,1,19))+30<=unix_timestamp(substr(release_time,1,19)) then '有效' else '无效' end as type
FROM tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表,call_type呼叫类型
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>='20200801'
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
LEFT JOIN
(
SELECT id,sub_id
FROM tl.tl_gm_sl_ali_virtual_phone_binding --阿里记录表
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)b
ON a.sub_id = b.sub_id
LEFT JOIN
(
SELECT id,platform_binding_id
FROM tl.tl_gm_sl_virtual_phone_binding
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)c
ON b.id = c.platform_binding_id
LEFT JOIN
(
SELECT phone_binding_id,lead_task_id
FROM tl.tl_gm_sl_lead_task_phone_binding
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)d
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,partition_date,type
)c
on a.user_id=c.user_id and b.merchant_id=c.merchant_id
group by a.partition_date,cl_id
)t6
on mas.partition_date=t6.partition_date and mas.device_id=t6.cl_id
left join
(
SELECT partition_date,cl_id
,count(case when params['button_name']='quick_login' then 1 end) as login_pv
,count(case when params['button_name']='quit' then 1 end) as quit_pv
,count(case when params['button_name']='wechat' then 1 end) as wechat_pv
,count(case when params['button_name']='microblog' then 1 end) as microblog_pv
,count(case when params['button_name']='qq' then 1 end) as qq_pv
,count(case when params['button_name']='apple' then 1 end) as apple_pv
,count(case when params['button_name']='change_account' then 1 end) as change_account_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<'${end_date}'
and page_name='one_click_login'
and action='on_click_button'
and int(split(app_version,'\\.')[1]) >= 31
group by partition_date,cl_id
)t7
on mas.partition_date=t7.partition_date and mas.device_id=t7.cl_id
left join
(
SELECT a.partition_date,a.cl_id
,count(case when action='on_click_button' and button_name='login' then 1 end) as login_pv
,count(case when action='on_click_button' and button_name='wechat' then 1 end) as wechat_pv
,count(case when action='on_click_button' and button_name='microblog' then 1 end) as microblog_pv
,count(case when action='on_click_button' and button_name='qq' then 1 end) as qq_pv
,count(case when action='page_view'then 1 end) as page_pv
from
(
SELECT partition_date,cl_id,action,params['button_name'] as button_name
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and int(split(app_version,'\\.')[1]) >= 31
and page_name='login_phone'
)a
left join
(--不会出现一键登录页
SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name='one_click_login'
and int(split(app_version,'\\.')[1]) >= 31
group by partition_date,cl_id
)b
on a.partition_date=b.partition_date and a.cl_id=b.cl_id
where b.cl_id is null
group by a.partition_date,a.cl_id
)t8
on mas.partition_date=t8.partition_date and mas.device_id=t8.cl_id
left join
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
union all
select distinct device_id
from dim.dim_device_user_staff
)spam_pv
on mas.device_id=spam_pv.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>='20200801' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)b
on a.user_id=b.user_id
group by partition_date,device_id
)dev
on mas.partition_date=dev.partition_date and mas.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is NULL
group by mas.partition_date,mas.device_os_type,mas.active_type,case when binding.type>=1 then '是' else '否' end
order by `日期`,`设备类型`,`活跃类型`,`是否授权电话`
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment