Commit 2b68baec authored by 魏艺敏's avatar 魏艺敏

push codes

parent 39bb9e3e
kyc_q2=用户兴趣引导日报及ai自测日报
ai_daily=AI自测漏斗数据
ai_qa=AI自测问答选项
kyc_qa_daily=用户兴趣引导日报
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_answer
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_question
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_userextra_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_message_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_conversationuserstatus_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_message_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_doctor_relation_dimen_day
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_call_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_virtual_phone_binding
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_d_ct_dv_devicespam_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_question_answer
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_doctor_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_ui_user_dimen_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_history_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_history_detail
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14,step1_15,step1_16,step1_17,step1_18,step1_19,step1_20,step1_21
command=curl -X GET http://localhost:8553/api/report/email/kyc_q2/weiyimin@igengmei.com/weiyimin@qq.com
\ No newline at end of file
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';
SELECT mas.partition_date
,count(distinct t1.cl_id) as `AI自测首页uv`
,sum(t1.pv) as `AI自测首页pv`
,count(distinct t5.cl_id) as `第一个问题出现uv`
,sum(t5.pv) as `第一个问题出现pv`
,count(distinct t4.cl_id) as `第一层答案点击uv`
,sum(t4.pv) as `第一层答案点击pv`
,count(distinct t2.cl_id) as `完成AI自测uv`
,sum(t2.pv) as `完成AI自测pv`
,count(distinct t3.cl_id) as `落地页点击uv`
,sum(t3.pv) as `落地页点击pv`
FROM
(
SELECT
partition_date,device_id,device_os_type
,case when active_type in ('1','2') then '新增设备'
when active_type ='4' then '老活跃设备' end as active_type
FROM online.ml_device_day_active_status
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
AND substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0--取灰度
)mas
left join
(--自测页浏览
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='page_view'
group by partition_date,cl_id
)t1
on mas.partition_date=t1.partition_date and mas.device_id=t1.cl_id
left join
(--完成自测(即接下来会出现卡片)
SELECT a.partition_date,a.cl_id,sum(1) as pv
FROM
(
SELECT partition_date,json_map(t2.result,'string,string')['answer_id'] answer_id
,json_map(t2.result,'string,string')['question_id'] question_id
,cl_id
from
(
SELECT partition_date,cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
)a
join
(
select ai_question_id,ai_answer_id
from tl.tl_zx_api_ai_qa_question_answer
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and next_type=1--推荐卡片
)b
on a.answer_id=b.ai_answer_id and a.question_id=b.ai_question_id
group by a.partition_date,a.cl_id
)t2
on mas.partition_date=t2.partition_date and mas.device_id=t2.cl_id
left join
(--落地页点击pv
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_card'
group by partition_date,cl_id
)t3
on mas.partition_date=t3.partition_date and mas.device_id=t3.cl_id
left join
(--第一层答案点击
SELECT a.partition_date,a.cl_id,count(1) as pv
FROM
(
SELECT partition_date,json_map(t2.result,'string,string')['answer_id'] answer_id
,cl_id
from
(
SELECT partition_date,cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
)a
JOIN
(
select ai_question_id,ai_answer_id
from tl.tl_zx_api_ai_qa_question_answer
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and ai_question_id=2
)b
on a.answer_id=b.ai_answer_id
group by a.partition_date,a.cl_id
)t4
on mas.partition_date=t4.partition_date and mas.device_id=t4.cl_id
left join
(--第一层答案曝光
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='report_status'
and (params['content_id']=2 or params['concent_id']=2)
group by partition_date,cl_id
)t5
on mas.partition_date=t5.partition_date and mas.device_id=t5.cl_id
left join
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on mas.device_id=spam_pv.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)a
join
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)b
on a.user_id=b.user_id
group by partition_date,device_id
)dev
on mas.partition_date=dev.partition_date and mas.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is NULL
group by mas.partition_date;
\ No newline at end of file
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';--进制转换函数,支持各种进制转换
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';
select t2.title
,sum(pv) as `选择pv`
,count(distinct cl_id) as `选择uv`
from
(--自测页浏览
SELECT json_map(t2.result,'string,string')['answer_id'] answer_id
,json_map(t2.result,'string,string')['question_id'] question_id
,cl_id
,count(1) as pv
from
(
SELECT cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200729' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
group by json_map(t2.result,'string,string')['answer_id']
,json_map(t2.result,'string,string')['question_id']
,cl_id
)t1
JOIN
(
SELECT id,title
FROM tl.tl_zx_api_ai_qa_answer
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online='true'
)t2
on t1.answer_id=t2.id
join
(
SELECT id,title
FROM tl.tl_zx_api_ai_qa_question
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online='true'
)t3
on t1.question_id=t3.id
join
(
SELECT distinct device_id
FROM online.ml_device_day_active_status
where partition_date>='20200729' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
AND substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0--取灰度
)mas
on mas.device_id=t1.cl_id
left join
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on mas.device_id=spam_pv.device_id
left join
(
SELECT distinct device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>='20200729' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)b
on a.user_id=b.user_id
)dev
on t1.cl_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is NULL
group by t2.title
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment