Commit 9c8bbd79 authored by 赵建伟's avatar 赵建伟

Merge branch 'weiyimin' into 'master'

Weiyimin

See merge request !35
parents 9b3e3517 f219b157
......@@ -69,6 +69,7 @@ FROM
when transaction_type in ('samecity_ctr') then 'samecity_ctr'
when transaction_type in ('samecity_cvr','samecity_ctcvr') then 'samecity_cvr'
when transaction_type in ('hotspot_feed') then 'hotspot_feed'
when transaction_type in ('fmctr') then 'fmctr'
END AS recommend_type,
card_id,
app_session_id
......@@ -78,7 +79,8 @@ FROM
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND transaction_type in ('-1','ctr','smr','cvr','ctcvr','hotspot','pgc','newdata','samecity_ctr','samecity_cvr','hotspot_feed','samecity_ctcvr')
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','samecity_ctr','samecity_cvr','hotspot_feed','samecity_ctcvr','fmctr')
or (transaction_type in ('ctr','cvr','ctcvr') and card_content_type in ('qa','answer','user_post')))
AND card_content_type in ('qa','diary','user_post','answer')
group by partition_date,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end,
......@@ -90,7 +92,8 @@ FROM
when transaction_type in ('newdata') then '保量卡片'
when transaction_type in ('samecity_ctr') then 'samecity_ctr'
when transaction_type in ('samecity_cvr','samecity_ctcvr') then 'samecity_cvr'
when transaction_type in ('hotspot_feed') then 'hotspot_feed' END,
when transaction_type in ('hotspot_feed') then 'hotspot_feed'
when transaction_type in ('fmctr') then 'fmctr' END,
card_id,
app_session_id
)a
......@@ -118,6 +121,7 @@ FROM
when params['transaction_type'] in ('samecity_ctr') then 'samecity_ctr'
when params['transaction_type'] in ('samecity_cvr','samecity_ctcvr') then 'samecity_cvr'
when params['transaction_type'] in ('hotspot_feed') then 'hotspot_feed'
when params['transaction_type'] in ('fmctr') then 'fmctr'
END AS recommend_type,
params['card_id'] as card_id,
app_session_id
......@@ -126,7 +130,8 @@ FROM
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
AND params['transaction_type'] in ('-1','ctr','smr','cvr','ctcvr','hotspot','pgc','newdata','samecity_ctr','samecity_cvr','hotspot_feed','samecity_ctcvr')
AND (params['transaction_type'] in ('-1','smr','hotspot','pgc','newdata','samecity_ctr','samecity_cvr','hotspot_feed','samecity_ctcvr','fmctr')
or (params['transaction_type'] in ('ctr','cvr','ctcvr') and params['card_content_type'] in ('qa','answer','user_post')))
AND params['card_content_type'] in ('qa','diary','user_post','answer')
GROUP BY partition_date,
cl_id,
......@@ -138,7 +143,8 @@ FROM
when params['transaction_type'] in ('newdata') then '保量卡片'
when params['transaction_type'] in ('samecity_ctr') then 'samecity_ctr'
when params['transaction_type'] in ('samecity_cvr','samecity_ctcvr') then 'samecity_cvr'
when params['transaction_type'] in ('hotspot_feed') then 'hotspot_feed' END,
when params['transaction_type'] in ('hotspot_feed') then 'hotspot_feed'
when params['transaction_type'] in ('fmctr') then 'fmctr' END,
params['card_id'],
app_session_id
)a
......
kyc_q2=用户兴趣引导日报及ai自测日报
ai_daily=AI自测漏斗数据
ai_qa=AI自测问答选项
kyc_qa_daily=用户兴趣引导日报
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_answer
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_question
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_userextra_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_message_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_conversationuserstatus_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_message_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_doctor_relation_dimen_day
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_call_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_virtual_phone_binding
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_d_ct_dv_devicespam_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_zx_api_ai_qa_question_answer
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_doctor_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_ui_user_dimen_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_history_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_history_detail
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14,step1_15,step1_16,step1_17,step1_18,step1_19,step1_20,step1_21
command=curl -X GET http://localhost:8553/api/report/email/kyc_q2/weiyimin@igengmei.com/weiyimin@qq.com
SET mapreduce.job.queuename=data;
set role admin;
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';
SELECT mas.partition_date
,count(distinct t1.cl_id) as `AI自测首页uv`
,sum(t1.pv) as `AI自测首页pv`
,count(distinct t5.cl_id) as `第一个问题出现uv`
,sum(t5.pv) as `第一个问题出现pv`
,count(distinct t4.cl_id) as `第一层答案点击uv`
,sum(t4.pv) as `第一层答案点击pv`
,count(distinct t2.cl_id) as `完成AI自测uv`
,sum(t2.pv) as `完成AI自测pv`
,count(distinct t3.cl_id) as `落地页点击uv`
,sum(t3.pv) as `落地页点击pv`
FROM
(
SELECT
partition_date,device_id,device_os_type
,case when active_type in ('1','2') then '新增设备'
when active_type ='4' then '老活跃设备' end as active_type
FROM online.ml_device_day_active_status
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
AND substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0--取灰度
)mas
left join
(--自测页浏览
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='page_view'
group by partition_date,cl_id
)t1
on mas.partition_date=t1.partition_date and mas.device_id=t1.cl_id
left join
(--完成自测(即接下来会出现卡片)
SELECT a.partition_date,a.cl_id,sum(1) as pv
FROM
(
SELECT partition_date,json_map(t2.result,'string,string')['answer_id'] answer_id
,json_map(t2.result,'string,string')['question_id'] question_id
,cl_id
from
(
SELECT partition_date,cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
)a
join
(
select ai_question_id,ai_answer_id
from tl.tl_zx_api_ai_qa_question_answer
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and next_type=1--推荐卡片
)b
on a.answer_id=b.ai_answer_id and a.question_id=b.ai_question_id
group by a.partition_date,a.cl_id
)t2
on mas.partition_date=t2.partition_date and mas.device_id=t2.cl_id
left join
(--落地页点击pv
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_card'
group by partition_date,cl_id
)t3
on mas.partition_date=t3.partition_date and mas.device_id=t3.cl_id
left join
(--第一层答案点击
SELECT a.partition_date,a.cl_id,count(1) as pv
FROM
(
SELECT partition_date,json_map(t2.result,'string,string')['answer_id'] answer_id
,cl_id
from
(
SELECT partition_date,cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
)a
JOIN
(
select ai_question_id,ai_answer_id
from tl.tl_zx_api_ai_qa_question_answer
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and ai_question_id=2
)b
on a.answer_id=b.ai_answer_id
group by a.partition_date,a.cl_id
)t4
on mas.partition_date=t4.partition_date and mas.device_id=t4.cl_id
left join
(--第一层答案曝光
SELECT partition_date,cl_id,count(1) as pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200720' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='report_status'
and (params['content_id']=2 or params['concent_id']=2)
group by partition_date,cl_id
)t5
on mas.partition_date=t5.partition_date and mas.device_id=t5.cl_id
left join
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on mas.device_id=spam_pv.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)a
join
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)b
on a.user_id=b.user_id
group by partition_date,device_id
)dev
on mas.partition_date=dev.partition_date and mas.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is NULL
group by mas.partition_date;
\ No newline at end of file
SET mapreduce.job.queuename=data;
set role admin;
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';--进制转换函数,支持各种进制转换
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';
select t2.title
,sum(pv) as `选择pv`
,count(distinct cl_id) as `选择uv`
from
(--自测页浏览
SELECT json_map(t2.result,'string,string')['answer_id'] answer_id
,json_map(t2.result,'string,string')['question_id'] question_id
,cl_id
,count(1) as pv
from
(
SELECT cl_id
,json_split(params['result_list']) as result_list
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200729' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name = 'ai_conversation_detail'
and action='on_click_button'
and params['button_name']='ai_answer'
)t1
LATERAL VIEW EXPLODE(t1.result_list) t2 as result
group by json_map(t2.result,'string,string')['answer_id']
,json_map(t2.result,'string,string')['question_id']
,cl_id
)t1
JOIN
(
SELECT id,title
FROM tl.tl_zx_api_ai_qa_answer
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online='true'
)t2
on t1.answer_id=t2.id
join
(
SELECT distinct device_id
FROM online.ml_device_day_active_status
where partition_date>='20200729' AND partition_date<= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
AND substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0--取灰度
)mas
on mas.device_id=t1.cl_id
left join
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on mas.device_id=spam_pv.device_id
left join
(
SELECT distinct device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>='20200729' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)b
on a.user_id=b.user_id
)dev
on t1.cl_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is NULL
group by t2.title;
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment