Commit 81a61076 authored by yindanlei's avatar yindanlei

add fangan_day report codes

parent 5727192e
fangan_day=方案灰度数据
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 方案灰度数据
--*业务名称: pm
--*输入数据:
--*作者: yindanlei@igengmei.com
--*更新时间:2020-07-24
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_fangan_d
(
day_id string comment '{"chs_name":"日期","description":"","etl":"","value":"","remark":""}',
grey_type string comment '{"chs_name":"灰度类型","description":"","etl":"","value":"","remark":""}',
DAU_730 BIGINT comment '{"chs_name":"7.30及以上版本DAU","description":"","etl":"","value":"","remark":""}',
new_DAU BIGINT comment '{"chs_name":"新用户DAU","description":"","etl":"","value":"","remark":""}',
old_DAU BIGINT comment '{"chs_name":"老用户DAU","description":"","etl":"","value":"","remark":""}',
quanzhong_DAU BIGINT comment '{"chs_name":"权重日活","description":"","etl":"","value":"","remark":""}',
liucun DOUBLE comment '{"chs_name":"次日留存率(%)","description":"","etl":"","value":"","remark":""}',
wel_pv BIGINT comment '{"chs_name":"商详PV","description":"","etl":"","value":"","remark":""}',
wel_uv BIGINT comment '{"chs_name":"商详UV","description":"","etl":"","value":"","remark":""}',
pay_num BIGINT comment '{"chs_name":"支付订单数","description":"","etl":"","value":"","remark":""}',
valid_dev_num BIGINT comment '{"chs_name":"有效线索设备数","description":"","etl":"","value":"","remark":""}',
valid_pv BIGINT comment '{"chs_name":"有效线索人次","description":"","etl":"","value":"","remark":""}'
)comment '方案灰度数据'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
INSERT OVERWRITE TABLE pm.tl_pm_fangan_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT t1.partition_date as day_id
,grey_type
,count(DISTINCT t2.device_id) as DAU_730
,count(DISTINCT CASE WHEN device_type = '新增' THEN t2.device_id END) as new_DAU
,count(DISTINCT CASE WHEN device_type = '老活' THEN t2.device_id END) as old_DAU
,round(count(DISTINCT CASE WHEN device_type = '新增' THEN t2.device_id END)/10+count(DISTINCT CASE WHEN device_type = '老活' THEN t2.device_id END),0) as quanzhong_DAU
,round(count(DISTINCT case WHEN date_add(t1.partition_date,1) = t.partition_date then t.device_id end)/count(DISTINCT t2.device_id)*100,2) as liucun
,sum(wel_pv) as wel_pv
,count(DISTINCT CASE WHEN wel_pv is not NULL and wel_pv <> 0 THEN t3.cl_id END) as wel_uv
,sum(pay_num) as pay_num
,count(DISTINCT t5.device_id) as valid_dev_num
,sum(clue_num) as valid_pv
FROM
(---dau数据
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,device_id
,case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END as device_type
,CASE WHEN substr(convup(sha1(device_id),16,10),-1) in ('0','1','2','3','4') THEN '测试组A(方案库)' else '测试组B(无方案库)' END as grey_type
FROM online.ml_device_day_active_status
where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
AND active_type in ('1','2','4')
)t1
JOIN
(--7.30.0 版本以上dau数据
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,device_id
FROM
(
SELECT partition_date,app_version_list,device_id
FROM online.ml_device_updates
where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
)a
LATERAL VIEW EXPLODE (app_version_list) t as app_version
WHERE int(split(t.app_version,'\\.')[1]) >= 30 --版本限制为7.30.0以上
and int(split(t.app_version,'\\.')[0]) = 7
GROUP BY partition_date,device_id
)t2
ON t1.partition_date = t2.partition_date
AND t1.device_id = t2.device_id
LEFT JOIN
(
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
FROM online.ml_device_day_active_status
where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
)t
on t1.device_id = t.device_id
LEFT JOIN
(--商详页pv
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id,count(1) as wel_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
and page_name = 'welfare_detail'
and action = 'page_view'
AND int(split(app_version,'\\.')[1]) >= 30
AND int(split(app_version,'\\.')[0]) = 7
GROUP BY partition_date,cl_id
)t3
ON t1.partition_date = t3.partition_date
AND t1.device_id = t3.cl_id
LEFT JOIN
(--纯用户支付订单数
SELECT device_id,SUBSTR(pay_time, 1, 10) as pay_date,count(DISTINCT order_id) as pay_num
FROM online.ml_trade_order_detail_day --美购交易表
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') >= '20200724'
AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') <=regexp_replace(date_sub(current_date(),1),'-','')
AND is_pure_user = 'true' --取纯用户
GROUP BY device_id,SUBSTR(pay_time, 1, 10)
)t4
ON t1.partition_date = t4.pay_date
AND t1.device_id = t4.device_id
LEFT JOIN
(--有效线索数和线索人次
SELECT
from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd') dt,device_id,count(1) AS clue_num
FROM
(
SELECT NVL(T1.USER_ID,T2.USER_ID) AS USER_ID,
NVL(T1.merchant_id,T2.merchant_id) AS merchant_id,
NVL(T1.DATA_MONTH,T2.CALL_MONTH) AS DATA_MONTH,
least(NVL(DATA_DAY,'99999999'),NVL(FIRST_CALL_DATE,'99999999')) AS DATA_DAY,
NVL(T1.CONSULT_NUM,0) AS CONSULT_NUM,
NVL(T1.PAY_NUM,0) AS PAY_NUM,
NVL(T2.call_num,0) AS CALL_NUM
FROM
(
SELECT
NVL(T3.USER_ID,T4.USER_ID) AS USER_ID,
NVL(T3.merchant_id,T4.merchant_id) AS merchant_id,
NVL(T3.CONSULT_MONTH,T4.PAY_MONTH) AS DATA_MONTH,
least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')) AS DATA_DAY,
NVL(T3.CONSULT_NUM,0) AS CONSULT_NUM,
NVL(T4.PAY_NUM,0) AS PAY_NUM
FROM
(
SELECT
user_id
,merchant_id
,substr(partition_date,1,6) AS consult_month
,min(partition_date) AS first_consult_date
,1 AS consult_num
FROM online.AL_COMMUNITY_USER_VALID_CONSULT_DAY
WHERE
partition_date between '20200701' AND regexp_replace(date_sub(current_date(),1),'-','')
AND is_valid_consult='true'
AND merchant_id IS NOT NULL
GROUP BY user_id,merchant_id,substr(partition_date,1,6)
)T3
FULL JOIN
(
SELECT
T.USER_ID,
T.merchant_id,
regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','') AS PAY_MONTH,
MIN(regexp_replace(SUBSTR(T.PAY_DATE,1,10),'-','')) AS FIRST_PAY_DATE,
1 AS PAY_NUM
FROM ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
WHERE
T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
AND T.PAY_DATE between '2020-07-01' AND date_sub(current_date(),1)
AND T.is_pure_user='true'
GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
) T4
ON T3.USER_ID = T4.USER_ID AND T3.merchant_id = T4.merchant_id AND T3.CONSULT_MONTH = T4.PAY_MONTH
GROUP BY NVL(T3.USER_ID,T4.USER_ID),NVL(T3.merchant_id,T4.merchant_id),NVL(T3.CONSULT_MONTH,T4.PAY_MONTH),
least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')),NVL(T3.CONSULT_NUM,0),NVL(T4.PAY_NUM,0)
)T1
FULL JOIN
(
SELECT user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,7),'-','') AS CALL_MONTH,
MIN(partition_date) as FIRST_CALL_DATE,1 AS call_num
FROM
(
SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
FROM tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表,call_type呼叫类型
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>='20200701'
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date(),1),'-','')
AND unix_timestamp(substr(start_time,1,19))+30<=unix_timestamp(substr(release_time,1,19)) --通话时长大于30秒
)a
LEFT JOIN
(
SELECT id,sub_id
FROM tl.tl_gm_sl_ali_virtual_phone_binding --阿里记录表
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)b
ON a.sub_id = b.sub_id
LEFT JOIN
(
SELECT id,platform_binding_id
FROM tl.tl_gm_sl_virtual_phone_binding
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)c
ON b.id = c.platform_binding_id
LEFT JOIN
(
SELECT phone_binding_id,lead_task_id
FROM tl.tl_gm_sl_lead_task_phone_binding
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)d
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,7),'-','')
)T2
ON T1.USER_ID = T2.USER_ID AND T1.merchant_id = T2.merchant_id AND T1.DATA_MONTH = T2.CALL_MONTH
)T1
JOIN
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>='20200701' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
)T2
ON T1.USER_ID = T2.USER_ID AND T1.DATA_DAY = T2.PARTITION_DATE
GROUP BY from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd'),device_id
)t5
ON t1.partition_date = t5.dt
AND t1.device_id = t5.device_id
GROUP BY t1.partition_date,grey_type
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_10.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task
\ No newline at end of file
#step1_11.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_trade_order_detail_day
\ No newline at end of file
#step1_5.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online AL_COMMUNITY_USER_VALID_CONSULT_DAY
\ No newline at end of file
#step1_6.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_call_detail
\ No newline at end of file
#step1_7.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_binding
\ No newline at end of file
#step1_8.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_virtual_phone_binding
\ No newline at end of file
#step1_9.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task_phone_binding
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11
command=sh /home/bi/bi-report/lib/shell/hive fangan_day
\ No newline at end of file
--方案灰度数据
SELECT
day_id AS `日期`
,grey_type AS `灰度类型`
,DAU_730 AS `7.30及以上版本DAU`
,new_DAU AS `新用户DAU`
,old_DAU AS `老用户DAU`
,quanzhong_DAU AS `权重日活`
,liucun AS `次日留存率(%)`
,wel_pv AS `商详PV`
,wel_uv AS `商详UV`
,pay_num AS `支付订单数`
,valid_dev_num AS `有效线索设备数`
,valid_pv AS `有效线索人次`
FROM pm.tl_pm_fangan_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment