Commit 9d976098 authored by yindanlei's avatar yindanlei

add fangan_day report codes

parent 81a61076
......@@ -19,15 +19,17 @@ CREATE TABLE IF NOT EXISTS pm.tl_pm_fangan_d
day_id string comment '{"chs_name":"日期","description":"","etl":"","value":"","remark":""}',
grey_type string comment '{"chs_name":"灰度类型","description":"","etl":"","value":"","remark":""}',
DAU_730 BIGINT comment '{"chs_name":"7.30及以上版本DAU","description":"","etl":"","value":"","remark":""}',
new_DAU BIGINT comment '{"chs_name":"新用户DAU","description":"","etl":"","value":"","remark":""}',
old_DAU BIGINT comment '{"chs_name":"老用户DAU","description":"","etl":"","value":"","remark":""}',
new_DAU BIGINT comment '{"chs_name":"新DAU","description":"","etl":"","value":"","remark":""}',
old_DAU BIGINT comment '{"chs_name":"老DAU","description":"","etl":"","value":"","remark":""}',
quanzhong_DAU BIGINT comment '{"chs_name":"权重日活","description":"","etl":"","value":"","remark":""}',
liucun DOUBLE comment '{"chs_name":"次日留存率(%)","description":"","etl":"","value":"","remark":""}',
wel_pv BIGINT comment '{"chs_name":"商详PV","description":"","etl":"","value":"","remark":""}',
wel_uv BIGINT comment '{"chs_name":"商详UV","description":"","etl":"","value":"","remark":""}',
pay_num BIGINT comment '{"chs_name":"支付订单数","description":"","etl":"","value":"","remark":""}',
valid_dev_num BIGINT comment '{"chs_name":"有效线索设备数","description":"","etl":"","value":"","remark":""}',
valid_pv BIGINT comment '{"chs_name":"有效线索人次","description":"","etl":"","value":"","remark":""}'
valid_pv BIGINT comment '{"chs_name":"有效线索人次","description":"","etl":"","value":"","remark":""}',
true_valid_dev_num BIGINT comment '{"chs_name":"真实用户有效线索设备数","description":"","etl":"","value":"","remark":""}',
true_valid_pv BIGINT comment '{"chs_name":"真实用户有效线索人次","description":"","etl":"","value":"","remark":""}'
)comment '方案灰度数据'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
......
......@@ -15,51 +15,77 @@ INSERT OVERWRITE TABLE pm.tl_pm_fangan_d PARTITION (PARTITION_DAY = ${partition_
SELECT t1.partition_date as day_id
,grey_type
,count(DISTINCT t2.device_id) as DAU_730
,count(DISTINCT CASE WHEN device_type = '新增' THEN t2.device_id END) as new_DAU
,count(DISTINCT CASE WHEN device_type = '老活' THEN t2.device_id END) as old_DAU
,round(count(DISTINCT CASE WHEN device_type = '新增' THEN t2.device_id END)/10+count(DISTINCT CASE WHEN device_type = '老活' THEN t2.device_id END),0) as quanzhong_DAU
,round(count(DISTINCT case WHEN date_add(t1.partition_date,1) = t.partition_date then t.device_id end)/count(DISTINCT t2.device_id)*100,2) as liucun
,count(DISTINCT t1.device_id) as DAU_730
,count(DISTINCT CASE WHEN device_type = '新增' THEN t1.device_id END) as new_DAU
,count(DISTINCT CASE WHEN device_type = '老活' THEN t1.device_id END) as old_DAU
,round(count(DISTINCT CASE WHEN device_type = '老活' AND device_os_type = 'android' AND channel_type = 'AI' THEN t1.device_id END)*0.16
+count(DISTINCT CASE WHEN device_type = '老活' AND device_os_type = 'android' AND channel_type = '医美' THEN t1.device_id END)*0.70
+count(DISTINCT CASE WHEN device_type = '新增' AND device_os_type = 'android' AND channel_type = 'AI' THEN t1.device_id END)*0.10
+count(DISTINCT CASE WHEN device_type = '新增' AND device_os_type = 'android' AND channel_type = '医美' THEN t1.device_id END)*0.20
+count(DISTINCT CASE WHEN device_type = '老活' AND device_os_type = 'ios' AND channel_type = 'AI' THEN t1.device_id END)*0.35
+count(DISTINCT CASE WHEN device_type = '老活' AND device_os_type = 'ios' AND channel_type = '医美' THEN t1.device_id END)*1.00
+count(DISTINCT CASE WHEN device_type = '新增' AND device_os_type = 'ios' AND channel_type = 'AI' THEN t1.device_id END)*0.21
+count(DISTINCT CASE WHEN device_type = '新增' AND device_os_type = 'ios' AND channel_type = '医美' THEN t1.device_id END)*0.21,0) as quanzhong_DAU
,round(sum(t1.rent_num)/count(DISTINCT t1.device_id)*100,2) as liucun
,sum(wel_pv) as wel_pv
,count(DISTINCT CASE WHEN wel_pv is not NULL and wel_pv <> 0 THEN t3.cl_id END) as wel_uv
,sum(pay_num) as pay_num
,count(DISTINCT t5.device_id) as valid_dev_num
,sum(clue_num) as valid_pv
,sum(t5.clue_num) as valid_pv
,count(DISTINCT t6.device_id) as true_valid_dev_num
,sum(t6.clue_num) as true_valid_pv
FROM
(---dau数据
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,device_id
( SELECT t1.partition_date,t1.device_id,device_type,device_os_type,channel_type,grey_type,count(DISTINCT CASE WHEN date_add(t1.partition_date,1) = t.partition_date THEN t.device_id END) as rent_num
FROM
(---dau数据
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,device_id,device_os_type
,case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END as device_type
,CASE WHEN substr(convup(sha1(device_id),16,10),-1) in ('0','1','2','3','4') THEN '测试组A(方案库)' else '测试组B(无方案库)' END as grey_type
,CASE WHEN (partition_date >= '20200725' AND partition_date <= '20200727' AND substr(convup(sha1(device_id),16,10),-1) in ('0','1'))
or (partition_date = '20200724' AND substr(convup(sha1(device_id),16,10),-1) in ('0','1','2','3','4'))
or (partition_date >= '20200728' AND substr(convup(sha1(device_id),16,10),-1) in ('0','1','2','3','4')) THEN '测试组A(方案库)'
else '测试组B(无方案库)' END as grey_type
,CASE WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '医美' END as channel_type
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
AND active_type in ('1','2','4')
)t1
)t1
JOIN
(--7.30.0 版本以上dau数据
JOIN
(--7.30.0 版本以上dau数据
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,device_id
FROM
(
SELECT partition_date,app_version_list,device_id
FROM online.ml_device_updates
where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
WHERE partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
)a
LATERAL VIEW EXPLODE (app_version_list) t as app_version
WHERE int(split(t.app_version,'\\.')[1]) >= 30 --版本限制为7.30.0以上
and int(split(t.app_version,'\\.')[0]) = 7
GROUP BY partition_date,device_id
)t2
)t2
ON t1.partition_date = t2.partition_date
AND t1.device_id = t2.device_id
LEFT JOIN
(
LEFT JOIN
(
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
FROM online.ml_device_day_active_status
where partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
)t
on t1.device_id = t.device_id
WHERE partition_date >= '20200724'
and partition_date <= regexp_replace(date_sub(current_date(),1),'-','')
)t
on t1.device_id = t.device_id
GROUP BY t1.partition_date,t1.device_id,device_type,grey_type,device_os_type,channel_type
)t1
LEFT JOIN
(--商详页pv
......@@ -81,7 +107,7 @@ LEFT JOIN
FROM online.ml_trade_order_detail_day --美购交易表
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') >= '20200724'
AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') <=regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') <= regexp_replace(date_sub(current_date(),1),'-','')
AND is_pure_user = 'true' --取纯用户
GROUP BY device_id,SUBSTR(pay_time, 1, 10)
)t4
......@@ -120,7 +146,7 @@ LEFT JOIN
,1 AS consult_num
FROM online.AL_COMMUNITY_USER_VALID_CONSULT_DAY
WHERE
partition_date between '20200701' AND regexp_replace(date_sub(current_date(),1),'-','')
partition_date between '20200724' AND regexp_replace(date_sub(current_date(),1),'-','')
AND is_valid_consult='true'
AND merchant_id IS NOT NULL
GROUP BY user_id,merchant_id,substr(partition_date,1,6)
......@@ -136,7 +162,7 @@ LEFT JOIN
FROM ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
WHERE
T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
AND T.PAY_DATE between '2020-07-01' AND date_sub(current_date(),1)
AND T.PAY_DATE between '2020-07-24' AND date_sub(current_date(),1)
AND T.is_pure_user='true'
GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
) T4
......@@ -147,16 +173,16 @@ LEFT JOIN
FULL JOIN
(
SELECT user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,7),'-','') AS CALL_MONTH,
SELECT user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','') AS CALL_MONTH,
MIN(partition_date) as FIRST_CALL_DATE,1 AS call_num
FROM
(
SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
FROM tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表,call_type呼叫类型
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>='20200701'
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>='20200724'
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date(),1),'-','')
AND unix_timestamp(substr(start_time,1,19))+30<=unix_timestamp(substr(release_time,1,19)) --通话时长大于30秒
AND start_time+ interval 30 second <= release_time --通话时长大于30秒
)a
LEFT JOIN
(
......@@ -187,7 +213,7 @@ LEFT JOIN
AND source='2' --用户行为电话授权
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,7),'-','')
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
)T2
ON T1.USER_ID = T2.USER_ID AND T1.merchant_id = T2.merchant_id AND T1.DATA_MONTH = T2.CALL_MONTH
)T1
......@@ -196,13 +222,187 @@ LEFT JOIN
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>='20200701' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
WHERE partition_date>='20200724' AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
)T2
ON T1.USER_ID = T2.USER_ID AND T1.DATA_DAY = T2.PARTITION_DATE
GROUP BY from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd'),device_id
)t5
ON t1.partition_date = t5.dt
AND t1.device_id = t5.device_id
LEFT JOIN
(--有效线索分日数据
SELECT
from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd') dt,dev.device_id,count(1) AS clue_num
FROM
(
SELECT NVL(T1.USER_ID,T2.USER_ID) AS USER_ID,
NVL(T1.merchant_id,T2.merchant_id) AS merchant_id,
NVL(T1.DATA_MONTH,T2.CALL_MONTH) AS DATA_MONTH,
least(NVL(DATA_DAY,'99999999'),NVL(FIRST_CALL_DATE,'99999999')) AS DATA_DAY,
NVL(T1.CONSULT_NUM,0) AS CONSULT_NUM,
NVL(T1.PAY_NUM,0) AS PAY_NUM,
NVL(T2.call_num,0) AS CALL_NUM
FROM
(
SELECT
NVL(T3.USER_ID,T4.USER_ID) AS USER_ID,
NVL(T3.merchant_id,T4.merchant_id) AS merchant_id,
NVL(T3.CONSULT_MONTH,T4.PAY_MONTH) AS DATA_MONTH,
least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')) AS DATA_DAY,
NVL(T3.CONSULT_NUM,0) AS CONSULT_NUM,
NVL(T4.PAY_NUM,0) AS PAY_NUM
FROM
(
SELECT
user_id
,merchant_id
,substr(partition_date,1,6) AS consult_month
,min(partition_date) AS first_consult_date
,1 AS consult_num
FROM online.AL_COMMUNITY_USER_VALID_CONSULT_DAY
WHERE
partition_date between '20200724' AND regexp_replace(date_sub(current_date(),1),'-','')
AND is_valid_consult='true'
AND merchant_id IS NOT NULL
GROUP BY user_id,merchant_id,substr(partition_date,1,6)
)T3
FULL JOIN
(
SELECT
T.USER_ID,
T.merchant_id,
regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','') AS PAY_MONTH,
MIN(regexp_replace(SUBSTR(T.PAY_DATE,1,10),'-','')) AS FIRST_PAY_DATE,
1 AS PAY_NUM
FROM ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
WHERE
T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
AND T.PAY_DATE between '2020-07-24' AND date_sub(current_date(),1)
AND T.is_pure_user='true'
GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
) T4
ON T3.USER_ID = T4.USER_ID AND T3.merchant_id = T4.merchant_id AND T3.CONSULT_MONTH = T4.PAY_MONTH
GROUP BY NVL(T3.USER_ID,T4.USER_ID),NVL(T3.merchant_id,T4.merchant_id),NVL(T3.CONSULT_MONTH,T4.PAY_MONTH),
least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')),NVL(T3.CONSULT_NUM,0),NVL(T4.PAY_NUM,0)
)T1
FULL JOIN
(
SELECT user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','') AS CALL_MONTH,
MIN(partition_date) as FIRST_CALL_DATE,1 AS call_num
FROM
(
SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
FROM tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表,call_type呼叫类型
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>='20200724'
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date(),1),'-','')
AND start_time+ interval 30 second <= release_time --通话时长大于30秒
)a
LEFT JOIN
(
SELECT id,sub_id
FROM tl.tl_gm_sl_ali_virtual_phone_binding --阿里记录表
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)b
ON a.sub_id = b.sub_id
LEFT JOIN
(
SELECT id,platform_binding_id
FROM tl.tl_gm_sl_virtual_phone_binding
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)c
ON b.id = c.platform_binding_id
LEFT JOIN
(
SELECT phone_binding_id,lead_task_id
FROM tl.tl_gm_sl_lead_task_phone_binding
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)d
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
)T2
ON T1.USER_ID = T2.USER_ID AND T1.merchant_id = T2.merchant_id AND T1.DATA_MONTH = T2.CALL_MONTH
)T1
LEFT JOIN
(
SELECT user_id
FROM
(
--医生账号
SELECT user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(date_sub(current_date(),1),'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--更美内网用户
SELECT user_id
FROM dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)a
GROUP BY user_id
)T2
ON T1.user_id=T2.user_id
LEFT JOIN
(
SELECT t.user_id,partition_date,
if(size(t.device_list) > 0, device_list [ 0 ], '') device_id
FROM online.ml_user_updates t
WHERE t.partition_date >= '20200724'
and t.partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
)T3
ON T1.user_id = T3.user_id
AND T1.DATA_DAY = T3.partition_date
JOIN
( -- 1.去掉机刷、积分墙渠道来的设备
SELECT partition_date,device_id
FROM online.ml_device_day_active_status
where partition_date >= '20200724'
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND active_type in ('1','2','4')
)dev
on dev.device_id = T3.device_id
AND dev.partition_date = T3.partition_date
WHERE (T2.user_id is null or T2.user_id = '')
GROUP BY from_unixtime(unix_timestamp(T1.DATA_DAY,'yyyyMMdd'),'yyyy-MM-dd'),dev.device_id
)t6
ON t1.partition_date = t6.dt
AND t1.device_id = t6.device_id
GROUP BY t1.partition_date,grey_type
#step1_12.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_doctor_view
\ No newline at end of file
#step1_13.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_ui_user_dimen_d
\ No newline at end of file
#step1_14.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive dim dim_device_user_staff
\ No newline at end of file
#step1_15.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_history_detail
\ No newline at end of file
#step1_16.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_history_detail
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14,step1_15,step1_16
command=sh /home/bi/bi-report/lib/shell/hive fangan_day
\ No newline at end of file
......@@ -4,8 +4,8 @@ SELECT
day_id AS `日期`
,grey_type AS `灰度类型`
,DAU_730 AS `7.30及以上版本DAU`
,new_DAU AS `新用户DAU`
,old_DAU AS `老用户DAU`
,new_DAU AS `新DAU`
,old_DAU AS `老DAU`
,quanzhong_DAU AS `权重日活`
,liucun AS `次日留存率(%)`
,wel_pv AS `商详PV`
......@@ -13,5 +13,7 @@ SELECT
,pay_num AS `支付订单数`
,valid_dev_num AS `有效线索设备数`
,valid_pv AS `有效线索人次`
,true_valid_dev_num AS `真实用户有效线索设备数`
,true_valid_pv AS `真实用户有效线索人次`
FROM pm.tl_pm_fangan_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment