Commit 236accba authored by 赵建伟's avatar 赵建伟

Merge branch 'weiyimin' into 'master'

Weiyimin

See merge request !50
parents d9f061d4 04a89108
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;
......@@ -138,7 +144,7 @@ FROM
partition_date
,device_id,device_os_type
,array(case when active_type in ('1','2') then '新增设备'
when active_type ='4' then '老活跃设备' end,'合计' as active_type
when active_type ='4' then '老活跃设备' end,'合计') as active_type
,array(case when substr(convup(setencryption(device_id,'sha-1'),16,10),-2,2)%20 = 0 then 'kyc灰度' else '非灰' end,'合计') as grey_type
FROM online.ml_device_day_active_status
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
......
clue_by_channel=线索pv
welfare_by_channel=核心页pv
wel_clue_by_channel=分渠道3日及7日内核心页和线索pv
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_d_ct_dv_devicespam_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_et_msg_conversation_dimen_inc_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ML_TRADE_ORDER_DETAIL_DAY
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_virtual_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task_phone_binding
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10
command=curl -X GET http://localhost:8553/api/report/email/wel_clue_by_channel/huchunhe@igengmei.com,wangjun@igengmei.com,zhaoyang@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
SELECT
first_active_date as `日期`
,channel as `渠道`
,device_os_type as `平台`
,COUNT(DISTINCT device.device_id) AS `当天新活量`
,SUM(IF(valid.date_day=device.first_active_date, valid.clue_num, NULL)) AS validUV_1
,SUM(IF(valid.date_day>=device.first_active_date AND valid.date_day<=date_add(device.first_active_date,2), valid.clue_num, NULL)) AS validUV_3
,SUM(IF(valid.date_day>=device.first_active_date AND valid.date_day<=date_add(device.first_active_date,6), valid.clue_num, NULL)) AS validUV_7
FROM
(
SELECT
first_active_date,device_id,device_os_type
,IF(ios_device.channel IS NOT NULL, ios_device.channel, first_channel_source_type) AS channel
FROM
(
SELECT
device_id,device_os_type
,first_channel_source_type
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS first_active_date
FROM online.ml_device_day_active_status
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND active_type in ('1','2')
AND first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)device
LEFT JOIN
( SELECT channel,idfa
FROM
(
SELECT channel,idfa,row_number () over (PARTITION BY idfa ORDER BY active_time asc) rn--,substr(active_time,1,7) as new_month
FROM online.tl_hdfs_ios_idfa_tmp--设备表,区分App Store 明细渠道名字
)a
WHERE rn = 1
)ios_device
ON ios_device.idfa = device.device_id
GROUP BY first_active_date,device_id,device_os_type,IF(ios_device.channel IS NOT NULL, ios_device.channel, first_channel_source_type)
)device
LEFT JOIN
( --有效线索分日数据(按用户维度)
SELECT
t2.device_id,from_unixtime(unix_timestamp(t1.DATE_DAY,'yyyyMMdd'),'yyyy-MM-dd') DATE_DAY,count(1) AS clue_num
FROM
(
SELECT NVL(T1.USER_ID,T2.USER_ID) AS USER_ID,
NVL(T1.merchant_id,T2.merchant_id) AS merchant_id,
NVL(T1.DATE_MONTH,T2.CALL_MONTH) AS DATE_MONTH,
least(NVL(DATE_DAY,'99999999'),NVL(FIRST_CALL_DATE,'99999999')) AS DATE_DAY,
NVL(T1.CONSULT_NUM,0) AS CONSULT_NUM,
NVL(T1.PAY_NUM,0) AS PAY_NUM,
NVL(T2.call_num,0) AS CALL_NUM
FROM
(
SELECT
NVL(T3.USER_ID,T4.USER_ID) AS USER_ID,
NVL(T3.merchant_id,T4.merchant_id) AS merchant_id,
NVL(T3.CONSULT_MONTH,T4.PAY_MONTH) AS DATE_MONTH,
least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')) AS DATE_DAY,
NVL(T3.CONSULT_NUM,0) AS CONSULT_NUM,
NVL(T4.PAY_NUM,0) AS PAY_NUM
FROM
(
SELECT user_id,merchant_id,SUBSTR(partition_day,1,6) AS consult_month,min(partition_day) as first_consult_date,1 as CONSULT_NUM
FROM ml.ml_c_et_msg_conversation_dimen_inc_d --新仓库私信表
WHERE partition_day >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day <= regexp_replace(date_sub(current_date,1),'-','')
and is_valid='true'
GROUP BY user_id,merchant_id,SUBSTR(partition_day,1,6)
)T3
FULL JOIN
(
SELECT
T.USER_ID,
T.merchant_id,
regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','') AS PAY_MONTH,
MIN(regexp_replace(SUBSTR(T.PAY_DATE,1,10),'-','')) AS FIRST_PAY_DATE,
1 AS PAY_NUM
FROM ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
WHERE
T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
AND regexp_replace(SUBSTR(T.pay_date,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND regexp_replace(SUBSTR(T.pay_date,1,10),'-','')<=regexp_replace(date_sub(current_date,1),'-','')
AND T.is_pure_user='true'
GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
) T4
ON T3.USER_ID = T4.USER_ID AND T3.merchant_id = T4.merchant_id AND T3.CONSULT_MONTH = T4.PAY_MONTH
GROUP BY NVL(T3.USER_ID,T4.USER_ID),NVL(T3.merchant_id,T4.merchant_id),NVL(T3.CONSULT_MONTH,T4.PAY_MONTH),
least(NVL(first_consult_date,'99999999'),NVL(FIRST_PAY_DATE,'99999999')),NVL(T3.CONSULT_NUM,0),NVL(T4.PAY_NUM,0)
)T1
FULL JOIN
(
SELECT user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','') AS CALL_MONTH,
MIN(partition_date) as FIRST_CALL_DATE,1 AS call_num
FROM
(
SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
FROM tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表,call_type呼叫类型
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date,1),'-','')
AND start_time<release_time --通话时长大于30秒
)a
LEFT JOIN
(
SELECT id,sub_id
FROM tl.tl_gm_sl_ali_virtual_phone_binding --阿里记录表
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)b
ON a.sub_id = b.sub_id
LEFT JOIN
(
SELECT id,platform_binding_id
FROM tl.tl_gm_sl_virtual_phone_binding
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)c
ON b.id = c.platform_binding_id
LEFT JOIN
(
SELECT phone_binding_id,lead_task_id
FROM tl.tl_gm_sl_lead_task_phone_binding
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)d
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
)T2
ON T1.USER_ID = T2.USER_ID AND T1.merchant_id = T2.merchant_id AND T1.DATE_MONTH = T2.CALL_MONTH
)t1
JOIN
(
SELECT user_id,partition_date,if(size(device_list) > 0, device_list [ 0 ], '') device_id
FROM online.ml_user_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
)t2
on t1.user_id=t2.user_id and t1.DATE_DAY=t2.partition_date
GROUP BY t2.device_id,from_unixtime(unix_timestamp(t1.DATE_DAY,'yyyyMMdd'),'yyyy-MM-dd')
)valid
ON device.device_id = valid.device_id
GROUP BY
first_active_date,channel,device_os_type
\ No newline at end of file
SELECT
first_active_date as `日期`
,channel as `渠道`
,device_os_type as `平台`
,COUNT(DISTINCT device.device_id) AS `当天新活量`
,SUM(IF(pv.action_date=device.first_active_date, pv.wel_pv, NULL)) AS meigouPV_1
,SUM(IF(pv.action_date>=device.first_active_date AND pv.action_date<=date_add(device.first_active_date,2), pv.wel_pv, NULL)) AS meigouPV_3
,SUM(IF(pv.action_date>=device.first_active_date AND pv.action_date<=date_add(device.first_active_date,6), pv.wel_pv, NULL)) AS meigouPV_7
FROM
(
SELECT
first_active_date,device_id,device_os_type
,IF(ios_device.channel IS NOT NULL, ios_device.channel, first_channel_source_type) AS channel
FROM
(
SELECT
device_id,device_os_type
,first_channel_source_type
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS first_active_date
FROM online.ml_device_day_active_status
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND active_type in ('1','2')
AND first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)device
LEFT JOIN
( SELECT channel,idfa
FROM
(
SELECT channel,idfa,row_number () over (PARTITION BY idfa ORDER BY active_time asc) rn--,substr(active_time,1,7) as new_month
FROM online.tl_hdfs_ios_idfa_tmp--设备表,区分App Store 明细渠道名字
)a
WHERE rn = 1
)ios_device
ON ios_device.idfa = device.device_id
GROUP BY first_active_date,device_id,device_os_type,IF(ios_device.channel IS NOT NULL, ios_device.channel, first_channel_source_type)
)device
LEFT JOIN
(
SELECT action_date,cl_id,count(1) as wel_pv
FROM
(
SELECT concat_ws('-',substr(partition_date,0,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS action_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND page_name in ('welfare_detail','organization_detail','expert_detail')
AND action = 'page_view'
)a
LEFT JOIN
( -- 2.去掉疑似机构刷量的PV和UV
SELECT device_id
FROM ml.ml_d_ct_dv_devicespam_d
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)spam_pv
on a.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
GROUP BY action_date,cl_id
)pv
ON device.device_id = pv.cl_id
GROUP BY
first_active_date,channel,device_os_type
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment