Commit ffbe21dc authored by 魏艺敏's avatar 魏艺敏

update codes

parent 7542f649
......@@ -88,4 +88,4 @@ SELECT
,per_article_pv AS `专栏PV/专栏UV`
,article_stay AS `专栏单设备时长(m)`
FROM pm.tl_pm_content_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','')
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
INSERT OVERWRITE TABLE tl_pm_content_d PARTITION (PARTITION_DAY = regexp_replace((current_date - interval '1' day),'-',''))
INSERT OVERWRITE TABLE tl_pm_content_d PARTITION (PARTITION_DAY = #partition_day)
SELECT
T1.partition_date AS day_id
,T1.device_type AS device_os_type
,T1.device_os_type AS device_os_type
,T1.active_type AS active_type
,T1.channel AS is_ai_channel
,COALESCE(T1.dau,0) AS dau
......@@ -90,22 +90,22 @@ SELECT
,COALESCE(T3.article_stay,0) AS article_stay
FROM
(--基础维度/dau
SELECT partition_date,device_type,active_type,t2.channel,sum(dau) AS dau
SELECT partition_date,device_os_type,active_type,t2.channel,sum(dau) AS dau
FROM
(
SELECT partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,count(1) AS dau
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -118,15 +118,15 @@ FROM
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
GROUP BY partition_date,device_os_type,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END,
CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END
CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END
)t1
LATERAL VIEW explode(t1.channel) t2 AS channel
GROUP BY partition_date,device_type,active_type,t2.channel
GROUP BY partition_date,device_os_type,active_type,t2.channel
)T1
LEFT JOIN
(--内容uv/pv
SELECT partition_date
,device_type
,device_os_type
,active_type
,t4.channel
,count(cl_id) as neirong_pv
......@@ -149,22 +149,22 @@ LEFT JOIN
,count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END) AS wiki_uv
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.page_name
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.page_name
FROM
(
SELECT partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -180,7 +180,7 @@ LEFT JOIN
(
SELECT partition_date,page_name,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
......@@ -192,23 +192,23 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','')
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel
GROUP BY partition_date,device_os_type,active_type,t4.channel
)T2
ON T1.partition_date=T2.partition_date
AND T1.device_type=T2.device_type
AND T1.device_os_type=T2.device_os_type
AND T1.active_type=T2.active_type
AND T1.channel=T2.channel
LEFT JOIN
(--内容浏览时长
SELECT partition_date
,device_type
,device_os_type
,active_type
,t4.channel
,round(sum(page_stay)/count(distinct cl_id)/60,4) as neirong_stay
......@@ -230,22 +230,22 @@ LEFT JOIN
count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END)/60,4) AS wiki_stay
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.page_name,t2.page_stay
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.page_name,t2.page_stay
FROM
(
SELECT partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -263,7 +263,7 @@ LEFT JOIN
(
SELECT partition_date,page_name,cl_id,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
......@@ -276,45 +276,45 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','')
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel
GROUP BY partition_date,device_os_type,active_type,t4.channel
)T3
ON T1.partition_date=T3.partition_date
AND T1.device_type=T3.device_type
AND T1.device_os_type=T3.device_os_type
AND T1.active_type=T3.active_type
AND T1.channel=T3.channel
LEFT JOIN
(--内容用户留存
SELECT regexp_replace(partition_date,'-','') AS partition_date
,device_type,active_type,t5.channel
,device_os_type,active_type,t5.channel
,int(count(DISTINCT CASE WHEN date_add(partition_date,1) = retention_date THEN device_id END)) AS retention_num1
,int(count(DISTINCT CASE WHEN date_add(partition_date,6) = retention_date THEN device_id END)) AS retention_num7
,int(count(DISTINCT CASE WHEN date_add(partition_date,29) = retention_date THEN device_id END)) AS retention_num30
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t3.device_id,t3.partition_date as retention_date
SELECT t1.partition_date,device_os_type,active_type,channel,t3.device_id,t3.partition_date as retention_date
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -332,7 +332,7 @@ LEFT JOIN
SELECT cl_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
......@@ -346,7 +346,7 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','')
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
LEFT JOIN
......@@ -354,46 +354,46 @@ LEFT JOIN
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date >=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
)t3
ON t2.cl_id=t3.device_id
WHERE spam_pv.device_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY regexp_replace(partition_date,'-',''),device_type,active_type,t5.channel
GROUP BY regexp_replace(partition_date,'-',''),device_os_type,active_type,t5.channel
)T4
ON T1.partition_date=T4.partition_date
AND T1.device_type=T4.device_type
AND T1.device_os_type=T4.device_os_type
AND T1.active_type=T4.active_type
AND T1.channel=T4.channel
LEFT JOIN
(--内容用户单设备app时长(m)
SELECT partition_date
,device_type
,device_os_type
,active_type
,t5.channel
,round(sum(use_duration)/count(distinct cl_id)/60,4) as app_duration
,round(avg(open_times),4) as avg_opentimes
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t3.use_duration,t3.open_times
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t3.use_duration,t3.open_times
FROM
(
SELECT partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -410,7 +410,7 @@ LEFT JOIN
(--内容用户
SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
......@@ -423,14 +423,14 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','')
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
LEFT JOIN
(
SELECT partition_date,device_id,use_duration,open_times
FROM online.ml_device_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND use_duration > 0 and use_duration < 86400
)t3
......@@ -439,17 +439,17 @@ LEFT JOIN
WHERE spam_pv.device_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY partition_date,device_type,active_type,t5.channel
GROUP BY partition_date,device_os_type,active_type,t5.channel
)T5
ON T1.partition_date=T5.partition_date
AND T1.device_type=T5.device_type
AND T1.device_os_type=T5.device_os_type
AND T1.active_type=T5.active_type
AND T1.channel=T5.channel
LEFT JOIN
(--不同来源进入内容uv/pv
SELECT partition_date
,device_type
,device_os_type
,active_type
,t4.channel
,count(CASE WHEN referrer='search' THEN cl_id END) AS search_pv
......@@ -478,22 +478,22 @@ LEFT JOIN
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.referrer
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.referrer
FROM
(
SELECT partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -520,7 +520,7 @@ LEFT JOIN
when referrer in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail') then 'content'
else null end as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
and action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
......@@ -531,7 +531,7 @@ LEFT JOIN
--首页feeds进入内容(首页非策略卡片点击)
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action = 'on_click_card'
......@@ -543,7 +543,7 @@ LEFT JOIN
--首页feeds进入内容(首页非策略卡片点击) 7.8.0版本前的埋点
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action in ( 'on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
......@@ -554,7 +554,7 @@ LEFT JOIN
--推荐进入内容(首页策略卡片点击),5月7日新增transaction_type类型
SELECT partition_date,cl_id,'recommend' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action = 'on_click_card'
......@@ -566,7 +566,7 @@ LEFT JOIN
--推荐进入内容(首页策略卡片点击) 7.8.0版本前的埋点
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action in ('on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
......@@ -578,45 +578,45 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','')
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel
GROUP BY partition_date,device_os_type,active_type,t4.channel
)T6
ON T1.partition_date=T6.partition_date
AND T1.device_type=T6.device_type
AND T1.device_os_type=T6.device_os_type
AND T1.active_type=T6.active_type
AND T1.channel=T6.channel
LEFT JOIN
(--真实发帖数
SELECT partition_date
,device_type
,device_os_type
,active_type
,t7.channel
,count(distinct id) as num
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t3.id
SELECT t1.partition_date,device_os_type,active_type,channel,t3.id
FROM
(
SELECT partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -634,7 +634,7 @@ LEFT JOIN
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t2
ON t1.partition_date=t2.partition_date
......@@ -645,10 +645,10 @@ LEFT JOIN
--新增帖子
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_view --发帖情况表
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_online='true'
AND platform in ('1','7') --更美用户发的以及打卡的(去除hera后台,爬虫抓取的,kyc自动回复的)
AND regexp_replace(substr(create_time,1,10),'-','')>=regexp_replace((current_date - interval '60' day),'-','')
AND regexp_replace(substr(create_time,1,10),'-','')>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time,1,10),'-','')< regexp_replace((current_date),'-','')
UNION ALL
......@@ -658,15 +658,15 @@ LEFT JOIN
(
SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date
FROM online.tl_hdfs_diary_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
and regexp_replace(substr(created_time,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(created_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(created_time,1,10),'-','') < regexp_replace((current_date),'-','')
)a
JOIN
( --取非空日记
SELECT diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false'
)b
ON a.id = b.diary_id
......@@ -675,8 +675,8 @@ LEFT JOIN
--新增日记贴
SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND diary_id is not null
......@@ -685,8 +685,8 @@ LEFT JOIN
--新增问题数
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_question_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
......@@ -695,8 +695,8 @@ LEFT JOIN
--新增回答数
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_answer_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
......@@ -711,7 +711,7 @@ LEFT JOIN
(
SELECT partition_date,user_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date< regexp_replace((current_date),'-','')
)a
JOIN
......@@ -731,13 +731,13 @@ LEFT JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
......@@ -753,13 +753,13 @@ LEFT JOIN
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
......@@ -768,38 +768,38 @@ LEFT JOIN
where (t5.user_id is null or t5.user_id = '')
)t6
LATERAL VIEW explode(t6.channel) t7 AS channel
GROUP BY partition_date,device_type,active_type,t7.channel
GROUP BY partition_date,device_os_type,active_type,t7.channel
)T7
ON T1.partition_date=T7.partition_date
AND T1.device_type=T7.device_type
AND T1.device_os_type=T7.device_os_type
AND T1.active_type=T7.active_type
AND T1.channel=T7.channel
LEFT JOIN
(--真实评论数
SELECT partition_date
,device_type
,device_os_type
,active_type
,t7.channel
,count(distinct id) as num
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t3.id,t3.type
SELECT t1.partition_date,device_os_type,active_type,channel,t3.id,t3.type
FROM
(
SELECT partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -817,7 +817,7 @@ LEFT JOIN
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t2
ON t1.partition_date=t2.partition_date
......@@ -830,17 +830,17 @@ LEFT JOIN
(
SELECT user_id,regexp_replace(substr(reply_date,1,10),'-','') as reply_date,problem_id,id
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
-- and diary_id is not null 这个表的diary_id有问题,需要join problem表来判断是不是属于日记
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','')
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(reply_date,1,10),'-','') < regexp_replace((current_date),'-','')
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
......@@ -855,17 +855,17 @@ LEFT JOIN
(
SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,answer_id,id
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','')
)t1
JOIN
(
SELECT id,question_id
FROM online.tl_hdfs_answer_view
WHERE partition_date =regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,question_id
)t2
ON t2.id = t1.answer_id
......@@ -875,8 +875,8 @@ LEFT JOIN
--有评论过用户帖的设备
SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,id,'tractate_reply' as type
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace((current_date - interval '1' day),'-','')
and regexp_replace(substr(create_time,1,10),'-','') >=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','')
)t3
ON t2.partition_date = t3.reply_date
......@@ -889,7 +889,7 @@ LEFT JOIN
(
SELECT partition_date,user_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date< regexp_replace((current_date),'-','')
)a
JOIN
......@@ -909,13 +909,13 @@ LEFT JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
......@@ -931,13 +931,13 @@ LEFT JOIN
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
......@@ -946,17 +946,17 @@ LEFT JOIN
where (t5.user_id is null or t5.user_id = '')
)t6
LATERAL VIEW explode(t6.channel) t7 AS channel
GROUP BY partition_date,device_type,active_type,t7.channel
GROUP BY partition_date,device_os_type,active_type,t7.channel
)T8
ON T1.partition_date=T8.partition_date
AND T1.device_type=T8.device_type
AND T1.device_os_type=T8.device_os_type
AND T1.active_type=T8.active_type
AND T1.channel=T8.channel
LEFT JOIN
(--部分页面的单设备页面浏览时长
SELECT partition_date
,device_type
,device_os_type
,active_type
,t5.channel
,round(sum(CASE WHEN page_name like 'search%' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS search_stay
......@@ -969,22 +969,22 @@ LEFT JOIN
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t3.page_name,t3.page_stay
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t3.page_name,t3.page_stay
FROM
(
SELECT partition_date
,device_os_type AS device_type
,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -1002,7 +1002,7 @@ LEFT JOIN
(--内容用户
SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
......@@ -1017,7 +1017,7 @@ LEFT JOIN
(--部分页面的停留时长
SELECT partition_date,cl_id,page_name,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND (page_name like 'search%' or page_name IN ('welfare_detail','question_detail','report_result','face_scan'
......@@ -1032,17 +1032,17 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','')
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY partition_date,device_type,active_type,t5.channel
GROUP BY partition_date,device_os_type,active_type,t5.channel
)T9
ON T1.partition_date=T9.partition_date
AND T1.device_type=T9.device_type
AND T1.device_os_type=T9.device_os_type
AND T1.active_type=T9.active_type
AND T1.channel=T9.channel
ORDER BY T1.partition_date desc,T1.device_type,T1.active_type,T1.channel
ORDER BY day_id desc,device_os_type,active_type,is_ai_channel
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment