Commit ffbe21dc authored by 魏艺敏's avatar 魏艺敏

update codes

parent 7542f649
...@@ -88,4 +88,4 @@ SELECT ...@@ -88,4 +88,4 @@ SELECT
,per_article_pv AS `专栏PV/专栏UV` ,per_article_pv AS `专栏PV/专栏UV`
,article_stay AS `专栏单设备时长(m)` ,article_stay AS `专栏单设备时长(m)`
FROM pm.tl_pm_content_d FROM pm.tl_pm_content_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','') where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
INSERT OVERWRITE TABLE tl_pm_content_d PARTITION (PARTITION_DAY = regexp_replace((current_date - interval '1' day),'-','')) INSERT OVERWRITE TABLE tl_pm_content_d PARTITION (PARTITION_DAY = #partition_day)
SELECT SELECT
T1.partition_date AS day_id T1.partition_date AS day_id
,T1.device_type AS device_os_type ,T1.device_os_type AS device_os_type
,T1.active_type AS active_type ,T1.active_type AS active_type
,T1.channel AS is_ai_channel ,T1.channel AS is_ai_channel
,COALESCE(T1.dau,0) AS dau ,COALESCE(T1.dau,0) AS dau
...@@ -90,22 +90,22 @@ SELECT ...@@ -90,22 +90,22 @@ SELECT
,COALESCE(T3.article_stay,0) AS article_stay ,COALESCE(T3.article_stay,0) AS article_stay
FROM FROM
(--基础维度/dau (--基础维度/dau
SELECT partition_date,device_type,active_type,t2.channel,sum(dau) AS dau SELECT partition_date,device_os_type,active_type,t2.channel,sum(dau) AS dau
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,count(1) AS dau ,count(1) AS dau
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -118,15 +118,15 @@ FROM ...@@ -118,15 +118,15 @@ FROM
AND first_channel_source_type not LIKE 'promotion\_jf\_%' AND first_channel_source_type not LIKE 'promotion\_jf\_%'
GROUP BY partition_date,device_os_type,CASE WHEN active_type = '4' THEN '老活跃设备' GROUP BY partition_date,device_os_type,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END, WHEN active_type IN ('1','2') THEN '新增设备' END,
CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END
)t1 )t1
LATERAL VIEW explode(t1.channel) t2 AS channel LATERAL VIEW explode(t1.channel) t2 AS channel
GROUP BY partition_date,device_type,active_type,t2.channel GROUP BY partition_date,device_os_type,active_type,t2.channel
)T1 )T1
LEFT JOIN LEFT JOIN
(--内容uv/pv (--内容uv/pv
SELECT partition_date SELECT partition_date
,device_type ,device_os_type
,active_type ,active_type
,t4.channel ,t4.channel
,count(cl_id) as neirong_pv ,count(cl_id) as neirong_pv
...@@ -149,22 +149,22 @@ LEFT JOIN ...@@ -149,22 +149,22 @@ LEFT JOIN
,count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END) AS wiki_uv ,count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END) AS wiki_uv
FROM FROM
( (
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.page_name SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.page_name
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -180,7 +180,7 @@ LEFT JOIN ...@@ -180,7 +180,7 @@ LEFT JOIN
( (
SELECT partition_date,page_name,cl_id SELECT partition_date,page_name,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view' AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail', AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
...@@ -192,23 +192,23 @@ LEFT JOIN ...@@ -192,23 +192,23 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV ( -- 去掉疑似机构刷量的PV和UV
select distinct device_id select distinct device_id
from ml.ml_d_ct_dv_devicespam_d from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','') where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv )spam_pv
on t2.cl_id=spam_pv.device_id on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL WHERE spam_pv.device_id IS NULL
)t3 )t3
LATERAL VIEW explode(t3.channel) t4 AS channel LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel GROUP BY partition_date,device_os_type,active_type,t4.channel
)T2 )T2
ON T1.partition_date=T2.partition_date ON T1.partition_date=T2.partition_date
AND T1.device_type=T2.device_type AND T1.device_os_type=T2.device_os_type
AND T1.active_type=T2.active_type AND T1.active_type=T2.active_type
AND T1.channel=T2.channel AND T1.channel=T2.channel
LEFT JOIN LEFT JOIN
(--内容浏览时长 (--内容浏览时长
SELECT partition_date SELECT partition_date
,device_type ,device_os_type
,active_type ,active_type
,t4.channel ,t4.channel
,round(sum(page_stay)/count(distinct cl_id)/60,4) as neirong_stay ,round(sum(page_stay)/count(distinct cl_id)/60,4) as neirong_stay
...@@ -230,22 +230,22 @@ LEFT JOIN ...@@ -230,22 +230,22 @@ LEFT JOIN
count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END)/60,4) AS wiki_stay count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END)/60,4) AS wiki_stay
FROM FROM
( (
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.page_name,t2.page_stay SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.page_name,t2.page_stay
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -263,7 +263,7 @@ LEFT JOIN ...@@ -263,7 +263,7 @@ LEFT JOIN
( (
SELECT partition_date,page_name,cl_id,page_stay SELECT partition_date,page_name,cl_id,page_stay
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view' AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail', AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
...@@ -276,45 +276,45 @@ LEFT JOIN ...@@ -276,45 +276,45 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV ( -- 去掉疑似机构刷量的PV和UV
select distinct device_id select distinct device_id
from ml.ml_d_ct_dv_devicespam_d from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','') where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv )spam_pv
on t2.cl_id=spam_pv.device_id on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL WHERE spam_pv.device_id IS NULL
)t3 )t3
LATERAL VIEW explode(t3.channel) t4 AS channel LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel GROUP BY partition_date,device_os_type,active_type,t4.channel
)T3 )T3
ON T1.partition_date=T3.partition_date ON T1.partition_date=T3.partition_date
AND T1.device_type=T3.device_type AND T1.device_os_type=T3.device_os_type
AND T1.active_type=T3.active_type AND T1.active_type=T3.active_type
AND T1.channel=T3.channel AND T1.channel=T3.channel
LEFT JOIN LEFT JOIN
(--内容用户留存 (--内容用户留存
SELECT regexp_replace(partition_date,'-','') AS partition_date SELECT regexp_replace(partition_date,'-','') AS partition_date
,device_type,active_type,t5.channel ,device_os_type,active_type,t5.channel
,int(count(DISTINCT CASE WHEN date_add(partition_date,1) = retention_date THEN device_id END)) AS retention_num1 ,int(count(DISTINCT CASE WHEN date_add(partition_date,1) = retention_date THEN device_id END)) AS retention_num1
,int(count(DISTINCT CASE WHEN date_add(partition_date,6) = retention_date THEN device_id END)) AS retention_num7 ,int(count(DISTINCT CASE WHEN date_add(partition_date,6) = retention_date THEN device_id END)) AS retention_num7
,int(count(DISTINCT CASE WHEN date_add(partition_date,29) = retention_date THEN device_id END)) AS retention_num30 ,int(count(DISTINCT CASE WHEN date_add(partition_date,29) = retention_date THEN device_id END)) AS retention_num30
FROM FROM
( (
SELECT t1.partition_date,device_type,active_type,channel,t3.device_id,t3.partition_date as retention_date SELECT t1.partition_date,device_os_type,active_type,channel,t3.device_id,t3.partition_date as retention_date
FROM FROM
( (
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -332,7 +332,7 @@ LEFT JOIN ...@@ -332,7 +332,7 @@ LEFT JOIN
SELECT cl_id SELECT cl_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date ,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view' AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail', AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
...@@ -346,7 +346,7 @@ LEFT JOIN ...@@ -346,7 +346,7 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV ( -- 去掉疑似机构刷量的PV和UV
select distinct device_id select distinct device_id
from ml.ml_d_ct_dv_devicespam_d from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','') where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv )spam_pv
on t2.cl_id=spam_pv.device_id on t2.cl_id=spam_pv.device_id
LEFT JOIN LEFT JOIN
...@@ -354,46 +354,46 @@ LEFT JOIN ...@@ -354,46 +354,46 @@ LEFT JOIN
SELECT device_id SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date ,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
WHERE partition_date >=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
)t3 )t3
ON t2.cl_id=t3.device_id ON t2.cl_id=t3.device_id
WHERE spam_pv.device_id IS NULL WHERE spam_pv.device_id IS NULL
)t4 )t4
LATERAL VIEW explode(t4.channel) t5 AS channel LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY regexp_replace(partition_date,'-',''),device_type,active_type,t5.channel GROUP BY regexp_replace(partition_date,'-',''),device_os_type,active_type,t5.channel
)T4 )T4
ON T1.partition_date=T4.partition_date ON T1.partition_date=T4.partition_date
AND T1.device_type=T4.device_type AND T1.device_os_type=T4.device_os_type
AND T1.active_type=T4.active_type AND T1.active_type=T4.active_type
AND T1.channel=T4.channel AND T1.channel=T4.channel
LEFT JOIN LEFT JOIN
(--内容用户单设备app时长(m) (--内容用户单设备app时长(m)
SELECT partition_date SELECT partition_date
,device_type ,device_os_type
,active_type ,active_type
,t5.channel ,t5.channel
,round(sum(use_duration)/count(distinct cl_id)/60,4) as app_duration ,round(sum(use_duration)/count(distinct cl_id)/60,4) as app_duration
,round(avg(open_times),4) as avg_opentimes ,round(avg(open_times),4) as avg_opentimes
FROM FROM
( (
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t3.use_duration,t3.open_times SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t3.use_duration,t3.open_times
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -410,7 +410,7 @@ LEFT JOIN ...@@ -410,7 +410,7 @@ LEFT JOIN
(--内容用户 (--内容用户
SELECT partition_date,cl_id SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view' AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail', AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
...@@ -423,14 +423,14 @@ LEFT JOIN ...@@ -423,14 +423,14 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV ( -- 去掉疑似机构刷量的PV和UV
select distinct device_id select distinct device_id
from ml.ml_d_ct_dv_devicespam_d from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','') where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv )spam_pv
on t2.cl_id=spam_pv.device_id on t2.cl_id=spam_pv.device_id
LEFT JOIN LEFT JOIN
( (
SELECT partition_date,device_id,use_duration,open_times SELECT partition_date,device_id,use_duration,open_times
FROM online.ml_device_updates FROM online.ml_device_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND use_duration > 0 and use_duration < 86400 AND use_duration > 0 and use_duration < 86400
)t3 )t3
...@@ -439,17 +439,17 @@ LEFT JOIN ...@@ -439,17 +439,17 @@ LEFT JOIN
WHERE spam_pv.device_id IS NULL WHERE spam_pv.device_id IS NULL
)t4 )t4
LATERAL VIEW explode(t4.channel) t5 AS channel LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY partition_date,device_type,active_type,t5.channel GROUP BY partition_date,device_os_type,active_type,t5.channel
)T5 )T5
ON T1.partition_date=T5.partition_date ON T1.partition_date=T5.partition_date
AND T1.device_type=T5.device_type AND T1.device_os_type=T5.device_os_type
AND T1.active_type=T5.active_type AND T1.active_type=T5.active_type
AND T1.channel=T5.channel AND T1.channel=T5.channel
LEFT JOIN LEFT JOIN
(--不同来源进入内容uv/pv (--不同来源进入内容uv/pv
SELECT partition_date SELECT partition_date
,device_type ,device_os_type
,active_type ,active_type
,t4.channel ,t4.channel
,count(CASE WHEN referrer='search' THEN cl_id END) AS search_pv ,count(CASE WHEN referrer='search' THEN cl_id END) AS search_pv
...@@ -478,22 +478,22 @@ LEFT JOIN ...@@ -478,22 +478,22 @@ LEFT JOIN
FROM FROM
( (
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.referrer SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.referrer
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -520,7 +520,7 @@ LEFT JOIN ...@@ -520,7 +520,7 @@ LEFT JOIN
when referrer in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail') then 'content' when referrer in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail') then 'content'
else null end as referrer else null end as referrer
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
and action='page_view' and action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail', AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
...@@ -531,7 +531,7 @@ LEFT JOIN ...@@ -531,7 +531,7 @@ LEFT JOIN
--首页feeds进入内容(首页非策略卡片点击) --首页feeds进入内容(首页非策略卡片点击)
SELECT partition_date,cl_id,'feeds' as referrer SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home' AND page_name = 'home'
AND action = 'on_click_card' AND action = 'on_click_card'
...@@ -543,7 +543,7 @@ LEFT JOIN ...@@ -543,7 +543,7 @@ LEFT JOIN
--首页feeds进入内容(首页非策略卡片点击) 7.8.0版本前的埋点 --首页feeds进入内容(首页非策略卡片点击) 7.8.0版本前的埋点
SELECT partition_date,cl_id,'feeds' as referrer SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home' AND page_name = 'home'
AND action in ( 'on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card') AND action in ( 'on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
...@@ -554,7 +554,7 @@ LEFT JOIN ...@@ -554,7 +554,7 @@ LEFT JOIN
--推荐进入内容(首页策略卡片点击),5月7日新增transaction_type类型 --推荐进入内容(首页策略卡片点击),5月7日新增transaction_type类型
SELECT partition_date,cl_id,'recommend' as referrer SELECT partition_date,cl_id,'recommend' as referrer
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home' AND page_name = 'home'
AND action = 'on_click_card' AND action = 'on_click_card'
...@@ -566,7 +566,7 @@ LEFT JOIN ...@@ -566,7 +566,7 @@ LEFT JOIN
--推荐进入内容(首页策略卡片点击) 7.8.0版本前的埋点 --推荐进入内容(首页策略卡片点击) 7.8.0版本前的埋点
SELECT partition_date,cl_id,'feeds' as referrer SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home' AND page_name = 'home'
AND action in ('on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card') AND action in ('on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
...@@ -578,45 +578,45 @@ LEFT JOIN ...@@ -578,45 +578,45 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV ( -- 去掉疑似机构刷量的PV和UV
select distinct device_id select distinct device_id
from ml.ml_d_ct_dv_devicespam_d from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','') where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv )spam_pv
on t2.cl_id=spam_pv.device_id on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL WHERE spam_pv.device_id IS NULL
)t3 )t3
LATERAL VIEW explode(t3.channel) t4 AS channel LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel GROUP BY partition_date,device_os_type,active_type,t4.channel
)T6 )T6
ON T1.partition_date=T6.partition_date ON T1.partition_date=T6.partition_date
AND T1.device_type=T6.device_type AND T1.device_os_type=T6.device_os_type
AND T1.active_type=T6.active_type AND T1.active_type=T6.active_type
AND T1.channel=T6.channel AND T1.channel=T6.channel
LEFT JOIN LEFT JOIN
(--真实发帖数 (--真实发帖数
SELECT partition_date SELECT partition_date
,device_type ,device_os_type
,active_type ,active_type
,t7.channel ,t7.channel
,count(distinct id) as num ,count(distinct id) as num
FROM FROM
( (
SELECT t1.partition_date,device_type,active_type,channel,t3.id SELECT t1.partition_date,device_os_type,active_type,channel,t3.id
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -634,7 +634,7 @@ LEFT JOIN ...@@ -634,7 +634,7 @@ LEFT JOIN
SELECT user_id,partition_date, SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates FROM online.ml_user_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
)t2 )t2
ON t1.partition_date=t2.partition_date ON t1.partition_date=t2.partition_date
...@@ -645,10 +645,10 @@ LEFT JOIN ...@@ -645,10 +645,10 @@ LEFT JOIN
--新增帖子 --新增帖子
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_view --发帖情况表 FROM online.tl_hdfs_api_tractate_view --发帖情况表
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_online='true' AND is_online='true'
AND platform in ('1','7') --更美用户发的以及打卡的(去除hera后台,爬虫抓取的,kyc自动回复的) AND platform in ('1','7') --更美用户发的以及打卡的(去除hera后台,爬虫抓取的,kyc自动回复的)
AND regexp_replace(substr(create_time,1,10),'-','')>=regexp_replace((current_date - interval '60' day),'-','') AND regexp_replace(substr(create_time,1,10),'-','')>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time,1,10),'-','')< regexp_replace((current_date),'-','') AND regexp_replace(substr(create_time,1,10),'-','')< regexp_replace((current_date),'-','')
UNION ALL UNION ALL
...@@ -658,15 +658,15 @@ LEFT JOIN ...@@ -658,15 +658,15 @@ LEFT JOIN
( (
SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date
FROM online.tl_hdfs_diary_view FROM online.tl_hdfs_diary_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(created_time,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','') and regexp_replace(substr(created_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(created_time,1,10),'-','') < regexp_replace((current_date),'-','') and regexp_replace(substr(created_time,1,10),'-','') < regexp_replace((current_date),'-','')
)a )a
JOIN JOIN
( --取非空日记 ( --取非空日记
SELECT diary_id SELECT diary_id
FROM online.tl_hdfs_problem_view FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' AND is_spam = 'false'
)b )b
ON a.id = b.diary_id ON a.id = b.diary_id
...@@ -675,8 +675,8 @@ LEFT JOIN ...@@ -675,8 +675,8 @@ LEFT JOIN
--新增日记贴 --新增日记贴
SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date
FROM online.tl_hdfs_problem_view FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') >= regexp_replace((current_date - interval '60' day),'-','') AND regexp_replace(substr(created_time, 1, 10), '-', '') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') < regexp_replace((current_date),'-','') AND regexp_replace(substr(created_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false' AND is_spam = 'false'
AND diary_id is not null AND diary_id is not null
...@@ -685,8 +685,8 @@ LEFT JOIN ...@@ -685,8 +685,8 @@ LEFT JOIN
--新增问题数 --新增问题数
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_question_view FROM online.tl_hdfs_question_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >=regexp_replace((current_date - interval '60' day),'-','') AND regexp_replace(substr(create_time, 1, 10), '-', '') >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','') AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false' AND is_spam = 'false'
AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的) AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
...@@ -695,8 +695,8 @@ LEFT JOIN ...@@ -695,8 +695,8 @@ LEFT JOIN
--新增回答数 --新增回答数
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_answer_view FROM online.tl_hdfs_answer_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >= regexp_replace((current_date - interval '60' day),'-','') AND regexp_replace(substr(create_time, 1, 10), '-', '') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','') AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false' AND is_spam = 'false'
AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的) AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
...@@ -711,7 +711,7 @@ LEFT JOIN ...@@ -711,7 +711,7 @@ LEFT JOIN
( (
SELECT partition_date,user_id,action SELECT partition_date,user_id,action
FROM online.bl_hdfs_operation_updates FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date< regexp_replace((current_date),'-','') AND partition_date< regexp_replace((current_date),'-','')
)a )a
JOIN JOIN
...@@ -731,13 +731,13 @@ LEFT JOIN ...@@ -731,13 +731,13 @@ LEFT JOIN
( --医生账号 ( --医生账号
SELECT distinct user_id SELECT distinct user_id
FROM online.tl_hdfs_doctor_view FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户 --马甲账号/模特用户
UNION ALL UNION ALL
SELECT user_id SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true') AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL UNION ALL
...@@ -753,13 +753,13 @@ LEFT JOIN ...@@ -753,13 +753,13 @@ LEFT JOIN
SELECT user_id, v.device_id as device_id SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1 ) t1
JOIN JOIN
( (
SELECT device_id SELECT device_id
FROM online.ml_device_history_detail FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1' AND is_login_doctor = '1'
) t2 ) t2
ON t1.device_id = t2.device_id ON t1.device_id = t2.device_id
...@@ -768,38 +768,38 @@ LEFT JOIN ...@@ -768,38 +768,38 @@ LEFT JOIN
where (t5.user_id is null or t5.user_id = '') where (t5.user_id is null or t5.user_id = '')
)t6 )t6
LATERAL VIEW explode(t6.channel) t7 AS channel LATERAL VIEW explode(t6.channel) t7 AS channel
GROUP BY partition_date,device_type,active_type,t7.channel GROUP BY partition_date,device_os_type,active_type,t7.channel
)T7 )T7
ON T1.partition_date=T7.partition_date ON T1.partition_date=T7.partition_date
AND T1.device_type=T7.device_type AND T1.device_os_type=T7.device_os_type
AND T1.active_type=T7.active_type AND T1.active_type=T7.active_type
AND T1.channel=T7.channel AND T1.channel=T7.channel
LEFT JOIN LEFT JOIN
(--真实评论数 (--真实评论数
SELECT partition_date SELECT partition_date
,device_type ,device_os_type
,active_type ,active_type
,t7.channel ,t7.channel
,count(distinct id) as num ,count(distinct id) as num
FROM FROM
( (
SELECT t1.partition_date,device_type,active_type,channel,t3.id,t3.type SELECT t1.partition_date,device_os_type,active_type,channel,t3.id,t3.type
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -817,7 +817,7 @@ LEFT JOIN ...@@ -817,7 +817,7 @@ LEFT JOIN
SELECT user_id,partition_date, SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates FROM online.ml_user_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
)t2 )t2
ON t1.partition_date=t2.partition_date ON t1.partition_date=t2.partition_date
...@@ -830,17 +830,17 @@ LEFT JOIN ...@@ -830,17 +830,17 @@ LEFT JOIN
( (
SELECT user_id,regexp_replace(substr(reply_date,1,10),'-','') as reply_date,problem_id,id SELECT user_id,regexp_replace(substr(reply_date,1,10),'-','') as reply_date,problem_id,id
FROM online.tl_hdfs_topicreply_view FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告 AND is_spam = 'false' --排除疑似广告
-- and diary_id is not null 这个表的diary_id有问题,需要join problem表来判断是不是属于日记 -- and diary_id is not null 这个表的diary_id有问题,需要join problem表来判断是不是属于日记
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','') and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(reply_date,1,10),'-','') < regexp_replace((current_date),'-','') and regexp_replace(substr(reply_date,1,10),'-','') < regexp_replace((current_date),'-','')
)t1 )t1
JOIN JOIN
( (
SELECT id,diary_id SELECT id,diary_id
FROM online.tl_hdfs_problem_view FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id group by id,diary_id
)t2 )t2
on t2.id=t1.problem_id on t2.id=t1.problem_id
...@@ -855,17 +855,17 @@ LEFT JOIN ...@@ -855,17 +855,17 @@ LEFT JOIN
( (
SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,answer_id,id SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,answer_id,id
FROM online.tl_hdfs_answer_reply_view FROM online.tl_hdfs_answer_reply_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_fake is NULL or is_fake = 'false') AND (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL AND answer_id is not NULL
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','') and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','') and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','')
)t1 )t1
JOIN JOIN
( (
SELECT id,question_id SELECT id,question_id
FROM online.tl_hdfs_answer_view FROM online.tl_hdfs_answer_view
WHERE partition_date =regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,question_id group by id,question_id
)t2 )t2
ON t2.id = t1.answer_id ON t2.id = t1.answer_id
...@@ -875,8 +875,8 @@ LEFT JOIN ...@@ -875,8 +875,8 @@ LEFT JOIN
--有评论过用户帖的设备 --有评论过用户帖的设备
SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,id,'tractate_reply' as type SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,id,'tractate_reply' as type
FROM online.tl_hdfs_api_tractate_reply_view FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >=regexp_replace((current_date - interval '60' day),'-','') and regexp_replace(substr(create_time,1,10),'-','') >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','') and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','')
)t3 )t3
ON t2.partition_date = t3.reply_date ON t2.partition_date = t3.reply_date
...@@ -889,7 +889,7 @@ LEFT JOIN ...@@ -889,7 +889,7 @@ LEFT JOIN
( (
SELECT partition_date,user_id,action SELECT partition_date,user_id,action
FROM online.bl_hdfs_operation_updates FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date< regexp_replace((current_date),'-','') AND partition_date< regexp_replace((current_date),'-','')
)a )a
JOIN JOIN
...@@ -909,13 +909,13 @@ LEFT JOIN ...@@ -909,13 +909,13 @@ LEFT JOIN
( --医生账号 ( --医生账号
SELECT distinct user_id SELECT distinct user_id
FROM online.tl_hdfs_doctor_view FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户 --马甲账号/模特用户
UNION ALL UNION ALL
SELECT user_id SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true') AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL UNION ALL
...@@ -931,13 +931,13 @@ LEFT JOIN ...@@ -931,13 +931,13 @@ LEFT JOIN
SELECT user_id, v.device_id as device_id SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1 ) t1
JOIN JOIN
( (
SELECT device_id SELECT device_id
FROM online.ml_device_history_detail FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','') WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1' AND is_login_doctor = '1'
) t2 ) t2
ON t1.device_id = t2.device_id ON t1.device_id = t2.device_id
...@@ -946,17 +946,17 @@ LEFT JOIN ...@@ -946,17 +946,17 @@ LEFT JOIN
where (t5.user_id is null or t5.user_id = '') where (t5.user_id is null or t5.user_id = '')
)t6 )t6
LATERAL VIEW explode(t6.channel) t7 AS channel LATERAL VIEW explode(t6.channel) t7 AS channel
GROUP BY partition_date,device_type,active_type,t7.channel GROUP BY partition_date,device_os_type,active_type,t7.channel
)T8 )T8
ON T1.partition_date=T8.partition_date ON T1.partition_date=T8.partition_date
AND T1.device_type=T8.device_type AND T1.device_os_type=T8.device_os_type
AND T1.active_type=T8.active_type AND T1.active_type=T8.active_type
AND T1.channel=T8.channel AND T1.channel=T8.channel
LEFT JOIN LEFT JOIN
(--部分页面的单设备页面浏览时长 (--部分页面的单设备页面浏览时长
SELECT partition_date SELECT partition_date
,device_type ,device_os_type
,active_type ,active_type
,t5.channel ,t5.channel
,round(sum(CASE WHEN page_name like 'search%' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS search_stay ,round(sum(CASE WHEN page_name like 'search%' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS search_stay
...@@ -969,22 +969,22 @@ LEFT JOIN ...@@ -969,22 +969,22 @@ LEFT JOIN
FROM FROM
( (
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t3.page_name,t3.page_stay SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t3.page_name,t3.page_stay
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type AS device_type ,device_os_type
,CASE WHEN active_type = '4' THEN '老活跃设备' ,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel ,array(CASE WHEN tmp.col2 = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
LEFT JOIN LEFT JOIN
(SELECT phone,time (SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM offline.tmp_zhx_20191227 FROM pm.tl_pm_ydl
WHERE flag='0204_danlei_channel')tmp WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone on first_channel_source_type=tmp.col1
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','') AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -1002,7 +1002,7 @@ LEFT JOIN ...@@ -1002,7 +1002,7 @@ LEFT JOIN
(--内容用户 (--内容用户
SELECT partition_date,cl_id SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view' AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail', AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
...@@ -1017,7 +1017,7 @@ LEFT JOIN ...@@ -1017,7 +1017,7 @@ LEFT JOIN
(--部分页面的停留时长 (--部分页面的停留时长
SELECT partition_date,cl_id,page_name,page_stay SELECT partition_date,cl_id,page_name,page_stay
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','') WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','') AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view' AND action = 'page_view'
AND (page_name like 'search%' or page_name IN ('welfare_detail','question_detail','report_result','face_scan' AND (page_name like 'search%' or page_name IN ('welfare_detail','question_detail','report_result','face_scan'
...@@ -1032,17 +1032,17 @@ LEFT JOIN ...@@ -1032,17 +1032,17 @@ LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV ( -- 去掉疑似机构刷量的PV和UV
select distinct device_id select distinct device_id
from ml.ml_d_ct_dv_devicespam_d from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace((current_date - interval '1' day),'-','') where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv )spam_pv
on t2.cl_id=spam_pv.device_id on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL WHERE spam_pv.device_id IS NULL
)t4 )t4
LATERAL VIEW explode(t4.channel) t5 AS channel LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY partition_date,device_type,active_type,t5.channel GROUP BY partition_date,device_os_type,active_type,t5.channel
)T9 )T9
ON T1.partition_date=T9.partition_date ON T1.partition_date=T9.partition_date
AND T1.device_type=T9.device_type AND T1.device_os_type=T9.device_os_type
AND T1.active_type=T9.active_type AND T1.active_type=T9.active_type
AND T1.channel=T9.channel AND T1.channel=T9.channel
ORDER BY T1.partition_date desc,T1.device_type,T1.active_type,T1.channel ORDER BY day_id desc,device_os_type,active_type,is_ai_channel
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment