Commit 4c221097 authored by 魏艺敏's avatar 魏艺敏

add daily_content_data codes

parent 8c3d46f7
......@@ -6,4 +6,5 @@ meigou-detail-page=美购详情页
meigou-detail-page-dispense-pv=美购详情页分发pv
meigou-detail-page-dispense-uv=美购详情页分发uv
meigou-detail-page-source-pv=美购详情页来源pv
meigou-detail-page-source-uv=美购详情页来源uv
\ No newline at end of file
meigou-detail-page-source-uv=美购详情页来源uv
daily_content_data=内容日报-新
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_view
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_topicreply_view
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_reply_view
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_updates
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_diary_view
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_problem_view
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_question_view
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13
command=curl -X GET http://localhost:8553/api/report/sendEmail/weiyimin@igengmei.com/zhaojianwei@igengmei.com/daily_content_data
\ No newline at end of file
--内容日报
SELECT
T1.partition_date AS `日期`
,T1.device_type AS `设备类型`
,T1.active_type AS `活跃类型`
,T1.channel AS `渠道`
,COALESCE(T1.dau,0) AS `DAU`
,COALESCE(T2.neirong_uv,0) AS `内容UV`
,COALESCE(T2.neirong_pv,0) AS `内容PV`
,COALESCE(ROUND(T2.neirong_uv/T1.dau,4),0) AS `内容UV/DAU`
,COALESCE(ROUND(T2.neirong_pv/T2.neirong_uv,4),0) AS `内容PV/内容UV`
,COALESCE(CONCAT(ROUND(T4.retention_num1/T2.neirong_uv*100,4),'%'),0) AS `内容用户APP次留`
,COALESCE(CONCAT(ROUND(T4.retention_num7/T2.neirong_uv*100,4),'%'),0) AS `内容用户APP7留`
,COALESCE(CONCAT(ROUND(T4.retention_num30/T2.neirong_uv*100,4),'%'),0) AS `内容用户APP30留`
,COALESCE(T5.app_duration,0) AS `内容用户单设备App时长(m)`
,COALESCE(T3.neirong_stay,0) AS `内容用户单设备内容时长(m)`
,COALESCE(T5.avg_opentimes,0) AS `内容用户单设备打开次数`
,COALESCE(T9.search_stay,0) AS `内容用户搜索相关页面单设备页面时长(m)`
,COALESCE(T9.welfare_stay,0) AS `内容用户美购详情页单设备页面时长(m)`
,COALESCE(T9.question_stay,0) AS `内容用户问题详情页单设备页面时长(m)`
,COALESCE(T9.ai_related_stay,0) AS `内容用户AI相关页面单设备页面时长(m)`
,COALESCE(T9.diary_stay,0) AS `内容用户日记详情页单设备页面时长(m)`
,COALESCE(T9.home_stay,0) AS `内容用户首页单设备页面时长(m)`
,COALESCE(T9.conv_stay,0) AS `内容用户咨询相关页面单设备页面时长(m)`
,COALESCE(ROUND(T6.recommend_uv/T2.neirong_uv,4),0) AS `首页feeds推荐进入内容UV/内容UV`
,COALESCE(ROUND(T6.recommend_pv/T2.neirong_uv,4),0) AS `首页feeds推荐进入内容PV/内容UV`
,COALESCE(ROUND(T6.feeds_uv/T2.neirong_uv,4),0) AS `首页feeds非推荐进入内容UV/内容UV`
,COALESCE(ROUND(T6.feeds_pv/T2.neirong_uv,4),0) AS `首页feeds非推荐进入内容PV/内容UV`
,COALESCE(ROUND(T6.search_uv/T2.neirong_uv,4),0) AS `搜索进入内容UV/内容UV`
,COALESCE(ROUND(T6.search_pv/T2.neirong_uv,4),0) AS `搜索进入内容PV/内容UV`
,COALESCE(ROUND(T6.zone_uv/T2.neirong_uv,4),0) AS `内容聚合页进入内容UV/内容UV`
,COALESCE(ROUND(T6.zone_pv/T2.neirong_uv,4),0) AS `内容聚合页进入内容PV/内容UV`
,COALESCE(ROUND(T6.content_uv/T2.neirong_uv,4),0) AS `内容详情页推荐板块进入内容UV/内容UV`
,COALESCE(ROUND(T6.content_pv/T2.neirong_uv,4),0) AS `内容详情页推荐板块进入内容PV/内容UV`
,COALESCE(ROUND(T6.blank_uv/T2.neirong_uv,4),0) AS `无来源页面(大多数为push)进入内容UV/内容UV`
,COALESCE(ROUND(T6.blank_pv/T2.neirong_uv,4),0) AS `无来源页面(大多数为push)进入内容PV/内容UV`
,COALESCE(ROUND(T6.comment_uv/T2.neirong_uv,4),0) AS `评论列表页进入内容UV/内容UV`
,COALESCE(ROUND(T6.comment_pv/T2.neirong_uv,4),0) AS `评论列表页进入内容PV/内容UV`
,COALESCE(ROUND(T6.org_uv/T2.neirong_uv,4),0) AS `医生医院主页进入内容UV/内容UV`
,COALESCE(ROUND(T6.org_pv/T2.neirong_uv,4),0) AS `医生医院主页进入内容PV/内容UV`
,COALESCE(ROUND(T6.category_uv/T2.neirong_uv,4),0) AS `品类聚合页进入内容UV/内容UV`
,COALESCE(ROUND(T6.category_pv/T2.neirong_uv,4),0) AS `品类聚合页进入内容PV/内容UV`
,COALESCE(ROUND(T6.my_diary_uv/T2.neirong_uv,4),0) AS `我的日记页进入内容UV/内容UV`
,COALESCE(ROUND(T6.my_diary_pv/T2.neirong_uv,4),0) AS `我的日记页进入内容PV/内容UV`
,COALESCE(ROUND(T6.ai_uv/T2.neirong_uv,4),0) AS `AI报告页进入内容UV/内容UV`
,COALESCE(ROUND(T6.ai_pv/T2.neirong_uv,4),0) AS `AI报告页进入内容PV/内容UV`
,COALESCE(T7.num,0) AS `真实发帖数`
,COALESCE(T8.num,0) AS `真实评论数`
,COALESCE(T2.diary_uv,0) AS `日记UV`
,COALESCE(T2.diary_pv,0) AS `日记PV`
,COALESCE(ROUND(T2.diary_uv/T2.neirong_uv,4),0) AS `日记UV/内容UV`
,COALESCE(ROUND(T2.diary_pv/T2.diary_uv,4),0) AS `日记PV/日记UV`
,COALESCE(T3.diary_stay,0) AS `日记单设备时长(m)`
,COALESCE(T2.post_uv,0) AS `帖子UV`
,COALESCE(T2.post_pv,0) AS `帖子PV`
,COALESCE(ROUND(T2.post_uv/T2.neirong_uv,4),0) AS `帖子UV/内容UV`
,COALESCE(ROUND(T2.post_pv/T2.post_uv,4),0) AS `帖子PV/帖子UV`
,COALESCE(T3.post_stay,0) AS `帖子单设备时长(m)`
,COALESCE(T2.question_uv,0) AS `问题UV`
,COALESCE(T2.question_pv,0) AS `问题PV`
,COALESCE(ROUND(T2.question_uv/T2.neirong_uv,4),0) AS `问题UV/内容UV`
,COALESCE(ROUND(T2.question_pv/T2.question_uv,4),0) AS `问题PV/问题UV`
,COALESCE(T3.question_stay,0) AS `问题单设备时长(m)`
,COALESCE(T2.question_answer_uv,0) AS `问答UV`
,COALESCE(T2.question_answer_pv,0) AS `问答PV`
,COALESCE(ROUND(T2.question_answer_uv/T2.neirong_uv,4),0) AS `问答UV/内容UV`
,COALESCE(ROUND(T2.question_answer_pv/T2.question_answer_uv,4),0) AS `问答PV/问答UV`
,COALESCE(T3.question_answer_stay,0) AS `问答单设备时长(m)`
,COALESCE(T2.answer_uv,0) AS `回答UV`
,COALESCE(T2.answer_pv,0) AS `回答PV`
,COALESCE(ROUND(T2.answer_uv/T2.neirong_uv,4),0) AS `回答UV/内容UV`
,COALESCE(ROUND(T2.answer_pv/T2.answer_uv,4),0) AS `回答PV/回答UV`
,COALESCE(T3.answer_stay,0) AS `回答单设备时长(m)`
,COALESCE(T2.video_uv,0) AS `视频UV`
,COALESCE(T2.video_pv,0) AS `视频PV`
,COALESCE(ROUND(T2.video_uv/T2.neirong_uv,4),0) AS `视频UV/内容UV`
,COALESCE(ROUND(T2.video_pv/T2.video_uv,4),0) AS `视频PV/视频UV`
,COALESCE(T3.video_stay,0) AS `视频单设备时长(m)`
,COALESCE(T2.wiki_uv,0) AS `百科UV`
,COALESCE(T2.wiki_pv,0) AS `百科PV`
,COALESCE(ROUND(T2.wiki_uv/T2.neirong_uv,4),0) AS `百科UV/内容UV`
,COALESCE(ROUND(T2.wiki_pv/T2.wiki_uv,4),0) AS `百科PV/百科UV`
,COALESCE(T3.wiki_stay,0) AS `百科单设备时长(m)`
,COALESCE(T2.article_uv,0) AS `专栏UV`
,COALESCE(T2.article_pv,0) AS `专栏PV`
,COALESCE(ROUND(T2.article_uv/T2.neirong_uv,4),0) AS `专栏UV/内容UV`
,COALESCE(ROUND(T2.article_pv/T2.article_uv,4),0) AS `专栏PV/专栏UV`
,COALESCE(T3.article_stay,0) AS `专栏单设备时长(m)`
FROM
(--基础维度/dau
SELECT partition_date,device_type,active_type,t2.channel,sum(dau) AS dau
FROM
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,count(1) AS dau
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
GROUP BY partition_date,device_os_type,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END,
CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END
)t1
LATERAL VIEW explode(t1.channel) t2 AS channel
GROUP BY partition_date,device_type,active_type,t2.channel
)T1
LEFT JOIN
(--内容uv/pv
SELECT partition_date
,device_type
,active_type
,t4.channel
,count(cl_id) as neirong_pv
,count(distinct cl_id) as neirong_uv
,count(CASE WHEN page_name IN ('diary_detail','topic_detail') THEN cl_id END) AS diary_pv
,count(distinct CASE WHEN page_name IN ('diary_detail','topic_detail') THEN cl_id END) AS diary_uv
,count(CASE WHEN page_name IN ('post_detail','user_post_detail','doctor_post_detail') THEN cl_id END) AS post_pv
,count(distinct CASE WHEN page_name IN ('post_detail','user_post_detail','doctor_post_detail') THEN cl_id END) AS post_uv
,count(CASE WHEN page_name ='question_detail' THEN cl_id END) AS question_pv
,count(distinct CASE WHEN page_name ='question_detail' THEN cl_id END) AS question_uv
,count(CASE WHEN page_name ='question_answer_detail' THEN cl_id END) AS question_answer_pv
,count(distinct CASE WHEN page_name ='question_answer_detail' THEN cl_id END) AS question_answer_uv
,count(CASE WHEN page_name='answer_detail' THEN cl_id END) AS answer_pv
,count(distinct CASE WHEN page_name='answer_detail' THEN cl_id END) AS answer_uv
,count(CASE WHEN page_name='video_steep' THEN cl_id END) AS video_pv
,count(distinct CASE WHEN page_name='video_steep' THEN cl_id END) AS video_uv
,count(CASE WHEN page_name='article_detail' THEN cl_id END) AS article_pv
,count(distinct CASE WHEN page_name='article_detail' THEN cl_id END) AS article_uv
,count(CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END) AS wiki_pv
,count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END) AS wiki_uv
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.page_name
FROM
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(
SELECT partition_date,page_name,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day='${partition_date}'
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel
)T2
ON T1.partition_date=T2.partition_date
AND T1.device_type=T2.device_type
AND T1.active_type=T2.active_type
AND T1.channel=T2.channel
LEFT JOIN
(--内容浏览时长
SELECT partition_date
,device_type
,active_type
,t4.channel
,round(sum(page_stay)/count(distinct cl_id)/60,4) as neirong_stay
,round(sum(CASE WHEN page_name IN ('diary_detail','topic_detail') THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name IN ('diary_detail','topic_detail') THEN cl_id END)/60,4) AS diary_stay
,round(sum(CASE WHEN page_name IN ('post_detail','user_post_detail','doctor_post_detail') THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name IN ('post_detail','user_post_detail','doctor_post_detail') THEN cl_id END)/60,4) AS post_stay
,round(sum(CASE WHEN page_name ='question_detail' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='question_detail' THEN cl_id END)/60,4) AS question_stay
,round(sum(CASE WHEN page_name ='question_answer_detail' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='question_answer_detail' THEN cl_id END)/60,4) AS question_answer_stay
,round(sum(CASE WHEN page_name ='answer_detail' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='answer_detail' THEN cl_id END)/60,4) AS answer_stay
,round(sum(CASE WHEN page_name ='video_steep' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='video_steep' THEN cl_id END)/60,4) AS video_stay
,round(sum(CASE WHEN page_name ='article_detail' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='article_detail' THEN cl_id END)/60,4) AS article_stay
,round(sum(CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END)/60,4) AS wiki_stay
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.page_name,t2.page_stay
FROM
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(
SELECT partition_date,page_name,cl_id,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
AND page_stay>=0 AND page_stay<1000
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day='${partition_date}'
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel
)T3
ON T1.partition_date=T3.partition_date
AND T1.device_type=T3.device_type
AND T1.active_type=T3.active_type
AND T1.channel=T3.channel
LEFT JOIN
(--内容用户留存
SELECT regexp_replace(partition_date,'-','') AS partition_date
,device_type,active_type,t5.channel
,int(count(DISTINCT CASE WHEN date_add(partition_date,1) = retention_date THEN device_id END)) AS retention_num1
,int(count(DISTINCT CASE WHEN date_add(partition_date,6) = retention_date THEN device_id END)) AS retention_num7
,int(count(DISTINCT CASE WHEN date_add(partition_date,29) = retention_date THEN device_id END)) AS retention_num30
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t3.device_id,t3.partition_date as retention_date
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(--内容用户
SELECT cl_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
GROUP BY cl_id,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2))
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day='${partition_date}'
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
LEFT JOIN
(--活跃设备
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date >=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
)t3
ON t2.cl_id=t3.device_id
WHERE spam_pv.cl_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY regexp_replace(partition_date,'-',''),device_type,active_type,t5.channel
)T4
ON T1.partition_date=T4.partition_date
AND T1.device_type=T4.device_type
AND T1.active_type=T4.active_type
AND T1.channel=T4.channel
LEFT JOIN
(--内容用户单设备app时长(m)
SELECT partition_date
,device_type
,active_type
,t5.channel
,round(sum(use_duration)/count(distinct cl_id)/60,4) as app_duration
,round(avg(open_times),4) as avg_opentimes
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t3.use_duration,t3.open_times
FROM
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(--内容用户
SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
GROUP BY partition_date,cl_id
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day='${partition_date}'
)spam_pv
on t2.cl_id=spam_pv.device_id
LEFT JOIN
(
SELECT partition_date,device_id,use_duration,open_times
FROM online.ml_device_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND use_duration > 0 and use_duration < 86400
)t3
on t2.partition_date=t3.partition_date
AND t2.cl_id=t3.device_id
WHERE spam_pv.cl_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY partition_date,device_type,active_type,t5.channel
)T5
ON T1.partition_date=T5.partition_date
AND T1.device_type=T5.device_type
AND T1.active_type=T5.active_type
AND T1.channel=T5.channel
LEFT JOIN
(--不同来源进入内容uv/pv
SELECT partition_date
,device_type
,active_type
,t4.channel
,count(CASE WHEN referrer='search' THEN cl_id END) AS search_pv
,count(distinct CASE WHEN referrer='search' THEN cl_id END) AS search_uv
,count(CASE WHEN referrer='zone_v3' THEN cl_id END) AS zone_pv
,count(distinct CASE WHEN referrer='zone_v3' THEN cl_id END) AS zone_uv
,count(CASE WHEN referrer='feeds' THEN cl_id END) AS feeds_pv
,count(distinct CASE WHEN referrer='feeds' THEN cl_id END) AS feeds_uv
,count(CASE WHEN referrer='recommend' THEN cl_id END) AS recommend_pv
,count(distinct CASE WHEN referrer='recommend' THEN cl_id END) AS recommend_uv
,count(CASE WHEN referrer='content' THEN cl_id END) as content_pv
,count(distinct CASE WHEN referrer='content' THEN cl_id END) as content_uv
,count(CASE WHEN referrer='blank' THEN cl_id END) as blank_pv
,count(distinct CASE WHEN referrer='blank' THEN cl_id END) as blank_uv
,count(CASE WHEN referrer='comment' THEN cl_id END) as comment_pv
,count(distinct CASE WHEN referrer='comment' THEN cl_id END) as comment_uv
,count(CASE WHEN referrer='org' THEN cl_id END) as org_pv
,count(distinct CASE WHEN referrer='org' THEN cl_id END) as org_uv
,count(CASE WHEN referrer='category' THEN cl_id END) as category_pv
,count(distinct CASE WHEN referrer='category' THEN cl_id END) as category_uv
,count(CASE WHEN referrer='my_diary' THEN cl_id END) as my_diary_pv
,count(distinct CASE WHEN referrer='my_diary' THEN cl_id END) as my_diary_uv
,count(CASE WHEN referrer='ai' THEN cl_id END) as ai_pv
,count(distinct CASE WHEN referrer='ai' THEN cl_id END) as ai_uv
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t2.referrer
FROM
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
LEFT JOIN
( --不同页面进入内容
SELECT partition_date,cl_id
,case when referrer like 'search_result%' then 'search'
when referrer ='' then 'blank'
when referrer = 'zone_v3' then 'zone_v3'
when referrer='all_case_service_comment' then 'comment'
when referrer in ('organization_detail','expert_detail') then 'org'
when referrer = 'category' then 'category'
when referrer ='my_diary' then 'my_diary'
when referrer in ('face_detect_result','report_result') then 'ai'
when referrer in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail') then 'content'
else null end as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
and action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
UNION ALL
--首页feeds进入内容(首页非策略卡片点击)
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action = 'on_click_card'
AND params['transaction_type'] not in ('-1','ctr','smr','newdata')
AND params['card_content_type'] IN ('diary','diary_topic','user_post','doctor_post','question','answer','qa','live','article')
UNION ALL
--首页feeds进入内容(首页非策略卡片点击) 7.8.0版本前的埋点
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action in ( 'on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
AND params['transaction_type'] not in ('-1','ctr','smr','newdata')
UNION ALL
--推荐进入内容(首页策略卡片点击),5月7日新增transaction_type类型
SELECT partition_date,cl_id,'recommend' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action = 'on_click_card'
AND params['transaction_type'] in ('-1','ctr','smr','newdata')
AND params['card_content_type'] IN ('diary','diary_topic','user_post','doctor_post','question','answer','qa','live','article')
UNION ALL
--推荐进入内容(首页策略卡片点击) 7.8.0版本前的埋点
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action in ('on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
AND params['transaction_type'] in ('-1','ctr','smr','newdata')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day='${partition_date}'
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_type,active_type,t4.channel
)T6
ON T1.partition_date=T6.partition_date
AND T1.device_type=T6.device_type
AND T1.active_type=T6.active_type
AND T1.channel=T6.channel
LEFT JOIN
(--真实发帖数
SELECT partition_date
,device_type
,active_type
,t7.channel
,count(distinct id) as num
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t3.id
FROM
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
LEFT JOIN
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.device_id
LEFT JOIN
(--通过user_id,找到发帖情况
--新增帖子
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_view --发帖情况表
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND is_online='true'
AND platform= '1' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
AND regexp_replace(substr(create_time,1,10),'-','')>=regexp_replace((current_date - interval '60' day),'-','')
AND regexp_replace(substr(create_time,1,10),'-','')< regexp_replace((current_date),'-','')
UNION ALL
--新增日记本
SELECT a.user_id,a.id,a.create_date
FROM
(
SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date
FROM online.tl_hdfs_diary_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
and regexp_replace(substr(created_time,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','')
and regexp_replace(substr(created_time,1,10),'-','') < regexp_replace((current_date),'-','')
)a
JOIN
( --取非空日记
SELECT diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND is_spam = 'false'
)b
ON a.id = b.diary_id
UNION ALL
--新增日记贴
SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') >= regexp_replace((current_date - interval '60' day),'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND diary_id is not null
UNION ALL
--新增问题数
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_question_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >=regexp_replace((current_date - interval '60' day),'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
UNION ALL
--新增回答数
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_answer_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >= regexp_replace((current_date - interval '60' day),'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
)t3
ON t2.partition_date = t3.create_date
AND t2.user_id = t3.user_id
LEFT SEMI JOIN
(--参考mars后台代码增加这一限制,限制用户是在app进行的回复
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date, user_id,device_id
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date< regexp_replace((current_date),'-','')
AND action like '%reply%'
AND action not like '%vote%'
)a
JOIN
(
SELECT partition_date,device_id
FROM online.ml_device_day_active_status
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date< regexp_replace((current_date),'-','')
)b
on a.device_id=b.device_id
and a.partition_date=b.partition_date
group by a.partition_date,user_id
)t4
on t3.user_id=t4.user_id
and t3.reply_date=t4.partition_date
LEFT JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
UNION ALL
--马甲账号
SELECT distinct user_id
FROM online.ml_user_history_detail
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND is_puppet = 'true'
UNION ALL
--模特用户
select user_id
from online.tl_api_classifyuser_view
where partition_date = regexp_replace((current_date - interval '1' day),'-','')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t5
ON t3.user_id=t5.user_id
where (t5.user_id is null or t5.user_id = '')
)t6
LATERAL VIEW explode(t6.channel) t7 AS channel
GROUP BY partition_date,device_type,active_type,t7.channel
)T7
ON T1.partition_date=T7.partition_date
AND T1.device_type=T7.device_type
AND T1.active_type=T7.active_type
AND T1.channel=T7.channel
LEFT JOIN
(--真实评论数
SELECT partition_date
,device_type
,active_type
,t7.channel
,count(distinct id) as num
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t3.id,t3.type
FROM
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
LEFT JOIN
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.device_id
LEFT JOIN
( --有评论过日记帖的设备,排除疑似广告
SELECT t1.user_id,reply_date,t1.id,'topic_reply' as type
FROM
(
SELECT user_id,regexp_replace(substr(reply_date,1,10),'-','') as reply_date,problem_id,id
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND is_spam = 'false' --排除疑似广告
-- and diary_id is not null 这个表的diary_id有问题,需要join problem表来判断是不是属于日记
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','')
and regexp_replace(substr(reply_date,1,10),'-','') < regexp_replace((current_date),'-','')
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
--group by t1.user_id,reply_date
UNION ALL
--有评论过回答的设备,排除疑似广告
SELECT
t1.user_id,t1.reply_date,t1.id as id,'answer_reply' as type
FROM
(
SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,answer_id,id
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND is_spam = 'false' --排除疑似广告
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace((current_date - interval '60' day),'-','')
and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','')
)t1
JOIN
(
SELECT id,question_id
FROM online.tl_hdfs_answer_view
WHERE partition_date =regexp_replace((current_date - interval '1' day),'-','')
group by id,question_id
)t2
ON t2.id = t1.answer_id
UNION ALL
--有评论过用户帖的设备
SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,id,'tractate_reply' as type
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace((current_date - interval '1' day),'-','')
and regexp_replace(substr(create_time,1,10),'-','') >=regexp_replace((current_date - interval '60' day),'-','')
and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','')
)t3
ON t2.partition_date = t3.reply_date
AND t2.user_id = t3.user_id
LEFT SEMI JOIN
(--参考mars后台代码增加这一限制,限制用户是在app进行的回复
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date, user_id,device_id
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date< regexp_replace((current_date),'-','')
AND action like '%reply%'
AND action not like '%vote%'
)a
JOIN
(
SELECT partition_date,device_id
FROM online.ml_device_day_active_status
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date< regexp_replace((current_date),'-','')
)b
on a.device_id=b.device_id
and a.partition_date=b.partition_date
group by a.partition_date,user_id
)t4
on t3.user_id=t4.user_id
and t3.reply_date=t4.partition_date
LEFT JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
UNION ALL
--马甲账号
SELECT distinct user_id
FROM online.ml_user_history_detail
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND is_puppet = 'true'
UNION ALL
--模特用户
select distinct user_id
from online.tl_api_classifyuser_view
where partition_date = regexp_replace((current_date - interval '1' day),'-','')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace((current_date - interval '1' day),'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t5
ON t3.user_id=t5.user_id
where (t5.user_id is null or t5.user_id = '')
)t6
LATERAL VIEW explode(t6.channel) t7 AS channel
GROUP BY partition_date,device_type,active_type,t7.channel
)T8
ON T1.partition_date=T8.partition_date
AND T1.device_type=T8.device_type
AND T1.active_type=T8.active_type
AND T1.channel=T8.channel
LEFT JOIN
(--部分页面的单设备页面浏览时长
SELECT partition_date
,device_type
,active_type
,t5.channel
,round(sum(CASE WHEN page_name like 'search%' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS search_stay
,round(sum(CASE WHEN page_name = 'welfare_detail' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS welfare_stay
,round(sum(CASE WHEN page_name = 'question_detail' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS question_stay
,round(sum(CASE WHEN page_name in ('report_result','face_scan','face_detect_result','face_scan_loading','face_institute_report') THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS ai_related_stay
,round(sum(CASE WHEN page_name = 'diary_detail' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS diary_stay
,round(sum(CASE WHEN page_name = 'home' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS home_stay
,round(sum(CASE WHEN page_name in ('conversation_detail','consult_home') THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS conv_stay
FROM
(
SELECT t1.partition_date,device_type,active_type,channel,t2.cl_id,t3.page_name,t3.page_stay
FROM
(
SELECT partition_date
,device_os_type AS device_type
,CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END AS active_type
,array(CASE WHEN tmp.time = 'AI' or (partition_date < 20200301 AND first_channel_source_type like 'promotion_toutiao_jy%') THEN 'AI' ELSE '其他' END , '合计') as channel
,device_id
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT phone,time
FROM offline.tmp_zhx_20191227
WHERE flag='0204_danlei_channel')tmp
on first_channel_source_type=tmp.phone
WHERE partition_date>=regexp_replace((current_date - interval '60' day),'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(--内容用户
SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
GROUP BY partition_date,cl_id
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
(--部分页面的停留时长
SELECT partition_date,cl_id,page_name,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace((current_date - interval '60' day),'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND (page_name like 'search%' or page_name IN ('welfare_detail','question_detail','report_result','face_scan'
,'face_detect_result','face_scan_loading','face_institute_report','diary_detail','home','conversation_detail','consult_home') )
AND page_name!='search_result'--android埋点会在上报search_result_more时重复上报search_result的埋点,导致page_stay重复计算
AND page_stay>=0 AND page_stay<1000
)t3
ON t2.partition_date=t3.partition_date
AND t2.cl_id=t3.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day='${partition_date}'
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY partition_date,device_type,active_type,t5.channel
)T9
ON T1.partition_date=T9.partition_date
AND T1.device_type=T9.device_type
AND T1.active_type=T9.active_type
AND T1.channel=T9.channel
ORDER BY T1.partition_date desc,T1.device_type,T1.active_type,T1.channel
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment