Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
B
bi-report
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data
bi-report
Commits
4c221097
Commit
4c221097
authored
May 15, 2020
by
魏艺敏
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add daily_content_data codes
parent
8c3d46f7
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
1127 additions
and
2 deletions
+1127
-2
en-cn.properties
conf/en-cn.properties
+3
-2
daily_content_data.zip
jobs/daily_content_data/daily_content_data.zip
+0
-0
step1_1.job
jobs/daily_content_data/step1_1.job
+4
-0
step1_10.job
jobs/daily_content_data/step1_10.job
+4
-0
step1_11.job
jobs/daily_content_data/step1_11.job
+4
-0
step1_12.job
jobs/daily_content_data/step1_12.job
+4
-0
step1_13.job
jobs/daily_content_data/step1_13.job
+4
-0
step1_2.job
jobs/daily_content_data/step1_2.job
+4
-0
step1_3.job
jobs/daily_content_data/step1_3.job
+4
-0
step1_4.job
jobs/daily_content_data/step1_4.job
+4
-0
step1_5.job
jobs/daily_content_data/step1_5.job
+4
-0
step1_6.job
jobs/daily_content_data/step1_6.job
+4
-0
step1_7.job
jobs/daily_content_data/step1_7.job
+4
-0
step1_8.job
jobs/daily_content_data/step1_8.job
+4
-0
step1_9.job
jobs/daily_content_data/step1_9.job
+4
-0
step2.job
jobs/daily_content_data/step2.job
+5
-0
daily_content_data.sql
sqls/daily_content_data/daily_content_data.sql
+1067
-0
No files found.
conf/en-cn.properties
View file @
4c221097
...
...
@@ -6,4 +6,5 @@ meigou-detail-page=美购详情页
meigou-detail-page-dispense-pv
=
美购详情页分发pv
meigou-detail-page-dispense-uv
=
美购详情页分发uv
meigou-detail-page-source-pv
=
美购详情页来源pv
meigou-detail-page-source-uv
=
美购详情页来源uv
\ No newline at end of file
meigou-detail-page-source-uv
=
美购详情页来源uv
daily_content_data
=
内容日报-新
\ No newline at end of file
jobs/daily_content_data/daily_content_data.zip
0 → 100644
View file @
4c221097
File added
jobs/daily_content_data/step1_1.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
jobs/daily_content_data/step1_10.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_view
\ No newline at end of file
jobs/daily_content_data/step1_11.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_topicreply_view
\ No newline at end of file
jobs/daily_content_data/step1_12.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_reply_view
\ No newline at end of file
jobs/daily_content_data/step1_13.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
jobs/daily_content_data/step1_2.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
jobs/daily_content_data/step1_3.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_updates
\ No newline at end of file
jobs/daily_content_data/step1_4.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
jobs/daily_content_data/step1_5.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
jobs/daily_content_data/step1_6.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file
jobs/daily_content_data/step1_7.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_diary_view
\ No newline at end of file
jobs/daily_content_data/step1_8.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_problem_view
\ No newline at end of file
jobs/daily_content_data/step1_9.job
0 → 100644
View file @
4c221097
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_question_view
\ No newline at end of file
jobs/daily_content_data/step2.job
0 → 100644
View file @
4c221097
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13
command=curl -X GET http://localhost:8553/api/report/sendEmail/weiyimin@igengmei.com/zhaojianwei@igengmei.com/daily_content_data
\ No newline at end of file
sqls/daily_content_data/daily_content_data.sql
0 → 100644
View file @
4c221097
--内容日报
SELECT
T1
.
partition_date
AS
`日期`
,
T1
.
device_type
AS
`设备类型`
,
T1
.
active_type
AS
`活跃类型`
,
T1
.
channel
AS
`渠道`
,
COALESCE
(
T1
.
dau
,
0
)
AS
`DAU`
,
COALESCE
(
T2
.
neirong_uv
,
0
)
AS
`内容UV`
,
COALESCE
(
T2
.
neirong_pv
,
0
)
AS
`内容PV`
,
COALESCE
(
ROUND
(
T2
.
neirong_uv
/
T1
.
dau
,
4
),
0
)
AS
`内容UV/DAU`
,
COALESCE
(
ROUND
(
T2
.
neirong_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`内容PV/内容UV`
,
COALESCE
(
CONCAT
(
ROUND
(
T4
.
retention_num1
/
T2
.
neirong_uv
*
100
,
4
),
'%'
),
0
)
AS
`内容用户APP次留`
,
COALESCE
(
CONCAT
(
ROUND
(
T4
.
retention_num7
/
T2
.
neirong_uv
*
100
,
4
),
'%'
),
0
)
AS
`内容用户APP7留`
,
COALESCE
(
CONCAT
(
ROUND
(
T4
.
retention_num30
/
T2
.
neirong_uv
*
100
,
4
),
'%'
),
0
)
AS
`内容用户APP30留`
,
COALESCE
(
T5
.
app_duration
,
0
)
AS
`内容用户单设备App时长(m)`
,
COALESCE
(
T3
.
neirong_stay
,
0
)
AS
`内容用户单设备内容时长(m)`
,
COALESCE
(
T5
.
avg_opentimes
,
0
)
AS
`内容用户单设备打开次数`
,
COALESCE
(
T9
.
search_stay
,
0
)
AS
`内容用户搜索相关页面单设备页面时长(m)`
,
COALESCE
(
T9
.
welfare_stay
,
0
)
AS
`内容用户美购详情页单设备页面时长(m)`
,
COALESCE
(
T9
.
question_stay
,
0
)
AS
`内容用户问题详情页单设备页面时长(m)`
,
COALESCE
(
T9
.
ai_related_stay
,
0
)
AS
`内容用户AI相关页面单设备页面时长(m)`
,
COALESCE
(
T9
.
diary_stay
,
0
)
AS
`内容用户日记详情页单设备页面时长(m)`
,
COALESCE
(
T9
.
home_stay
,
0
)
AS
`内容用户首页单设备页面时长(m)`
,
COALESCE
(
T9
.
conv_stay
,
0
)
AS
`内容用户咨询相关页面单设备页面时长(m)`
,
COALESCE
(
ROUND
(
T6
.
recommend_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`首页feeds推荐进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
recommend_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`首页feeds推荐进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
feeds_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`首页feeds非推荐进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
feeds_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`首页feeds非推荐进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
search_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`搜索进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
search_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`搜索进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
zone_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`内容聚合页进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
zone_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`内容聚合页进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
content_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`内容详情页推荐板块进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
content_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`内容详情页推荐板块进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
blank_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`无来源页面(大多数为push)进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
blank_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`无来源页面(大多数为push)进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
comment_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`评论列表页进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
comment_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`评论列表页进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
org_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`医生医院主页进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
org_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`医生医院主页进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
category_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`品类聚合页进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
category_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`品类聚合页进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
my_diary_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`我的日记页进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
my_diary_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`我的日记页进入内容PV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
ai_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`AI报告页进入内容UV/内容UV`
,
COALESCE
(
ROUND
(
T6
.
ai_pv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`AI报告页进入内容PV/内容UV`
,
COALESCE
(
T7
.
num
,
0
)
AS
`真实发帖数`
,
COALESCE
(
T8
.
num
,
0
)
AS
`真实评论数`
,
COALESCE
(
T2
.
diary_uv
,
0
)
AS
`日记UV`
,
COALESCE
(
T2
.
diary_pv
,
0
)
AS
`日记PV`
,
COALESCE
(
ROUND
(
T2
.
diary_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`日记UV/内容UV`
,
COALESCE
(
ROUND
(
T2
.
diary_pv
/
T2
.
diary_uv
,
4
),
0
)
AS
`日记PV/日记UV`
,
COALESCE
(
T3
.
diary_stay
,
0
)
AS
`日记单设备时长(m)`
,
COALESCE
(
T2
.
post_uv
,
0
)
AS
`帖子UV`
,
COALESCE
(
T2
.
post_pv
,
0
)
AS
`帖子PV`
,
COALESCE
(
ROUND
(
T2
.
post_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`帖子UV/内容UV`
,
COALESCE
(
ROUND
(
T2
.
post_pv
/
T2
.
post_uv
,
4
),
0
)
AS
`帖子PV/帖子UV`
,
COALESCE
(
T3
.
post_stay
,
0
)
AS
`帖子单设备时长(m)`
,
COALESCE
(
T2
.
question_uv
,
0
)
AS
`问题UV`
,
COALESCE
(
T2
.
question_pv
,
0
)
AS
`问题PV`
,
COALESCE
(
ROUND
(
T2
.
question_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`问题UV/内容UV`
,
COALESCE
(
ROUND
(
T2
.
question_pv
/
T2
.
question_uv
,
4
),
0
)
AS
`问题PV/问题UV`
,
COALESCE
(
T3
.
question_stay
,
0
)
AS
`问题单设备时长(m)`
,
COALESCE
(
T2
.
question_answer_uv
,
0
)
AS
`问答UV`
,
COALESCE
(
T2
.
question_answer_pv
,
0
)
AS
`问答PV`
,
COALESCE
(
ROUND
(
T2
.
question_answer_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`问答UV/内容UV`
,
COALESCE
(
ROUND
(
T2
.
question_answer_pv
/
T2
.
question_answer_uv
,
4
),
0
)
AS
`问答PV/问答UV`
,
COALESCE
(
T3
.
question_answer_stay
,
0
)
AS
`问答单设备时长(m)`
,
COALESCE
(
T2
.
answer_uv
,
0
)
AS
`回答UV`
,
COALESCE
(
T2
.
answer_pv
,
0
)
AS
`回答PV`
,
COALESCE
(
ROUND
(
T2
.
answer_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`回答UV/内容UV`
,
COALESCE
(
ROUND
(
T2
.
answer_pv
/
T2
.
answer_uv
,
4
),
0
)
AS
`回答PV/回答UV`
,
COALESCE
(
T3
.
answer_stay
,
0
)
AS
`回答单设备时长(m)`
,
COALESCE
(
T2
.
video_uv
,
0
)
AS
`视频UV`
,
COALESCE
(
T2
.
video_pv
,
0
)
AS
`视频PV`
,
COALESCE
(
ROUND
(
T2
.
video_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`视频UV/内容UV`
,
COALESCE
(
ROUND
(
T2
.
video_pv
/
T2
.
video_uv
,
4
),
0
)
AS
`视频PV/视频UV`
,
COALESCE
(
T3
.
video_stay
,
0
)
AS
`视频单设备时长(m)`
,
COALESCE
(
T2
.
wiki_uv
,
0
)
AS
`百科UV`
,
COALESCE
(
T2
.
wiki_pv
,
0
)
AS
`百科PV`
,
COALESCE
(
ROUND
(
T2
.
wiki_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`百科UV/内容UV`
,
COALESCE
(
ROUND
(
T2
.
wiki_pv
/
T2
.
wiki_uv
,
4
),
0
)
AS
`百科PV/百科UV`
,
COALESCE
(
T3
.
wiki_stay
,
0
)
AS
`百科单设备时长(m)`
,
COALESCE
(
T2
.
article_uv
,
0
)
AS
`专栏UV`
,
COALESCE
(
T2
.
article_pv
,
0
)
AS
`专栏PV`
,
COALESCE
(
ROUND
(
T2
.
article_uv
/
T2
.
neirong_uv
,
4
),
0
)
AS
`专栏UV/内容UV`
,
COALESCE
(
ROUND
(
T2
.
article_pv
/
T2
.
article_uv
,
4
),
0
)
AS
`专栏PV/专栏UV`
,
COALESCE
(
T3
.
article_stay
,
0
)
AS
`专栏单设备时长(m)`
FROM
(
--基础维度/dau
SELECT
partition_date
,
device_type
,
active_type
,
t2
.
channel
,
sum
(
dau
)
AS
dau
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
count
(
1
)
AS
dau
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
,
''
,
'unknown'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
GROUP
BY
partition_date
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
,
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
)
t1
LATERAL
VIEW
explode
(
t1
.
channel
)
t2
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t2
.
channel
)
T1
LEFT
JOIN
(
--内容uv/pv
SELECT
partition_date
,
device_type
,
active_type
,
t4
.
channel
,
count
(
cl_id
)
as
neirong_pv
,
count
(
distinct
cl_id
)
as
neirong_uv
,
count
(
CASE
WHEN
page_name
IN
(
'diary_detail'
,
'topic_detail'
)
THEN
cl_id
END
)
AS
diary_pv
,
count
(
distinct
CASE
WHEN
page_name
IN
(
'diary_detail'
,
'topic_detail'
)
THEN
cl_id
END
)
AS
diary_uv
,
count
(
CASE
WHEN
page_name
IN
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
THEN
cl_id
END
)
AS
post_pv
,
count
(
distinct
CASE
WHEN
page_name
IN
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
THEN
cl_id
END
)
AS
post_uv
,
count
(
CASE
WHEN
page_name
=
'question_detail'
THEN
cl_id
END
)
AS
question_pv
,
count
(
distinct
CASE
WHEN
page_name
=
'question_detail'
THEN
cl_id
END
)
AS
question_uv
,
count
(
CASE
WHEN
page_name
=
'question_answer_detail'
THEN
cl_id
END
)
AS
question_answer_pv
,
count
(
distinct
CASE
WHEN
page_name
=
'question_answer_detail'
THEN
cl_id
END
)
AS
question_answer_uv
,
count
(
CASE
WHEN
page_name
=
'answer_detail'
THEN
cl_id
END
)
AS
answer_pv
,
count
(
distinct
CASE
WHEN
page_name
=
'answer_detail'
THEN
cl_id
END
)
AS
answer_uv
,
count
(
CASE
WHEN
page_name
=
'video_steep'
THEN
cl_id
END
)
AS
video_pv
,
count
(
distinct
CASE
WHEN
page_name
=
'video_steep'
THEN
cl_id
END
)
AS
video_uv
,
count
(
CASE
WHEN
page_name
=
'article_detail'
THEN
cl_id
END
)
AS
article_pv
,
count
(
distinct
CASE
WHEN
page_name
=
'article_detail'
THEN
cl_id
END
)
AS
article_uv
,
count
(
CASE
WHEN
page_name
IN
(
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
THEN
cl_id
END
)
AS
wiki_pv
,
count
(
distinct
CASE
WHEN
page_name
IN
(
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
THEN
cl_id
END
)
AS
wiki_uv
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
page_name
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
JOIN
(
SELECT
partition_date
,
page_name
,
cl_id
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
cl_id
LEFT
JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
'${partition_date}'
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t3
LATERAL
VIEW
explode
(
t3
.
channel
)
t4
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t4
.
channel
)
T2
ON
T1
.
partition_date
=
T2
.
partition_date
AND
T1
.
device_type
=
T2
.
device_type
AND
T1
.
active_type
=
T2
.
active_type
AND
T1
.
channel
=
T2
.
channel
LEFT
JOIN
(
--内容浏览时长
SELECT
partition_date
,
device_type
,
active_type
,
t4
.
channel
,
round
(
sum
(
page_stay
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
as
neirong_stay
,
round
(
sum
(
CASE
WHEN
page_name
IN
(
'diary_detail'
,
'topic_detail'
)
THEN
page_stay
else
0
END
)
/
count
(
distinct
CASE
WHEN
page_name
IN
(
'diary_detail'
,
'topic_detail'
)
THEN
cl_id
END
)
/
60
,
4
)
AS
diary_stay
,
round
(
sum
(
CASE
WHEN
page_name
IN
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
THEN
page_stay
else
0
END
)
/
count
(
distinct
CASE
WHEN
page_name
IN
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
THEN
cl_id
END
)
/
60
,
4
)
AS
post_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'question_detail'
THEN
page_stay
else
0
END
)
/
count
(
distinct
CASE
WHEN
page_name
=
'question_detail'
THEN
cl_id
END
)
/
60
,
4
)
AS
question_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'question_answer_detail'
THEN
page_stay
else
0
END
)
/
count
(
distinct
CASE
WHEN
page_name
=
'question_answer_detail'
THEN
cl_id
END
)
/
60
,
4
)
AS
question_answer_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'answer_detail'
THEN
page_stay
else
0
END
)
/
count
(
distinct
CASE
WHEN
page_name
=
'answer_detail'
THEN
cl_id
END
)
/
60
,
4
)
AS
answer_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'video_steep'
THEN
page_stay
else
0
END
)
/
count
(
distinct
CASE
WHEN
page_name
=
'video_steep'
THEN
cl_id
END
)
/
60
,
4
)
AS
video_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'article_detail'
THEN
page_stay
else
0
END
)
/
count
(
distinct
CASE
WHEN
page_name
=
'article_detail'
THEN
cl_id
END
)
/
60
,
4
)
AS
article_stay
,
round
(
sum
(
CASE
WHEN
page_name
IN
(
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
THEN
page_stay
else
0
END
)
/
count
(
distinct
CASE
WHEN
page_name
IN
(
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
THEN
cl_id
END
)
/
60
,
4
)
AS
wiki_stay
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
page_name
,
t2
.
page_stay
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
JOIN
(
SELECT
partition_date
,
page_name
,
cl_id
,
page_stay
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
AND
page_stay
>=
0
AND
page_stay
<
1000
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
cl_id
LEFT
JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
'${partition_date}'
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t3
LATERAL
VIEW
explode
(
t3
.
channel
)
t4
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t4
.
channel
)
T3
ON
T1
.
partition_date
=
T3
.
partition_date
AND
T1
.
device_type
=
T3
.
device_type
AND
T1
.
active_type
=
T3
.
active_type
AND
T1
.
channel
=
T3
.
channel
LEFT
JOIN
(
--内容用户留存
SELECT
regexp_replace
(
partition_date
,
'-'
,
''
)
AS
partition_date
,
device_type
,
active_type
,
t5
.
channel
,
int
(
count
(
DISTINCT
CASE
WHEN
date_add
(
partition_date
,
1
)
=
retention_date
THEN
device_id
END
))
AS
retention_num1
,
int
(
count
(
DISTINCT
CASE
WHEN
date_add
(
partition_date
,
6
)
=
retention_date
THEN
device_id
END
))
AS
retention_num7
,
int
(
count
(
DISTINCT
CASE
WHEN
date_add
(
partition_date
,
29
)
=
retention_date
THEN
device_id
END
))
AS
retention_num30
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t3
.
device_id
,
t3
.
partition_date
as
retention_date
FROM
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
JOIN
(
--内容用户
SELECT
cl_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
GROUP
BY
cl_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
cl_id
LEFT
JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
'${partition_date}'
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
LEFT
JOIN
(
--活跃设备
SELECT
device_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t3
ON
t2
.
cl_id
=
t3
.
device_id
WHERE
spam_pv
.
cl_id
IS
NULL
)
t4
LATERAL
VIEW
explode
(
t4
.
channel
)
t5
AS
channel
GROUP
BY
regexp_replace
(
partition_date
,
'-'
,
''
),
device_type
,
active_type
,
t5
.
channel
)
T4
ON
T1
.
partition_date
=
T4
.
partition_date
AND
T1
.
device_type
=
T4
.
device_type
AND
T1
.
active_type
=
T4
.
active_type
AND
T1
.
channel
=
T4
.
channel
LEFT
JOIN
(
--内容用户单设备app时长(m)
SELECT
partition_date
,
device_type
,
active_type
,
t5
.
channel
,
round
(
sum
(
use_duration
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
as
app_duration
,
round
(
avg
(
open_times
),
4
)
as
avg_opentimes
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t3
.
use_duration
,
t3
.
open_times
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
JOIN
(
--内容用户
SELECT
partition_date
,
cl_id
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
GROUP
BY
partition_date
,
cl_id
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
cl_id
LEFT
JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
'${partition_date}'
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
LEFT
JOIN
(
SELECT
partition_date
,
device_id
,
use_duration
,
open_times
FROM
online
.
ml_device_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
use_duration
>
0
and
use_duration
<
86400
)
t3
on
t2
.
partition_date
=
t3
.
partition_date
AND
t2
.
cl_id
=
t3
.
device_id
WHERE
spam_pv
.
cl_id
IS
NULL
)
t4
LATERAL
VIEW
explode
(
t4
.
channel
)
t5
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t5
.
channel
)
T5
ON
T1
.
partition_date
=
T5
.
partition_date
AND
T1
.
device_type
=
T5
.
device_type
AND
T1
.
active_type
=
T5
.
active_type
AND
T1
.
channel
=
T5
.
channel
LEFT
JOIN
(
--不同来源进入内容uv/pv
SELECT
partition_date
,
device_type
,
active_type
,
t4
.
channel
,
count
(
CASE
WHEN
referrer
=
'search'
THEN
cl_id
END
)
AS
search_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'search'
THEN
cl_id
END
)
AS
search_uv
,
count
(
CASE
WHEN
referrer
=
'zone_v3'
THEN
cl_id
END
)
AS
zone_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'zone_v3'
THEN
cl_id
END
)
AS
zone_uv
,
count
(
CASE
WHEN
referrer
=
'feeds'
THEN
cl_id
END
)
AS
feeds_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'feeds'
THEN
cl_id
END
)
AS
feeds_uv
,
count
(
CASE
WHEN
referrer
=
'recommend'
THEN
cl_id
END
)
AS
recommend_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'recommend'
THEN
cl_id
END
)
AS
recommend_uv
,
count
(
CASE
WHEN
referrer
=
'content'
THEN
cl_id
END
)
as
content_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'content'
THEN
cl_id
END
)
as
content_uv
,
count
(
CASE
WHEN
referrer
=
'blank'
THEN
cl_id
END
)
as
blank_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'blank'
THEN
cl_id
END
)
as
blank_uv
,
count
(
CASE
WHEN
referrer
=
'comment'
THEN
cl_id
END
)
as
comment_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'comment'
THEN
cl_id
END
)
as
comment_uv
,
count
(
CASE
WHEN
referrer
=
'org'
THEN
cl_id
END
)
as
org_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'org'
THEN
cl_id
END
)
as
org_uv
,
count
(
CASE
WHEN
referrer
=
'category'
THEN
cl_id
END
)
as
category_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'category'
THEN
cl_id
END
)
as
category_uv
,
count
(
CASE
WHEN
referrer
=
'my_diary'
THEN
cl_id
END
)
as
my_diary_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'my_diary'
THEN
cl_id
END
)
as
my_diary_uv
,
count
(
CASE
WHEN
referrer
=
'ai'
THEN
cl_id
END
)
as
ai_pv
,
count
(
distinct
CASE
WHEN
referrer
=
'ai'
THEN
cl_id
END
)
as
ai_uv
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
referrer
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
LEFT
JOIN
(
--不同页面进入内容
SELECT
partition_date
,
cl_id
,
case
when
referrer
like
'search_result%'
then
'search'
when
referrer
=
''
then
'blank'
when
referrer
=
'zone_v3'
then
'zone_v3'
when
referrer
=
'all_case_service_comment'
then
'comment'
when
referrer
in
(
'organization_detail'
,
'expert_detail'
)
then
'org'
when
referrer
=
'category'
then
'category'
when
referrer
=
'my_diary'
then
'my_diary'
when
referrer
in
(
'face_detect_result'
,
'report_result'
)
then
'ai'
when
referrer
in
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'content'
else
null
end
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
and
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
UNION
ALL
--首页feeds进入内容(首页非策略卡片点击)
SELECT
partition_date
,
cl_id
,
'feeds'
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
page_name
=
'home'
AND
action
=
'on_click_card'
AND
params
[
'transaction_type'
]
not
in
(
'-1'
,
'ctr'
,
'smr'
,
'newdata'
)
AND
params
[
'card_content_type'
]
IN
(
'diary'
,
'diary_topic'
,
'user_post'
,
'doctor_post'
,
'question'
,
'answer'
,
'qa'
,
'live'
,
'article'
)
UNION
ALL
--首页feeds进入内容(首页非策略卡片点击) 7.8.0版本前的埋点
SELECT
partition_date
,
cl_id
,
'feeds'
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
page_name
=
'home'
AND
action
in
(
'on_click_diary_card'
,
'on_click_answer_card'
,
'on_click_question_card'
,
'on_click_topic_card'
,
'on_click_live_card'
)
AND
params
[
'transaction_type'
]
not
in
(
'-1'
,
'ctr'
,
'smr'
,
'newdata'
)
UNION
ALL
--推荐进入内容(首页策略卡片点击),5月7日新增transaction_type类型
SELECT
partition_date
,
cl_id
,
'recommend'
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
page_name
=
'home'
AND
action
=
'on_click_card'
AND
params
[
'transaction_type'
]
in
(
'-1'
,
'ctr'
,
'smr'
,
'newdata'
)
AND
params
[
'card_content_type'
]
IN
(
'diary'
,
'diary_topic'
,
'user_post'
,
'doctor_post'
,
'question'
,
'answer'
,
'qa'
,
'live'
,
'article'
)
UNION
ALL
--推荐进入内容(首页策略卡片点击) 7.8.0版本前的埋点
SELECT
partition_date
,
cl_id
,
'feeds'
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
page_name
=
'home'
AND
action
in
(
'on_click_diary_card'
,
'on_click_answer_card'
,
'on_click_question_card'
,
'on_click_topic_card'
,
'on_click_live_card'
)
AND
params
[
'transaction_type'
]
in
(
'-1'
,
'ctr'
,
'smr'
,
'newdata'
)
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
cl_id
LEFT
JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
'${partition_date}'
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t3
LATERAL
VIEW
explode
(
t3
.
channel
)
t4
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t4
.
channel
)
T6
ON
T1
.
partition_date
=
T6
.
partition_date
AND
T1
.
device_type
=
T6
.
device_type
AND
T1
.
active_type
=
T6
.
active_type
AND
T1
.
channel
=
T6
.
channel
LEFT
JOIN
(
--真实发帖数
SELECT
partition_date
,
device_type
,
active_type
,
t7
.
channel
,
count
(
distinct
id
)
as
num
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t3
.
id
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
LEFT
JOIN
(
--找出user_id当天活跃的第一个设备id
SELECT
user_id
,
partition_date
,
if
(
size
(
device_list
)
>
0
,
device_list
[
0
],
''
)
AS
device_id
FROM
online
.
ml_user_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
device_id
LEFT
JOIN
(
--通过user_id,找到发帖情况
--新增帖子
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_api_tractate_view
--发帖情况表
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
is_online
=
'true'
AND
platform
=
'1'
--更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
UNION
ALL
--新增日记本
SELECT
a
.
user_id
,
a
.
id
,
a
.
create_date
FROM
(
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_diary_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
and
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
and
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
a
JOIN
(
--取非空日记
SELECT
diary_id
FROM
online
.
tl_hdfs_problem_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
is_spam
=
'false'
)
b
ON
a
.
id
=
b
.
diary_id
UNION
ALL
--新增日记贴
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_problem_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
is_spam
=
'false'
AND
diary_id
is
not
null
UNION
ALL
--新增问题数
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_question_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
is_spam
=
'false'
AND
platform
=
'99'
--更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
UNION
ALL
--新增回答数
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_answer_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
is_spam
=
'false'
AND
platform
=
'99'
--更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
)
t3
ON
t2
.
partition_date
=
t3
.
create_date
AND
t2
.
user_id
=
t3
.
user_id
LEFT
SEMI
JOIN
(
--参考mars后台代码增加这一限制,限制用户是在app进行的回复
SELECT
a
.
partition_date
,
user_id
FROM
(
SELECT
partition_date
,
user_id
,
device_id
FROM
online
.
bl_hdfs_operation_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
like
'%reply%'
AND
action
not
like
'%vote%'
)
a
JOIN
(
SELECT
partition_date
,
device_id
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
b
on
a
.
device_id
=
b
.
device_id
and
a
.
partition_date
=
b
.
partition_date
group
by
a
.
partition_date
,
user_id
)
t4
on
t3
.
user_id
=
t4
.
user_id
and
t3
.
reply_date
=
t4
.
partition_date
LEFT
JOIN
(
--医生账号
SELECT
distinct
user_id
FROM
online
.
tl_hdfs_doctor_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
UNION
ALL
--马甲账号
SELECT
distinct
user_id
FROM
online
.
ml_user_history_detail
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
is_puppet
=
'true'
UNION
ALL
--模特用户
select
user_id
from
online
.
tl_api_classifyuser_view
where
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
UNION
ALL
--公司内网覆盖用户
select
distinct
user_id
from
dim
.
dim_device_user_staff
UNION
ALL
--登陆过医生设备
SELECT
distinct
t1
.
user_id
FROM
(
SELECT
user_id
,
v
.
device_id
as
device_id
FROM
online
.
ml_user_history_detail
LATERAL
VIEW
EXPLODE
(
device_history_list
)
v
AS
device_id
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
)
t1
JOIN
(
SELECT
device_id
FROM
online
.
ml_device_history_detail
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
is_login_doctor
=
'1'
)
t2
ON
t1
.
device_id
=
t2
.
device_id
)
t5
ON
t3
.
user_id
=
t5
.
user_id
where
(
t5
.
user_id
is
null
or
t5
.
user_id
=
''
)
)
t6
LATERAL
VIEW
explode
(
t6
.
channel
)
t7
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t7
.
channel
)
T7
ON
T1
.
partition_date
=
T7
.
partition_date
AND
T1
.
device_type
=
T7
.
device_type
AND
T1
.
active_type
=
T7
.
active_type
AND
T1
.
channel
=
T7
.
channel
LEFT
JOIN
(
--真实评论数
SELECT
partition_date
,
device_type
,
active_type
,
t7
.
channel
,
count
(
distinct
id
)
as
num
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t3
.
id
,
t3
.
type
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
LEFT
JOIN
(
--找出user_id当天活跃的第一个设备id
SELECT
user_id
,
partition_date
,
if
(
size
(
device_list
)
>
0
,
device_list
[
0
],
''
)
AS
device_id
FROM
online
.
ml_user_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
device_id
LEFT
JOIN
(
--有评论过日记帖的设备,排除疑似广告
SELECT
t1
.
user_id
,
reply_date
,
t1
.
id
,
'topic_reply'
as
type
FROM
(
SELECT
user_id
,
regexp_replace
(
substr
(
reply_date
,
1
,
10
),
'-'
,
''
)
as
reply_date
,
problem_id
,
id
FROM
online
.
tl_hdfs_topicreply_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
is_spam
=
'false'
--排除疑似广告
-- and diary_id is not null 这个表的diary_id有问题,需要join problem表来判断是不是属于日记
and
regexp_replace
(
substr
(
reply_date
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
and
regexp_replace
(
substr
(
reply_date
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t1
JOIN
(
SELECT
id
,
diary_id
FROM
online
.
tl_hdfs_problem_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
group
by
id
,
diary_id
)
t2
on
t2
.
id
=
t1
.
problem_id
--group by t1.user_id,reply_date
UNION
ALL
--有评论过回答的设备,排除疑似广告
SELECT
t1
.
user_id
,
t1
.
reply_date
,
t1
.
id
as
id
,
'answer_reply'
as
type
FROM
(
SELECT
user_id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
reply_date
,
answer_id
,
id
FROM
online
.
tl_hdfs_answer_reply_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
is_spam
=
'false'
--排除疑似广告
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t1
JOIN
(
SELECT
id
,
question_id
FROM
online
.
tl_hdfs_answer_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
group
by
id
,
question_id
)
t2
ON
t2
.
id
=
t1
.
answer_id
UNION
ALL
--有评论过用户帖的设备
SELECT
user_id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
reply_date
,
id
,
'tractate_reply'
as
type
FROM
online
.
tl_hdfs_api_tractate_reply_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t3
ON
t2
.
partition_date
=
t3
.
reply_date
AND
t2
.
user_id
=
t3
.
user_id
LEFT
SEMI
JOIN
(
--参考mars后台代码增加这一限制,限制用户是在app进行的回复
SELECT
a
.
partition_date
,
user_id
FROM
(
SELECT
partition_date
,
user_id
,
device_id
FROM
online
.
bl_hdfs_operation_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
like
'%reply%'
AND
action
not
like
'%vote%'
)
a
JOIN
(
SELECT
partition_date
,
device_id
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
b
on
a
.
device_id
=
b
.
device_id
and
a
.
partition_date
=
b
.
partition_date
group
by
a
.
partition_date
,
user_id
)
t4
on
t3
.
user_id
=
t4
.
user_id
and
t3
.
reply_date
=
t4
.
partition_date
LEFT
JOIN
(
--医生账号
SELECT
distinct
user_id
FROM
online
.
tl_hdfs_doctor_view
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
UNION
ALL
--马甲账号
SELECT
distinct
user_id
FROM
online
.
ml_user_history_detail
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
is_puppet
=
'true'
UNION
ALL
--模特用户
select
distinct
user_id
from
online
.
tl_api_classifyuser_view
where
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
UNION
ALL
--公司内网覆盖用户
select
distinct
user_id
from
dim
.
dim_device_user_staff
UNION
ALL
--登陆过医生设备
SELECT
distinct
t1
.
user_id
FROM
(
SELECT
user_id
,
v
.
device_id
as
device_id
FROM
online
.
ml_user_history_detail
LATERAL
VIEW
EXPLODE
(
device_history_list
)
v
AS
device_id
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
)
t1
JOIN
(
SELECT
device_id
FROM
online
.
ml_device_history_detail
WHERE
partition_date
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
AND
is_login_doctor
=
'1'
)
t2
ON
t1
.
device_id
=
t2
.
device_id
)
t5
ON
t3
.
user_id
=
t5
.
user_id
where
(
t5
.
user_id
is
null
or
t5
.
user_id
=
''
)
)
t6
LATERAL
VIEW
explode
(
t6
.
channel
)
t7
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t7
.
channel
)
T8
ON
T1
.
partition_date
=
T8
.
partition_date
AND
T1
.
device_type
=
T8
.
device_type
AND
T1
.
active_type
=
T8
.
active_type
AND
T1
.
channel
=
T8
.
channel
LEFT
JOIN
(
--部分页面的单设备页面浏览时长
SELECT
partition_date
,
device_type
,
active_type
,
t5
.
channel
,
round
(
sum
(
CASE
WHEN
page_name
like
'search%'
THEN
page_stay
else
0
END
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
AS
search_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'welfare_detail'
THEN
page_stay
else
0
END
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
AS
welfare_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'question_detail'
THEN
page_stay
else
0
END
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
AS
question_stay
,
round
(
sum
(
CASE
WHEN
page_name
in
(
'report_result'
,
'face_scan'
,
'face_detect_result'
,
'face_scan_loading'
,
'face_institute_report'
)
THEN
page_stay
else
0
END
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
AS
ai_related_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'diary_detail'
THEN
page_stay
else
0
END
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
AS
diary_stay
,
round
(
sum
(
CASE
WHEN
page_name
=
'home'
THEN
page_stay
else
0
END
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
AS
home_stay
,
round
(
sum
(
CASE
WHEN
page_name
in
(
'conversation_detail'
,
'consult_home'
)
THEN
page_stay
else
0
END
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
AS
conv_stay
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t3
.
page_name
,
t3
.
page_stay
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
JOIN
(
--内容用户
SELECT
partition_date
,
cl_id
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
GROUP
BY
partition_date
,
cl_id
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
cl_id
LEFT
JOIN
(
--部分页面的停留时长
SELECT
partition_date
,
cl_id
,
page_name
,
page_stay
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
((
current_date
-
interval
'60'
day
),
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
(
page_name
like
'search%'
or
page_name
IN
(
'welfare_detail'
,
'question_detail'
,
'report_result'
,
'face_scan'
,
'face_detect_result'
,
'face_scan_loading'
,
'face_institute_report'
,
'diary_detail'
,
'home'
,
'conversation_detail'
,
'consult_home'
)
)
AND
page_name
!=
'search_result'
--android埋点会在上报search_result_more时重复上报search_result的埋点,导致page_stay重复计算
AND
page_stay
>=
0
AND
page_stay
<
1000
)
t3
ON
t2
.
partition_date
=
t3
.
partition_date
AND
t2
.
cl_id
=
t3
.
cl_id
LEFT
JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
'${partition_date}'
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t4
LATERAL
VIEW
explode
(
t4
.
channel
)
t5
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t5
.
channel
)
T9
ON
T1
.
partition_date
=
T9
.
partition_date
AND
T1
.
device_type
=
T9
.
device_type
AND
T1
.
active_type
=
T9
.
active_type
AND
T1
.
channel
=
T9
.
channel
ORDER
BY
T1
.
partition_date
desc
,
T1
.
device_type
,
T1
.
active_type
,
T1
.
channel
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment