Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
B
bi-report
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data
bi-report
Commits
2d0eff76
Commit
2d0eff76
authored
Oct 20, 2020
by
魏艺敏
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
push codes
parent
34b7d118
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
225 additions
and
201 deletions
+225
-201
en-cn.properties
pm/daily_content/en-cn.properties
+2
-0
create_daily_content_retention.sql
pm/daily_content/etl/create_daily_content_retention.sql
+40
-0
daily_content.sql
pm/daily_content/etl/daily_content.sql
+160
-27
ai_content_by_month.sql
pm/daily_content/report/ai_content_by_month.sql
+3
-3
ai_content_detail.sql
pm/daily_content/report/ai_content_detail.sql
+4
-3
home_content_retention.sql
pm/daily_content/report/home_content_retention.sql
+16
-0
en-cn.properties
pm/daily_content_retention/en-cn.properties
+0
-1
daily_content_retention.zip
pm/daily_content_retention/job/daily_content_retention.zip
+0
-0
step1_1.job
pm/daily_content_retention/job/step1_1.job
+0
-4
step1_2.job
pm/daily_content_retention/job/step1_2.job
+0
-4
step1_3.job
pm/daily_content_retention/job/step1_3.job
+0
-4
step1_4.job
pm/daily_content_retention/job/step1_4.job
+0
-4
step2.job
pm/daily_content_retention/job/step2.job
+0
-5
readme.txt
pm/daily_content_retention/readme.txt
+0
-0
daily_content_retention.sql
...aily_content_retention/report/daily_content_retention.sql
+0
-146
No files found.
pm/daily_content/en-cn.properties
View file @
2d0eff76
...
...
@@ -3,3 +3,4 @@ home_content_detail=首页内容数据-分日明细
home_content_by_month
=
首页内容数据-月均
ai_content_detail
=
ai内容数据-分日明细
ai_content_by_month
=
ai内容数据-月均
home_content_retention
=
分类用户次留
\ No newline at end of file
pm/daily_content/etl/create_daily_content_retention.sql
0 → 100644
View file @
2d0eff76
--***************************************************************
--*脚本名称:
--*功能: 内容日报-简化版-for 思璟
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET
mapreduce
.
job
.
queuename
=
data
;
--使用bl数据库
USE
pm
;
--创建BL层内部表
CREATE
TABLE
IF
NOT
EXISTS
pm
.
tl_pm_content_retention
(
day_id
string
comment
'{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}'
,
device_os_type
string
comment
'{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}'
,
active_type
string
comment
'{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}'
,
channel
string
comment
'{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}'
,
dau
BIGINT
comment
'{"chs_name":"日活","description":"","etl":"","value":"","remark":""}'
,
retention_rate
string
comment
'{"chs_name":"次留率","description":"","etl":"","value":"","remark":""}'
,
home_good_click_uv
BIGINT
comment
'{"chs_name":"首页good click设备数","description":"","etl":"","value":"","remark":""}'
,
home_good_click_quality
string
comment
'{"chs_name":"首页gc用户次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
,
home_ungood_click_uv
BIGINT
comment
'{"chs_name":"点击首页卡片但非gc设备数","description":"","etl":"","value":"","remark":""}'
,
home_ungood_click_quality
string
comment
'{"chs_name":"点击首页卡片但非gc设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
,
no_click_uv
BIGINT
comment
'{"chs_name":"未点击首页feed卡片设备数","description":"","etl":"","value":"","remark":""}'
,
no_click_uv_quality
string
comment
'{"chs_name":"未点击首页feed卡片设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
,
home_good_click_retention_quality
string
comment
'{"chs_name":"当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
)
comment
'内容日报-分用户次留'
PARTITIONED
BY
(
PARTITION_DAY
STRING
comment
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\t
'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
;
\ No newline at end of file
pm/daily_content/etl/daily_content.sql
View file @
2d0eff76
...
...
@@ -53,9 +53,9 @@ FROM
LEFT
JOIN
(
SELECT
code
,
is_spam
,
is_ai_channel
,
partition_day
FROM
DIM
.
DIM_AI_CHANNEL_ZP_NEW
WHERE
partition_day
>=
'20190701'
and
partition_day
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
WHERE
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
on
first_channel_source_type
=
tmp
.
code
and
m
.
partition_date
=
tmp
.
partition_day
WHERE
partition_date
>=
'20190701'
AND
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
...
...
@@ -75,7 +75,7 @@ FROM
SELECT
device_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
>=
'20190701'
AND
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
t2
ON
t1
.
device_id
=
t2
.
device_id
left
join
...
...
@@ -115,9 +115,9 @@ left join
LEFT
JOIN
(
SELECT
code
,
is_spam
,
is_ai_channel
,
partition_day
FROM
DIM
.
DIM_AI_CHANNEL_ZP_NEW
WHERE
partition_day
>=
'20190701'
and
partition_day
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
WHERE
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
on
first_channel_source_type
=
tmp
.
code
and
m
.
partition_date
=
tmp
.
partition_day
WHERE
partition_date
>=
'20190701'
AND
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
...
...
@@ -133,40 +133,41 @@ left join
lateral
view
explode
(
mas
.
channel
)
v
as
channel
)
t1
left
join
(
SELECT
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
from
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
params
[
'page_name'
]
=
'home'
and
params
[
'card_content_type'
]
in
(
'diary'
,
'user_post'
,
'answer'
,
'qa'
)
--首页内容卡片点击,未限制tab
GROUP
BY
partition_date
,
cl_id
)
t3
ON
t1
.
partition_date
=
t3
.
partition_date
AND
t1
.
device_id
=
t3
.
cl_id
left
join
(
SELECT
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
referrer
=
'home'
AND
action
=
'page_view'
and
params
[
'is_push'
]
=
0
-- and params['is_first']=1
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
,
'welfare_detail'
)
--9月11日新增了可以跳转商详页评论的日记卡片
and
page_stay
>=
20
group
by
partition_date
,
cl_id
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
AND
t1
.
device_id
=
t2
.
cl_id
left
join
(
SELECT
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
from
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
params
[
'page_name'
]
=
'home'
and
params
[
'card_content_type'
]
in
(
'diary'
,
'user_post'
,
'answer'
,
'qa'
)
--首页内容卡片点击,未限制tab
GROUP
BY
partition_date
,
cl_id
)
t3
ON
t1
.
partition_date
=
t3
.
partition_date
AND
t1
.
device_id
=
t3
.
cl_id
ON
t3
.
partition_date
=
t2
.
partition_date
AND
t3
.
cl_id
=
t2
.
cl_id
left
join
(
SELECT
partition_date
,
cl_id
,
sum
(
page_stay
)
as
page_stay
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
...
...
@@ -215,9 +216,9 @@ left join
LEFT
JOIN
(
SELECT
code
,
is_spam
,
is_ai_channel
,
partition_day
FROM
DIM
.
DIM_AI_CHANNEL_ZP_NEW
WHERE
partition_day
>=
'20190701'
and
partition_day
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
WHERE
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
on
first_channel_source_type
=
tmp
.
code
and
m
.
partition_date
=
tmp
.
partition_day
WHERE
partition_date
>=
'20190701'
AND
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
...
...
@@ -238,7 +239,7 @@ left join
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
referrer
in
(
'report_result'
,
'face_detect_result'
,
'float_tag_detail'
)
AND
action
=
'page_view'
-- and params['is_first']=1
...
...
@@ -253,7 +254,7 @@ left join
(
SELECT
cl_id
,
partition_date
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
and
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
and
page_name
in
(
'report_result'
,
'face_detect_result'
,
'float_tag_detail'
)
--ai测颜值、ai测肤质、模拟整形结果页
AND
params
[
'card_content_type'
]
in
(
'diary'
,
'user_post'
,
'answer'
,
'qa'
)
...
...
@@ -265,7 +266,7 @@ left join
(
SELECT
cl_id
,
partition_date
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
and
partition_date
<
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'page_view'
and
page_name
in
(
'report_result'
,
'face_detect_result'
,
'float_tag_detail'
)
--ai测颜值、ai测肤质、模拟整形结果页
-- AND params['card_content_type'] in ('diary','user_post','answer','qa')
...
...
@@ -287,4 +288,136 @@ left join
on
t1
.
partition_date
=
t3
.
partition_date
and
t1
.
active_type
=
t3
.
active_type
and
t1
.
device_os_type
=
t3
.
device_os_type
and
t1
.
channel
=
t3
.
channel
\ No newline at end of file
and
t1
.
channel
=
t3
.
channel
;
INSERT
OVERWRITE
TABLE
pm
.
tl_pm_content_retention
PARTITION
(
PARTITION_DAY
=
${
partition_day
}
)
SELECT
day_id
,
device_os_type
,
active_type
,
channel
,
dau
,
COALESCE
(
ROUND
(
retention_num
/
dau
*
100
,
2
),
0
)
AS
retention_rate
,
home_good_click_uv
,
COALESCE
(
ROUND
(
home_good_click_retention_num
*
dau
/
home_good_click_uv
/
retention_num
,
2
),
0
)
as
home_good_click_quality
,
home_ungood_click_uv
,
COALESCE
(
ROUND
(
home_ungood_click_retention_num
*
dau
/
home_ungood_click_uv
/
retention_num
,
2
),
0
)
as
home_ungood_click_quality
,
no_click_uv
,
COALESCE
(
ROUND
(
no_click_uv_retention_num
*
dau
/
no_click_uv
/
retention_num
,
2
),
0
)
as
no_click_uv_quality
,
COALESCE
(
ROUND
(
home_good_click_uv_2
*
dau
/
home_good_click_uv
/
retention_num
,
2
),
0
)
as
home_good_click_retention_quality
FROM
(
SELECT
regexp_replace
(
substr
(
t1
.
partition_date
,
1
,
10
),
'-'
,
''
)
as
day_id
,
device_os_type
,
active_type
,
channel
,
count
(
distinct
t1
.
device_id
)
as
dau
,
count
(
distinct
t2
.
device_id
)
as
retention_num
,
count
(
distinct
t3
.
cl_id
)
as
home_good_click_uv
,
count
(
distinct
t5
.
cl_id
)
as
home_good_click_uv_2
,
count
(
distinct
case
when
t3
.
cl_id
is
not
null
then
t2
.
device_id
end
)
as
home_good_click_retention_num
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t4
.
cl_id
end
)
as
home_ungood_click_uv
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t2
.
device_id
end
)
as
home_ungood_click_retention_num
,
count
(
distinct
case
when
t4
.
cl_id
is
null
and
t3
.
cl_id
is
null
then
t1
.
device_id
end
)
as
no_click_uv
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t2
.
device_id
end
)
as
no_click_uv_retention_num
FROM
(
SELECT
partition_date
,
a
.
device_os_type
,
b
.
active_type
,
device_id
,
v
.
channel
FROM
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
array
(
device_os_type
,
'合计'
)
as
device_os_type
,
array
(
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
,
'合计'
)
AS
active_type
,
device_id
,
array
(
CASE
WHEN
tmp
.
is_ai_channel
=
'true'
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
FROM
online
.
ml_device_day_active_status
m
LEFT
JOIN
(
SELECT
code
,
is_spam
,
is_ai_channel
,
partition_day
FROM
DIM
.
DIM_AI_CHANNEL_ZP_NEW
WHERE
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
on
first_channel_source_type
=
tmp
.
code
and
m
.
partition_date
=
tmp
.
partition_day
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
,
''
,
'unknown'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
mas
lateral
view
explode
(
mas
.
device_os_type
)
a
as
device_os_type
lateral
view
explode
(
mas
.
active_type
)
b
as
active_type
lateral
view
explode
(
mas
.
channel
)
v
as
channel
)
t1
LEFT
JOIN
(
--活跃设备
SELECT
device_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
t2
ON
t1
.
device_id
=
t2
.
device_id
and
date_add
(
t1
.
partition_date
,
1
)
=
t2
.
partition_date
left
join
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
page_name
=
'home'
and
params
[
'card_content_type'
]
in
(
'diary'
,
'user_post'
,
'answer'
,
'qa'
)
group
by
partition_date
,
cl_id
)
t4
on
t1
.
partition_date
=
t4
.
partition_date
and
t1
.
device_id
=
t4
.
cl_id
left
join
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
referrer
=
'home'
AND
action
=
'page_view'
and
params
[
'is_push'
]
=
0
--ios从push点击进入的数据referrer也为首页,故需要去掉(可能存在个别时期的数据有问题)
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
,
'welfare_detail'
)
and
page_stay
>=
20
group
by
partition_date
,
cl_id
)
t3
on
t4
.
partition_date
=
t3
.
partition_date
and
t4
.
cl_id
=
t3
.
cl_id
LEFT
JOIN
(
--活跃设备
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
referrer
=
'home'
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
,
'welfare_detail'
)
and
page_stay
>=
20
group
by
partition_date
,
cl_id
)
t5
ON
t3
.
cl_id
=
t5
.
cl_id
and
date_add
(
t3
.
partition_date
,
1
)
=
t5
.
partition_date
left
join
(
-- 去掉黑名单设备
select
distinct
device_id
from
ML
.
ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where
PARTITION_DAY
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_abnormal_device
=
'true'
)
spam_pv
on
t1
.
device_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
group
by
t1
.
partition_date
,
device_os_type
,
active_type
,
channel
)
t1
\ No newline at end of file
pm/daily_content/report/ai_content_by_month.sql
View file @
2d0eff76
...
...
@@ -8,9 +8,9 @@ SELECT
,
ai_report_card_click_uv
AS
`ai结果页内容卡片点击uv`
,
ai_report_card_click_pv
AS
`ai结果页内容卡片点击pv`
,
ai_report_good_click
AS
`来源于ai结果页的good click`
,
CONCAT
(
ROUND
(
ai_report_card_click_uv
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`ai结果页内容卡片点击uv/
DAU
`
,
CONCAT
(
ROUND
(
ai_report_card_click_pv
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`ai结果页内容卡片点击pv/
DAU
`
,
CONCAT
(
ROUND
(
ai_report_good_click
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`来源于ai结果页的good click/
DAU
`
,
CONCAT
(
ROUND
(
ai_report_card_click_uv
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`ai结果页内容卡片点击uv/
ai结果页uv
`
,
CONCAT
(
ROUND
(
ai_report_card_click_pv
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`ai结果页内容卡片点击pv/
ai结果页uv
`
,
CONCAT
(
ROUND
(
ai_report_good_click
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`来源于ai结果页的good click/
ai结果页uv
`
FROM
(
select
substr
(
day_id
,
1
,
6
)
as
month
...
...
pm/daily_content/report/ai_content_detail.sql
View file @
2d0eff76
...
...
@@ -8,9 +8,9 @@ SELECT
,
ai_report_card_click_uv
AS
`ai结果页内容卡片点击uv`
,
ai_report_card_click_pv
AS
`ai结果页内容卡片点击pv`
,
ai_report_good_click
AS
`来源于ai结果页的good click`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_card_click_uv
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`ai结果页内容卡片点击uv/
DAU
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_card_click_pv
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`ai结果页内容卡片点击pv/
DAU
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_good_click
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`来源于ai结果页的good click/
DAU
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_card_click_uv
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`ai结果页内容卡片点击uv/
ai结果页uv
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_card_click_pv
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`ai结果页内容卡片点击pv/
ai结果页uv
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_good_click
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`来源于ai结果页的good click/
ai结果页uv
`
FROM
pm
.
tl_pm_content_v3
where
partition_day
>=
'20201018'
and
partition_day
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
order
by
`日期`
,
`系统`
,
`活跃`
,
`渠道`
\ No newline at end of file
pm/daily_content/report/home_content_retention.sql
0 → 100644
View file @
2d0eff76
select
substr
(
day_id
,
1
,
6
)
`日期`
,
device_os_type
`系统`
,
active_type
`活跃`
,
channel
`渠道`
,
round
(
avg
(
home_good_click_uv
),
0
)
as
`首页good click设备数`
,
round
(
avg
(
if
(
home_good_click_quality
=
0
,
NULL
,
home_good_click_quality
)),
2
)
as
`首页gc用户次留率/全站次留率`
,
round
(
avg
(
home_ungood_click_uv
),
0
)
as
`点击首页卡片但非gc设备数`
,
round
(
avg
(
if
(
home_ungood_click_quality
=
0
,
NULL
,
home_ungood_click_quality
)),
2
)
as
`点击首页卡片但非gc设备次留率/全站次留率`
,
round
(
avg
(
no_click_uv
),
0
)
as
`未点击首页feed卡片设备数`
,
round
(
avg
(
if
(
no_click_uv_quality
=
0
,
NULL
,
no_click_uv_quality
)),
2
)
as
`未点击首页feed卡片设备次留率/全站次留率`
,
round
(
avg
(
if
(
home_good_click_retention_quality
=
0
,
NULL
,
home_good_click_retention_quality
)),
2
)
as
`当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率`
FROM
pm
.
tl_pm_content_retention
where
partition_day
>=
'20201018'
and
partition_day
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
group
by
substr
(
day_id
,
1
,
6
),
device_os_type
,
active_type
,
channel
order
by
`日期`
,
`系统`
,
`活跃`
,
`渠道`
\ No newline at end of file
pm/daily_content_retention/en-cn.properties
deleted
100644 → 0
View file @
34b7d118
daily_content_retention
=
内容日报-次留率
pm/daily_content_retention/job/daily_content_retention.zip
deleted
100644 → 0
View file @
34b7d118
File deleted
pm/daily_content_retention/job/step1_1.job
deleted
100644 → 0
View file @
34b7d118
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
pm/daily_content_retention/job/step1_2.job
deleted
100644 → 0
View file @
34b7d118
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
pm/daily_content_retention/job/step1_3.job
deleted
100644 → 0
View file @
34b7d118
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_DV_DEVICECLEAN_DIMEN_D
\ No newline at end of file
pm/daily_content_retention/job/step1_4.job
deleted
100644 → 0
View file @
34b7d118
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive DIM DIM_AI_CHANNEL_ZP_NEW
\ No newline at end of file
pm/daily_content_retention/job/step2.job
deleted
100644 → 0
View file @
34b7d118
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/email/daily_content_retention/weiyimin@igengmei.com/hanyingyue@igengmei.com
\ No newline at end of file
pm/daily_content_retention/readme.txt
deleted
100644 → 0
View file @
34b7d118
pm/daily_content_retention/report/daily_content_retention.sql
deleted
100644 → 0
View file @
34b7d118
select
substr
(
day_id
,
1
,
6
)
as
month
,
device_os_type
,
active_type
,
channel
,
round
(
avg
(
dau
),
0
)
as
dau
,
round
(
avg
(
if
(
retention_rate
=
0
,
NULL
,
retention_rate
)),
2
)
as
retention_rate
,
round
(
avg
(
home_good_click_uv
),
0
)
as
home_good_click_uv
,
round
(
avg
(
if
(
home_good_click_quality
=
0
,
NULL
,
home_good_click_quality
)),
2
)
as
home_good_click_quality
,
round
(
avg
(
home_ungood_click_uv
),
0
)
as
home_ungood_click_uv
,
round
(
avg
(
if
(
home_ungood_click_quality
=
0
,
NULL
,
home_ungood_click_quality
)),
2
)
as
home_ungood_click_quality
,
round
(
avg
(
no_click_uv
),
0
)
as
no_click_uv
,
round
(
avg
(
if
(
no_click_uv_quality
=
0
,
NULL
,
no_click_uv_quality
)),
2
)
as
no_click_uv_quality
,
round
(
avg
(
if
(
home_good_click_retention_quality
=
0
,
NULL
,
home_good_click_retention_quality
)),
2
)
as
home_good_click_retention_quality
from
(
SELECT
day_id
,
device_os_type
,
active_type
,
channel
,
dau
,
COALESCE
(
ROUND
(
retention_num
/
dau
*
100
,
2
),
0
)
AS
retention_rate
,
home_good_click_uv
,
COALESCE
(
ROUND
(
home_good_click_retention_num
*
dau
/
home_good_click_uv
/
retention_num
,
2
),
0
)
as
home_good_click_quality
,
home_ungood_click_uv
,
COALESCE
(
ROUND
(
home_ungood_click_retention_num
*
dau
/
home_ungood_click_uv
/
retention_num
,
2
),
0
)
as
home_ungood_click_quality
,
no_click_uv
,
COALESCE
(
ROUND
(
no_click_uv_retention_num
*
dau
/
no_click_uv
/
retention_num
,
2
),
0
)
as
no_click_uv_quality
,
COALESCE
(
ROUND
(
home_good_click_uv_2
*
dau
/
home_good_click_uv
/
retention_num
,
2
),
0
)
as
home_good_click_retention_quality
FROM
(
SELECT
regexp_replace
(
substr
(
t1
.
partition_date
,
1
,
10
),
'-'
,
''
)
as
day_id
,
device_os_type
,
active_type
,
channel
,
count
(
distinct
t1
.
device_id
)
as
dau
,
count
(
distinct
t2
.
device_id
)
as
retention_num
,
count
(
distinct
t3
.
cl_id
)
as
home_good_click_uv
,
count
(
distinct
t5
.
cl_id
)
as
home_good_click_uv_2
,
count
(
distinct
case
when
t3
.
cl_id
is
not
null
then
t2
.
device_id
end
)
as
home_good_click_retention_num
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t4
.
cl_id
end
)
as
home_ungood_click_uv
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t2
.
device_id
end
)
as
home_ungood_click_retention_num
,
count
(
distinct
case
when
t4
.
cl_id
is
null
and
t3
.
cl_id
is
null
then
t1
.
device_id
end
)
as
no_click_uv
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t2
.
device_id
end
)
as
no_click_uv_retention_num
FROM
(
SELECT
partition_date
,
a
.
device_os_type
,
b
.
active_type
,
device_id
,
v
.
channel
FROM
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
array
(
device_os_type
,
'合计'
)
as
device_os_type
,
array
(
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
,
'合计'
)
AS
active_type
,
device_id
,
array
(
CASE
WHEN
tmp
.
is_ai_channel
=
'true'
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
FROM
online
.
ml_device_day_active_status
m
LEFT
JOIN
(
SELECT
code
,
is_spam
,
is_ai_channel
,
partition_day
FROM
DIM
.
DIM_AI_CHANNEL_ZP_NEW
WHERE
partition_day
>=
'20190701'
and
partition_day
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
on
first_channel_source_type
=
tmp
.
code
and
m
.
partition_date
=
tmp
.
partition_day
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
,
''
,
'unknown'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
mas
lateral
view
explode
(
mas
.
device_os_type
)
a
as
device_os_type
lateral
view
explode
(
mas
.
active_type
)
b
as
active_type
lateral
view
explode
(
mas
.
channel
)
v
as
channel
)
t1
LEFT
JOIN
(
--活跃设备
SELECT
device_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
t2
ON
t1
.
device_id
=
t2
.
device_id
and
date_add
(
t1
.
partition_date
,
1
)
=
t2
.
partition_date
left
join
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
referrer
=
'home'
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
,
'welfare_detail'
)
and
page_stay
>=
20
group
by
partition_date
,
cl_id
)
t3
on
t1
.
partition_date
=
t3
.
partition_date
and
t1
.
device_id
=
t3
.
cl_id
left
join
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
page_name
=
'home'
and
params
[
'card_content_type'
]
in
(
'diary'
,
'user_post'
,
'answer'
,
'qa'
)
group
by
partition_date
,
cl_id
)
t4
on
t1
.
partition_date
=
t4
.
partition_date
and
t1
.
device_id
=
t4
.
cl_id
LEFT
JOIN
(
--活跃设备
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
referrer
=
'home'
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
,
'welfare_detail'
)
and
page_stay
>=
20
group
by
partition_date
,
cl_id
)
t5
ON
t3
.
cl_id
=
t5
.
cl_id
and
date_add
(
t3
.
partition_date
,
1
)
=
t5
.
partition_date
left
join
(
-- 去掉黑名单设备
select
distinct
device_id
from
ML
.
ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where
PARTITION_DAY
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_abnormal_device
=
'true'
)
spam_pv
on
t1
.
device_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
group
by
t1
.
partition_date
,
device_os_type
,
active_type
,
channel
)
t1
)
t2
group
by
substr
(
day_id
,
1
,
6
)
,
device_os_type
,
active_type
,
channel
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment