Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
B
bi-report
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data
bi-report
Commits
2d0eff76
Commit
2d0eff76
authored
Oct 20, 2020
by
魏艺敏
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
push codes
parent
34b7d118
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
65 additions
and
174 deletions
+65
-174
en-cn.properties
pm/daily_content/en-cn.properties
+2
-0
create_daily_content_retention.sql
pm/daily_content/etl/create_daily_content_retention.sql
+40
-0
daily_content.sql
pm/daily_content/etl/daily_content.sql
+0
-0
ai_content_by_month.sql
pm/daily_content/report/ai_content_by_month.sql
+3
-3
ai_content_detail.sql
pm/daily_content/report/ai_content_detail.sql
+4
-3
home_content_retention.sql
pm/daily_content/report/home_content_retention.sql
+16
-0
en-cn.properties
pm/daily_content_retention/en-cn.properties
+0
-1
daily_content_retention.zip
pm/daily_content_retention/job/daily_content_retention.zip
+0
-0
step1_1.job
pm/daily_content_retention/job/step1_1.job
+0
-4
step1_2.job
pm/daily_content_retention/job/step1_2.job
+0
-4
step1_3.job
pm/daily_content_retention/job/step1_3.job
+0
-4
step1_4.job
pm/daily_content_retention/job/step1_4.job
+0
-4
step2.job
pm/daily_content_retention/job/step2.job
+0
-5
readme.txt
pm/daily_content_retention/readme.txt
+0
-0
daily_content_retention.sql
...aily_content_retention/report/daily_content_retention.sql
+0
-146
No files found.
pm/daily_content/en-cn.properties
View file @
2d0eff76
...
...
@@ -3,3 +3,4 @@ home_content_detail=首页内容数据-分日明细
home_content_by_month
=
首页内容数据-月均
ai_content_detail
=
ai内容数据-分日明细
ai_content_by_month
=
ai内容数据-月均
home_content_retention
=
分类用户次留
\ No newline at end of file
pm/daily_content/etl/create_daily_content_retention.sql
0 → 100644
View file @
2d0eff76
--***************************************************************
--*脚本名称:
--*功能: 内容日报-简化版-for 思璟
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET
mapreduce
.
job
.
queuename
=
data
;
--使用bl数据库
USE
pm
;
--创建BL层内部表
CREATE
TABLE
IF
NOT
EXISTS
pm
.
tl_pm_content_retention
(
day_id
string
comment
'{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}'
,
device_os_type
string
comment
'{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}'
,
active_type
string
comment
'{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}'
,
channel
string
comment
'{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}'
,
dau
BIGINT
comment
'{"chs_name":"日活","description":"","etl":"","value":"","remark":""}'
,
retention_rate
string
comment
'{"chs_name":"次留率","description":"","etl":"","value":"","remark":""}'
,
home_good_click_uv
BIGINT
comment
'{"chs_name":"首页good click设备数","description":"","etl":"","value":"","remark":""}'
,
home_good_click_quality
string
comment
'{"chs_name":"首页gc用户次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
,
home_ungood_click_uv
BIGINT
comment
'{"chs_name":"点击首页卡片但非gc设备数","description":"","etl":"","value":"","remark":""}'
,
home_ungood_click_quality
string
comment
'{"chs_name":"点击首页卡片但非gc设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
,
no_click_uv
BIGINT
comment
'{"chs_name":"未点击首页feed卡片设备数","description":"","etl":"","value":"","remark":""}'
,
no_click_uv_quality
string
comment
'{"chs_name":"未点击首页feed卡片设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
,
home_good_click_retention_quality
string
comment
'{"chs_name":"当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
)
comment
'内容日报-分用户次留'
PARTITIONED
BY
(
PARTITION_DAY
STRING
comment
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\t
'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
;
\ No newline at end of file
pm/daily_content/etl/daily_content.sql
View file @
2d0eff76
This diff is collapsed.
Click to expand it.
pm/daily_content/report/ai_content_by_month.sql
View file @
2d0eff76
...
...
@@ -8,9 +8,9 @@ SELECT
,
ai_report_card_click_uv
AS
`ai结果页内容卡片点击uv`
,
ai_report_card_click_pv
AS
`ai结果页内容卡片点击pv`
,
ai_report_good_click
AS
`来源于ai结果页的good click`
,
CONCAT
(
ROUND
(
ai_report_card_click_uv
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`ai结果页内容卡片点击uv/
DAU
`
,
CONCAT
(
ROUND
(
ai_report_card_click_pv
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`ai结果页内容卡片点击pv/
DAU
`
,
CONCAT
(
ROUND
(
ai_report_good_click
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`来源于ai结果页的good click/
DAU
`
,
CONCAT
(
ROUND
(
ai_report_card_click_uv
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`ai结果页内容卡片点击uv/
ai结果页uv
`
,
CONCAT
(
ROUND
(
ai_report_card_click_pv
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`ai结果页内容卡片点击pv/
ai结果页uv
`
,
CONCAT
(
ROUND
(
ai_report_good_click
/
ai_report_uv
*
100
,
2
),
'%'
)
AS
`来源于ai结果页的good click/
ai结果页uv
`
FROM
(
select
substr
(
day_id
,
1
,
6
)
as
month
...
...
pm/daily_content/report/ai_content_detail.sql
View file @
2d0eff76
...
...
@@ -8,9 +8,9 @@ SELECT
,
ai_report_card_click_uv
AS
`ai结果页内容卡片点击uv`
,
ai_report_card_click_pv
AS
`ai结果页内容卡片点击pv`
,
ai_report_good_click
AS
`来源于ai结果页的good click`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_card_click_uv
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`ai结果页内容卡片点击uv/
DAU
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_card_click_pv
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`ai结果页内容卡片点击pv/
DAU
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_good_click
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`来源于ai结果页的good click/
DAU
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_card_click_uv
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`ai结果页内容卡片点击uv/
ai结果页uv
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_card_click_pv
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`ai结果页内容卡片点击pv/
ai结果页uv
`
,
NVL
(
CONCAT
(
ROUND
(
ai_report_good_click
/
ai_report_uv
*
100
,
2
),
'%'
),
0
)
AS
`来源于ai结果页的good click/
ai结果页uv
`
FROM
pm
.
tl_pm_content_v3
where
partition_day
>=
'20201018'
and
partition_day
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
order
by
`日期`
,
`系统`
,
`活跃`
,
`渠道`
\ No newline at end of file
pm/daily_content/report/home_content_retention.sql
0 → 100644
View file @
2d0eff76
select
substr
(
day_id
,
1
,
6
)
`日期`
,
device_os_type
`系统`
,
active_type
`活跃`
,
channel
`渠道`
,
round
(
avg
(
home_good_click_uv
),
0
)
as
`首页good click设备数`
,
round
(
avg
(
if
(
home_good_click_quality
=
0
,
NULL
,
home_good_click_quality
)),
2
)
as
`首页gc用户次留率/全站次留率`
,
round
(
avg
(
home_ungood_click_uv
),
0
)
as
`点击首页卡片但非gc设备数`
,
round
(
avg
(
if
(
home_ungood_click_quality
=
0
,
NULL
,
home_ungood_click_quality
)),
2
)
as
`点击首页卡片但非gc设备次留率/全站次留率`
,
round
(
avg
(
no_click_uv
),
0
)
as
`未点击首页feed卡片设备数`
,
round
(
avg
(
if
(
no_click_uv_quality
=
0
,
NULL
,
no_click_uv_quality
)),
2
)
as
`未点击首页feed卡片设备次留率/全站次留率`
,
round
(
avg
(
if
(
home_good_click_retention_quality
=
0
,
NULL
,
home_good_click_retention_quality
)),
2
)
as
`当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率`
FROM
pm
.
tl_pm_content_retention
where
partition_day
>=
'20201018'
and
partition_day
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
group
by
substr
(
day_id
,
1
,
6
),
device_os_type
,
active_type
,
channel
order
by
`日期`
,
`系统`
,
`活跃`
,
`渠道`
\ No newline at end of file
pm/daily_content_retention/en-cn.properties
deleted
100644 → 0
View file @
34b7d118
daily_content_retention
=
内容日报-次留率
pm/daily_content_retention/job/daily_content_retention.zip
deleted
100644 → 0
View file @
34b7d118
File deleted
pm/daily_content_retention/job/step1_1.job
deleted
100644 → 0
View file @
34b7d118
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
pm/daily_content_retention/job/step1_2.job
deleted
100644 → 0
View file @
34b7d118
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
pm/daily_content_retention/job/step1_3.job
deleted
100644 → 0
View file @
34b7d118
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_DV_DEVICECLEAN_DIMEN_D
\ No newline at end of file
pm/daily_content_retention/job/step1_4.job
deleted
100644 → 0
View file @
34b7d118
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive DIM DIM_AI_CHANNEL_ZP_NEW
\ No newline at end of file
pm/daily_content_retention/job/step2.job
deleted
100644 → 0
View file @
34b7d118
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/email/daily_content_retention/weiyimin@igengmei.com/hanyingyue@igengmei.com
\ No newline at end of file
pm/daily_content_retention/readme.txt
deleted
100644 → 0
View file @
34b7d118
pm/daily_content_retention/report/daily_content_retention.sql
deleted
100644 → 0
View file @
34b7d118
select
substr
(
day_id
,
1
,
6
)
as
month
,
device_os_type
,
active_type
,
channel
,
round
(
avg
(
dau
),
0
)
as
dau
,
round
(
avg
(
if
(
retention_rate
=
0
,
NULL
,
retention_rate
)),
2
)
as
retention_rate
,
round
(
avg
(
home_good_click_uv
),
0
)
as
home_good_click_uv
,
round
(
avg
(
if
(
home_good_click_quality
=
0
,
NULL
,
home_good_click_quality
)),
2
)
as
home_good_click_quality
,
round
(
avg
(
home_ungood_click_uv
),
0
)
as
home_ungood_click_uv
,
round
(
avg
(
if
(
home_ungood_click_quality
=
0
,
NULL
,
home_ungood_click_quality
)),
2
)
as
home_ungood_click_quality
,
round
(
avg
(
no_click_uv
),
0
)
as
no_click_uv
,
round
(
avg
(
if
(
no_click_uv_quality
=
0
,
NULL
,
no_click_uv_quality
)),
2
)
as
no_click_uv_quality
,
round
(
avg
(
if
(
home_good_click_retention_quality
=
0
,
NULL
,
home_good_click_retention_quality
)),
2
)
as
home_good_click_retention_quality
from
(
SELECT
day_id
,
device_os_type
,
active_type
,
channel
,
dau
,
COALESCE
(
ROUND
(
retention_num
/
dau
*
100
,
2
),
0
)
AS
retention_rate
,
home_good_click_uv
,
COALESCE
(
ROUND
(
home_good_click_retention_num
*
dau
/
home_good_click_uv
/
retention_num
,
2
),
0
)
as
home_good_click_quality
,
home_ungood_click_uv
,
COALESCE
(
ROUND
(
home_ungood_click_retention_num
*
dau
/
home_ungood_click_uv
/
retention_num
,
2
),
0
)
as
home_ungood_click_quality
,
no_click_uv
,
COALESCE
(
ROUND
(
no_click_uv_retention_num
*
dau
/
no_click_uv
/
retention_num
,
2
),
0
)
as
no_click_uv_quality
,
COALESCE
(
ROUND
(
home_good_click_uv_2
*
dau
/
home_good_click_uv
/
retention_num
,
2
),
0
)
as
home_good_click_retention_quality
FROM
(
SELECT
regexp_replace
(
substr
(
t1
.
partition_date
,
1
,
10
),
'-'
,
''
)
as
day_id
,
device_os_type
,
active_type
,
channel
,
count
(
distinct
t1
.
device_id
)
as
dau
,
count
(
distinct
t2
.
device_id
)
as
retention_num
,
count
(
distinct
t3
.
cl_id
)
as
home_good_click_uv
,
count
(
distinct
t5
.
cl_id
)
as
home_good_click_uv_2
,
count
(
distinct
case
when
t3
.
cl_id
is
not
null
then
t2
.
device_id
end
)
as
home_good_click_retention_num
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t4
.
cl_id
end
)
as
home_ungood_click_uv
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t2
.
device_id
end
)
as
home_ungood_click_retention_num
,
count
(
distinct
case
when
t4
.
cl_id
is
null
and
t3
.
cl_id
is
null
then
t1
.
device_id
end
)
as
no_click_uv
,
count
(
distinct
case
when
t4
.
cl_id
is
not
null
and
t3
.
cl_id
is
null
then
t2
.
device_id
end
)
as
no_click_uv_retention_num
FROM
(
SELECT
partition_date
,
a
.
device_os_type
,
b
.
active_type
,
device_id
,
v
.
channel
FROM
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
array
(
device_os_type
,
'合计'
)
as
device_os_type
,
array
(
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
,
'合计'
)
AS
active_type
,
device_id
,
array
(
CASE
WHEN
tmp
.
is_ai_channel
=
'true'
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
FROM
online
.
ml_device_day_active_status
m
LEFT
JOIN
(
SELECT
code
,
is_spam
,
is_ai_channel
,
partition_day
FROM
DIM
.
DIM_AI_CHANNEL_ZP_NEW
WHERE
partition_day
>=
'20190701'
and
partition_day
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
))
tmp
on
first_channel_source_type
=
tmp
.
code
and
m
.
partition_date
=
tmp
.
partition_day
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
,
''
,
'unknown'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
mas
lateral
view
explode
(
mas
.
device_os_type
)
a
as
device_os_type
lateral
view
explode
(
mas
.
active_type
)
b
as
active_type
lateral
view
explode
(
mas
.
channel
)
v
as
channel
)
t1
LEFT
JOIN
(
--活跃设备
SELECT
device_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
t2
ON
t1
.
device_id
=
t2
.
device_id
and
date_add
(
t1
.
partition_date
,
1
)
=
t2
.
partition_date
left
join
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
referrer
=
'home'
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
,
'welfare_detail'
)
and
page_stay
>=
20
group
by
partition_date
,
cl_id
)
t3
on
t1
.
partition_date
=
t3
.
partition_date
and
t1
.
device_id
=
t3
.
cl_id
left
join
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
page_name
=
'home'
and
params
[
'card_content_type'
]
in
(
'diary'
,
'user_post'
,
'answer'
,
'qa'
)
group
by
partition_date
,
cl_id
)
t4
on
t1
.
partition_date
=
t4
.
partition_date
and
t1
.
device_id
=
t4
.
cl_id
LEFT
JOIN
(
--活跃设备
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
cl_id
,
count
(
distinct
time_str
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
'20190701'
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
referrer
=
'home'
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
'video_steep'
,
'article_detail'
,
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
,
'welfare_detail'
)
and
page_stay
>=
20
group
by
partition_date
,
cl_id
)
t5
ON
t3
.
cl_id
=
t5
.
cl_id
and
date_add
(
t3
.
partition_date
,
1
)
=
t5
.
partition_date
left
join
(
-- 去掉黑名单设备
select
distinct
device_id
from
ML
.
ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where
PARTITION_DAY
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_abnormal_device
=
'true'
)
spam_pv
on
t1
.
device_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
group
by
t1
.
partition_date
,
device_os_type
,
active_type
,
channel
)
t1
)
t2
group
by
substr
(
day_id
,
1
,
6
)
,
device_os_type
,
active_type
,
channel
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment