Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
B
bi-report
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data
bi-report
Commits
ffbe21dc
Commit
ffbe21dc
authored
May 28, 2020
by
魏艺敏
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update codes
parent
7542f649
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
152 additions
and
152 deletions
+152
-152
daily_content_data.sql
sqls/daily_content_data/daily_content_data.sql
+1
-1
daily_content_data_insert.sql
sqls/daily_content_data_insert/daily_content_data_insert.sql
+151
-151
No files found.
sqls/daily_content_data/daily_content_data.sql
View file @
ffbe21dc
...
...
@@ -88,4 +88,4 @@ SELECT
,
per_article_pv
AS
`专栏PV/专栏UV`
,
article_stay
AS
`专栏单设备时长(m)`
FROM
pm
.
tl_pm_content_d
where
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
where
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
sqls/daily_content_data_insert/daily_content_data_insert.sql
View file @
ffbe21dc
INSERT
OVERWRITE
TABLE
tl_pm_content_d
PARTITION
(
PARTITION_DAY
=
regexp_replace
((
current_date
-
interval
'1'
day
),
'-'
,
''
)
)
INSERT
OVERWRITE
TABLE
tl_pm_content_d
PARTITION
(
PARTITION_DAY
=
#
partition_day
)
SELECT
T1
.
partition_date
AS
day_id
,
T1
.
device_type
AS
device_os_type
,
T1
.
device_
os_
type
AS
device_os_type
,
T1
.
active_type
AS
active_type
,
T1
.
channel
AS
is_ai_channel
,
COALESCE
(
T1
.
dau
,
0
)
AS
dau
...
...
@@ -90,22 +90,22 @@ SELECT
,
COALESCE
(
T3
.
article_stay
,
0
)
AS
article_stay
FROM
(
--基础维度/dau
SELECT
partition_date
,
device_type
,
active_type
,
t2
.
channel
,
sum
(
dau
)
AS
dau
SELECT
partition_date
,
device_
os_
type
,
active_type
,
t2
.
channel
,
sum
(
dau
)
AS
dau
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
count
(
1
)
AS
dau
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -118,15 +118,15 @@ FROM
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
GROUP
BY
partition_date
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
,
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
)
t1
LATERAL
VIEW
explode
(
t1
.
channel
)
t2
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t2
.
channel
GROUP
BY
partition_date
,
device_
os_
type
,
active_type
,
t2
.
channel
)
T1
LEFT
JOIN
(
--内容uv/pv
SELECT
partition_date
,
device_type
,
device_
os_
type
,
active_type
,
t4
.
channel
,
count
(
cl_id
)
as
neirong_pv
...
...
@@ -149,22 +149,22 @@ LEFT JOIN
,
count
(
distinct
CASE
WHEN
page_name
IN
(
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
THEN
cl_id
END
)
AS
wiki_uv
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
page_name
SELECT
t1
.
partition_date
,
device_
os_
type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
page_name
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -180,7 +180,7 @@ LEFT JOIN
(
SELECT
partition_date
,
page_name
,
cl_id
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
...
...
@@ -192,23 +192,23 @@ LEFT JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
where
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t3
LATERAL
VIEW
explode
(
t3
.
channel
)
t4
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t4
.
channel
GROUP
BY
partition_date
,
device_
os_
type
,
active_type
,
t4
.
channel
)
T2
ON
T1
.
partition_date
=
T2
.
partition_date
AND
T1
.
device_
type
=
T2
.
device
_type
AND
T1
.
device_
os_type
=
T2
.
device_os
_type
AND
T1
.
active_type
=
T2
.
active_type
AND
T1
.
channel
=
T2
.
channel
LEFT
JOIN
(
--内容浏览时长
SELECT
partition_date
,
device_type
,
device_
os_
type
,
active_type
,
t4
.
channel
,
round
(
sum
(
page_stay
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
as
neirong_stay
...
...
@@ -230,22 +230,22 @@ LEFT JOIN
count
(
distinct
CASE
WHEN
page_name
IN
(
'wiki_detail'
,
'product_detail'
,
'wiki_brand'
,
'wiki_collect'
)
THEN
cl_id
END
)
/
60
,
4
)
AS
wiki_stay
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
page_name
,
t2
.
page_stay
SELECT
t1
.
partition_date
,
device_
os_
type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
page_name
,
t2
.
page_stay
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -263,7 +263,7 @@ LEFT JOIN
(
SELECT
partition_date
,
page_name
,
cl_id
,
page_stay
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
...
...
@@ -276,45 +276,45 @@ LEFT JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
where
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t3
LATERAL
VIEW
explode
(
t3
.
channel
)
t4
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t4
.
channel
GROUP
BY
partition_date
,
device_
os_
type
,
active_type
,
t4
.
channel
)
T3
ON
T1
.
partition_date
=
T3
.
partition_date
AND
T1
.
device_
type
=
T3
.
device
_type
AND
T1
.
device_
os_type
=
T3
.
device_os
_type
AND
T1
.
active_type
=
T3
.
active_type
AND
T1
.
channel
=
T3
.
channel
LEFT
JOIN
(
--内容用户留存
SELECT
regexp_replace
(
partition_date
,
'-'
,
''
)
AS
partition_date
,
device_type
,
active_type
,
t5
.
channel
,
device_
os_
type
,
active_type
,
t5
.
channel
,
int
(
count
(
DISTINCT
CASE
WHEN
date_add
(
partition_date
,
1
)
=
retention_date
THEN
device_id
END
))
AS
retention_num1
,
int
(
count
(
DISTINCT
CASE
WHEN
date_add
(
partition_date
,
6
)
=
retention_date
THEN
device_id
END
))
AS
retention_num7
,
int
(
count
(
DISTINCT
CASE
WHEN
date_add
(
partition_date
,
29
)
=
retention_date
THEN
device_id
END
))
AS
retention_num30
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t3
.
device_id
,
t3
.
partition_date
as
retention_date
SELECT
t1
.
partition_date
,
device_
os_
type
,
active_type
,
channel
,
t3
.
device_id
,
t3
.
partition_date
as
retention_date
FROM
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -332,7 +332,7 @@ LEFT JOIN
SELECT
cl_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
...
...
@@ -346,7 +346,7 @@ LEFT JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
where
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
LEFT
JOIN
...
...
@@ -354,46 +354,46 @@ LEFT JOIN
SELECT
device_id
,
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
AS
partition_date
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t3
ON
t2
.
cl_id
=
t3
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t4
LATERAL
VIEW
explode
(
t4
.
channel
)
t5
AS
channel
GROUP
BY
regexp_replace
(
partition_date
,
'-'
,
''
),
device_type
,
active_type
,
t5
.
channel
GROUP
BY
regexp_replace
(
partition_date
,
'-'
,
''
),
device_
os_
type
,
active_type
,
t5
.
channel
)
T4
ON
T1
.
partition_date
=
T4
.
partition_date
AND
T1
.
device_
type
=
T4
.
device
_type
AND
T1
.
device_
os_type
=
T4
.
device_os
_type
AND
T1
.
active_type
=
T4
.
active_type
AND
T1
.
channel
=
T4
.
channel
LEFT
JOIN
(
--内容用户单设备app时长(m)
SELECT
partition_date
,
device_type
,
device_
os_
type
,
active_type
,
t5
.
channel
,
round
(
sum
(
use_duration
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
as
app_duration
,
round
(
avg
(
open_times
),
4
)
as
avg_opentimes
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t3
.
use_duration
,
t3
.
open_times
SELECT
t1
.
partition_date
,
device_
os_
type
,
active_type
,
channel
,
t2
.
cl_id
,
t3
.
use_duration
,
t3
.
open_times
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -410,7 +410,7 @@ LEFT JOIN
(
--内容用户
SELECT
partition_date
,
cl_id
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
...
...
@@ -423,14 +423,14 @@ LEFT JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
where
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
LEFT
JOIN
(
SELECT
partition_date
,
device_id
,
use_duration
,
open_times
FROM
online
.
ml_device_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
use_duration
>
0
and
use_duration
<
86400
)
t3
...
...
@@ -439,17 +439,17 @@ LEFT JOIN
WHERE
spam_pv
.
device_id
IS
NULL
)
t4
LATERAL
VIEW
explode
(
t4
.
channel
)
t5
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t5
.
channel
GROUP
BY
partition_date
,
device_
os_
type
,
active_type
,
t5
.
channel
)
T5
ON
T1
.
partition_date
=
T5
.
partition_date
AND
T1
.
device_
type
=
T5
.
device
_type
AND
T1
.
device_
os_type
=
T5
.
device_os
_type
AND
T1
.
active_type
=
T5
.
active_type
AND
T1
.
channel
=
T5
.
channel
LEFT
JOIN
(
--不同来源进入内容uv/pv
SELECT
partition_date
,
device_type
,
device_
os_
type
,
active_type
,
t4
.
channel
,
count
(
CASE
WHEN
referrer
=
'search'
THEN
cl_id
END
)
AS
search_pv
...
...
@@ -478,22 +478,22 @@ LEFT JOIN
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
referrer
SELECT
t1
.
partition_date
,
device_
os_
type
,
active_type
,
channel
,
t2
.
cl_id
,
t2
.
referrer
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -520,7 +520,7 @@ LEFT JOIN
when
referrer
in
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'content'
else
null
end
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
and
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
...
...
@@ -531,7 +531,7 @@ LEFT JOIN
--首页feeds进入内容(首页非策略卡片点击)
SELECT
partition_date
,
cl_id
,
'feeds'
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
page_name
=
'home'
AND
action
=
'on_click_card'
...
...
@@ -543,7 +543,7 @@ LEFT JOIN
--首页feeds进入内容(首页非策略卡片点击) 7.8.0版本前的埋点
SELECT
partition_date
,
cl_id
,
'feeds'
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
page_name
=
'home'
AND
action
in
(
'on_click_diary_card'
,
'on_click_answer_card'
,
'on_click_question_card'
,
'on_click_topic_card'
,
'on_click_live_card'
)
...
...
@@ -554,7 +554,7 @@ LEFT JOIN
--推荐进入内容(首页策略卡片点击),5月7日新增transaction_type类型
SELECT
partition_date
,
cl_id
,
'recommend'
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
page_name
=
'home'
AND
action
=
'on_click_card'
...
...
@@ -566,7 +566,7 @@ LEFT JOIN
--推荐进入内容(首页策略卡片点击) 7.8.0版本前的埋点
SELECT
partition_date
,
cl_id
,
'feeds'
as
referrer
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
page_name
=
'home'
AND
action
in
(
'on_click_diary_card'
,
'on_click_answer_card'
,
'on_click_question_card'
,
'on_click_topic_card'
,
'on_click_live_card'
)
...
...
@@ -578,45 +578,45 @@ LEFT JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
where
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t3
LATERAL
VIEW
explode
(
t3
.
channel
)
t4
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t4
.
channel
GROUP
BY
partition_date
,
device_
os_
type
,
active_type
,
t4
.
channel
)
T6
ON
T1
.
partition_date
=
T6
.
partition_date
AND
T1
.
device_
type
=
T6
.
device
_type
AND
T1
.
device_
os_type
=
T6
.
device_os
_type
AND
T1
.
active_type
=
T6
.
active_type
AND
T1
.
channel
=
T6
.
channel
LEFT
JOIN
(
--真实发帖数
SELECT
partition_date
,
device_type
,
device_
os_
type
,
active_type
,
t7
.
channel
,
count
(
distinct
id
)
as
num
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t3
.
id
SELECT
t1
.
partition_date
,
device_
os_
type
,
active_type
,
channel
,
t3
.
id
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -634,7 +634,7 @@ LEFT JOIN
SELECT
user_id
,
partition_date
,
if
(
size
(
device_list
)
>
0
,
device_list
[
0
],
''
)
AS
device_id
FROM
online
.
ml_user_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
...
...
@@ -645,10 +645,10 @@ LEFT JOIN
--新增帖子
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_api_tractate_view
--发帖情况表
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_online
=
'true'
AND
platform
in
(
'1'
,
'7'
)
--更美用户发的以及打卡的(去除hera后台,爬虫抓取的,kyc自动回复的)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
UNION
ALL
...
...
@@ -658,15 +658,15 @@ LEFT JOIN
(
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_diary_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
a
JOIN
(
--取非空日记
SELECT
diary_id
FROM
online
.
tl_hdfs_problem_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_spam
=
'false'
)
b
ON
a
.
id
=
b
.
diary_id
...
...
@@ -675,8 +675,8 @@ LEFT JOIN
--新增日记贴
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_problem_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
created_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
is_spam
=
'false'
AND
diary_id
is
not
null
...
...
@@ -685,8 +685,8 @@ LEFT JOIN
--新增问题数
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_question_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
is_spam
=
'false'
AND
platform
=
'99'
--更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
...
...
@@ -695,8 +695,8 @@ LEFT JOIN
--新增回答数
SELECT
user_id
,
id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
create_date
FROM
online
.
tl_hdfs_answer_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
is_spam
=
'false'
AND
platform
=
'99'
--更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
...
...
@@ -711,7 +711,7 @@ LEFT JOIN
(
SELECT
partition_date
,
user_id
,
action
FROM
online
.
bl_hdfs_operation_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
a
JOIN
...
...
@@ -731,13 +731,13 @@ LEFT JOIN
(
--医生账号
SELECT
distinct
user_id
FROM
online
.
tl_hdfs_doctor_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
--马甲账号/模特用户
UNION
ALL
SELECT
user_id
FROM
ml
.
ml_c_ct_ui_user_dimen_d
WHERE
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
(
is_puppet
=
'true'
or
is_classifyuser
=
'true'
)
UNION
ALL
...
...
@@ -753,13 +753,13 @@ LEFT JOIN
SELECT
user_id
,
v
.
device_id
as
device_id
FROM
online
.
ml_user_history_detail
LATERAL
VIEW
EXPLODE
(
device_history_list
)
v
AS
device_id
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
t1
JOIN
(
SELECT
device_id
FROM
online
.
ml_device_history_detail
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_login_doctor
=
'1'
)
t2
ON
t1
.
device_id
=
t2
.
device_id
...
...
@@ -768,38 +768,38 @@ LEFT JOIN
where
(
t5
.
user_id
is
null
or
t5
.
user_id
=
''
)
)
t6
LATERAL
VIEW
explode
(
t6
.
channel
)
t7
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t7
.
channel
GROUP
BY
partition_date
,
device_
os_
type
,
active_type
,
t7
.
channel
)
T7
ON
T1
.
partition_date
=
T7
.
partition_date
AND
T1
.
device_
type
=
T7
.
device
_type
AND
T1
.
device_
os_type
=
T7
.
device_os
_type
AND
T1
.
active_type
=
T7
.
active_type
AND
T1
.
channel
=
T7
.
channel
LEFT
JOIN
(
--真实评论数
SELECT
partition_date
,
device_type
,
device_
os_
type
,
active_type
,
t7
.
channel
,
count
(
distinct
id
)
as
num
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t3
.
id
,
t3
.
type
SELECT
t1
.
partition_date
,
device_
os_
type
,
active_type
,
channel
,
t3
.
id
,
t3
.
type
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -817,7 +817,7 @@ LEFT JOIN
SELECT
user_id
,
partition_date
,
if
(
size
(
device_list
)
>
0
,
device_list
[
0
],
''
)
AS
device_id
FROM
online
.
ml_user_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t2
ON
t1
.
partition_date
=
t2
.
partition_date
...
...
@@ -830,17 +830,17 @@ LEFT JOIN
(
SELECT
user_id
,
regexp_replace
(
substr
(
reply_date
,
1
,
10
),
'-'
,
''
)
as
reply_date
,
problem_id
,
id
FROM
online
.
tl_hdfs_topicreply_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_spam
=
'false'
--排除疑似广告
-- and diary_id is not null 这个表的diary_id有问题,需要join problem表来判断是不是属于日记
and
regexp_replace
(
substr
(
reply_date
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
reply_date
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
reply_date
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t1
JOIN
(
SELECT
id
,
diary_id
FROM
online
.
tl_hdfs_problem_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
group
by
id
,
diary_id
)
t2
on
t2
.
id
=
t1
.
problem_id
...
...
@@ -855,17 +855,17 @@ LEFT JOIN
(
SELECT
user_id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
reply_date
,
answer_id
,
id
FROM
online
.
tl_hdfs_answer_reply_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
(
is_fake
is
NULL
or
is_fake
=
'false'
)
AND
answer_id
is
not
NULL
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t1
JOIN
(
SELECT
id
,
question_id
FROM
online
.
tl_hdfs_answer_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
group
by
id
,
question_id
)
t2
ON
t2
.
id
=
t1
.
answer_id
...
...
@@ -875,8 +875,8 @@ LEFT JOIN
--有评论过用户帖的设备
SELECT
user_id
,
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
as
reply_date
,
id
,
'tractate_reply'
as
type
FROM
online
.
tl_hdfs_api_tractate_reply_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t3
ON
t2
.
partition_date
=
t3
.
reply_date
...
...
@@ -889,7 +889,7 @@ LEFT JOIN
(
SELECT
partition_date
,
user_id
,
action
FROM
online
.
bl_hdfs_operation_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
a
JOIN
...
...
@@ -909,13 +909,13 @@ LEFT JOIN
(
--医生账号
SELECT
distinct
user_id
FROM
online
.
tl_hdfs_doctor_view
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
--马甲账号/模特用户
UNION
ALL
SELECT
user_id
FROM
ml
.
ml_c_ct_ui_user_dimen_d
WHERE
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
(
is_puppet
=
'true'
or
is_classifyuser
=
'true'
)
UNION
ALL
...
...
@@ -931,13 +931,13 @@ LEFT JOIN
SELECT
user_id
,
v
.
device_id
as
device_id
FROM
online
.
ml_user_history_detail
LATERAL
VIEW
EXPLODE
(
device_history_list
)
v
AS
device_id
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
t1
JOIN
(
SELECT
device_id
FROM
online
.
ml_device_history_detail
WHERE
partition_date
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_login_doctor
=
'1'
)
t2
ON
t1
.
device_id
=
t2
.
device_id
...
...
@@ -946,17 +946,17 @@ LEFT JOIN
where
(
t5
.
user_id
is
null
or
t5
.
user_id
=
''
)
)
t6
LATERAL
VIEW
explode
(
t6
.
channel
)
t7
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t7
.
channel
GROUP
BY
partition_date
,
device_
os_
type
,
active_type
,
t7
.
channel
)
T8
ON
T1
.
partition_date
=
T8
.
partition_date
AND
T1
.
device_
type
=
T8
.
device
_type
AND
T1
.
device_
os_type
=
T8
.
device_os
_type
AND
T1
.
active_type
=
T8
.
active_type
AND
T1
.
channel
=
T8
.
channel
LEFT
JOIN
(
--部分页面的单设备页面浏览时长
SELECT
partition_date
,
device_type
,
device_
os_
type
,
active_type
,
t5
.
channel
,
round
(
sum
(
CASE
WHEN
page_name
like
'search%'
THEN
page_stay
else
0
END
)
/
count
(
distinct
cl_id
)
/
60
,
4
)
AS
search_stay
...
...
@@ -969,22 +969,22 @@ LEFT JOIN
FROM
(
SELECT
t1
.
partition_date
,
device_type
,
active_type
,
channel
,
t2
.
cl_id
,
t3
.
page_name
,
t3
.
page_stay
SELECT
t1
.
partition_date
,
device_
os_
type
,
active_type
,
channel
,
t2
.
cl_id
,
t3
.
page_name
,
t3
.
page_stay
FROM
(
SELECT
partition_date
,
device_os_type
AS
device_type
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增设备'
END
AS
active_type
,
array
(
CASE
WHEN
tmp
.
time
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
array
(
CASE
WHEN
tmp
.
col2
=
'AI'
or
(
partition_date
<
20200301
AND
first_channel_source_type
like
'promotion_toutiao_jy%'
)
THEN
'AI'
ELSE
'其他'
END
,
'合计'
)
as
channel
,
device_id
FROM
online
.
ml_device_day_active_status
LEFT
JOIN
(
SELECT
phone
,
time
FROM
offline
.
tmp_zhx_20191227
WHERE
flag
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
phone
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
(
SELECT
col1
,
col2
--col1:子渠道,col2:是否属于AI,col3:标识
FROM
pm
.
tl_pm_ydl
WHERE
col3
=
'0204_danlei_channel'
)
tmp
on
first_channel_source_type
=
tmp
.
col1
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
...
...
@@ -1002,7 +1002,7 @@ LEFT JOIN
(
--内容用户
SELECT
partition_date
,
cl_id
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
,
...
...
@@ -1017,7 +1017,7 @@ LEFT JOIN
(
--部分页面的停留时长
SELECT
partition_date
,
cl_id
,
page_name
,
page_stay
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
(
current_date
-
interval
'60'
day
)
,
'-'
,
''
)
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
60
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
(
page_name
like
'search%'
or
page_name
IN
(
'welfare_detail'
,
'question_detail'
,
'report_result'
,
'face_scan'
...
...
@@ -1032,17 +1032,17 @@ LEFT JOIN
(
-- 去掉疑似机构刷量的PV和UV
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
where
partition_day
=
regexp_replace
(
(
current_date
-
interval
'1'
day
)
,
'-'
,
''
)
where
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
)
t4
LATERAL
VIEW
explode
(
t4
.
channel
)
t5
AS
channel
GROUP
BY
partition_date
,
device_type
,
active_type
,
t5
.
channel
GROUP
BY
partition_date
,
device_
os_
type
,
active_type
,
t5
.
channel
)
T9
ON
T1
.
partition_date
=
T9
.
partition_date
AND
T1
.
device_
type
=
T9
.
device
_type
AND
T1
.
device_
os_type
=
T9
.
device_os
_type
AND
T1
.
active_type
=
T9
.
active_type
AND
T1
.
channel
=
T9
.
channel
ORDER
BY
T1
.
partition_date
desc
,
T1
.
device_type
,
T1
.
active_type
,
T1
.
channel
ORDER
BY
day_id
desc
,
device_os_type
,
active_type
,
is_ai_
channel
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment