Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
B
bi-report
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data
bi-report
Commits
8a90473c
Commit
8a90473c
authored
Jun 28, 2020
by
魏艺敏
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update codes
parent
9db36996
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
636 additions
and
2 deletions
+636
-2
search_related.zip
jobs/search_related/search_related.zip
+0
-0
step2.job
jobs/search_related/step2.job
+2
-2
search_related_insert.sql
sqls/search_related_insert/search_related_insert.sql
+634
-0
No files found.
jobs/search_related/search_related.zip
View file @
8a90473c
No preview for this file type
jobs/search_related/step2.job
View file @
8a90473c
#step2.job
#step2.job
type=command
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5
command=/home/bi/bi-report/shells/search_related/search_related.sh
command=/home/bi/bi-report/lib/shell/hive.sh search_related_insert
\ No newline at end of file
\ No newline at end of file
sqls/search_related_insert/search_related_insert.sql
0 → 100644
View file @
8a90473c
SET
mapreduce
.
job
.
queuename
=
data
;
SET
mapreduce
.
map
.
memory
.
mb
=
8192
;
SET
mapreduce
.
map
.
java
.
opts
=-
Xmx8000m
;
SET
mapreduce
.
reduce
.
memory
.
mb
=
8192
;
SET
mapreduce
.
reduce
.
java
.
opts
=-
Xmx8000m
;
set
hive
.
auto
.
convert
.
join
=
true
;
SET
mapred
.
reduce
.
tasks
=
20
;
SET
role
admin
;
INSERT
OVERWRITE
TABLE
pm
.
tl_pm_search_click_path_d
PARTITION
(
PARTITION_DAY
=
${
partition_day
}
)
SELECT
day_id
as
day_id
,
device_os_type
as
device_os_type
,
active_type
as
active_type
,
v
.
search_entrance
as
search_entrance
,
sum
(
search_pv
)
as
click_search_pv
,
sum
(
search_uv
)
as
click_search_uv
FROM
(
SELECT
day_id
,
device_os_type
,
active_type
,
array
(
search_entrance
,
'合计'
)
as
search_entrance
,
search_pv
,
search_uv
FROM
(
SELECT
t1
.
partition_date
as
day_id
,
t1
.
device_os_type
,
t1
.
active_type
,
case
when
action
=
'on_click_navbar_search'
and
page_name
=
'home'
then
'首页搜索框'
when
action
=
'on_click_navbar_search'
and
page_name
=
'welfare_home'
then
'美购首页搜索框'
when
action
=
'on_click_navbar_search'
and
page_name
=
'category'
then
'品类聚合页搜索框'
when
action
=
'on_click_navbar_search'
and
page_name
=
'welfare_list'
and
referrer_link
=
'["home","category"]'
then
'来自品类聚合的美购列表页搜索框'
when
action
=
'on_click_navbar_search'
and
page_name
=
'welfare_list'
and
referrer_link
=
'["welfare_home"]'
then
'来自美购首页的美购列表页搜索框'
when
action
in
(
'do_search'
,
'on_click_navbar_search'
)
and
page_name
in
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'内容详情页搜索框'
when
action
=
'详情页高亮词'
then
'详情页高亮词搜索'
when
action
=
'大家都在看'
then
'首页feed大家都在看'
when
action
=
'热搜词'
then
'美购首页热搜词'
else
null
end
as
search_entrance
,
count
(
cl_id
)
as
search_pv
,
count
(
distinct
cl_id
)
as
search_uv
FROM
(
SELECT
partition_date
,
device_os_type
,
case
WHEN
active_type
=
'4'
THEN
'老活跃设备'
WHEN
active_type
in
(
'1'
,
'2'
)
then
'新增设备'
END
as
active_type
,
device_id
FROM
online
.
ml_device_day_active_status
where
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
active_type
in
(
'1'
,
'2'
,
'4'
)
and
first_channel_source_type
not
in
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
like
'promotion
\_
jf
\_
%'
)
t1
join
(
SELECT
partition_date
,
cl_id
,
page_name
,
action
,
params
[
'referrer_link'
]
as
referrer_link
FROM
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
action
in
(
'do_search'
,
'on_click_navbar_search'
)
union
all
SELECT
partition_date
,
cl_id
,
null
as
page_name
,
'大家都在看'
as
action
,
null
as
referrer_link
FROM
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
params
[
'in_page_pos'
]
=
'猜你喜欢'
AND
params
[
'tab_name'
]
=
'精选'
AND
params
[
'card_type'
]
=
'search_word'
--AND page_name='home' android的page_name为空
union
all
SELECT
partition_date
,
cl_id
,
page_name
,
'热搜词'
as
action
,
null
as
referrer_link
FROM
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
page_name
=
'welfare_home'
AND
params
[
'card_type'
]
=
'search_word'
AND
params
[
'in_page_pos'
]
=
'大家都在搜'
union
all
select
partition_date
,
cl_id
,
null
as
page_name
,
'详情页高亮词'
as
action
,
null
as
referrer_link
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
action
=
'on_click_card'
and
params
[
'card_type'
]
=
'highlight_word'
)
t2
on
t1
.
partition_date
=
t2
.
partition_date
and
t1
.
device_id
=
t2
.
cl_id
group
by
t1
.
partition_date
,
t1
.
device_os_type
,
t1
.
active_type
,
case
when
action
=
'on_click_navbar_search'
and
page_name
=
'home'
then
'首页搜索框'
when
action
=
'on_click_navbar_search'
and
page_name
=
'welfare_home'
then
'美购首页搜索框'
when
action
=
'on_click_navbar_search'
and
page_name
=
'category'
then
'品类聚合页搜索框'
when
action
=
'on_click_navbar_search'
and
page_name
=
'welfare_list'
and
referrer_link
=
'["home","category"]'
then
'来自品类聚合的美购列表页搜索框'
when
action
=
'on_click_navbar_search'
and
page_name
=
'welfare_list'
and
referrer_link
=
'["welfare_home"]'
then
'来自美购首页的美购列表页搜索框'
when
action
in
(
'do_search'
,
'on_click_navbar_search'
)
and
page_name
in
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'内容详情页搜索框'
when
action
=
'详情页高亮词'
then
'详情页高亮词搜索'
when
action
=
'大家都在看'
then
'首页feed大家都在看'
when
action
=
'热搜词'
then
'美购首页热搜词'
else
null
end
having
search_entrance
is
not
null
)
t
)
n
LATERAL
VIEW
explode
(
n
.
search_entrance
)
v
AS
search_entrance
group
by
day_id
,
device_os_type
,
active_type
,
v
.
search_entrance
;
INSERT
OVERWRITE
TABLE
pm
.
tl_pm_search_input_type_d
PARTITION
(
PARTITION_DAY
=
${
partition_day
}
)
SELECT
partition_date
as
day_id
,
t1
.
query
,
t2
.
input_type
,
sum
(
t1
.
all_search_pv
)
as
search_pv
FROM
(
SELECT
partition_date
,
query
,
array
(
case
when
input_type
in
(
'联想'
,
'聚合模块'
)
then
'sug点击'
else
input_type
end
,
'全部'
)
as
input_type
,
count
(
click
.
cl_id
)
as
all_search_pv
,
count
(
distinct
click
.
cl_id
)
as
all_search_uv
FROM
(
SELECT
cl_id
,
partition_date
,
case
when
params
[
'input_type'
]
in
(
'热门'
,
'related_search'
)
then
'发现'
else
params
[
'input_type'
]
end
as
input_type
,
params
[
'query'
]
as
query
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
in
(
'do_search'
,
'search_result_click_search'
)
union
all
SELECT
cl_id
,
partition_date
,
'首页精选大家都在看'
as
input_type
,
params
[
'card_name'
]
as
query
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
params
[
'in_page_pos'
]
=
'猜你喜欢'
AND
params
[
'tab_name'
]
=
'精选'
AND
params
[
'card_type'
]
=
'search_word'
--AND page_name='home' android的page_name为空
union
all
SELECT
cl_id
,
partition_date
,
'美购首页热搜词'
as
input_type
,
params
[
'card_name'
]
as
query
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
action
=
'on_click_card'
AND
page_name
=
'welfare_home'
AND
params
[
'card_type'
]
=
'search_word'
AND
params
[
'in_page_pos'
]
=
'大家都在搜'
union
all
select
cl_id
,
partition_date
,
'详情页高亮词'
as
input_type
,
params
[
'card_name'
]
as
query
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
int
(
split
(
app_version
,
'
\\
.'
)[
1
])
>=
27
and
action
=
'on_click_card'
and
params
[
'card_type'
]
=
'highlight_word'
)
click
JOIN
(
SELECT
device_id
from
online
.
ml_device_history_detail
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
first_channel_source_type
not
in
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
like
'promotion
\_
jf
\_
%'
)
dev_channel
on
dev_channel
.
device_id
=
click
.
cl_id
GROUP
BY
partition_date
,
query
,
case
when
input_type
in
(
'联想'
,
'聚合模块'
)
then
'sug点击'
else
input_type
end
)
t1
LATERAL
VIEW
explode
(
t1
.
input_type
)
t2
AS
input_type
GROUP
BY
t1
.
partition_date
,
t1
.
query
,
t2
.
input_type
;
INSERT
OVERWRITE
TABLE
pm
.
tl_pm_search_result_click_rate_d
PARTITION
(
PARTITION_DAY
=
${
partition_day
}
)
--搜索关键词日报,取近7天数据,每天取关键词搜索量前500
select
t1
.
partition_date
as
day_id
,
t1
.
query
as
query
,
NVL
(
t1
.
search_pv
,
0
)
as
search_pv
,
NVL
(
t1
.
search_uv
,
0
)
as
search_uv
,
NVL
(
t2
.
search_more_pv
,
0
)
as
search_more_pv
,
NVL
(
t2
.
page_more_pv
,
0
)
as
page_more_pv
,
NVL
(
t2
.
more_click_pv
,
0
)
as
more_click_pv
,
NVL
(
t2
.
more_click_10_pv
,
0
)
as
more_click_10_pv
,
NVL
(
t2
.
more_click_5_diary_pv
,
0
)
as
more_click_5_diary_pv
,
NVL
(
t2
.
more_click_5_answer_pv
,
0
)
as
more_click_5_answer_pv
,
NVL
(
t2
.
more_click_5_topic_pv
,
0
)
as
more_click_5_topic_pv
,
NVL
(
t2
.
search_wel_pv
,
0
)
as
search_wel_pv
,
NVL
(
t2
.
page_wel_pv
,
0
)
as
page_wel_pv
,
NVL
(
t2
.
wel_click_pv
,
0
)
as
wel_click_pv
,
NVL
(
t2
.
wel_click_10_pv
,
0
)
as
wel_click_10_pv
,
NVL
(
t2
.
wel_click_5_pv
,
0
)
as
wel_click_5_pv
,
NVL
(
t2
.
search_diary_pv
,
0
)
as
search_diary_pv
,
NVL
(
t2
.
page_diary_pv
,
0
)
as
page_diary_pv
,
NVL
(
t2
.
diary_click_pv
,
0
)
as
diary_click_pv
,
NVL
(
t2
.
diary_click_10_pv
,
0
)
as
diary_click_10_pv
,
NVL
(
t2
.
diary_click_5_pv
,
0
)
as
diary_click_5_pv
,
NVL
(
t2
.
search_more_uv
,
0
)
as
search_more_uv
,
NVL
(
t2
.
page_more_uv
,
0
)
as
page_more_uv
,
NVL
(
t2
.
more_click_uv
,
0
)
as
more_click_uv
,
NVL
(
t2
.
more_click_10_uv
,
0
)
as
more_click_10_uv
,
NVL
(
t2
.
more_click_5_diary_uv
,
0
)
as
more_click_5_diary_uv
,
NVL
(
t2
.
more_click_5_answer_uv
,
0
)
as
more_click_5_answer_uv
,
NVL
(
t2
.
more_click_5_topic_uv
,
0
)
as
more_click_5_topic_uv
,
NVL
(
t2
.
search_wel_uv
,
0
)
as
search_wel_uv
,
NVL
(
t2
.
page_wel_uv
,
0
)
as
page_wel_uv
,
NVL
(
t2
.
wel_click_uv
,
0
)
as
wel_click_uv
,
NVL
(
t2
.
wel_click_10_uv
,
0
)
as
wel_click_10_uv
,
NVL
(
t2
.
wel_click_5_uv
,
0
)
as
wel_click_5_uv
,
NVL
(
t2
.
search_diary_uv
,
0
)
as
search_diary_uv
,
NVL
(
t2
.
page_diary_uv
,
0
)
as
page_diary_uv
,
NVL
(
t2
.
diary_click_uv
,
0
)
as
diary_click_uv
,
NVL
(
t2
.
diary_click_10_uv
,
0
)
as
diary_click_10_uv
,
NVL
(
t2
.
diary_click_5_uv
,
0
)
as
diary_click_5_uv
from
(
--总搜索量
select
partition_date
,
query
,
search_pv
,
search_uv
from
(
select
t1
.
partition_date
,
query
,
count
(
t1
.
cl_id
)
as
search_pv
,
count
(
distinct
t1
.
cl_id
)
as
search_uv
,
row_number
()
over
(
partition
by
partition_date
order
by
count
(
t1
.
cl_id
)
desc
)
as
rank
from
(
select
partition_date
,
params
[
'query'
]
as
query
,
cl_id
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
action
in
(
'do_search'
,
'search_result_click_search'
)
union
all
SELECT
partition_date
,
params
[
'card_name'
]
as
query
,
cl_id
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
AND
action
=
'on_click_card'
AND
params
[
'in_page_pos'
]
=
'猜你喜欢'
AND
params
[
'tab_name'
]
=
'精选'
AND
params
[
'card_type'
]
=
'search_word'
--AND page_name='home' android的page_name为空
union
all
SELECT
partition_date
,
params
[
'card_name'
]
as
query
,
cl_id
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
AND
action
=
'on_click_card'
AND
page_name
=
'welfare_home'
AND
params
[
'card_type'
]
=
'search_word'
AND
params
[
'in_page_pos'
]
=
'大家都在搜'
union
all
select
partition_date
,
params
[
'card_name'
]
as
query
,
cl_id
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
int
(
split
(
app_version
,
'
\\
.'
)[
1
])
>=
27
and
action
=
'on_click_card'
and
params
[
'card_type'
]
=
'highlight_word'
)
t1
join
(
SELECT
device_id
from
online
.
ml_device_history_detail
WHERE
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
first_channel_source_type
not
in
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
like
'promotion
\_
jf
\_
%'
)
t2
on
t1
.
cl_id
=
t2
.
device_id
group
by
t1
.
partition_date
,
query
)
t
where
rank
<=
300
)
t1
left
join
--综合、美购、日记页面浏览pv
(
select
t1
.
partition_date
,
t1
.
query
,
sum
(
case
when
tab_name
=
'综合'
then
t1
.
pv
end
)
as
search_more_pv
,
count
(
distinct
case
when
tab_name
=
'综合'
then
t1
.
cl_id
end
)
as
search_more_uv
,
sum
(
case
when
tab_name
=
'美购'
then
t1
.
pv
end
)
as
search_wel_pv
,
count
(
distinct
case
when
tab_name
=
'美购'
then
t1
.
cl_id
end
)
as
search_wel_uv
,
sum
(
case
when
tab_name
=
'日记'
then
t1
.
pv
end
)
as
search_diary_pv
,
count
(
distinct
case
when
tab_name
=
'日记'
then
t1
.
cl_id
end
)
as
search_diary_uv
,
sum
(
case
when
t2
.
page_name
=
'综合'
then
t2
.
pv
end
)
as
page_more_pv
,
count
(
distinct
case
when
t2
.
page_name
=
'综合'
then
t2
.
cl_id
end
)
as
page_more_uv
,
sum
(
case
when
t2
.
page_name
=
'美购'
then
t2
.
pv
end
)
as
page_wel_pv
,
count
(
distinct
case
when
t2
.
page_name
=
'美购'
then
t2
.
cl_id
end
)
as
page_wel_uv
,
sum
(
case
when
t2
.
page_name
=
'日记'
then
t2
.
pv
end
)
as
page_diary_pv
,
count
(
distinct
case
when
t2
.
page_name
=
'日记'
then
t2
.
cl_id
end
)
as
page_diary_uv
,
sum
(
case
when
t3
.
page_name
=
'综合'
then
t3
.
pv
end
)
as
more_click_pv
,
count
(
distinct
case
when
t3
.
page_name
=
'综合'
then
t3
.
cl_id
end
)
as
more_click_uv
,
sum
(
case
when
t3
.
page_name
=
'美购'
then
t3
.
pv
end
)
as
wel_click_pv
,
count
(
distinct
case
when
t3
.
page_name
=
'美购'
then
t3
.
cl_id
end
)
as
wel_click_uv
,
sum
(
case
when
t3
.
page_name
=
'日记'
then
t3
.
pv
end
)
as
diary_click_pv
,
count
(
distinct
case
when
t3
.
page_name
=
'日记'
then
t3
.
cl_id
end
)
as
diary_click_uv
,
sum
(
case
when
t4
.
page_name
=
'综合'
then
t4
.
pv
end
)
as
more_click_10_pv
,
count
(
distinct
case
when
t4
.
page_name
=
'综合'
then
t4
.
cl_id
end
)
as
more_click_10_uv
,
sum
(
case
when
t4
.
page_name
=
'美购'
then
t4
.
pv
end
)
as
wel_click_10_pv
,
count
(
distinct
case
when
t4
.
page_name
=
'美购'
then
t4
.
cl_id
end
)
as
wel_click_10_uv
,
sum
(
case
when
t4
.
page_name
=
'日记'
then
t4
.
pv
end
)
as
diary_click_10_pv
,
count
(
distinct
case
when
t4
.
page_name
=
'日记'
then
t4
.
cl_id
end
)
as
diary_click_10_uv
,
sum
(
case
when
t5
.
page_name
=
'美购'
then
t5
.
pv
end
)
as
wel_click_5_pv
,
count
(
distinct
case
when
t5
.
page_name
=
'美购'
then
t5
.
cl_id
end
)
as
wel_click_5_uv
,
sum
(
case
when
t5
.
page_name
=
'日记'
then
t5
.
pv
end
)
as
diary_click_5_pv
,
count
(
distinct
case
when
t5
.
page_name
=
'日记'
then
t5
.
cl_id
end
)
as
diary_click_5_uv
,
sum
(
case
when
t6
.
page_name
=
'综合'
then
t6
.
pv
end
)
as
more_click_5_diary_pv
,
count
(
distinct
case
when
t6
.
page_name
=
'综合'
then
t6
.
cl_id
end
)
as
more_click_5_diary_uv
,
sum
(
case
when
t7
.
page_name
=
'综合'
then
t7
.
pv
end
)
as
more_click_5_answer_pv
,
count
(
distinct
case
when
t7
.
page_name
=
'综合'
then
t7
.
cl_id
end
)
as
more_click_5_answer_uv
,
sum
(
case
when
t8
.
page_name
=
'综合'
then
t8
.
pv
end
)
as
more_click_5_topic_pv
,
count
(
distinct
case
when
t8
.
page_name
=
'综合'
then
t8
.
cl_id
end
)
as
more_click_5_topic_uv
from
(
--各tab搜索
select
partition_date
,
params
[
'query'
]
as
query
,
cl_id
,
params
[
'tab'
]
as
tab_name
,
count
(
1
)
as
pv
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
params
[
'tab'
]
in
(
'综合'
,
'美购'
,
'日记'
)
and
action
in
(
'search_result_click_tab'
,
'search_result_click_search'
)
group
by
partition_date
,
params
[
'query'
],
cl_id
,
params
[
'tab'
]
)
t1
left
join
(
--结果页浏览
select
partition_date
,
get_json_object
(
params
[
'extra_param'
],
'$.query'
)
as
query
,
cl_id
,
case
when
page_name
=
'search_result_more'
then
'综合'
when
page_name
=
'search_result_welfare'
then
'美购'
when
page_name
=
'search_result_diary'
then
'日记'
end
as
page_name
,
count
(
1
)
as
pv
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
page_name
in
(
'search_result_more'
,
'search_result_welfare'
,
'search_result_diary'
)
and
action
=
'page_view'
group
by
partition_date
,
get_json_object
(
params
[
'extra_param'
],
'$.query'
)
,
cl_id
,
case
when
page_name
=
'search_result_more'
then
'综合'
when
page_name
=
'search_result_welfare'
then
'美购'
when
page_name
=
'search_result_diary'
then
'日记'
end
)
t2
on
t1
.
partition_date
=
t2
.
partition_date
and
t1
.
query
=
t2
.
query
and
t1
.
cl_id
=
t2
.
cl_id
and
t1
.
tab_name
=
t2
.
page_name
left
join
(
--内容卡片点击
select
partition_date
,
params
[
'query'
]
as
query
,
cl_id
,
case
when
page_name
=
'search_result_more'
then
'综合'
when
page_name
=
'search_result_welfare'
then
'美购'
when
page_name
=
'search_result_diary'
then
'日记'
end
as
page_name
,
count
(
1
)
as
pv
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
action
in
(
'search_result_click_infomation_item'
,
'on_click_topic_card'
,
'on_click_diary_card'
,
'search_result_welfare_click_item'
)
and
page_name
in
(
'search_result_more'
,
'search_result_welfare'
,
'search_result_diary'
)
group
by
partition_date
,
params
[
'query'
]
,
cl_id
,
case
when
page_name
=
'search_result_more'
then
'综合'
when
page_name
=
'search_result_welfare'
then
'美购'
when
page_name
=
'search_result_diary'
then
'日记'
end
)
t3
on
t2
.
partition_date
=
t3
.
partition_date
and
t2
.
query
=
t3
.
query
and
t2
.
cl_id
=
t3
.
cl_id
and
t2
.
page_name
=
t3
.
page_name
left
join
(
--前10内容卡片点击
select
partition_date
,
params
[
'query'
]
as
query
,
cl_id
,
case
when
page_name
=
'search_result_more'
then
'综合'
when
page_name
=
'search_result_welfare'
then
'美购'
when
page_name
=
'search_result_diary'
then
'日记'
end
as
page_name
,
count
(
1
)
as
pv
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
action
in
(
'search_result_click_infomation_item'
,
'on_click_topic_card'
,
'on_click_diary_card'
,
'search_result_welfare_click_item'
)
and
params
[
'position'
]
in
(
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
)
and
page_name
in
(
'search_result_more'
,
'search_result_welfare'
,
'search_result_diary'
)
group
by
partition_date
,
params
[
'query'
]
,
cl_id
,
case
when
page_name
=
'search_result_more'
then
'综合'
when
page_name
=
'search_result_welfare'
then
'美购'
when
page_name
=
'search_result_diary'
then
'日记'
end
)
t4
on
t2
.
partition_date
=
t4
.
partition_date
and
t2
.
query
=
t4
.
query
and
t2
.
cl_id
=
t4
.
cl_id
and
t2
.
page_name
=
t4
.
page_name
left
join
(
--日记、美购前5内容卡片点击
select
partition_date
,
params
[
'query'
]
as
query
,
cl_id
,
case
when
page_name
=
'search_result_welfare'
then
'美购'
when
page_name
=
'search_result_diary'
then
'日记'
end
as
page_name
,
count
(
1
)
as
pv
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
(
action
=
'on_click_diary_card'
and
page_name
=
'search_result_diary'
or
action
=
'search_result_welfare_click_item'
and
page_name
=
'search_result_welfare'
)
and
params
[
'position'
]
in
(
0
,
1
,
2
,
3
,
4
)
group
by
partition_date
,
params
[
'query'
]
,
cl_id
,
case
when
page_name
=
'search_result_welfare'
then
'美购'
when
page_name
=
'search_result_diary'
then
'日记'
end
)
t5
on
t2
.
partition_date
=
t5
.
partition_date
and
t2
.
query
=
t5
.
query
and
t2
.
cl_id
=
t5
.
cl_id
and
t2
.
page_name
=
t5
.
page_name
left
join
(
--综搜页日记卡片前5
select
c
.
partition_date
,
c
.
query
,
c
.
cl_id
,
'综合'
as
page_name
,
count
(
1
)
as
pv
from
(
SELECT
partition_date
,
query
,
position
FROM
(
select
partition_date
,
params
[
'query'
]
as
query
,
cast
(
absolute_position
as
int
)
as
position
,
row_number
()
over
(
partition
by
params
[
'query'
],
partition_date
order
by
cast
(
absolute_position
as
int
)
asc
)
as
rank
from
online
.
ml_community_precise_exposure_detail
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
page_name
=
'search_result_more'
and
action
in
(
'page_precise_exposure'
,
'home_choiceness_card_exposure'
)
--7745版本action改为page_precise_exposure
and
is_exposure
=
'1'
----精准曝光
and
card_content_type
=
'diary'
group
by
partition_date
,
params
[
'query'
],
cast
(
absolute_position
as
int
)
)
a
where
rank
<=
5
)
b
join
(
select
partition_date
,
params
[
'query'
]
as
query
,
cl_id
,
params
[
'position'
]
as
position
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
page_name
=
'search_result_more'
and
(
action
=
'search_result_click_infomation_item'
and
params
[
'business_type'
]
in
(
'diary'
,
'0'
)
or
action
=
'on_click_diary_card'
)
)
c
on
b
.
partition_date
=
c
.
partition_date
and
b
.
query
=
c
.
query
and
b
.
position
=
c
.
position
group
by
c
.
partition_date
,
c
.
query
,
c
.
cl_id
)
t6
on
t2
.
partition_date
=
t6
.
partition_date
and
t2
.
query
=
t6
.
query
and
t2
.
cl_id
=
t6
.
cl_id
and
t2
.
page_name
=
t6
.
page_name
left
join
(
--综搜页回答卡片前5
select
c
.
partition_date
,
c
.
query
,
c
.
cl_id
,
'综合'
as
page_name
,
count
(
1
)
as
pv
from
(
SELECT
partition_date
,
query
,
position
FROM
(
select
partition_date
,
params
[
'query'
]
as
query
,
cast
(
absolute_position
as
int
)
as
position
,
row_number
()
over
(
partition
by
params
[
'query'
],
partition_date
order
by
cast
(
absolute_position
as
int
)
asc
)
as
rank
from
online
.
ml_community_precise_exposure_detail
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
page_name
=
'search_result_more'
and
action
in
(
'page_precise_exposure'
,
'home_choiceness_card_exposure'
)
--7745版本action改为page_precise_exposure
and
is_exposure
=
'1'
----精准曝光
and
card_content_type
=
'answer'
group
by
partition_date
,
params
[
'query'
],
cast
(
absolute_position
as
int
)
)
a
where
rank
<=
5
)
b
join
(
select
partition_date
,
params
[
'query'
]
as
query
,
cl_id
,
params
[
'position'
]
as
position
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
page_name
=
'search_result_more'
and
action
=
'search_result_click_infomation_item'
and
params
[
'business_type'
]
in
(
'answer'
,
'5'
)
)
c
on
b
.
partition_date
=
c
.
partition_date
and
b
.
query
=
c
.
query
and
b
.
position
=
c
.
position
group
by
c
.
partition_date
,
c
.
query
,
c
.
cl_id
)
t7
on
t2
.
partition_date
=
t7
.
partition_date
and
t2
.
query
=
t7
.
query
and
t2
.
cl_id
=
t7
.
cl_id
and
t2
.
page_name
=
t7
.
page_name
left
join
(
--综搜页帖子卡片前5
select
c
.
partition_date
,
c
.
query
,
c
.
cl_id
,
'综合'
as
page_name
,
count
(
1
)
as
pv
from
(
SELECT
partition_date
,
query
,
position
FROM
(
select
partition_date
,
params
[
'query'
]
as
query
,
cast
(
absolute_position
as
int
)
as
position
,
row_number
()
over
(
partition
by
params
[
'query'
],
partition_date
order
by
cast
(
absolute_position
as
int
)
asc
)
as
rank
from
online
.
ml_community_precise_exposure_detail
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
page_name
=
'search_result_more'
and
action
in
(
'page_precise_exposure'
,
'home_choiceness_card_exposure'
)
--7745版本action改为page_precise_exposure
and
is_exposure
=
'1'
----精准曝光
and
card_content_type
=
'user_post'
group
by
partition_date
,
params
[
'query'
],
cast
(
absolute_position
as
int
)
)
a
where
rank
<=
5
)
b
join
(
select
partition_date
,
params
[
'query'
]
as
query
,
cl_id
,
params
[
'position'
]
as
position
from
online
.
bl_hdfs_maidian_updates
where
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
page_name
=
'search_result_more'
and
(
action
=
'search_result_click_infomation_item'
and
params
[
'business_type'
]
in
(
'post'
,
'user_post'
,
'doctor_post'
,
'11'
)
or
action
=
'on_click_topic_card'
)
)
c
on
b
.
partition_date
=
c
.
partition_date
and
b
.
query
=
c
.
query
and
b
.
position
=
c
.
position
group
by
c
.
partition_date
,
c
.
query
,
c
.
cl_id
)
t8
on
t2
.
partition_date
=
t8
.
partition_date
and
t2
.
query
=
t8
.
query
and
t2
.
cl_id
=
t8
.
cl_id
and
t2
.
page_name
=
t8
.
page_name
join
(
select
device_id
from
online
.
ml_device_history_detail
WHERE
partition_date
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
and
first_channel_source_type
not
in
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
like
'promotion
\_
jf
\_
%'
)
dev
on
t1
.
cl_id
=
dev
.
device_id
left
join
(
-- 去掉疑似机构刷量的PV和UV
SELECT
distinct
device_id
FROM
ml
.
ml_d_ct_dv_devicespam_d
WHERE
partition_day
=
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
)
)
spam_pv
on
t2
.
cl_id
=
spam_pv
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
group
by
t1
.
partition_date
,
t1
.
query
)
t2
on
t1
.
partition_date
=
t2
.
partition_date
and
t1
.
query
=
t2
.
query
order
by
day_id
desc
,
search_pv
desc
,
query
;
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment