Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
B
bi-report
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data
bi-report
Commits
d40c9c2b
Commit
d40c9c2b
authored
Jul 03, 2020
by
魏艺敏
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update codes
parent
1d6e5108
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
378 additions
and
115 deletions
+378
-115
daily_recommend_strategy.zip
jobs/daily_recommend_strategy/daily_recommend_strategy.zip
+0
-0
step2.job
jobs/daily_recommend_strategy/step2.job
+2
-2
daily_recommend_strategy_insert.sql
...ommend_stategy_insert/daily_recommend_strategy_insert.sql
+376
-0
create_meigou_source-pv.sql
tables/create_meigou_source-pv.sql
+0
-113
No files found.
jobs/daily_recommend_strategy/daily_recommend_strategy.zip
View file @
d40c9c2b
No preview for this file type
jobs/daily_recommend_strategy/step2.job
View file @
d40c9c2b
#step2.job
#step2.job
type=command
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9
command=/home/bi/bi-report/shells/daily_recommend_strategy/daily_recommend_strategy.sh
command=/home/bi/bi-report/lib/shell/hive.sh daily_recommend_strategy_insert
\ No newline at end of file
\ No newline at end of file
sqls/daily_recommend_stategy_insert/daily_recommend_strategy_insert.sql
0 → 100644
View file @
d40c9c2b
SET
mapreduce
.
job
.
queuename
=
data
;
SET
mapreduce
.
map
.
memory
.
mb
=
8192
;
SET
mapreduce
.
map
.
java
.
opts
=-
Xmx8000m
;
SET
mapreduce
.
reduce
.
memory
.
mb
=
8192
;
SET
mapreduce
.
reduce
.
java
.
opts
=-
Xmx8000m
;
set
hive
.
auto
.
convert
.
join
=
true
;
SET
mapred
.
reduce
.
tasks
=
20
;
SET
role
admin
;
ADD
JAR
hdfs
:
///
user
/
hive
/
share
/
lib
/
udf
/
hive
-
udf
-
1
.
0
-
SNAPSHOT
.
jar
;
CREATE
TEMPORARY
FUNCTION
convup
AS
'com.gmei.hive.common.udf.UDFConvUpgrade'
;
INSERT
OVERWRITE
TABLE
pm
.
tl_pm_recommend_strategy_d
PARTITION
(
PARTITION_DAY
=
$
partition_day
)
SELECT
t1
.
partition_date
as
day_id
,
t1
.
device_os_type
as
device_os_type
,
t1
.
active_type
as
active_type
,
t2
.
card_content_type
as
card_content_type
,
t2
.
recommend_type
as
recommend_type
,
NVL
(
sum
(
t3
.
session_pv
),
0
)
as
card_click
,
NVL
(
sum
(
t2
.
session_pv
),
0
)
as
card_exposure
,
NVL
(
round
(
sum
(
page_stay
)
/
count
(
distinct
t4
.
cl_id
)
/
60
,
2
),
0
)
as
avg_page_stay
,
NVL
(
sum
(
navbar_pv
),
0
)
as
navbar_search
,
NVL
(
sum
(
highlight_pv
),
0
)
as
highlight_word
,
NVL
(
sum
(
self_wel_pv
),
0
)
as
self_welfare_card
,
NVL
(
sum
(
recom_wel_pv
),
0
)
-
NVL
(
sum
(
self_wel_pv
),
0
)
as
recommend_welfare_card
,
--需要排除关联的商品卡片点击
NVL
(
sum
(
recom_content_pv
),
0
)
as
recommend_content_card
,
NULL
as
recommend_special_card
,
NULL
as
transfer_card
,
NULL
as
video_consultation
FROM
(
SELECT
partition_date
,
device_os_type
,
CASE
WHEN
active_type
=
'4'
THEN
'老活'
WHEN
active_type
IN
(
'1'
,
'2'
)
THEN
'新增'
END
AS
active_type
,
device_id
FROM
online
.
ml_device_day_active_status
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
active_type
IN
(
'1'
,
'2'
,
'4'
)
AND
first_channel_source_type
not
IN
(
'yqxiu1'
,
'yqxiu2'
,
'yqxiu3'
,
'yqxiu4'
,
'yqxiu5'
,
'mxyc1'
,
'mxyc2'
,
'mxyc3'
,
'wanpu'
,
'jinshan'
,
'jx'
,
'maimai'
,
'zhuoyi'
,
'huatian'
,
'suopingjingling'
,
'mocha'
,
'mizhe'
,
'meika'
,
'lamabang'
,
'js-az1'
,
'js-az2'
,
'js-az3'
,
'js-az4'
,
'js-az5'
,
'jfq-az1'
,
'jfq-az2'
,
'jfq-az3'
,
'jfq-az4'
,
'jfq-az5'
,
'toufang1'
,
'toufang2'
,
'toufang3'
,
'toufang4'
,
'toufang5'
,
'toufang6'
,
'TF-toufang1'
,
'TF-toufang2'
,
'TF-toufang3'
,
'TF-toufang4'
,
'TF-toufang5'
,
'tf-toufang1'
,
'tf-toufang2'
,
'tf-toufang3'
,
'tf-toufang4'
,
'tf-toufang5'
,
'benzhan'
,
'promotion_aso100'
,
'promotion_qianka'
,
'promotion_xiaoyu'
,
'promotion_dianru'
,
'promotion_malioaso'
,
'promotion_malioaso-shequ'
,
'promotion_shike'
,
'promotion_julang_jl03'
,
'promotion_zuimei'
)
AND
first_channel_source_type
not
LIKE
'promotion
\_
jf
\_
%'
)
t1
JOIN
(
--精准曝光,卡片id和session_id去重
SELECT
partition_date
,
card_content_type
,
cl_id
,
recommend_type
,
card_id
,
count
(
distinct
app_session_id
)
as
session_pv
FROM
(
SELECT
partition_date
,
cl_id
,
case
when
card_content_type
in
(
'qa'
,
'answer'
)
then
'qa'
else
card_content_type
end
as
card_content_type
,
CASE
WHEN
transaction_type
in
(
'ctr'
)
THEN
'ctr预估'
WHEN
transaction_type
in
(
'cvr'
)
THEN
'cvr预估'
WHEN
transaction_type
in
(
'-1'
,
'smr'
)
THEN
'smr'
when
transaction_type
in
(
'pgc'
,
'hotspot'
)
then
'热点卡片'
when
transaction_type
in
(
'newdata'
)
then
'保量卡片'
END
AS
recommend_type
,
card_id
,
app_session_id
from
online
.
ml_community_precise_exposure_detail
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
in
(
'page_precise_exposure'
,
'home_choiceness_card_exposure'
)
--7745版本action改为page_precise_exposure
AND
is_exposure
=
'1'
----精准曝光
AND
page_name
=
'home'
AND
tab_name
=
'精选'
AND
transaction_type
in
(
'-1'
,
'ctr'
,
'smr'
,
'cvr'
,
'hotspot'
,
'pgc'
,
'newdata'
)
AND
card_content_type
in
(
'qa'
,
'diary'
,
'user_post'
,
'answer'
)
group
by
partition_date
,
case
when
card_content_type
in
(
'qa'
,
'answer'
)
then
'qa'
else
card_content_type
end
,
cl_id
,
CASE
WHEN
transaction_type
in
(
'ctr'
)
THEN
'ctr预估'
WHEN
transaction_type
in
(
'cvr'
)
THEN
'cvr预估'
WHEN
transaction_type
in
(
'-1'
,
'smr'
)
THEN
'smr'
when
transaction_type
in
(
'pgc'
,
'hotspot'
)
then
'热点卡片'
when
transaction_type
in
(
'newdata'
)
then
'保量卡片'
END
,
card_id
,
app_session_id
)
a
group
by
partition_date
,
card_content_type
,
cl_id
,
recommend_type
,
card_id
)
t2
on
t1
.
device_id
=
t2
.
cl_id
and
t1
.
partition_date
=
t2
.
partition_date
LEFT
JOIN
(
--卡片,卡片id和session_id去重
SELECT
partition_date
,
card_content_type
,
cl_id
,
recommend_type
,
card_id
,
count
(
distinct
app_session_id
)
as
session_pv
FROM
(
SELECT
partition_date
,
cl_id
,
case
when
params
[
'card_content_type'
]
in
(
'qa'
,
'answer'
)
then
'qa'
else
params
[
'card_content_type'
]
end
as
card_content_type
,
CASE
WHEN
params
[
'transaction_type'
]
in
(
'ctr'
)
THEN
'ctr预估'
WHEN
params
[
'transaction_type'
]
in
(
'cvr'
)
THEN
'cvr预估'
WHEN
params
[
'transaction_type'
]
in
(
'-1'
,
'smr'
)
THEN
'smr'
when
params
[
'transaction_type'
]
in
(
'pgc'
,
'hotspot'
)
then
'热点卡片'
when
params
[
'transaction_type'
]
in
(
'newdata'
)
then
'保量卡片'
END
AS
recommend_type
,
params
[
'card_id'
]
as
card_id
,
app_session_id
from
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'on_click_card'
AND
params
[
'page_name'
]
=
'home'
AND
params
[
'tab_name'
]
=
'精选'
AND
params
[
'transaction_type'
]
in
(
'-1'
,
'ctr'
,
'smr'
,
'cvr'
,
'hotspot'
,
'pgc'
,
'newdata'
)
AND
params
[
'card_content_type'
]
in
(
'qa'
,
'diary'
,
'user_post'
,
'answer'
)
GROUP
BY
partition_date
,
cl_id
,
case
when
params
[
'card_content_type'
]
in
(
'qa'
,
'answer'
)
then
'qa'
else
params
[
'card_content_type'
]
end
,
CASE
WHEN
params
[
'transaction_type'
]
in
(
'ctr'
)
THEN
'ctr预估'
WHEN
params
[
'transaction_type'
]
in
(
'cvr'
)
THEN
'cvr预估'
WHEN
params
[
'transaction_type'
]
in
(
'-1'
,
'smr'
)
THEN
'smr'
when
params
[
'transaction_type'
]
in
(
'pgc'
,
'hotspot'
)
then
'热点卡片'
when
params
[
'transaction_type'
]
in
(
'newdata'
)
then
'保量卡片'
END
,
params
[
'card_id'
],
app_session_id
)
a
group
by
partition_date
,
card_content_type
,
cl_id
,
recommend_type
,
card_id
)
t3
on
t2
.
partition_date
=
t3
.
partition_date
and
t2
.
cl_id
=
t3
.
cl_id
and
t2
.
card_id
=
t3
.
card_id
and
t2
.
card_content_type
=
t3
.
card_content_type
and
t2
.
recommend_type
=
t3
.
recommend_type
LEFT
JOIN
(
--页面浏览时长
SELECT
partition_date
,
cl_id
,
business_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
as
page_name
,
sum
(
page_stay
)
as
page_stay
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'page_view'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
AND
referrer
=
'home'
AND
page_stay
>=
0
AND
page_stay
<
1000
GROUP
BY
partition_date
,
cl_id
,
business_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
)
t4
on
t4
.
partition_date
=
t3
.
partition_date
and
t4
.
cl_id
=
t3
.
cl_id
and
t4
.
business_id
=
t3
.
card_id
and
t4
.
page_name
=
t3
.
card_content_type
LEFT
JOIN
(
--搜索框和点击行为
SELECT
partition_date
,
cl_id
,
business_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
as
page_name
,
count
(
1
)
as
navbar_pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
in
(
'on_click_navbar_search'
,
'do_search'
)
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
AND
(
referrer
=
'home'
or
(
params
[
'referrer_link'
]
like
'%[%'
and
json_split
(
params
[
'referrer_link'
])[
size
(
json_split
(
params
[
'referrer_link'
]))
-
1
]
=
'home'
))
group
by
partition_date
,
cl_id
,
business_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
)
t5
on
t5
.
partition_date
=
t3
.
partition_date
and
t5
.
cl_id
=
t3
.
cl_id
and
t5
.
business_id
=
t3
.
card_id
and
t5
.
page_name
=
t3
.
card_content_type
LEFT
JOIN
(
--点击高亮词
SELECT
partition_date
,
cl_id
,
business_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
as
page_name
,
count
(
1
)
as
highlight_pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'on_click_card'
and
params
[
'card_type'
]
=
'highlight_word'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
AND
(
referrer
=
'home'
or
(
params
[
'referrer_link'
]
like
'%[%'
and
json_split
(
params
[
'referrer_link'
])[
size
(
json_split
(
params
[
'referrer_link'
]))
-
1
]
=
'home'
))
group
by
partition_date
,
cl_id
,
business_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
)
t6
on
t6
.
partition_date
=
t3
.
partition_date
and
t6
.
cl_id
=
t3
.
cl_id
and
t6
.
business_id
=
t3
.
card_id
and
t6
.
page_name
=
t3
.
card_content_type
LEFT
JOIN
(
--关联的美购卡片
SELECT
partition_date
,
cl_id
,
business_id
,
page_name
,
count
(
distinct
app_session_id
)
as
self_wel_pv
FROM
(
SELECT
partition_date
,
cl_id
,
business_id
,
app_session_id
,
params
[
'card_id'
]
as
card_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
as
page_name
,
count
(
1
)
as
pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
(
get_json_object
(
params
[
'extra_param'
],
'$.type'
)
=
'交互栏'
or
get_json_object
(
params
[
'extra_param'
],
'$.jump_from'
)
=
'msg_link'
or
params
[
'in_page_pos'
]
=
'top'
or
params
[
'in_page_pos'
]
=
'bottom'
)
AND
action
=
'on_click_card'
and
params
[
'card_content_type'
]
=
'service'
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
)
AND
(
referrer
=
'home'
or
(
params
[
'referrer_link'
]
like
'%[%'
and
json_split
(
params
[
'referrer_link'
])[
size
(
json_split
(
params
[
'referrer_link'
]))
-
1
]
=
'home'
))
group
by
partition_date
,
cl_id
,
business_id
,
app_session_id
,
params
[
'card_id'
],
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
)
a
group
by
partition_date
,
cl_id
,
business_id
,
page_name
)
t7
on
t7
.
partition_date
=
t3
.
partition_date
and
t7
.
cl_id
=
t3
.
cl_id
and
t7
.
business_id
=
t3
.
card_id
and
t7
.
page_name
=
t3
.
card_content_type
LEFT
JOIN
(
--推荐的美购卡片(需要排除作者消费的美购)
SELECT
partition_date
,
cl_id
,
business_id
,
page_name
,
count
(
distinct
app_session_id
)
as
recom_wel_pv
FROM
(
SELECT
partition_date
,
cl_id
,
business_id
,
app_session_id
,
params
[
'card_id'
]
as
card_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
as
page_name
,
count
(
1
)
as
service_pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
(
action
=
'on_click_card'
and
params
[
'card_content_type'
]
=
'service'
or
action
=
'on_click_button'
and
params
[
'button_name'
]
=
'unfold'
)
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
AND
(
referrer
=
'home'
or
(
params
[
'referrer_link'
]
like
'%[%'
and
json_split
(
params
[
'referrer_link'
])[
size
(
json_split
(
params
[
'referrer_link'
]))
-
1
]
=
'home'
))
group
by
partition_date
,
cl_id
,
business_id
,
app_session_id
,
params
[
'card_id'
],
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
)
a
group
by
partition_date
,
cl_id
,
business_id
,
page_name
)
t8
on
t8
.
partition_date
=
t3
.
partition_date
and
t8
.
cl_id
=
t3
.
cl_id
and
t8
.
business_id
=
t3
.
card_id
and
t8
.
page_name
=
t3
.
card_content_type
LEFT
JOIN
(
--推荐的内容卡片
SELECT
partition_date
,
cl_id
,
business_id
,
page_name
,
count
(
distinct
app_session_id
)
as
recom_content_pv
FROM
(
SELECT
partition_date
,
cl_id
,
business_id
,
app_session_id
,
params
[
'card_id'
]
as
card_id
,
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
as
page_name
,
count
(
1
)
as
service_pv
FROM
online
.
bl_hdfs_maidian_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
AND
action
=
'on_click_card'
and
params
[
'card_content_type'
]
in
(
'qa'
,
'diary'
,
'user_post'
,
'answer'
)
AND
page_name
IN
(
'diary_detail'
,
'topic_detail'
,
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
,
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
AND
(
referrer
=
'home'
or
(
params
[
'referrer_link'
]
like
'%[%'
and
json_split
(
params
[
'referrer_link'
])[
size
(
json_split
(
params
[
'referrer_link'
]))
-
1
]
=
'home'
))
group
by
partition_date
,
cl_id
,
business_id
,
app_session_id
,
params
[
'card_id'
],
case
when
page_name
in
(
'diary_detail'
,
'topic_detail'
)
then
'diary'
when
page_name
in
(
'post_detail'
,
'user_post_detail'
,
'doctor_post_detail'
)
then
'user_post'
when
page_name
in
(
'question_detail'
,
'answer_detail'
,
'question_answer_detail'
)
then
'qa'
else
null
end
)
a
group
by
partition_date
,
cl_id
,
business_id
,
page_name
)
t9
on
t9
.
partition_date
=
t3
.
partition_date
and
t9
.
cl_id
=
t3
.
cl_id
and
t9
.
business_id
=
t3
.
card_id
and
t9
.
page_name
=
t3
.
card_content_type
LEFT
JOIN
(
select
distinct
device_id
from
ml
.
ml_d_ct_dv_devicespam_d
--去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
union
all
select
distinct
device_id
from
dim
.
dim_device_user_staff
--去除内网用户
)
spam_pv
on
spam_pv
.
device_id
=
t2
.
cl_id
LEFT
JOIN
(
SELECT
partition_date
,
device_id
FROM
(
--找出user_id当天活跃的第一个设备id
SELECT
user_id
,
partition_date
,
if
(
size
(
device_list
)
>
0
,
device_list
[
0
],
''
)
AS
device_id
FROM
online
.
ml_user_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<
regexp_replace
((
current_date
),
'-'
,
''
)
)
t1
JOIN
(
--医生账号
SELECT
distinct
user_id
FROM
online
.
tl_hdfs_doctor_view
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
--马甲账号/模特用户
UNION
ALL
SELECT
user_id
FROM
ml
.
ml_c_ct_ui_user_dimen_d
WHERE
partition_day
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
(
is_puppet
=
'true'
or
is_classifyuser
=
'true'
)
UNION
ALL
--公司内网覆盖用户
select
distinct
user_id
from
dim
.
dim_device_user_staff
UNION
ALL
--登陆过医生设备
SELECT
distinct
t1
.
user_id
FROM
(
SELECT
user_id
,
v
.
device_id
as
device_id
FROM
online
.
ml_user_history_detail
LATERAL
VIEW
EXPLODE
(
device_history_list
)
v
AS
device_id
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
t1
JOIN
(
SELECT
device_id
FROM
online
.
ml_device_history_detail
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_login_doctor
=
'1'
)
t2
ON
t1
.
device_id
=
t2
.
device_id
)
t2
on
t1
.
user_id
=
t2
.
user_id
group
by
partition_date
,
device_id
)
dev
on
t2
.
partition_date
=
dev
.
partition_date
and
t2
.
cl_id
=
dev
.
device_id
WHERE
spam_pv
.
device_id
IS
NULL
and
dev
.
device_id
is
null
GROUP
BY
t1
.
partition_date
,
t1
.
device_os_type
,
t1
.
active_type
,
t2
.
card_content_type
,
t2
.
recommend_type
order
by
day_id
,
device_os_type
,
active_type
,
card_content_type
,
recommend_type
;
\ No newline at end of file
tables/create_meigou_source-pv.sql
deleted
100644 → 0
View file @
1d6e5108
--***************************************************************
--*脚本名称: create_pm_c_op_co_content_dimen_d.sql
--*功能: 内容日报
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间: 2020-5-25 11:00
--***************************************************************
--设置全局变量&UDF
SET
mapreduce
.
job
.
queuename
=
data
;
--使用bl数据库
USE
pm
;
--创建BL层内部表
CREATE
TABLE
IF
NOT
EXISTS
pm
.
tl_pm_meigou_source_pv_d
(
day_id
string
comment
'{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}'
,
device_os_type
string
comment
'{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}'
,
active_type
string
comment
'{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}'
,
is_ai_channel
string
comment
'{"chs_name":"是否AI渠道","description":"","etl":"","value":"","remark":""}'
,
welfare_pv
int
comment
'{"chs_name":"美购详情页pv","description":"","etl":"","value":"","remark":""}'
,
to_welfare_pv
int
comment
'{"chs_name":"从上级页面转化到美购详情页的pv总量","description":"","etl":"","value":"","remark":""}'
,
total_conversion_rate
double
comment
'{"chs_name":"从上级页面转化到美购详情页的pv比美购详情页总pv","description":"","etl":"","value":"","remark":""}'
,
search_result_welfare_pv
int
comment
'{"chs_name":"搜索结果美购页pv","description":"","etl":"","value":"","remark":""}'
,
search_result_welfare_to_welfare_pv
int
comment
'{"chs_name":"从搜索结果美购页转化到美购详情页pv","description":"","etl":"","value":"","remark":""}'
,
result_wel_add_pv
int
comment
'{"chs_name":"搜索结果美购页-转化点击加车pv","description":"","etl":"","value":"","remark":""}'
,
result_wel_buy_pv
int
comment
'{"chs_name":"内容用户APP7留","description":"","etl":"","value":"","remark":""}'
,
result_wel_msg_pv
int
comment
'{"chs_name":"内容用户APP30留","description":"","etl":"","value":"","remark":""}'
,
avg_app_duration
double
comment
'{"chs_name":"内容用户单设备App时长(m)","description":"","etl":"","value":"","remark":""}'
,
avg_content_stay
double
comment
'{"chs_name":"内容用户单设备内容时长(m)","description":"","etl":"","value":"","remark":""}'
,
avg_open_times
double
comment
'{"chs_name":"内容用户单设备打开次数","description":"","etl":"","value":"","remark":""}'
,
search_related_stay
double
comment
'{"chs_name":"内容用户搜索相关页面单设备页面时长(m)","description":"","etl":"","value":"","remark":""}'
,
welfare_stay
double
comment
'{"chs_name":"内容用户美购详情页单设备页面时长(m)","description":"","etl":"","value":"","remark":""}'
,
content_question_stay
double
comment
'{"chs_name":"内容用户问题详情页单设备页面时长(m)","description":"","etl":"","value":"","remark":""}'
,
ai_related_stay
double
comment
'{"chs_name":"内容用户AI相关页面单设备页面时长(m)","description":"","etl":"","value":"","remark":""}'
,
content_diary_stay
double
comment
'{"chs_name":"内容用户日记详情页单设备页面时长(m)","description":"","etl":"","value":"","remark":""}'
,
home_stay
double
comment
'{"chs_name":"内容用户首页单设备页面时长(m)","description":"","etl":"","value":"","remark":""}'
,
conv_related_stay
double
comment
'{"chs_name":"内容用户咨询相关页面单设备页面时长(m)","description":"","etl":"","value":"","remark":""}'
,
recommend_rate
double
comment
'{"chs_name":"首页feeds推荐进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_recommend_pv
double
comment
'{"chs_name":"首页feeds推荐进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
feeds_rate
double
comment
'{"chs_name":"首页feeds非推荐进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_feeds_pv
double
comment
'{"chs_name":"首页feeds非推荐进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
search_rate
double
comment
'{"chs_name":"搜索进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_search_pv
double
comment
'{"chs_name":"搜索进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
zone_rate
double
comment
'{"chs_name":"内容聚合页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_zone_pv
double
comment
'{"chs_name":"内容聚合页进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
content_rate
double
comment
'{"chs_name":"内容详情页推荐板块进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_from_content_pv
double
comment
'{"chs_name":"内容详情页推荐板块进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
blank_rate
double
comment
'{"chs_name":"无来源页面(大多数为push)进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_blank_pv
double
comment
'{"chs_name":"无来源页面(大多数为push)进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
comment_rate
double
comment
'{"chs_name":"评论列表页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_comment_pv
double
comment
'{"chs_name":"评论列表页进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
org_rate
double
comment
'{"chs_name":"医生医院主页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_org_pv
double
comment
'{"chs_name":"医生医院主页进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
category_rate
double
comment
'{"chs_name":"品类聚合页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_category_pv
double
comment
'{"chs_name":"品类聚合页进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
my_diary_rate
double
comment
'{"chs_name":"我的日记页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_my_diary_pv
double
comment
'{"chs_name":"我的日记页进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
ai_rate
double
comment
'{"chs_name":"AI报告页进入内容UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_ai_pv
double
comment
'{"chs_name":"AI报告页进入内容PV/UV","description":"","etl":"","value":"","remark":""}'
,
create_topic_num
int
comment
'{"chs_name":"真实发帖数","description":"","etl":"","value":"","remark":""}'
,
create_reply_num
int
comment
'{"chs_name":"真实评论数","description":"","etl":"","value":"","remark":""}'
,
diary_uv
int
comment
'{"chs_name":"日记UV","description":"","etl":"","value":"","remark":""}'
,
diary_pv
int
comment
'{"chs_name":"日记PV","description":"","etl":"","value":"","remark":""}'
,
diary_rate
double
comment
'{"chs_name":"日记UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_diary_pv
double
comment
'{"chs_name":"日记PV/日记UV","description":"","etl":"","value":"","remark":""}'
,
diary_stay
double
comment
'{"chs_name":"日记单设备时长(m)","description":"","etl":"","value":"","remark":""}'
,
post_uv
int
comment
'{"chs_name":"帖子UV","description":"","etl":"","value":"","remark":""}'
,
post_pv
int
comment
'{"chs_name":"帖子PV","description":"","etl":"","value":"","remark":""}'
,
post_rate
double
comment
'{"chs_name":"帖子UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_post_pv
double
comment
'{"chs_name":"帖子PV/帖子UV","description":"","etl":"","value":"","remark":""}'
,
post_stay
double
comment
'{"chs_name":"帖子单设备时长(m)","description":"","etl":"","value":"","remark":""}'
,
question_uv
int
comment
'{"chs_name":"问题UV","description":"","etl":"","value":"","remark":""}'
,
question_pv
int
comment
'{"chs_name":"问题PV","description":"","etl":"","value":"","remark":""}'
,
question_rate
double
comment
'{"chs_name":"问题UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_question_pv
double
comment
'{"chs_name":"问题PV/问题UV","description":"","etl":"","value":"","remark":""}'
,
question_stay
double
comment
'{"chs_name":"问题单设备时长(m)","description":"","etl":"","value":"","remark":""}'
,
question_answer_uv
int
comment
'{"chs_name":"问答UV","description":"","etl":"","value":"","remark":""}'
,
question_answer_pv
int
comment
'{"chs_name":"问答PV","description":"","etl":"","value":"","remark":""}'
,
question_answer_rate
double
comment
'{"chs_name":"问答UV/问答UV","description":"","etl":"","value":"","remark":""}'
,
per_question_answer_pv
double
comment
'{"chs_name":"问答PV/问答UV","description":"","etl":"","value":"","remark":""}'
,
question_answer_stay
double
comment
'{"chs_name":"问答单设备时长(m)","description":"","etl":"","value":"","remark":""}'
,
answer_uv
int
comment
'{"chs_name":"回答UV","description":"","etl":"","value":"","remark":""}'
,
answer_pv
int
comment
'{"chs_name":"回答PV","description":"","etl":"","value":"","remark":""}'
,
answer_rate
double
comment
'{"chs_name":"回答UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_answer_pv
double
comment
'{"chs_name":"回答PV/回答UV","description":"","etl":"","value":"","remark":""}'
,
answer_stay
double
comment
'{"chs_name":"回答单设备时长(m)","description":"","etl":"","value":"","remark":""}'
,
video_uv
int
comment
'{"chs_name":"视频UV","description":"","etl":"","value":"","remark":""}'
,
video_pv
int
comment
'{"chs_name":"视频PV","description":"","etl":"","value":"","remark":""}'
,
video_rate
double
comment
'{"chs_name":"视频UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_video_pv
double
comment
'{"chs_name":"视频PV/视频UV","description":"","etl":"","value":"","remark":""}'
,
video_stay
double
comment
'{"chs_name":"视频单设备时长(m)","description":"","etl":"","value":"","remark":""}'
,
wiki_uv
int
comment
'{"chs_name":"百科UV","description":"","etl":"","value":"","remark":""}'
,
wiki_pv
int
comment
'{"chs_name":"百科PV","description":"","etl":"","value":"","remark":""}'
,
wiki_rate
double
comment
'{"chs_name":"百科UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_wiki_pv
double
comment
'{"chs_name":"百科PV/百科UV","description":"","etl":"","value":"","remark":""}'
,
wiki_stay
double
comment
'{"chs_name":"百科单设备时长(m)","description":"","etl":"","value":"","remark":""}'
,
article_uv
int
comment
'{"chs_name":"专栏UV","description":"","etl":"","value":"","remark":""}'
,
article_pv
int
comment
'{"chs_name":"专栏PV","description":"","etl":"","value":"","remark":""}'
,
article_rate
double
comment
'{"chs_name":"专栏UV/内容UV","description":"","etl":"","value":"","remark":""}'
,
per_article_pv
double
comment
'{"chs_name":"专栏PV/专栏UV","description":"","etl":"","value":"","remark":""}'
,
article_stay
double
comment
'{"chs_name":"专栏单设备时长(m)","description":"","etl":"","value":"","remark":""}'
)
comment
'内容日报'
PARTITIONED
BY
(
PARTITION_DAY
STRING
comment
'分区日期'
)
ROW
FORMAT
DELIMITED
FIELDS
TERMINATED
BY
'
\t
'
COLLECTION
ITEMS
TERMINATED
BY
'
\0
02'
MAP
KEYS
TERMINATED
BY
'
\0
03'
LINES
TERMINATED
BY
'
\n
'
STORED
AS
TEXTFILE
;
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment