Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
B
bi-report
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data
bi-report
Commits
e4ad086c
Commit
e4ad086c
authored
Sep 11, 2020
by
魏艺敏
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
push codes
parent
41445cd1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
128 additions
and
49 deletions
+128
-49
clue_daily.sql
pm/clue_daily/etl/clue_daily.sql
+0
-0
clue_daily.sql
pm/clue_daily/report/clue_daily.sql
+1
-1
en-cn.properties
pm/daily_reply_content/en-cn.properties
+2
-0
daily_reply_content.zip
pm/daily_reply_content/job/daily_reply_content.zip
+0
-0
step1_1.job
pm/daily_reply_content/job/step1_1.job
+4
-0
step1_2.job
pm/daily_reply_content/job/step1_2.job
+4
-0
step1_3.job
pm/daily_reply_content/job/step1_3.job
+4
-0
step1_4.job
pm/daily_reply_content/job/step1_4.job
+4
-0
step1_5.job
pm/daily_reply_content/job/step1_5.job
+4
-0
step1_6.job
pm/daily_reply_content/job/step1_6.job
+4
-0
step2.job
pm/daily_reply_content/job/step2.job
+5
-0
readme.txt
pm/daily_reply_content/readme.txt
+0
-0
daily_reply_content.sql
pm/daily_reply_content/report/daily_reply_content.sql
+83
-0
daily_userpost.zip
pm/daily_userpost/job/daily_userpost.zip
+0
-0
step1_1.job
pm/daily_userpost/job/step1_1.job
+2
-2
daily_userpost.sql
pm/daily_userpost/report/daily_userpost.sql
+11
-46
No files found.
pm/clue_daily/etl/clue_daily.sql
View file @
e4ad086c
This diff is collapsed.
Click to expand it.
pm/clue_daily/report/clue_daily.sql
View file @
e4ad086c
...
...
@@ -80,5 +80,5 @@ SELECT day_id `日期`
,
valid_call_dev_qa_d
as
`当天有效电话线索设备数-需求自测`
,
valid_call_num_qa_d
as
`当天有效电话线索人次-需求自测`
FROM
pm
.
tl_pm_userclue_d
where
partition_day
in
(
'20200
901'
,
'202001001'
,
'20201101'
,
'20201201'
,
'2021010
1'
,
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
))
where
partition_day
in
(
'20200
831'
,
'20200930'
,
'20201031'
,
'20201130'
,
'2020123
1'
,
regexp_replace
(
date_sub
(
current_date
,
1
),
'-'
,
''
))
order
by
`日期`
,
`系统`
,
`新老`
,
`是否灰度`
pm/daily_reply_content/en-cn.properties
0 → 100644
View file @
e4ad086c
daily_userpost
=
精华帖日报
\ No newline at end of file
pm/daily_reply_content/job/daily_reply_content.zip
0 → 100644
View file @
e4ad086c
File added
pm/daily_reply_content/job/step1_1.job
0 → 100644
View file @
e4ad086c
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_topicreply_view
\ No newline at end of file
pm/daily_reply_content/job/step1_2.job
0 → 100644
View file @
e4ad086c
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_problem_view
\ No newline at end of file
pm/daily_reply_content/job/step1_3.job
0 → 100644
View file @
e4ad086c
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_reply_view
\ No newline at end of file
pm/daily_reply_content/job/step1_4.job
0 → 100644
View file @
e4ad086c
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
pm/daily_reply_content/job/step1_5.job
0 → 100644
View file @
e4ad086c
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
pm/daily_reply_content/job/step1_6.job
0 → 100644
View file @
e4ad086c
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_UI_USERCLEAN_DIMEN_D
\ No newline at end of file
pm/daily_reply_content/job/step2.job
0 → 100644
View file @
e4ad086c
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6
command=curl -X GET http://localhost:8553/api/report/email/daily_reply_content/zhaoyang@igengmei.com,zhangwen@igengmei.com,dongyiming@igengmei/weiyimin@igengmei.com
\ No newline at end of file
pm/daily_reply_content/readme.txt
0 → 100644
View file @
e4ad086c
pm/daily_reply_content/report/daily_reply_content.sql
0 → 100644
View file @
e4ad086c
select
a
.
*
FROM
(
--有评论过日记帖的设备,排除疑似广告
SELECT
diary_id
as
content_id
,
'日记贴'
as
type
,
user_id
,
create_time
,
content
FROM
(
SELECT
id
,
problem_id
,
user_id
,
reply_date
as
create_time
,
content
FROM
online
.
tl_hdfs_topicreply_view
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
is_spam
=
'false'
--排除疑似广告
and
regexp_replace
(
substr
(
reply_date
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
group
by
id
,
problem_id
,
user_id
,
reply_date
,
content
)
t1
JOIN
(
SELECT
id
,
diary_id
FROM
online
.
tl_hdfs_problem_view
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
group
by
id
,
diary_id
)
t2
on
t2
.
id
=
t1
.
problem_id
group
by
diary_id
,
user_id
,
create_time
,
content
UNION
ALL
--有评论过回答的设备,排除疑似广告
SELECT
answer_id
as
content_id
,
'回答'
as
type
,
user_id
,
create_time
,
content
FROM
online
.
tl_hdfs_answer_reply_view
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
(
is_fake
is
NULL
or
is_fake
=
'false'
)
AND
answer_id
is
not
NULL
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
group
by
answer_id
,
user_id
,
create_time
,
content
UNION
ALL
--有评论过用户帖的设备
SELECT
tractate_id
as
content_id
,
'帖子'
as
type
,
user_id
,
create_time
,
content
FROM
online
.
tl_hdfs_api_tractate_reply_view
WHERE
partition_date
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
regexp_replace
(
substr
(
create_time
,
1
,
10
),
'-'
,
''
)
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
group
by
tractate_id
,
user_id
,
create_time
,
content
)
a
JOIN
--限制用户是在app进行的回复
(
SELECT
a
.
partition_date
,
user_id
FROM
(
SELECT
concat_ws
(
'-'
,
substr
(
partition_date
,
1
,
4
),
substr
(
partition_date
,
5
,
2
),
substr
(
partition_date
,
7
,
2
))
as
partition_date
,
user_id
,
device_id
,
action
FROM
online
.
bl_hdfs_operation_updates
WHERE
partition_date
>=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
a
JOIN
(
SELECT
code
FROM
dim
.
dim_community_action_type
WHERE
communityuserbehavior_type_name
=
'回帖'
)
type
ON
a
.
action
=
code
GROUP
BY
a
.
partition_date
,
user_id
)
b
ON
a
.
user_id
=
b
.
user_id
AND
substr
(
a
.
create_time
,
1
,
10
)
=
b
.
partition_date
left
join
(
select
distinct
user_id
from
ml
.
ML_D_CT_UI_USERCLEAN_DIMEN_D
where
PARTITION_DAY
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
(
is_doctor
=
'true'
or
is_puppet
=
'true'
or
is_classify_user
=
'true'
or
is_reply_fake
=
'true'
or
is_staff
=
'true'
or
is_associated_issuedevice
=
'true'
)
)
c
on
a
.
user_id
=
c
.
user_id
where
c
.
user_id
is
null
pm/daily_userpost/job/daily_userpost.zip
View file @
e4ad086c
No preview for this file type
pm/daily_userpost/job/step1_1.job
View file @
e4ad086c
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate
\ No newline at end of file
pm/daily_userpost/report/daily_userpost.sql
View file @
e4ad086c
...
...
@@ -5,7 +5,7 @@
--帖子关联标签 3315
SELECT
id
as
`帖子id`
,
content
as
`帖子标题`
,
title
as
`帖子标题`
,
audit_date
as
`审核日期`
,
tag_list
as
`所有关联标签`
,
nvl
(
concat
(
round
((
nvl
(
reply_num_1
,
0
)
+
nvl
(
vote_num_1
,
0
)
+
nvl
(
favor_num_1
,
0
)
+
nvl
(
share_num_1
,
0
))
/
page_pv_1
*
100
,
2
),
'%'
),
0
)
as
`前1日互动率`
...
...
@@ -54,7 +54,7 @@ SELECT id as `帖子id`
FROM
(
SELECT
t1
.
id
,
content
,
audit_date
,
tag_list
SELECT
t1
.
id
,
title
,
audit_date
,
tag_list
,
sum
(
exp_pv
)
as
exp_pv
,
sum
(
click_pv
)
as
click_pv
,
sum
(
page_pv
)
as
page_pv
...
...
@@ -92,12 +92,12 @@ FROM
,
round
(
avg
(
case
when
t2
.
partition_date
>=
DATE_SUB
(
current_date
,
10
)
then
avg_page_stay
end
),
2
)
as
avg_page_stay_10
FROM
(
select
a
.
id
,
content
,
a
.
audit_date
,
collect_set
(
d
.
name
)
as
tag_list
select
a
.
id
,
title
,
a
.
audit_date
,
collect_set
(
d
.
name
)
as
tag_list
from
(
select
id
,
content
,
user_id
,
substr
(
audit_time
,
1
,
10
)
as
audit_date
from
online
.
tl_hdfs_api_tractate_view
where
partition_da
te
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
select
id
,
title
,
user_id
,
substr
(
audit_time
,
1
,
10
)
as
audit_date
from
tl
.
tl_mp_api_tractate
where
partition_da
y
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
content_level
>=
3
and
is_online
=
'true'
)
a
...
...
@@ -125,7 +125,7 @@ FROM
group
by
id
,
name
)
d
on
d
.
id
=
c
.
tag_v3_id
group
by
a
.
id
,
content
,
a
.
audit_date
group
by
a
.
id
,
title
,
a
.
audit_date
)
t1
join
(
--历史数据,指从审核时间至今的数据
...
...
@@ -203,40 +203,10 @@ FROM
select
distinct
device_id
from
ML
.
ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where
PARTITION_DAY
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
AND
(
IS_MORE_USER
=
'true'
OR
IS_STAFF
=
'true'
OR
IS_SPAM_CHANNEL
=
'true'
OR
IS_SUSPICIOUS
=
'true'
OR
IS_ASSOCIATED_ISSUEUSER
=
'ture'
)
AND
is_abnormal_device
=
'ture'
)
c
on
a
.
cl_id
=
c
.
device_id
left
join
(
SELECT
partition_date
,
device_id
FROM
(
--找出user_id当天活跃的第一个设备id
SELECT
user_id
,
partition_date
,
if
(
size
(
device_list
)
>
0
,
device_list
[
0
],
''
)
AS
device_id
FROM
online
.
ml_user_updates
WHERE
partition_date
>=
'20160101'
and
partition_date
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
)
a
join
(
select
distinct
user_id
from
ml
.
ML_D_CT_UI_USERCLEAN_DIMEN_D
where
PARTITION_DAY
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
(
is_doctor
=
'true'
or
is_puppet
=
'true'
or
is_classify_user
=
'true'
or
is_reply_fake
=
'true'
or
is_staff
=
'true'
or
is_associated_issuedevice
=
'true'
)
)
b
on
a
.
user_id
=
b
.
user_id
)
d
on
a
.
cl_id
=
d
.
device_id
and
a
.
partition_date
=
d
.
partition_date
where
c
.
device_id
is
null
and
d
.
device_id
is
null
where
c
.
device_id
is
null
group
by
nvl
(
nvl
(
a
.
card_id
,
e
.
business_id
),
f
.
business_id
),
nvl
(
nvl
(
a
.
partition_date
,
e
.
partition_date
),
f
.
partition_date
)
)
t1
full
join
...
...
@@ -350,12 +320,7 @@ FROM
select
distinct
user_id
from
ml
.
ML_D_CT_UI_USERCLEAN_DIMEN_D
where
PARTITION_DAY
=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
and
(
is_doctor
=
'true'
or
is_puppet
=
'true'
or
is_classify_user
=
'true'
or
is_reply_fake
=
'true'
or
is_staff
=
'true'
or
is_associated_issuedevice
=
'true'
)
and
is_abnormal_user
=
'true'
)
b
on
a
.
user_id
=
b
.
user_id
where
b
.
user_id
is
null
...
...
@@ -365,7 +330,7 @@ FROM
)
t2
on
t1
.
id
=
t2
.
card_id
where
t2
.
partition_date
>=
t1
.
audit_date
group
by
t1
.
id
,
content
,
audit_date
,
tag_list
group
by
t1
.
id
,
title
,
audit_date
,
tag_list
)
t4
order
by
`前10日曝光`
desc
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment