Commit e4ad086c authored by 魏艺敏's avatar 魏艺敏

push codes

parent 41445cd1
This diff is collapsed.
......@@ -80,5 +80,5 @@ SELECT day_id `日期`
,valid_call_dev_qa_d as `当天有效电话线索设备数-需求自测`
,valid_call_num_qa_d as `当天有效电话线索人次-需求自测`
FROM pm.tl_pm_userclue_d
where partition_day in ('20200901','202001001','20201101','20201201','20210101',regexp_replace(date_sub(current_date,1),'-',''))
where partition_day in ('20200831','20200930','20201031','20201130','20201231',regexp_replace(date_sub(current_date,1),'-',''))
order by `日期`,`系统`,`新老`,`是否灰度`
daily_userpost=精华帖日报
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_topicreply_view
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_problem_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_reply_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_UI_USERCLEAN_DIMEN_D
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6
command=curl -X GET http://localhost:8553/api/report/email/daily_reply_content/zhaoyang@igengmei.com,zhangwen@igengmei.com,dongyiming@igengmei/weiyimin@igengmei.com
\ No newline at end of file
select a.*
FROM
(
--有评论过日记帖的设备,排除疑似广告
SELECT diary_id as content_id,'日记贴' as type,user_id,create_time,content
FROM
(
SELECT id,problem_id,user_id,reply_date as create_time,content
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,problem_id,user_id,reply_date,content
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by diary_id,user_id,create_time,content
UNION ALL
--有评论过回答的设备,排除疑似广告
SELECT answer_id as content_id,'回答' as type,user_id,create_time,content
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by answer_id,user_id,create_time,content
UNION ALL
--有评论过用户帖的设备
SELECT tractate_id as content_id,'帖子' as type,user_id,create_time,content
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,user_id,create_time,content
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '回帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND substr(a.create_time,1,10)= b.partition_date
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)c
on a.user_id=c.user_id
where c.user_id is null
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate
\ No newline at end of file
......@@ -5,7 +5,7 @@
--帖子关联标签 3315
SELECT id as `帖子id`
,content as `帖子标题`
,title as `帖子标题`
,audit_date as `审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round((nvl(reply_num_1,0)+nvl(vote_num_1,0)+nvl(favor_num_1,0)+nvl(share_num_1,0))/page_pv_1*100,2),'%'),0) as `前1日互动率`
......@@ -54,7 +54,7 @@ SELECT id as `帖子id`
FROM
(
SELECT t1.id,content,audit_date,tag_list
SELECT t1.id,title,audit_date,tag_list
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
......@@ -92,12 +92,12 @@ FROM
,round(avg(case when t2.partition_date>=DATE_SUB(current_date,10) then avg_page_stay end),2) as avg_page_stay_10
FROM
(
select a.id,content,a.audit_date,collect_set(d.name) as tag_list
select a.id,title,a.audit_date,collect_set(d.name) as tag_list
from
(
select id,content,user_id,substr(audit_time,1,10) as audit_date
from online.tl_hdfs_api_tractate_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
select id,title,user_id,substr(audit_time,1,10) as audit_date
from tl.tl_mp_api_tractate
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and content_level>=3
and is_online='true'
)a
......@@ -125,7 +125,7 @@ FROM
group by id,name
)d
on d.id=c.tag_v3_id
group by a.id,content,a.audit_date
group by a.id,title,a.audit_date
)t1
join
(--历史数据,指从审核时间至今的数据
......@@ -203,40 +203,10 @@ FROM
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true'
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
AND is_abnormal_device = 'ture'
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d
on a.cl_id=d.device_id and a.partition_date=d.partition_date
where c.device_id is null and d.device_id is null
where c.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
full join
......@@ -350,12 +320,7 @@ FROM
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
and is_abnormal_user = 'true'
)b
on a.user_id=b.user_id
where b.user_id is null
......@@ -365,7 +330,7 @@ FROM
)t2
on t1.id= t2.card_id
where t2.partition_date>=t1.audit_date
group by t1.id,content,audit_date,tag_list
group by t1.id,title,audit_date,tag_list
)t4
order by `前10日曝光` desc
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment