Commit 5ae32c83 authored by 魏艺敏's avatar 魏艺敏

push codes

parent 2c57579c
clear_content=首页内容清洗
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_vote_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_vote_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_favor_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_favor_view
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate_tag_v3
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate_tag_v3
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_3_0_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_precise_exposure_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_DV_DEVICECLEAN_DIMEN_D
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_UI_USERCLEAN_DIMEN_D
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14
command=curl -X GET http://localhost:8553/api/report/email/clear_content/weiyimin@igengmei.com/weiyimin@qq.com
\ No newline at end of file
--新手精选帖子
--帖子关联标签 3315
SELECT card_id as `帖子id`
,type as `内容类型`
,content_level as `星级`
,case when is_cpc>0 then '是' else '否' end as `是否商业化内容`
,create_date as `上线日期`
,audit_date as `最近审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round((nvl(reply_num_30,0)+nvl(vote_num_30,0)+nvl(favor_num_30,0)+nvl(share_num_30,0))/page_pv_30*100,2),'%'),0) as `前30日互动率`
,nvl(concat(round(click_pv_30/exp_pv_30*100,2),'%'),0) as `前30日ctr`
,nvl(click_pv_30,0) as `前30日点击`
,nvl(exp_pv_30,0) as `前30日曝光`
,nvl(page_pv_30,0) as `前30日浏览pv`
,nvl(reply_num_30,0) as `前30日真实评论`
,nvl(vote_num_30,0) as `前30日真实点赞`
,nvl(favor_num_30,0) as `前30日收藏`
,nvl(share_num_30,0) as `前30日转发`
,nvl(page_pv_20_30,0) as `前30日超过20秒阅读pv`
,nvl(avg_page_stay_30,0) as `前30日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(click_pv,0) as `历史点击`
,nvl(exp_pv,0) as `历史曝光`
,nvl(page_pv,0) as `历史浏览pv`
,nvl(reply_num,0) as `历史真实评论`
,nvl(vote_num,0) as `历史真实点赞`
,nvl(favor_num,0) as `历史收藏`
,nvl(share_num,0) as `历史转发`
,nvl(page_pv_20,0) as `历史超过20秒阅读pv`
,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM
(
--历史数据,指从审核时间至今的数据
SELECT t1.card_id,type,content_level,create_date,audit_date,tag_list
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(reply_num) as reply_num
,sum(vote_num) as vote_num
,sum(favor_num) as favor_num
,sum(share_num) as share_num
,sum(page_pv_20) as page_pv_20
,avg(avg_page_stay) as avg_page_stay
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then exp_pv end) as exp_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then click_pv end) as click_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv end) as page_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then reply_num end) as reply_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then vote_num end) as vote_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then favor_num end) as favor_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then share_num end) as share_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv_20 end) as page_pv_20_30
,round(avg(case when t1.partition_date>=DATE_SUB(current_date,30) then avg_page_stay end),2) as avg_page_stay_30
from
(
SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
,nvl(t1.card_id,t2.tractate_id) as card_id
,is_cpc,exp_pv,click_pv,page_pv,page_pv_20,avg_page_stay
,reply_num,vote_num,favor_num,share_num
from
(
select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(page_pv_20) as page_pv_20
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
,case when transaction_type in ('cpc','advertise') then 1 else 0 end as is_cpc
from online.ml_community_precise_exposure_detail
where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND card_content_type in ('diary','user_post','answer')
AND tab_name = '精选'
group by partition_date,card_id,cl_id,case when transaction_type in ('cpc','advertise') then 1 else 0 end
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
full join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
full join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id
,count(distinct time_str) as page_pv
,count(case when page_stay>=20 then time_str end) as page_pv_20
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true'
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d
on a.cl_id=d.device_id and a.partition_date=d.partition_date
where c.device_id is null and d.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
full join
(
SELECT a.tractate_id,a.create_date
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num
from
(--评论数
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>='20160101'
AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '回帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND a.create_date = b.partition_date
group by tractate_id,create_date,a.user_id
union all
--真实点赞数
SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
UNION ALL
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_reply_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>='20160101'
AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '点赞'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND a.create_date = b.partition_date
GROUP BY tractate_id,create_date,a.user_id
union all
--真实收藏数
SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num
FROM online.tl_hdfs_api_tractate_favor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '${start_date}'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--点击分享数
SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.tractate_id,a.create_date
)t2
on t1.card_id=t2.tractate_id and t1.partition_date =t2.create_date
)t1
join
(
select id,content_level,user_id,create_date,audit_date,type,tag_list
FROM
(
select id,content_level,user_id,substr(create_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'帖子' as type
from online.tl_hdfs_api_tractate_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,content_level,user_id,create_time,audit_time
union all
select id,content_level,user_id,substr(created_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'日记本' as type
from online.tl_hdfs_diary_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,content_level,user_id,created_time,audit_time
union all
select id,level as content_level,user_id,substr(create_time,1,10) as create_date,substr(update_time,1,10) as audit_date,'回答' as type--回答无审核时间,用更新时间近似取代
from online.tl_hdfs_answer_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,level,user_id,create_time,update_time
)a
left join
(
select card_id,collect_set(tag_name) as tag_list
from
(
select diary_id as card_id, tag_id
from online.tl_hdfs_diary_tags_view --日记
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by diary_id, tag_id
union all
select tractate_id as card_id,tag_id
from online.tl_hdfs_api_tractate_tag_view --用户贴
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,tag_id
union all
select a.card_id,b.tag_id
from
(
select id as card_id,question_id
from online.tl_hdfs_answer_view --回答
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online ='true'
)a
join
(
select question_id,tag_id
from online.tl_hdfs_questiontag_view
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
)b
on a.question_id =b.question_id
group by a.card_id,b.tag_id
)c
join
(
select id as tag_id
,name as tag_name
from online.tl_hdfs_api_tag_view --老标签库
where partition_date = '20200212'
and tag_type in (2,3)
)d
on c.tag_id=d.tag_id
group by c.card_id
)b
on a.id=b.card_id
)t2
on t1.card_id= t2.id
group by t1.card_id,type,content_level,create_date,audit_date,tag_list
)t4
order by `历史曝光` desc
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14
command=curl -X GET http://localhost:8553/api/report/email/meigou_detail_page/liudi@igengmei.com,zhaoyang@igengmei.com,duanyingrong@igengmei.com,zhaowei@igengmei.com,songke@igengmei.com,wanglidan@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/daily_userpost/liudi@igengmei.com,zhaoyang@igengmei.com,duanyingrong@igengmei.com,zhaowei@igengmei.com,songke@igengmei.com,wanglidan@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment