Commit e35a5b43 authored by 赵建伟's avatar 赵建伟

Merge branch 'weiyimin' into 'master'

Weiyimin

See merge request !57
parents 85f385cb 75684651
clear_content=首页内容清洗
clear_content_answer=首页内容清洗-回答
clear_content_diary=首页内容清洗-日记
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_vote_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_vote_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_favor_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_favor_view
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate_tag_v3
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate_tag_v3
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_3_0_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_precise_exposure_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_DV_DEVICECLEAN_DIMEN_D
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_UI_USERCLEAN_DIMEN_D
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14
command=curl -X GET http://localhost:8553/api/report/email/clear_content/weiyimin@igengmei.com/weiyimin@qq.com
\ No newline at end of file
--新手精选帖子
--帖子关联标签 3315
SELECT card_id as `日记本id`
,type as `内容类型`
,content_level as `星级`
,case when is_cpc>0 then '是' else '否' end as `是否商业化内容`
,create_date as `上线日期`
,audit_date as `最近审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round((nvl(reply_num_30,0)+nvl(vote_num_30,0)+nvl(favor_num_30,0)+nvl(share_num_30,0))/page_pv_30*100,2),'%'),0) as `前30日互动率`
,nvl(concat(round(click_pv_30/exp_pv_30*100,2),'%'),0) as `前30日ctr`
,nvl(click_pv_30,0) as `前30日点击`
,nvl(exp_pv_30,0) as `前30日曝光`
,nvl(page_pv_30,0) as `前30日浏览pv`
,nvl(reply_num_30,0) as `前30日真实评论`
,nvl(vote_num_30,0) as `前30日真实点赞`
,nvl(favor_num_30,0) as `前30日收藏`
,nvl(share_num_30,0) as `前30日转发`
,nvl(page_pv_20_30,0) as `前30日超过20秒阅读pv`
,nvl(avg_page_stay_30,0) as `前30日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(click_pv,0) as `历史点击`
,nvl(exp_pv,0) as `历史曝光`
,nvl(page_pv,0) as `历史浏览pv`
,nvl(reply_num,0) as `历史真实评论`
,nvl(vote_num,0) as `历史真实点赞`
,nvl(favor_num,0) as `历史收藏`
,nvl(share_num,0) as `历史转发`
,nvl(page_pv_20,0) as `历史超过20秒阅读pv`
,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM
(
--历史数据,指从审核时间至今的数据
SELECT t1.card_id,type,content_level,create_date,audit_date,tag_list
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(reply_num) as reply_num
,sum(vote_num) as vote_num
,sum(favor_num) as favor_num
,sum(share_num) as share_num
,sum(page_pv_20) as page_pv_20
,round(avg(avg_page_stay),2) as avg_page_stay
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then exp_pv end) as exp_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then click_pv end) as click_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv end) as page_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then reply_num end) as reply_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then vote_num end) as vote_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then favor_num end) as favor_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then share_num end) as share_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv_20 end) as page_pv_20_30
,round(avg(case when t1.partition_date>=DATE_SUB(current_date,30) then avg_page_stay end),2) as avg_page_stay_30
from
(
SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
,nvl(t1.card_id,t2.answer_id) as card_id
,is_cpc,exp_pv,click_pv,page_pv,page_pv_20,avg_page_stay
,reply_num,vote_num,favor_num,share_num
from
(
select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(page_pv_20) as page_pv_20
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
,case when transaction_type in ('cpc','advertise') then 1 else 0 end as is_cpc
from online.ml_community_precise_exposure_detail
where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND card_content_type in ('answer')
AND tab_name = '精选'
group by partition_date,card_id,cl_id,case when transaction_type in ('cpc','advertise') then 1 else 0 end
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
full join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('question_answer_detail','answer_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
full join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id
,count(distinct time_str) as page_pv
,count(case when page_stay>=20 then time_str end) as page_pv_20
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_name in ('question_answer_detail','answer_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true'
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d
on a.cl_id=d.device_id and a.partition_date=d.partition_date
where c.device_id is null and d.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
full join
(
SELECT a.answer_id,a.create_date
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT answer_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'reply' as type,count(create_time) as num
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND answer_id is not NULL
group by user_id,answer_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--真实点赞数
SELECT answer_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'vote' as type,count(create_time) as num
FROM online.tl_hdfs_answer_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by answer_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--关注问答
SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'favor' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='on_click_button' and params['button_name']='attention'
and page_name in ('question_answer_detail')
group by params['business_id'],partition_date,user_id
union all
--收藏回答
SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'favor' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='on_click_button' and params['button_name']='收藏'
and page_name in ('answer_detail')
group by params['business_id'],partition_date,user_id
union all
--点击分享数
SELECT params['business_id'] as answer_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('question_answer_detail','answer_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.answer_id,a.create_date
)t2
on t1.card_id=t2.answer_id and t1.partition_date =t2.create_date
)t1
join
(
select id,content_level,user_id,create_date,audit_date,type,tag_list
FROM
(
-- select id,content_level,user_id,substr(create_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'帖子' as type
-- from online.tl_hdfs_api_tractate_view
-- where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by id,content_level,user_id,create_time,audit_time
--
-- union all
-- select id,content_level,user_id,substr(created_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'日记本' as type
-- from online.tl_hdfs_diary_view
-- where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by id,content_level,user_id,created_time,audit_time
-- union all
--
select id,level as content_level,user_id,substr(create_time,1,10) as create_date,substr(update_time,1,10) as audit_date,'回答' as type--回答无审核时间,用更新时间近似取代
from online.tl_hdfs_answer_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,level,user_id,create_time,update_time
)a
left join
(
select card_id,collect_set(tag_name) as tag_list
from
(
-- select diary_id as card_id, tag_id
-- from online.tl_hdfs_diary_tags_view --日记
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by diary_id, tag_id
-- union all
--
-- select tractate_id as card_id,tag_id
-- from online.tl_hdfs_api_tractate_tag_view --用户贴
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by tractate_id,tag_id
--
-- union all
--
select a.card_id,b.tag_id
from
(
select id as card_id,question_id
from online.tl_hdfs_answer_view --回答
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online ='true'
)a
join
(
select question_id,tag_id
from online.tl_hdfs_questiontag_view
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
)b
on a.question_id =b.question_id
group by a.card_id,b.tag_id
)c
join
(
select id as tag_id
,name as tag_name
from online.tl_hdfs_api_tag_view --老标签库
where partition_date = '20200212'
and tag_type in (2,3)
)d
on c.tag_id=d.tag_id
group by c.card_id
)b
on a.id=b.card_id
)t2
on t1.card_id= t2.id
group by t1.card_id,type,content_level,create_date,audit_date,tag_list
)t4
order by `历史曝光` desc
--新手精选帖子
--帖子关联标签 3315
SELECT card_id as `日记本id`
,type as `内容类型`
,content_level as `星级`
,case when is_cpc>0 then '是' else '否' end as `是否商业化内容`
,create_date as `上线日期`
,audit_date as `最近审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round((nvl(reply_num_30,0)+nvl(vote_num_30,0)+nvl(favor_num_30,0)+nvl(share_num_30,0))/page_pv_30*100,2),'%'),0) as `前30日互动率`
,nvl(concat(round(click_pv_30/exp_pv_30*100,2),'%'),0) as `前30日ctr`
,nvl(click_pv_30,0) as `前30日点击`
,nvl(exp_pv_30,0) as `前30日曝光`
,nvl(page_pv_30,0) as `前30日浏览pv`
,nvl(reply_num_30,0) as `前30日真实评论`
,nvl(vote_num_30,0) as `前30日真实点赞`
,nvl(favor_num_30,0) as `前30日收藏`
,nvl(share_num_30,0) as `前30日转发`
,nvl(page_pv_20_30,0) as `前30日超过20秒阅读pv`
,nvl(avg_page_stay_30,0) as `前30日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(click_pv,0) as `历史点击`
,nvl(exp_pv,0) as `历史曝光`
,nvl(page_pv,0) as `历史浏览pv`
,nvl(reply_num,0) as `历史真实评论`
,nvl(vote_num,0) as `历史真实点赞`
,nvl(favor_num,0) as `历史收藏`
,nvl(share_num,0) as `历史转发`
,nvl(page_pv_20,0) as `历史超过20秒阅读pv`
,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM
(
--历史数据,指从审核时间至今的数据
SELECT t1.card_id,type,content_level,create_date,audit_date,tag_list
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(reply_num) as reply_num
,sum(vote_num) as vote_num
,sum(favor_num) as favor_num
,sum(share_num) as share_num
,sum(page_pv_20) as page_pv_20
,round(avg(avg_page_stay),2) as avg_page_stay
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then exp_pv end) as exp_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then click_pv end) as click_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv end) as page_pv_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then reply_num end) as reply_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then vote_num end) as vote_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then favor_num end) as favor_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then share_num end) as share_num_30
,sum(case when t1.partition_date>=DATE_SUB(current_date,30) then page_pv_20 end) as page_pv_20_30
,round(avg(case when t1.partition_date>=DATE_SUB(current_date,30) then avg_page_stay end),2) as avg_page_stay_30
from
(
SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
,nvl(t1.card_id,t2.diary_id) as card_id
,is_cpc,exp_pv,click_pv,page_pv,page_pv_20,avg_page_stay
,reply_num,vote_num,favor_num,share_num
from
(
select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
,sum(is_cpc) as is_cpc
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(page_pv_20) as page_pv_20
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
,case when transaction_type in ('cpc','advertise') then 1 else 0 end as is_cpc
from online.ml_community_precise_exposure_detail
where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND card_content_type in ('diary')
AND tab_name = '精选'
group by partition_date,card_id,cl_id,case when transaction_type in ('cpc','advertise') then 1 else 0 end
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
full join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('diary_detail','topic_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
full join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id
,count(distinct time_str) as page_pv
,count(case when page_stay>=20 then time_str end) as page_pv_20
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_name in ('diary_detail','topic_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (IS_MORE_USER = 'true'
OR IS_STAFF = 'true'
OR IS_SPAM_CHANNEL = 'true'
OR IS_SUSPICIOUS = 'true'
OR IS_ASSOCIATED_ISSUEUSER = 'ture')
)c
on a.cl_id=c.device_id
left join
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
)d
on a.cl_id=d.device_id and a.partition_date=d.partition_date
where c.device_id is null and d.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
full join
(
SELECT a.diary_id,a.create_date
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT t2.diary_id,create_date,t1.user_id,'reply' as type,sum(reply_num) as num
FROM
(
SELECT problem_id,user_id,regexp_replace(substr(reply_date,1,10),'-','') as create_date,count(distinct reply_date) as reply_num
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by user_id,regexp_replace(substr(reply_date,1,10),'-',''),problem_id
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by t2.diary_id,create_date,t1.user_id
union all
--真实点赞数
SELECT t1.diary_id,create_date,t1.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT diary_id,user_id,regexp_replace(substr(vote_time,1,10),'-','') as create_date,count(distinct vote_time) as vote_num
FROM online.tl_hdfs_diaryvote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
GROUP BY diary_id,user_id,regexp_replace(substr(vote_time,1,10),'-','')
)t1
JOIN
(
SELECT user_id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
group by user_id,diary_id
)t2
on t1.diary_id = t2.diary_id
GROUP BY t1.diary_id,t1.user_id,t1.create_date
UNION ALL
SELECT t2.diary_id,t1.create_date,t1.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT topic_id,user_id,regexp_replace(substr(vote_time,1,10),'-','') as create_date,count(distinct vote_time) as vote_num
FROM online.tl_hdfs_topicvote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by topic_id,user_id,regexp_replace(substr(vote_time,1,10),'-','')
)t1
JOIN
(
SELECT id ,diary_id
from online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by id,diary_id
)t2
on t2.id = t1.topic_id
GROUP BY t2.diary_id,t1.create_date,t1.user_id
UNION ALL
--收藏过日记本
SELECT diary_id,regexp_replace(substr(creatd_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct creatd_time) as num
FROM online.tl_hdfs_diaryfavor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by user_id,diary_id,regexp_replace(substr(creatd_time,1,10),'-','')
UNION ALL
--有收藏过日记帖的用户
SELECT t1.diary_id,create_date,t2.user_id,'favor' as type,sum(favor_num) as num
FROM
(
SELECT user_id,problem_id,regexp_replace(substr(created_time,1,10),'-','') as create_date,count(distinct created_time) as favor_num
FROM online.tl_hdfs_problemfavor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
group by user_id,problem_id,regexp_replace(substr(created_time,1,10),'-','')
)t2
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
group by id,diary_id
)t1
ON t1.id = t2.problem_id
GROUP BY t1.diary_id,create_date,t2.user_id
union all
--点击分享数
SELECT params['business_id'] as diary_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('diary_detail','topic_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.diary_id,a.create_date
)t2
on t1.card_id=t2.diary_id and t1.partition_date =t2.create_date
)t1
join
(
select id,content_level,user_id,create_date,audit_date,type,tag_list
FROM
(
-- select id,content_level,user_id,substr(create_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'帖子' as type
-- from online.tl_hdfs_api_tractate_view
-- where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by id,content_level,user_id,create_time,audit_time
--
-- union all
select id,content_level,user_id,substr(created_time,1,10) as create_date,substr(audit_time,1,10) as audit_date,'日记本' as type
from online.tl_hdfs_diary_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,content_level,user_id,created_time,audit_time
-- union all
--
-- select id,level as content_level,user_id,substr(create_time,1,10) as create_date,substr(update_time,1,10) as audit_date,'回答' as type--回答无审核时间,用更新时间近似取代
-- from online.tl_hdfs_answer_view
-- where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by id,level,user_id,create_time,update_time
)a
left join
(
select card_id,collect_set(tag_name) as tag_list
from
(
select diary_id as card_id, tag_id
from online.tl_hdfs_diary_tags_view --日记
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by diary_id, tag_id
-- union all
--
-- select tractate_id as card_id,tag_id
-- from online.tl_hdfs_api_tractate_tag_view --用户贴
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- group by tractate_id,tag_id
--
-- union all
--
-- select a.card_id,b.tag_id
-- from
-- (
-- select id as card_id,question_id
-- from online.tl_hdfs_answer_view --回答
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- and is_online ='true'
-- )a
-- join
-- (
-- select question_id,tag_id
-- from online.tl_hdfs_questiontag_view
-- where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- )b
-- on a.question_id =b.question_id
-- group by a.card_id,b.tag_id
)c
join
(
select id as tag_id
,name as tag_name
from online.tl_hdfs_api_tag_view --老标签库
where partition_date = '20200212'
and tag_type in (2,3)
)d
on c.tag_id=d.tag_id
group by c.card_id
)b
on a.id=b.card_id
)t2
on t1.card_id= t2.id
group by t1.card_id,type,content_level,create_date,audit_date,tag_list
)t4
order by `历史曝光` desc
......@@ -248,12 +248,13 @@ FROM
SELECT partition_date,user_id,count(1) AS clue_num
FROM
(
SELECT user_id,merchant_id,MIN(REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '')) as partition_date,regexp_replace(SUBSTR(created_time,1,7),'-','') AS CALL_MONTH
SELECT user_id,merchant_id,MIN(REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '')) as partition_date,regexp_replace(SUBSTR(last_issue_time,1,7),'-','') AS CALL_MONTH
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(created_time,1,7),'-','')
and (user_id is NOT NULL or USER_ID <> '')
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(last_issue_time,1,7),'-','')
)a
GROUP BY partition_date,user_id
)t2
......@@ -303,6 +304,7 @@ FROM
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
......@@ -356,6 +358,7 @@ FROM
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
......@@ -432,12 +435,13 @@ FROM
SELECT a.user_id,a.merchant_id,substr(a.partition_date,1,6) as month,min(a.partition_date) as partition_date
FROM
(
SELECT user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as partition_date
SELECT user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as partition_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(created_time,1,10),'-','')
and (user_id is NOT NULL or USER_ID <> '')
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(last_issue_time,1,10),'-','')
)a
join
(
......@@ -516,12 +520,13 @@ FROM
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as partition_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as partition_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
group by id,user_id,merchant_id,created_time
and (user_id is NOT NULL or USER_ID <> '')
group by id,user_id,merchant_id,last_issue_time
)e
ON d.lead_task_id = e.id
join
......@@ -602,12 +607,13 @@ FROM
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
group by id,user_id,merchant_id,created_time
and (user_id is NOT NULL or USER_ID <> '')
group by id,user_id,merchant_id,last_issue_time
)e
ON d.lead_task_id = e.id
join
......@@ -667,6 +673,7 @@ FROM
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <= regexp_replace(date_sub(current_date,1),'-','')
and page_name='ai_plan_consult_list'
and referrer in ('plan_service_list','ai_plan_list')
and params['button_name'] in ('send')
and action='on_click_button'
and int(split(app_version,'\\.')[1]) >= 31
......@@ -721,6 +728,7 @@ FROM
AND partition_date <= regexp_replace(date_sub(current_date,1),'-','')
and page_name='ai_plan_consult_list'
and params['button_name'] in ('send')
and referrer in ('plan_service_list','ai_plan_list')
and action='on_click_button'
and int(split(app_version,'\\.')[1]) >= 31
group by partition_date,user_id,cl_id,params['hospital_id_list']
......@@ -761,12 +769,13 @@ FROM
SELECT a.user_id,a.merchant_id,substr(a.partition_date,1,6) as month,min(a.partition_date) as partition_date
FROM
(
SELECT user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as partition_date
SELECT user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as partition_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(created_time,1,10),'-','')
and (user_id is NOT NULL or USER_ID <> '')
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(last_issue_time,1,10),'-','')
)a
join
(
......@@ -775,7 +784,7 @@ FROM
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <= regexp_replace(date_sub(current_date,1),'-','')
and params['popup_name']='bargain_phone'
and page_name='ai_plan_list'
and page_name in ('plan_service_list','ai_plan_list')
and params['button_name'] in ('confirm')
and action='on_click_button'
and int(split(app_version,'\\.')[1]) >= 31
......@@ -827,12 +836,13 @@ FROM
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
group by id,user_id,merchant_id,created_time
and (user_id is NOT NULL or USER_ID <> '')
group by id,user_id,merchant_id,last_issue_time
)e
ON d.lead_task_id = e.id
join
......@@ -842,7 +852,7 @@ FROM
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <= regexp_replace(date_sub(current_date,1),'-','')
and params['popup_name']='bargain_phone'
and page_name='ai_plan_list'
and page_name in ('plan_service_list','ai_plan_list')
and params['button_name'] in ('confirm')
and action='on_click_button'
and int(split(app_version,'\\.')[1]) >= 31
......@@ -895,12 +905,13 @@ FROM
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
group by id,user_id,merchant_id,created_time
and (user_id is NOT NULL or USER_ID <> '')
group by id,user_id,merchant_id,last_issue_time
)e
ON d.lead_task_id = e.id
join
......@@ -910,7 +921,7 @@ FROM
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <= regexp_replace(date_sub(current_date,1),'-','')
and params['popup_name']='bargain_phone'
and page_name='ai_plan_list'
and page_name in ('plan_service_list','ai_plan_list')
and params['button_name'] in ('confirm')
and action='on_click_button'
and int(split(app_version,'\\.')[1]) >= 31
......@@ -983,12 +994,13 @@ FROM
SELECT partition_date,user_id,count(1) AS clue_num
FROM
(
SELECT user_id,merchant_id,MIN(REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '')) as partition_date,regexp_replace(SUBSTR(created_time,1,7),'-','') AS CALL_MONTH
SELECT user_id,merchant_id,MIN(REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '')) as partition_date,regexp_replace(SUBSTR(last_issue_time,1,7),'-','') AS CALL_MONTH
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(created_time,1,7),'-','')
and (user_id is NOT NULL or USER_ID <> '')
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(last_issue_time,1,7),'-','')
)a
join
(
......@@ -1051,6 +1063,7 @@ FROM
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
......@@ -1117,6 +1130,7 @@ FROM
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
......@@ -1158,12 +1172,13 @@ FROM
SELECT partition_date,user_id,count(1) AS clue_num
FROM
(
SELECT user_id,merchant_id,MIN(REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '')) as partition_date,regexp_replace(SUBSTR(created_time,1,7),'-','') AS CALL_MONTH
SELECT user_id,merchant_id,MIN(REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '')) as partition_date,regexp_replace(SUBSTR(last_issue_time,1,7),'-','') AS CALL_MONTH
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(created_time,1,7),'-','')
and (user_id is NOT NULL or USER_ID <> '')
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(last_issue_time,1,7),'-','')
)a
GROUP BY partition_date,user_id
)a
......@@ -1218,6 +1233,7 @@ FROM
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
......@@ -1390,10 +1406,11 @@ LEFT JOIN
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
where a.partition_date<>e.created_date
......@@ -1444,10 +1461,11 @@ LEFT JOIN
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
where a.partition_date<>e.created_date
......@@ -1498,12 +1516,13 @@ LEFT JOIN
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
group by id,user_id,merchant_id,created_time
and (user_id is NOT NULL or USER_ID <> '')
group by id,user_id,merchant_id,last_issue_time
)e
ON d.lead_task_id = e.id
join
......@@ -1586,12 +1605,13 @@ LEFT JOIN
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
group by id,user_id,merchant_id,created_time
and (user_id is NOT NULL or USER_ID <> '')
group by id,user_id,merchant_id,last_issue_time
)e
ON d.lead_task_id = e.id
join
......@@ -1673,12 +1693,13 @@ LEFT JOIN
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
group by id,user_id,merchant_id,created_time
and (user_id is NOT NULL or USER_ID <> '')
group by id,user_id,merchant_id,last_issue_time
)e
ON d.lead_task_id = e.id
join
......@@ -1688,7 +1709,7 @@ LEFT JOIN
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <= regexp_replace(date_sub(current_date,1),'-','')
and params['popup_name']='bargain_phone'
and page_name='ai_plan_list'
and page_name in ('plan_service_list','ai_plan_list')
and params['button_name'] in ('confirm')
and action='on_click_button'
and int(split(app_version,'\\.')[1]) >= 31
......@@ -1743,12 +1764,13 @@ LEFT JOIN
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day<=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
group by id,user_id,merchant_id,created_time
and (user_id is NOT NULL or USER_ID <> '')
group by id,user_id,merchant_id,last_issue_time
)e
ON d.lead_task_id = e.id
join
......@@ -1758,7 +1780,7 @@ LEFT JOIN
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <= regexp_replace(date_sub(current_date,1),'-','')
and params['popup_name']='bargain_phone'
and page_name='ai_plan_list'
and page_name in ('plan_service_list','ai_plan_list')
and params['button_name'] in ('confirm')
and action='on_click_button'
and int(split(app_version,'\\.')[1]) >= 31
......@@ -1812,10 +1834,11 @@ LEFT JOIN
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
where a.partition_date<>e.created_date
......@@ -1880,10 +1903,11 @@ LEFT JOIN
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(created_time,1,10), '-', '') as created_date
SELECT id,user_id,merchant_id,REGEXP_REPLACE(SUBSTR(last_issue_time,1,10), '-', '') as created_date
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
where a.partition_date<>e.created_date
......
......@@ -80,5 +80,5 @@ SELECT day_id `日期`
,valid_call_dev_qa_d as `当天有效电话线索设备数-需求自测`
,valid_call_num_qa_d as `当天有效电话线索人次-需求自测`
FROM pm.tl_pm_userclue_d
where partition_day in ('20200901','202001001','20201101','20201201','20210101',regexp_replace(date_sub(current_date,1),'-',''))
where partition_day in ('20200831','20200930','20201031','20201130','20201231',regexp_replace(date_sub(current_date,1),'-',''))
order by `日期`,`系统`,`新老`,`是否灰度`
......@@ -619,6 +619,7 @@ LEFT JOIN
AND params['transaction_type'] not in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video')
and params['transaction_type'] not like '%ctr'
and params['transaction_type'] not like '%cvr'
and params['transaction_type'] not like '%deeplink%'
AND params['card_content_type'] IN ('diary','diary_topic','user_post','doctor_post','question','answer','qa','live','article')
UNION ALL
......@@ -633,6 +634,7 @@ LEFT JOIN
AND params['transaction_type'] not in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video')
and params['transaction_type'] not like '%ctr'
and params['transaction_type'] not like '%cvr'
and params['transaction_type'] not like '%deeplink%'
UNION ALL
......@@ -644,7 +646,7 @@ LEFT JOIN
AND page_name = 'home'
AND action = 'on_click_card'
AND (params['transaction_type'] in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr' or params['transaction_type'] like '%deeplink%')
AND params['card_content_type'] IN ('diary','diary_topic','user_post','doctor_post','question','answer','qa','live','article')
UNION ALL
......@@ -657,7 +659,7 @@ LEFT JOIN
AND page_name = 'home'
AND action in ('on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
AND (params['transaction_type'] in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr' or params['transaction_type'] like '%deeplink%')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
......
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_content_data/liudi@igengmei.com,wangxin@igengmei.com,dengguangyu@igengmei.com,zhaoyang@igengmei.com/weiyimin@igengmei.com,zhaofei@igengmei.com,yindanlei@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/daily_content_data/liudi@igengmei.com,shenzheng@igengmei.com,wangxin@igengmei.com,dengguangyu@igengmei.com,zhaoyang@igengmei.com/weiyimin@igengmei.com,zhaofei@igengmei.com,yindanlei@igengmei.com
\ No newline at end of file
......@@ -81,7 +81,7 @@ INSERT OVERWRITE TABLE pm.tl_pm_channel_d PARTITION (PARTITION_DAY = ${partition
INSERT OVERWRITE TABLE pm.tl_pm_operation_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT t1.partition_date as day_id
SELECT concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2)) as day_id
,t1.device_os_type as device_os_type
,t1.device_type as active_type
,t1.channel as channel
......@@ -509,8 +509,8 @@ LEFT JOIN
AND page_code='home'
AND is_exposure='1'
AND tab_code='精选'
AND (params['transaction_type'] in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr')
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video','deeplink_strategy')
or transaction_type like '%ctr' or transaction_type like '%cvr')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND card_content_type in ('user_post','diary','qa','answer')
)t1
......@@ -589,7 +589,7 @@ LEFT JOIN
AND action='on_click_card'
AND page_code ='home'
AND tab_code = '精选'
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video')
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video','deeplink_strategy')
or transaction_type like '%ctr' or transaction_type like '%cvr')
AND card_content_type in ('diary','user_post','qa','answer')
)t1
......
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_operation/liudi@igengmei.com,wangxin@igengmei.com,xuepengfei@igengmei.com,zhaoyang@igengmei.com,dengguangyu@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/daily_operation/liudi@igengmei.com,shenzheng@igengmei.com,wangxin@igengmei.com,xuepengfei@igengmei.com,zhaoyang@igengmei.com,dengguangyu@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
SELECT FROM_UNIXTIME(UNIX_TIMESTAMP(day_id,'yyyymmdd'),'yyyy-mm-dd') `日期`
SELECT day_id `日期`
,device_os_type `系统`
,active_type `新老`
,channel `渠道`
......@@ -70,7 +70,7 @@ SELECT FROM_UNIXTIME(UNIX_TIMESTAMP(day_id,'yyyymmdd'),'yyyy-mm-dd') `日期`
,nvl(wel_second_pv_in_neirongPV,0) as `来自内容页的商业二跳/内容pv`
,nvl(neirong_second_pv_in_neirongPV,0) as `来自内容页的内容二跳/内容pv`
FROM pm.tl_pm_operation_d
where ((partition_day ='20200824' and day_id<='20200726')
or (partition_day<regexp_replace(date_sub(current_date,1),'-','') and partition_day>'20200824' and day_id=regexp_replace(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(partition_day,'yyyymmdd'),'yyyy-mm-dd'),29),'-',''))
or (partition_day=regexp_replace(date_sub(current_date,1),'-','') and day_id>=regexp_replace(date_sub(current_date,30),'-','')))
order by `日期`,`系统`,`新老`,`渠道`
where ((partition_day ='20200906' and day_id<='2020-08-08')
or (partition_day<regexp_replace(date_sub(current_date,1),'-','') and partition_day>'20200906' and day_id=date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(partition_day,'yyyymmdd'),'yyyy-mm-dd'),29))
or (partition_day=regexp_replace(date_sub(current_date,1),'-','') and day_id>=date_sub(current_date,30)))
order by `日期` desc,`系统`,`新老`,`渠道`
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_push/liweirui@igengmei.com,duanyingrong@igengmei.com,zhaowei@igengmei.com,songke@igengmei.com/zhaofei@igengmei.com,weiyimin@igengmei.com,yindanlei@igengmei.com,wangyan@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/daily_push/liweirui@igengmei.com,duanyingrong@igengmei.com,zhaowei@igengmei.com,songke@igengmei.com/zhaoyang@igengmei.com,weiyimin@igengmei.com,hanyingyue@igengmei.com
\ No newline at end of file
......@@ -31,9 +31,10 @@ CREATE TABLE IF NOT EXISTS pm.tl_pm_recommend_strategy_d
recommend_content_card bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
recommend_special_card bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
transfer_card bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
video_consultation bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}'
)comment '内容日报'
video_consultation bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
total_post_pv bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
post_click_pv bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}'
)comment '首页推荐策略日报'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
......
--***************************************************************
--*脚本名称:
--*功能: 首页推荐策略日报
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_recommend_strategy_d
(
day_id string comment '{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}',
device_os_type string comment '{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}',
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
card_content_type string comment '{"chs_name":"卡片类型","description":"","etl":"","value":"","remark":""}',
recommend_type string comment '{"chs_name":"推荐类型","description":"","etl":"","value":"","remark":""}',
card_click bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
card_exposure bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
avg_page_stay double comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
navbar_search bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
highlight_word bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
self_welfare_card bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
recommend_welfare_card bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
recommend_content_card bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
recommend_special_card bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
transfer_card bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}',
video_consultation bigint comment '{"chs_name":"","description":"","etl":"","value":"","remark":""}'
)comment '内容日报'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
......@@ -28,7 +28,10 @@ SELECT
NVL(sum(recom_content_pv),0) as recommend_content_card,
NULL as recommend_special_card,
NVL(sum(referral_pv),0) as transfer_card,
NVL(sum(video_pv),0) as video_consultation
NVL(sum(video_pv),0) as video_consultation,
NVL(sum(post_pv),0) as total_post_pv,
NVL(sum(post_click_pv),0) as post_click_pv
FROM
(
SELECT partition_date
......@@ -48,7 +51,7 @@ FROM
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
JOIN
(--精准曝光,卡片id和session_id去重
SELECT partition_date,
card_content_type,
......@@ -63,17 +66,19 @@ FROM
case when card_content_type in ('qa','answer') then 'qa'
when card_content_type in ('special_pool') then 'special' else card_content_type end as card_content_type,
CASE when transaction_type in ('fmctr') then array('fmctr','合计')
when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN transaction_type like '%ctr' THEN array('ctr预估','合计')
WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
when transaction_type in ('pgc','hotspot') then array('热点卡片','合计')
when transaction_type in ('pgc','hotspot') then array('热点卡片')
when transaction_type in ('newdata') then array('保量卡片')
when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
when transaction_type in ('aistragegy') then array('新用户AI帖优先')
when transaction_type in ('excestragegy') then array('新用户精华帖优先')
when transaction_type in ('fixedstragegy') then array('新氧新用户策略一')
when transaction_type in ('fixedstragegy_video') then array('新氧新用户策略二') end
AS recommend_type,
when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
when transaction_type like 'deeplink%' then array('deeplink策略','合计')
end AS recommend_type,
card_id,
app_session_id
from online.ml_community_precise_exposure_detail
......@@ -82,24 +87,26 @@ FROM
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video')
or transaction_type like '%ctr' or transaction_type like '%cvr')
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','FIXEDSTRATEGY','FIXEDSTRATEGY_VIDEO')
or transaction_type like '%ctr' or transaction_type like '%cvr' or transaction_type like 'deeplink%')
AND card_content_type in ('qa','diary','user_post','answer','special_pool')
group by partition_date,
case when card_content_type in ('qa','answer') then 'qa'
when card_content_type in ('special_pool') then 'special' else card_content_type end,
cl_id,
CASE when transaction_type in ('fmctr') then array('fmctr','合计')
when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN transaction_type like '%ctr' THEN array('ctr预估','合计')
WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
when transaction_type in ('pgc','hotspot') then array('热点卡片','合计')
when transaction_type in ('pgc','hotspot') then array('热点卡片')
when transaction_type in ('newdata') then array('保量卡片')
when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
when transaction_type in ('aistragegy') then array('新用户AI帖优先')
when transaction_type in ('excestragegy') then array('新用户精华帖优先')
when transaction_type in ('fixedstragegy') then array('新氧新用户策略一')
when transaction_type in ('fixedstragegy_video') then array('新氧新用户策略二') end,
when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
when transaction_type like 'deeplink%' then array('deeplink策略','合计') end,
card_id,
app_session_id
)a
......@@ -122,16 +129,18 @@ FROM
case when params['card_content_type'] in ('qa','answer') then 'qa'
when params['card_content_type'] in ('special_pool') then 'special' else params['card_content_type'] end as card_content_type,
CASE when params['transaction_type'] in ('fmctr') then array('fmctr','合计')
when params['transaction_type'] in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN params['transaction_type'] like '%ctr' THEN array('ctr预估','合计')
WHEN params['transaction_type'] like '%cvr' THEN array('cvr预估','合计')
WHEN params['transaction_type'] in ('-1','smr') THEN array('smr','合计')
when params['transaction_type'] in ('pgc','hotspot') then array('热点卡片','合计')
when params['transaction_type'] in ('pgc','hotspot') then array('热点卡片')
when params['transaction_type'] in ('newdata') then array('保量卡片')
when params['transaction_type'] in ('hotspot_feed') then array('hotspot_feed','合计')
when params['transaction_type'] in ('aistragegy') then array('新用户AI帖优先')
when params['transaction_type'] in ('excestragegy') then array('新用户精华帖优先')
when params['transaction_type'] in ('fixedstragegy') then array('新氧新用户策略一')
when params['transaction_type'] in ('fixedstragegy_video') then array('新氧新用户策略二')
when params['transaction_type'] in ('aistragegy') then array('新用户AI帖优先','合计')
when params['transaction_type'] in ('excestragegy') then array('新用户精华帖优先','合计')
when params['transaction_type'] in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
when params['transaction_type'] in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
when params['transaction_type'] like 'deeplink%' then array('deeplink策略','合计')
end AS recommend_type,
params['card_id'] as card_id,
app_session_id
......@@ -140,24 +149,26 @@ FROM
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
AND (params['transaction_type'] in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','fixedstragegy','fixedstragegy_video')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr')
AND (params['transaction_type'] in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','FIXEDSTRATEGY','FIXEDSTRATEGY_VIDEO')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr' or params['transaction_type'] like 'deeplink%')
AND params['card_content_type'] in ('qa','diary','user_post','answer','special_pool')
GROUP BY partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa'
when params['card_content_type'] in ('special_pool') then 'special' else params['card_content_type'] end,
CASE when params['transaction_type'] in ('fmctr') then array('fmctr','合计')
when params['transaction_type'] in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN params['transaction_type'] like '%ctr' THEN array('ctr预估','合计')
WHEN params['transaction_type'] like '%cvr' THEN array('cvr预估','合计')
WHEN params['transaction_type'] in ('-1','smr') THEN array('smr','合计')
when params['transaction_type'] in ('pgc','hotspot') then array('热点卡片','合计')
when params['transaction_type'] in ('pgc','hotspot') then array('热点卡片')
when params['transaction_type'] in ('newdata') then array('保量卡片')
when params['transaction_type'] in ('hotspot_feed') then array('hotspot_feed','合计')
when params['transaction_type'] in ('aistragegy') then array('新用户AI帖优先')
when params['transaction_type'] in ('excestragegy') then array('新用户精华帖优先')
when params['transaction_type'] in ('fixedstragegy') then array('新氧新用户策略一')
when params['transaction_type'] in ('fixedstragegy_video') then array('新氧新用户策略二') end,
when params['transaction_type'] in ('aistragegy') then array('新用户AI帖优先','合计')
when params['transaction_type'] in ('excestragegy') then array('新用户精华帖优先','合计')
when params['transaction_type'] in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
when params['transaction_type'] in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
when params['transaction_type'] like 'deeplink%' then array('deeplink策略','合计') end,
params['card_id'],
app_session_id
)a
......@@ -169,7 +180,7 @@ FROM
and t2.card_id=t3.card_id
and t2.card_content_type=t3.card_content_type
and t2.recommend_type=t3.recommend_type
LEFT JOIN
LEFT JOIN
(--页面浏览时长
select partition_date,cl_id,coalesce(b.id,a.business_id) as business_id,a.page_name,page_stay
from
......@@ -205,7 +216,7 @@ FROM
and t4.cl_id=t3.cl_id
and t4.business_id=t3.card_id
and t4.page_name=t3.card_content_type
LEFT JOIN
LEFT JOIN
(--搜索框和点击行为
select partition_date,cl_id,coalesce(b.id,a.business_id) as business_id,a.page_name,navbar_pv
from
......@@ -242,7 +253,7 @@ FROM
and t5.cl_id=t3.cl_id
and t5.business_id=t3.card_id
and t5.page_name=t3.card_content_type
LEFT JOIN
LEFT JOIN
(--点击高亮词
select partition_date,cl_id,coalesce(b.id,a.business_id) as business_id,a.page_name,highlight_pv
from
......@@ -280,7 +291,7 @@ FROM
and t6.cl_id=t3.cl_id
and t6.business_id=t3.card_id
and t6.page_name=t3.card_content_type
LEFT JOIN
LEFT JOIN
(--关联的美购卡片
SELECT partition_date,cl_id,coalesce(b.id,a.business_id) as business_id,a.page_name,count(1) as self_wel_pv
FROM
......@@ -324,7 +335,7 @@ FROM
and t7.cl_id=t3.cl_id
and t7.business_id=t3.card_id
and t7.page_name=t3.card_content_type
LEFT JOIN
LEFT JOIN
(--推荐的美购卡片(需要排除作者消费的美购)
SELECT partition_date,cl_id,coalesce(b.id,a.business_id) as business_id,a.page_name,count(1) as recom_wel_pv
FROM
......@@ -364,7 +375,7 @@ FROM
and t8.cl_id=t3.cl_id
and t8.business_id=t3.card_id
and t8.page_name=t3.card_content_type
LEFT JOIN
LEFT JOIN
(--推荐的内容卡片
SELECT partition_date,cl_id,coalesce(b.id,a.business_id) as business_id,a.page_name,count(1) as recom_content_pv
FROM
......@@ -403,7 +414,7 @@ FROM
and t9.cl_id=t3.cl_id
and t9.business_id=t3.card_id
and t9.page_name=t3.card_content_type
LEFT JOIN
LEFT JOIN
(--视频面诊点击
select partition_date,cl_id,coalesce(b.id,a.business_id) as business_id,a.page_name,video_pv
from
......@@ -441,7 +452,7 @@ FROM
and t10.cl_id=t3.cl_id
and t10.business_id=t3.card_id
and t10.page_name=t3.card_content_type
LEFT JOIN
LEFT JOIN
(--转诊按钮点击
select partition_date,cl_id,coalesce(b.id,a.business_id) as business_id,a.page_name,referral_pv
from
......@@ -480,6 +491,48 @@ FROM
and t11.business_id=t3.card_id
and t11.page_name=t3.card_content_type
LEFT JOIN
(--从帖子页到帖子页
SELECT partition_date,cl_id,params['referrer_id'] as business_id,
case when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
else null end as page_name,
count(distinct params['business_id'],app_session_id) as post_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='page_view'
AND page_name IN ('post_detail','user_post_detail','doctor_post_detail','custom_special')
AND (json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]IN ('post_detail','user_post_detail','doctor_post_detail')
and json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-2]='home')
group by partition_date,cl_id,params['referrer_id'],
case when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
else null end
)t12
on t12.partition_date=t3.partition_date
and t12.cl_id=t3.cl_id
and t12.business_id=t3.card_id
and t12.page_name=t3.card_content_type
left join
(--在帖子页点击帖子
SELECT partition_date,cl_id,params['business_id'] as business_id,
case when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
else null end as page_name,
count(distinct params['card_id'],app_session_id) as post_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_content_type'] in ('user_post')
AND page_name IN ('post_detail','user_post_detail','doctor_post_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,params['business_id'],
case when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
else null end
)t13
on t13.partition_date=t3.partition_date
and t13.cl_id=t3.cl_id
and t13.business_id=t3.card_id
and t13.page_name=t3.card_content_type
LEFT JOIN
(
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
......
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_recommend_strategy/liudi@igengmei.com,wangxin@igengmei.com,zhaoyang@igengmei.com,liweirui@igengmei.com,duanyingrong@igengmei.com,xuepengfei@igengmei.com,dengguangyu@igengmei.com,zhaowei@igengmei.com,songke@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/daily_recommend_strategy/liudi@igengmei.com,shenzheng@igengmei.com,wangxin@igengmei.com,zhaoyang@igengmei.com,liweirui@igengmei.com,duanyingrong@igengmei.com,xuepengfei@igengmei.com,dengguangyu@igengmei.com,zhaowei@igengmei.com,songke@igengmei.com,wanglidan@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
......@@ -2,7 +2,7 @@ SELECT
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
grey_type as `灰度`,
grey_type as `灰度类型`,
page_name as `页面`,
NVL(CONCAT(ROUND(wel_click_pv/wel_exp_pv*100,2),'%'),0) as `内容页你可能喜欢美购卡片点击PV/该类卡片曝光PV`,
NVL(CONCAT(ROUND(content_click_pv/content_exp_pv*100,2),'%'),0) as `内容页你可能喜欢内容卡片点击PV/该类卡片曝光PV`,
......@@ -16,4 +16,4 @@ SELECT
self_wel_click_pv as `关联卡片点击pv`
FROM pm.tl_pm_contentpage_ctr
WHERE partition_day>='20200730' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期` desc,`设备类型`,`活跃类型`,`灰度`,`页面`
\ No newline at end of file
order by `日期` desc,`设备类型`,`活跃类型`,`灰度类型`,`页面`
\ No newline at end of file
SELECT
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
card_content_type as `卡片类型`,
recommend_type as `推荐类型`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation)/card_exposure*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND(card_click/card_exposure*100,2),'%'),0) as `首页卡片点击PV/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation)/card_click*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片点击PV`,
card_click as `首页卡片点击PV`,
card_exposure as `首页卡片精准曝光PV`,
(navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation)as `有效二跳pv`,
avg_page_stay as `来自I的单PV平均浏览时长`,
navbar_search as `来自I的搜索框+搜索按钮点击PV`,
highlight_word as `来自I的文内搜索点击PV`,
self_welfare_card as `来自I的商品卡片点击PV`,
recommend_welfare_card as `来自I的推荐商品+查看全部商品点击pv`,
recommend_content_card as `来自I的推荐内容点击pv`,
-- '未配置' as `来自I的推荐专题点击pv`,
transfer_card as `来自I的转诊点击pv`,
video_consultation as `来自I的视频面诊点击pv`FROM pm.tl_pm_recommend_strategy_d
WHERE partition_day>='20200824' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
--SELECT
-- day_id as `日期`,
-- device_os_type as `设备类型`,
-- active_type as `活跃类型`,
-- card_content_type as `卡片类型`,
-- recommend_type as `推荐类型`,
-- NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)/card_exposure*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片精准曝光PV`,
-- NVL(CONCAT(ROUND(card_click/card_exposure*100,2),'%'),0) as `首页卡片点击PV/首页卡片精准曝光PV`,
-- NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)/card_click*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片点击PV`,
-- card_click as `首页卡片点击PV`,
-- card_exposure as `首页卡片精准曝光PV`,
-- (navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)as `有效二跳pv`,
-- avg_page_stay as `来自I的单PV平均浏览时长`,
-- navbar_search as `来自I的搜索框+搜索按钮点击PV`,
-- highlight_word as `来自I的文内搜索点击PV`,
-- self_welfare_card as `来自I的商品卡片点击PV`,
-- recommend_welfare_card as `来自I的推荐商品+查看全部商品点击pv`,
-- recommend_content_card as `来自I的推荐内容点击pv`,
---- '未配置' as `来自I的推荐专题点击pv`,
-- transfer_card as `来自I的转诊点击pv`,
-- video_consultation as `来自I的视频面诊点击pv`,
-- if(total_post_pv-post_click_pv>=0,total_post_pv-post_click_pv,0) as `帖子页链接点击pv(近似值)`
--FROM pm.tl_pm_recommend_strategy_d
--WHERE partition_day>='20200907' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
--and card_content_type not in ('special')
--and recommend_type not in ('新用户AI帖优先','热点卡片','新用户精华帖优先','hotspot_feed')
--order by `日期` desc ,`设备类型`,`活跃类型`,`卡片类型`,`推荐类型`;
select *
from
(
select
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
card_content_type as `卡片类型`,
recommend_type as `推荐类型`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)/card_exposure*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND(card_click/card_exposure*100,2),'%'),0) as `首页卡片点击PV/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)/card_click*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片点击PV`,
card_click as `首页卡片点击PV`,
card_exposure as `首页卡片精准曝光PV`,
(navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)as `有效二跳pv`,
avg_page_stay as `来自I的单PV平均浏览时长`,
navbar_search as `来自I的搜索框+搜索按钮点击PV`,
highlight_word as `来自I的文内搜索点击PV`,
self_welfare_card as `来自I的商品卡片点击PV`,
recommend_welfare_card as `来自I的推荐商品+查看全部商品点击pv`,
recommend_content_card as `来自I的推荐内容点击pv`,
transfer_card as `来自I的转诊点击pv`,
video_consultation as `来自I的视频面诊点击pv`,
if(total_post_pv-post_click_pv>=0,total_post_pv-post_click_pv,0) as `帖子页链接点击pv(近似值)`
from pm.tl_pm_recommend_strategy_d
WHERE partition_day>='20200907' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and card_content_type not in ('special')
and recommend_type not in ('新用户AI帖优先','热点卡片','新用户精华帖优先','hotspot_feed','合计','保量')
union all
select
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
card_content_type as `卡片类型`,
recommend_type as `推荐类型`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)/card_exposure*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND(card_click/card_exposure*100,2),'%'),0) as `首页卡片点击PV/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)/card_click*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片点击PV`,
card_click as `首页卡片点击PV`,
card_exposure as `首页卡片精准曝光PV`,
(navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)as `有效二跳pv`,
avg_page_stay as `来自I的单PV平均浏览时长`,
navbar_search as `来自I的搜索框+搜索按钮点击PV`,
highlight_word as `来自I的文内搜索点击PV`,
self_welfare_card as `来自I的商品卡片点击PV`,
recommend_welfare_card as `来自I的推荐商品+查看全部商品点击pv`,
recommend_content_card as `来自I的推荐内容点击pv`,
transfer_card as `来自I的转诊点击pv`,
video_consultation as `来自I的视频面诊点击pv`,
if(total_post_pv-post_click_pv>=0,total_post_pv-post_click_pv,0) as `帖子页链接点击pv(近似值)`
from pm.tl_pm_recommend_strategy_d
WHERE partition_day>='20200907' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and card_content_type not in ('special')
and recommend_type ='保量'
union all
select
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
card_content_type as `卡片类型`,
'合计' as `推荐类型`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)/card_exposure*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND(card_click/card_exposure*100,2),'%'),0) as `首页卡片点击PV/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)/card_click*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片点击PV`,
card_click as `首页卡片点击PV`,
card_exposure as `首页卡片精准曝光PV`,
(navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation+total_post_pv-post_click_pv)as `有效二跳pv`,
avg_page_stay as `来自I的单PV平均浏览时长`,
navbar_search as `来自I的搜索框+搜索按钮点击PV`,
highlight_word as `来自I的文内搜索点击PV`,
self_welfare_card as `来自I的商品卡片点击PV`,
recommend_welfare_card as `来自I的推荐商品+查看全部商品点击pv`,
recommend_content_card as `来自I的推荐内容点击pv`,
transfer_card as `来自I的转诊点击pv`,
video_consultation as `来自I的视频面诊点击pv`,
if(total_post_pv-post_click_pv>=0,total_post_pv-post_click_pv,0) as `帖子页链接点击pv(近似值)`
from
(
select
day_id,
device_os_type,
active_type,
card_content_type,
sum(card_click) as card_click,
sum(card_exposure) as card_exposure,
round(avg(avg_page_stay),2) as avg_page_stay,
sum(navbar_search) as navbar_search,
sum(highlight_word) as highlight_word,
sum(self_welfare_card) as self_welfare_card,
sum(recommend_welfare_card) as recommend_welfare_card,
sum(recommend_content_card) as recommend_content_card,
sum(transfer_card) as transfer_card,
sum(video_consultation) as video_consultation,
sum(total_post_pv) as total_post_pv,
sum(post_click_pv) as post_click_pv
from pm.tl_pm_recommend_strategy_d
WHERE partition_day>='20200907' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and card_content_type not in ('special')
and recommend_type not in ('新用户AI帖优先','热点卡片','新用户精华帖优先','hotspot_feed','合计','保量')
group by day_id,device_os_type,active_type,card_content_type
)a
)b
order by `日期` desc ,`设备类型`,`活跃类型`,`卡片类型`,`推荐类型`;
\ No newline at end of file
daily_reply_content=每日真实评价内容
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_topicreply_view
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_problem_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_reply_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_UI_USERCLEAN_DIMEN_D
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6
command=curl -X GET http://localhost:8553/api/report/email/daily_reply_content/zhaoyang@igengmei.com,zhangwen@igengmei.com,dongyiming@igengmei/weiyimin@igengmei.com
\ No newline at end of file
select a.*
FROM
(
--有评论过日记帖的设备,排除疑似广告
SELECT diary_id as content_id,'日记贴' as type,user_id,create_time,content
FROM
(
SELECT id,problem_id,user_id,reply_date as create_time,content
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,problem_id,user_id,reply_date,content
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by diary_id,user_id,create_time,content
UNION ALL
--有评论过回答的设备,排除疑似广告
SELECT answer_id as content_id,'回答' as type,user_id,create_time,content
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by answer_id,user_id,create_time,content
UNION ALL
--有评论过用户帖的设备
SELECT tractate_id as content_id,'帖子' as type,user_id,create_time,content
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,user_id,create_time,content
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '回帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND substr(a.create_time,1,10)= b.partition_date
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_doctor = 'true'
or is_puppet = 'true'
or is_classify_user = 'true'
or is_reply_fake = 'true'
or is_staff = 'true'
or is_associated_issuedevice = 'true')
)c
on a.user_id=c.user_id
where c.user_id is null
daily_userpost=新手精选帖日报
data_by_day=每日数据汇总
key_data=当日数据关键指标
detail_data=当日数据明细
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 线索日报
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_userpost_d
(
post_id string comment '{"chs_name":"帖子id","description":"","etl":"","value":"","remark":""}',
title string comment '{"chs_name":"帖子标题","description":"","etl":"","value":"","remark":""}',
audit_date string comment '{"chs_name":"最新审核时间","description":"","etl":"","value":"","remark":""}',
tag_list string comment '{"chs_name":"关联标签","description":"","etl":"","value":"","remark":""}',
click_pv_1 bigint comment '{"chs_name":"前1日点击","description":"","etl":"","value":"","remark":""}',
exp_pv_1 bigint comment '{"chs_name":"前1日曝光","description":"","etl":"","value":"","remark":""}',
page_pv_1 bigint comment '{"chs_name":"前1日浏览pv","description":"","etl":"","value":"","remark":""}',
reply_num_1 bigint comment '{"chs_name":"前1日真实评论","description":"","etl":"","value":"","remark":""}',
vote_num_1 bigint comment '{"chs_name":"前1日真实点赞","description":"","etl":"","value":"","remark":""}',
favor_num_1 bigint comment '{"chs_name":"前1日收藏","description":"","etl":"","value":"","remark":""}',
share_num_1 bigint comment '{"chs_name":"前1日转发","description":"","etl":"","value":"","remark":""}',
avg_page_stay_1 double comment '{"chs_name":"前1日平均阅读时长(s)","description":"","etl":"","value":"","remark":""}',
click_pv_3 bigint comment '{"chs_name":"前3日点击","description":"","etl":"","value":"","remark":""}',
exp_pv_3 bigint comment '{"chs_name":"前3日曝光","description":"","etl":"","value":"","remark":""}',
page_pv_3 bigint comment '{"chs_name":"前3日浏览pv","description":"","etl":"","value":"","remark":""}',
reply_num_3 bigint comment '{"chs_name":"前3日真实评论","description":"","etl":"","value":"","remark":""}',
vote_num_3 bigint comment '{"chs_name":"前3日真实点赞","description":"","etl":"","value":"","remark":""}',
favor_num_3 bigint comment '{"chs_name":"前3日收藏","description":"","etl":"","value":"","remark":""}',
share_num_3 bigint comment '{"chs_name":"前3日转发","description":"","etl":"","value":"","remark":""}',
avg_page_stay_3 double comment '{"chs_name":"前3日平均阅读时长(s)","description":"","etl":"","value":"","remark":""}',
click_pv_10 bigint comment '{"chs_name":"前10日点击","description":"","etl":"","value":"","remark":""}',
exp_pv_10 bigint comment '{"chs_name":"前10日曝光","description":"","etl":"","value":"","remark":""}',
page_pv_10 bigint comment '{"chs_name":"前10日浏览pv","description":"","etl":"","value":"","remark":""}',
reply_num_10 bigint comment '{"chs_name":"前10日真实评论","description":"","etl":"","value":"","remark":""}',
vote_num_10 bigint comment '{"chs_name":"前10日真实点赞","description":"","etl":"","value":"","remark":""}',
favor_num_10 bigint comment '{"chs_name":"前10日收藏","description":"","etl":"","value":"","remark":""}',
share_num_10 bigint comment '{"chs_name":"前10日转发","description":"","etl":"","value":"","remark":""}',
avg_page_stay_10 double comment '{"chs_name":"前10日平均阅读时长(s)","description":"","etl":"","value":"","remark":""}',
click_pv bigint comment '{"chs_name":"历史点击","description":"","etl":"","value":"","remark":""}',
exp_pv bigint comment '{"chs_name":"历史曝光","description":"","etl":"","value":"","remark":""}',
page_pv bigint comment '{"chs_name":"历史浏览pv","description":"","etl":"","value":"","remark":""}',
reply_num bigint comment '{"chs_name":"历史真实评论","description":"","etl":"","value":"","remark":""}',
vote_num bigint comment '{"chs_name":"历史真实点赞","description":"","etl":"","value":"","remark":""}',
favor_num bigint comment '{"chs_name":"历史收藏","description":"","etl":"","value":"","remark":""}',
share_num bigint comment '{"chs_name":"历史转发","description":"","etl":"","value":"","remark":""}',
avg_page_stay double comment '{"chs_name":"历史平均阅读时长(s)","description":"","etl":"","value":"","remark":""}'
)comment '新手精选帖日报'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;
INSERT OVERWRITE TABLE pm.tl_pm_userpost_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT t1.id as post_id
,title
,audit_date
,tag_list
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then exp_pv end),0) as exp_pv_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then click_pv end),0) as click_pv_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then page_pv end),0) as page_pv_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then reply_num end),0) as reply_num_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then vote_num end),0) as vote_num_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then favor_num end),0) as favor_num_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then share_num end),0) as share_num_1
,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,1) then avg_page_stay end),2),0) as avg_page_stay_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then exp_pv end),0) as exp_pv_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then click_pv end),0) as click_pv_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then page_pv end),0) as page_pv_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then reply_num end),0) as reply_num_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then vote_num end),0) as vote_num_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then favor_num end),0) as favor_num_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then share_num end),0) as share_num_3
,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,3) then avg_page_stay end),2),0) as avg_page_stay_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then exp_pv end),0) as exp_pv_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then click_pv end),0) as click_pv_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then page_pv end),0) as page_pv_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then reply_num end),0) as reply_num_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then vote_num end),0) as vote_num_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then favor_num end),0) as favor_num_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then share_num end),0) as share_num_10
,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,10) then avg_page_stay end),2),0) as avg_page_stay_10
,nvl(sum(exp_pv),0) as exp_pv
,nvl(sum(click_pv),0) as click_pv
,nvl(sum(page_pv),0) as page_pv
,nvl(sum(reply_num),0) as reply_num
,nvl(sum(vote_num),0) as vote_num
,nvl(sum(favor_num),0) as favor_num
,nvl(sum(share_num),0) as share_num
,nvl(round(avg(avg_page_stay),2),0) as avg_page_stay
FROM
(
select a.id,title,a.audit_date,collect_set(d.name) as tag_list
from
(
select id,title,user_id,substr(audit_time,1,10) as audit_date
from tl.tl_mp_api_tractate
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and content_level>=3
and is_online='true'
)a
join
(
select distinct tractate_id
from tl.tl_mp_api_tractate_tag_v3
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and tag_v3_id='3315'
)b
on a.id=b.tractate_id
join
(
select tractate_id,tag_v3_id
from tl.tl_mp_api_tractate_tag_v3
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,tag_v3_id
)c
on a.id=c.tractate_id
left join
(
select id,name
from online.tl_hdfs_api_tag_3_0_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,name
)d
on d.id=c.tag_v3_id
group by a.id,title,a.audit_date
)t1
join
(--历史数据,指从审核时间至今的数据
SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
,nvl(t1.card_id,t2.tractate_id) as card_id
,exp_pv
,click_pv
,page_pv
,reply_num
,vote_num
,favor_num
,share_num
,avg_page_stay
from
(
select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
from online.ml_community_precise_exposure_detail
where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
group by partition_date,cl_id,card_id
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id,cl_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
full join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
full join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id,count(distinct time_str) as page_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)c
on a.cl_id=c.device_id
where c.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
full join
(
SELECT a.tractate_id,a.create_date
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num
from
(--评论数
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
group by tractate_id,create_date,a.user_id
union all
--真实点赞数
SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
GROUP BY tractate_id,create_date,a.user_id
union all
--真实收藏数
SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num
FROM online.tl_hdfs_api_tractate_favor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '${start_date}'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--点击分享数
SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_abnormal_user = 'true'
)b
on a.user_id=b.user_id
where b.user_id is null
group by a.tractate_id,a.create_date
)t2
on t1.card_id=t2.tractate_id and t1.partition_date =t2.create_date
)t2
on t1.id= t2.card_id
where t2.partition_date>=t1.audit_date
group by t1.id,title,audit_date,tag_list
\ No newline at end of file
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_vote_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_vote_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_favor_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_favor_view
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate_tag_v3
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_mp_api_tractate_tag_v3
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tag_3_0_view
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_precise_exposure_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_DV_DEVICECLEAN_DIMEN_D
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_UI_USERCLEAN_DIMEN_D
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14
command=sh /home/bi/bi-report/lib/shell/hive daily_userpost
\ No newline at end of file
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_userpost/liudi@igengmei.com,zhaoyang@igengmei.com,shenzheng@igengmei.com,wangxin@igengmei.com,duanyingrong@igengmei.com,zhaowei@igengmei.com,songke@igengmei.com,wanglidan@igengmei.com/weiyimin@igengmei.com,hanyingyue@igengmei.com
\ No newline at end of file
SELECT partition_day as `日期`
,count(post_id) as `帖子数量`
,nvl(concat(round(sum(click_pv_1)/sum(exp_pv_1)*100,2),'%'),0) as `前1日ctr`
,nvl(sum(exp_pv_1),0) as `前1日曝光`
,nvl(round(avg(avg_page_stay_1),2),0) as `前1日平均阅读时长(s)`
,nvl(concat(round(sum(click_pv_3)/sum(exp_pv_3)*100,2),'%'),0) as `前3日ctr`
,nvl(sum(exp_pv_3),0) as `前3日曝光`
,nvl(round(avg(avg_page_stay_3),2),0) as `前3日平均阅读时长(s)`
,nvl(concat(round(sum(click_pv_10)/sum(exp_pv_10)*100,2),'%'),0) as `前10日ctr`
,nvl(sum(exp_pv_10),0) as `前10日曝光`
,nvl(round(avg(avg_page_stay_10),2),0) as `前10日平均阅读时长(s)`
FROM pm.tl_pm_userpost_d
where partition_day>='20200911' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by partition_day
order by `日期`;
\ No newline at end of file
SELECT post_id as `帖子id`
,title as `帖子标题`
,audit_date as `审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round((nvl(reply_num_1,0)+nvl(vote_num_1,0)+nvl(favor_num_1,0)+nvl(share_num_1,0))/page_pv_1*100,2),'%'),0) as `前1日互动率`
,nvl(concat(round(click_pv_1/exp_pv_1*100,2),'%'),0) as `前1日ctr`
,nvl(click_pv_1,0) as `前1日点击`
,nvl(exp_pv_1,0) as `前1日曝光`
,nvl(page_pv_1,0) as `前1日浏览pv`
,nvl(reply_num_1,0) as `前1日真实评论`
,nvl(vote_num_1,0) as `前1日真实点赞`
,nvl(favor_num_1,0) as `前1日收藏`
,nvl(share_num_1,0) as `前1日转发`
,nvl(avg_page_stay_1,0) as `前1日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num_3,0)+nvl(vote_num_3,0)+nvl(favor_num_3,0)+nvl(share_num_3,0))/page_pv_3*100,2),'%'),0) as `前3日互动率`
,nvl(concat(round(click_pv_3/exp_pv_3*100,2),'%'),0) as `前3日ctr`
,nvl(click_pv_3,0) as `前3日点击`
,nvl(exp_pv_3,0) as `前3日曝光`
,nvl(page_pv_3,0) as `前3日浏览pv`
,nvl(reply_num_3,0) as `前3日真实评论`
,nvl(vote_num_3,0) as `前3日真实点赞`
,nvl(favor_num_3,0) as `前3日收藏`
,nvl(share_num_3,0) as `前3日转发`
,nvl(avg_page_stay_3,0) as `前3日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num_10,0)+nvl(vote_num_10,0)+nvl(favor_num_10,0)+nvl(share_num_10,0))/page_pv_10*100,2),'%'),0) as `前10日互动率`
,nvl(concat(round(click_pv_10/exp_pv_10*100,2),'%'),0) as `前10日ctr`
,nvl(click_pv_10,0) as `前10日点击`
,nvl(exp_pv_10,0) as `前10日曝光`
,nvl(page_pv_10,0) as `前10日浏览pv`
,nvl(reply_num_10,0) as `前10日真实评论`
,nvl(vote_num_10,0) as `前10日真实点赞`
,nvl(favor_num_10,0) as `前10日收藏`
,nvl(share_num_10,0) as `前10日转发`
,nvl(avg_page_stay_10,0) as `前10日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(click_pv,0) as `历史点击`
,nvl(exp_pv,0) as `历史曝光`
,nvl(page_pv,0) as `历史浏览pv`
,nvl(reply_num,0) as `历史真实评论`
,nvl(vote_num,0) as `历史真实点赞`
,nvl(favor_num,0) as `历史收藏`
,nvl(share_num,0) as `历史转发`
,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM pm.tl_pm_userpost_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `前10日曝光` desc
SELECT post_id as `帖子id`
,title as `帖子标题`
,audit_date as `审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round(click_pv_1/exp_pv_1*100,2),'%'),0) as `前1日ctr`
,nvl(exp_pv_1,0) as `前1日曝光`
,nvl(avg_page_stay_1,0) as `前1日平均阅读时长(s)`
,nvl(concat(round(click_pv_3/exp_pv_3*100,2),'%'),0) as `前3日ctr`
,nvl(exp_pv_3,0) as `前3日曝光`
,nvl(avg_page_stay_3,0) as `前3日平均阅读时长(s)`
,nvl(concat(round(click_pv_10/exp_pv_10*100,2),'%'),0) as `前10日ctr`
,nvl(exp_pv_10,0) as `前10日曝光`
,nvl(avg_page_stay_10,0) as `前10日平均阅读时长(s)`
FROM pm.tl_pm_userpost_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `前10日曝光` desc
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3
command=curl -X GET http://localhost:8553/api/report/email/meigou_detail_page/liudi@igengmei.com,wangxin@igengmei.com,dailiang@igengmei.com,zhanghaoyu@igengmei.com,cp-sunyinghe@igengmei.com,zhaofei@igengmei.com/weiyimin@igengmei.com,yindanlei@igengmei.com,wangyan@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/meigou_detail_page/liudi@igengmei.com,wangxin@igengmei.com,dailiang@igengmei.com,zhanghaoyu@igengmei.com,cp-sunyinghe@igengmei.com,zhaofei@igengmei.com,zhaoyang@igengmei.com/weiyimin@igengmei.com,yindanlei@igengmei.com,wangyan@igengmei.com
\ No newline at end of file
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/sign_daily/zhaoyang@igengmei.com,zhangwen@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/sign_daily/zhaoyang@igengmei.com,shenzheng@igengmei.com,zhangwen@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
......@@ -18,7 +18,7 @@ SELECT
,first_channel_source_type
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS first_active_date
FROM online.ml_device_day_active_status
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND active_type in ('1','2')
AND first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -70,7 +70,7 @@ SELECT
(
SELECT user_id,merchant_id,SUBSTR(partition_day,1,6) AS consult_month,min(partition_day) as first_consult_date,1 as CONSULT_NUM
FROM ml.ml_c_et_msg_conversation_dimen_inc_d --新仓库私信表
WHERE partition_day >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
WHERE partition_day >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_day <= regexp_replace(date_sub(current_date,1),'-','')
and is_valid='true'
GROUP BY user_id,merchant_id,SUBSTR(partition_day,1,6)
......@@ -105,7 +105,7 @@ SELECT
SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
FROM tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表,call_type呼叫类型
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date,1),'-','')
AND start_time<release_time --通话时长大于30秒
)a
......@@ -136,6 +136,7 @@ SELECT
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
and (user_id is NOT NULL or USER_ID <> '')
)e
ON d.lead_task_id = e.id
GROUP BY user_id,merchant_id,regexp_replace(SUBSTR(a.partition_date,1,6),'-','')
......@@ -146,7 +147,7 @@ SELECT
(
SELECT user_id,partition_date,if(size(device_list) > 0, device_list [ 0 ], '') device_id
FROM online.ml_user_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
)t2
on t1.user_id=t2.user_id and t1.DATE_DAY=t2.partition_date
......
......@@ -18,7 +18,7 @@ SELECT
,first_channel_source_type
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS first_active_date
FROM online.ml_device_day_active_status
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND active_type in ('1','2')
AND first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
......@@ -51,7 +51,7 @@ SELECT
(
SELECT concat_ws('-',substr(partition_date,0,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS action_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND page_name in ('welfare_detail','organization_detail','expert_detail')
AND action = 'page_view'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment