Commit 8a90473c authored by 魏艺敏's avatar 魏艺敏

update codes

parent 9db36996
#step2.job #step2.job
type=command type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5 dependencies=step1_1,step1_2,step1_3,step1_4,step1_5
command=/home/bi/bi-report/shells/search_related/search_related.sh command=/home/bi/bi-report/lib/shell/hive.sh search_related_insert
\ No newline at end of file \ No newline at end of file
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;
INSERT OVERWRITE TABLE pm.tl_pm_search_click_path_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT day_id as day_id
,device_os_type as device_os_type
,active_type as active_type
,v.search_entrance as search_entrance
,sum(search_pv) as click_search_pv
,sum(search_uv) as click_search_uv
FROM
(
SELECT day_id,device_os_type,active_type,array(search_entrance,'合计') as search_entrance
,search_pv,search_uv
FROM
(
SELECT t1.partition_date as day_id
,t1.device_os_type
,t1.active_type
,case when action='on_click_navbar_search' and page_name='home' then '首页搜索框'
when action='on_click_navbar_search' and page_name='welfare_home' then '美购首页搜索框'
when action='on_click_navbar_search' and page_name='category' then '品类聚合页搜索框'
when action='on_click_navbar_search' and page_name='welfare_list' and referrer_link ='["home","category"]' then '来自品类聚合的美购列表页搜索框'
when action='on_click_navbar_search' and page_name='welfare_list' and referrer_link ='["welfare_home"]' then '来自美购首页的美购列表页搜索框'
when action in ('do_search','on_click_navbar_search')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail'
,'doctor_post_detail','question_detail','answer_detail','question_answer_detail') then '内容详情页搜索框'
when action ='详情页高亮词' then '详情页高亮词搜索'
when action='大家都在看' then '首页feed大家都在看'
when action='热搜词' then '美购首页热搜词' else null end as search_entrance
,count(cl_id) as search_pv
,count(distinct cl_id) as search_uv
FROM
(
SELECT partition_date,device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,device_id
FROM online.ml_device_day_active_status
where partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
join
(
SELECT partition_date,cl_id,page_name,action,params['referrer_link'] as referrer_link
FROM online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action in ('do_search','on_click_navbar_search')
union all
SELECT partition_date,cl_id,null as page_name,'大家都在看' as action, null as referrer_link
FROM online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
--AND page_name='home' android的page_name为空
union all
SELECT partition_date,cl_id,page_name,'热搜词' as action, null as referrer_link
FROM online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
select partition_date,cl_id,null as page_name,'详情页高亮词' as action,null as referrer_link
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='on_click_card'
and params['card_type']='highlight_word'
)t2
on t1.partition_date=t2.partition_date and t1.device_id=t2.cl_id
group by t1.partition_date
,t1.device_os_type
,t1.active_type
,case when action='on_click_navbar_search' and page_name='home' then '首页搜索框'
when action='on_click_navbar_search' and page_name='welfare_home' then '美购首页搜索框'
when action='on_click_navbar_search' and page_name='category' then '品类聚合页搜索框'
when action='on_click_navbar_search' and page_name='welfare_list' and referrer_link ='["home","category"]' then '来自品类聚合的美购列表页搜索框'
when action='on_click_navbar_search' and page_name='welfare_list' and referrer_link ='["welfare_home"]' then '来自美购首页的美购列表页搜索框'
when action in ('do_search','on_click_navbar_search')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail'
,'doctor_post_detail','question_detail','answer_detail','question_answer_detail') then '内容详情页搜索框'
when action ='详情页高亮词' then '详情页高亮词搜索'
when action='大家都在看' then '首页feed大家都在看'
when action='热搜词' then '美购首页热搜词' else null end
having search_entrance is not null
)t
)n
LATERAL VIEW explode(n.search_entrance) v AS search_entrance
group by day_id,device_os_type,active_type,v.search_entrance;
INSERT OVERWRITE TABLE pm.tl_pm_search_input_type_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT partition_date as day_id
,t1.query
,t2.input_type
,sum(t1.all_search_pv) as search_pv
FROM
(
SELECT
partition_date
,query
,array(case when input_type in ('联想','聚合模块') then 'sug点击' else input_type end,'全部') as input_type
,count(click.cl_id) as all_search_pv
,count(distinct click.cl_id) as all_search_uv
FROM
(
SELECT cl_id
,partition_date
,case when params['input_type'] in ('热门','related_search') then '发现' else params['input_type'] end as input_type
,params['query'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('do_search','search_result_click_search')
union all
SELECT cl_id
,partition_date
,'首页精选大家都在看' as input_type
,params['card_name'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
--AND page_name='home' android的page_name为空
union all
SELECT cl_id
,partition_date
,'美购首页热搜词' as input_type
,params['card_name'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
select cl_id
,partition_date
,'详情页高亮词' as input_type
,params['card_name'] as query
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and int(split(app_version,'\\.')[1]) >= 27
and action='on_click_card'
and params['card_type']='highlight_word'
)click
JOIN
(
SELECT device_id
from online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not like 'promotion\_jf\_%'
)dev_channel
on dev_channel.device_id = click.cl_id
GROUP BY partition_date,query,case when input_type in ('联想','聚合模块') then 'sug点击' else input_type end
)t1
LATERAL VIEW explode(t1.input_type) t2 AS input_type
GROUP BY t1.partition_date,t1.query,t2.input_type;
INSERT OVERWRITE TABLE pm.tl_pm_search_result_click_rate_d PARTITION (PARTITION_DAY = ${partition_day})
--搜索关键词日报,取近7天数据,每天取关键词搜索量前500
select t1.partition_date as day_id
,t1.query as query
,NVL(t1.search_pv,0) as search_pv
,NVL(t1.search_uv,0) as search_uv
,NVL(t2.search_more_pv,0) as search_more_pv
,NVL(t2.page_more_pv,0) as page_more_pv
,NVL(t2.more_click_pv,0) as more_click_pv
,NVL(t2.more_click_10_pv,0) as more_click_10_pv
,NVL(t2.more_click_5_diary_pv,0) as more_click_5_diary_pv
,NVL(t2.more_click_5_answer_pv,0) as more_click_5_answer_pv
,NVL(t2.more_click_5_topic_pv,0) as more_click_5_topic_pv
,NVL(t2.search_wel_pv,0) as search_wel_pv
,NVL(t2.page_wel_pv,0) as page_wel_pv
,NVL(t2.wel_click_pv,0) as wel_click_pv
,NVL(t2.wel_click_10_pv,0) as wel_click_10_pv
,NVL(t2.wel_click_5_pv,0) as wel_click_5_pv
,NVL(t2.search_diary_pv,0) as search_diary_pv
,NVL(t2.page_diary_pv,0) as page_diary_pv
,NVL(t2.diary_click_pv,0) as diary_click_pv
,NVL(t2.diary_click_10_pv,0) as diary_click_10_pv
,NVL(t2.diary_click_5_pv,0) as diary_click_5_pv
,NVL(t2.search_more_uv,0) as search_more_uv
,NVL(t2.page_more_uv,0) as page_more_uv
,NVL(t2.more_click_uv,0) as more_click_uv
,NVL(t2.more_click_10_uv,0) as more_click_10_uv
,NVL(t2.more_click_5_diary_uv,0) as more_click_5_diary_uv
,NVL(t2.more_click_5_answer_uv,0) as more_click_5_answer_uv
,NVL(t2.more_click_5_topic_uv,0) as more_click_5_topic_uv
,NVL(t2.search_wel_uv,0) as search_wel_uv
,NVL(t2.page_wel_uv,0) as page_wel_uv
,NVL(t2.wel_click_uv,0) as wel_click_uv
,NVL(t2.wel_click_10_uv,0) as wel_click_10_uv
,NVL(t2.wel_click_5_uv,0) as wel_click_5_uv
,NVL(t2.search_diary_uv,0) as search_diary_uv
,NVL(t2.page_diary_uv,0) as page_diary_uv
,NVL(t2.diary_click_uv,0) as diary_click_uv
,NVL(t2.diary_click_10_uv,0) as diary_click_10_uv
,NVL(t2.diary_click_5_uv,0) as diary_click_5_uv
from
(--总搜索量
select partition_date
,query
,search_pv
,search_uv
from
(
select t1.partition_date
,query
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
,row_number() over(partition by partition_date order by count(t1.cl_id) desc) as rank
from
(
select partition_date
,params['query'] as query
,cl_id
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and action in ('do_search','search_result_click_search')
union all
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(date_sub(current_date,1),'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
--AND page_name='home' android的page_name为空
union all
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(date_sub(current_date,1),'-','')
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
select partition_date
,params['card_name'] as query
,cl_id
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and int(split(app_version,'\\.')[1]) >= 27
and action='on_click_card'
and params['card_type']='highlight_word'
)t1
join
(
SELECT device_id
from online.ml_device_history_detail
WHERE partition_date = regexp_replace(date_sub(current_date,1),'-','')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t2
on t1.cl_id=t2.device_id
group by t1.partition_date,query
)t
where rank<=300
)t1
left join
--综合、美购、日记页面浏览pv
(
select t1.partition_date,t1.query
,sum(case when tab_name='综合' then t1.pv end) as search_more_pv
,count(distinct case when tab_name='综合' then t1.cl_id end) as search_more_uv
,sum(case when tab_name='美购' then t1.pv end) as search_wel_pv
,count(distinct case when tab_name='美购' then t1.cl_id end) as search_wel_uv
,sum(case when tab_name='日记' then t1.pv end) as search_diary_pv
,count(distinct case when tab_name='日记' then t1.cl_id end) as search_diary_uv
,sum(case when t2.page_name='综合' then t2.pv end) as page_more_pv
,count(distinct case when t2.page_name='综合' then t2.cl_id end) as page_more_uv
,sum(case when t2.page_name='美购' then t2.pv end) as page_wel_pv
,count(distinct case when t2.page_name='美购' then t2.cl_id end) as page_wel_uv
,sum(case when t2.page_name='日记' then t2.pv end) as page_diary_pv
,count(distinct case when t2.page_name='日记' then t2.cl_id end) as page_diary_uv
,sum(case when t3.page_name='综合' then t3.pv end) as more_click_pv
,count(distinct case when t3.page_name='综合' then t3.cl_id end) as more_click_uv
,sum(case when t3.page_name='美购' then t3.pv end) as wel_click_pv
,count(distinct case when t3.page_name='美购' then t3.cl_id end) as wel_click_uv
,sum(case when t3.page_name='日记' then t3.pv end) as diary_click_pv
,count(distinct case when t3.page_name='日记' then t3.cl_id end) as diary_click_uv
,sum(case when t4.page_name='综合' then t4.pv end) as more_click_10_pv
,count(distinct case when t4.page_name='综合' then t4.cl_id end) as more_click_10_uv
,sum(case when t4.page_name='美购' then t4.pv end) as wel_click_10_pv
,count(distinct case when t4.page_name='美购' then t4.cl_id end) as wel_click_10_uv
,sum(case when t4.page_name='日记' then t4.pv end) as diary_click_10_pv
,count(distinct case when t4.page_name='日记' then t4.cl_id end) as diary_click_10_uv
,sum(case when t5.page_name='美购' then t5.pv end) as wel_click_5_pv
,count(distinct case when t5.page_name='美购' then t5.cl_id end) as wel_click_5_uv
,sum(case when t5.page_name='日记' then t5.pv end) as diary_click_5_pv
,count(distinct case when t5.page_name='日记' then t5.cl_id end) as diary_click_5_uv
,sum(case when t6.page_name='综合' then t6.pv end) as more_click_5_diary_pv
,count(distinct case when t6.page_name='综合' then t6.cl_id end) as more_click_5_diary_uv
,sum(case when t7.page_name='综合' then t7.pv end) as more_click_5_answer_pv
,count(distinct case when t7.page_name='综合' then t7.cl_id end) as more_click_5_answer_uv
,sum(case when t8.page_name='综合' then t8.pv end) as more_click_5_topic_pv
,count(distinct case when t8.page_name='综合' then t8.cl_id end) as more_click_5_topic_uv
from
(--各tab搜索
select partition_date
,params['query'] as query
,cl_id
,params['tab'] as tab_name
,count(1) as pv
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and params['tab'] in ('综合','美购','日记')
and action in ('search_result_click_tab','search_result_click_search')
group by partition_date,params['query'],cl_id,params['tab']
)t1
left join
(--结果页浏览
select partition_date
,get_json_object(params['extra_param'],'$.query') as query
,cl_id
,case when page_name='search_result_more' then '综合'
when page_name='search_result_welfare' then '美购'
when page_name='search_result_diary' then '日记' end as page_name
,count(1) as pv
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and page_name in ('search_result_more','search_result_welfare','search_result_diary')
and action ='page_view'
group by partition_date
,get_json_object(params['extra_param'],'$.query')
,cl_id
,case when page_name='search_result_more' then '综合'
when page_name='search_result_welfare' then '美购'
when page_name='search_result_diary' then '日记' end
)t2
on t1.partition_date=t2.partition_date
and t1.query=t2.query
and t1.cl_id=t2.cl_id
and t1.tab_name=t2.page_name
left join
(--内容卡片点击
select partition_date
,params['query'] as query
,cl_id
,case when page_name='search_result_more' then '综合'
when page_name='search_result_welfare' then '美购'
when page_name='search_result_diary' then '日记' end as page_name
,count(1) as pv
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and action in ('search_result_click_infomation_item','on_click_topic_card','on_click_diary_card','search_result_welfare_click_item')
and page_name in ('search_result_more','search_result_welfare','search_result_diary')
group by partition_date
,params['query']
,cl_id
,case when page_name='search_result_more' then '综合'
when page_name='search_result_welfare' then '美购'
when page_name='search_result_diary' then '日记' end
)t3
on t2.partition_date=t3.partition_date
and t2.query=t3.query
and t2.cl_id=t3.cl_id
and t2.page_name=t3.page_name
left join
(--前10内容卡片点击
select partition_date
,params['query'] as query
,cl_id
,case when page_name='search_result_more' then '综合'
when page_name='search_result_welfare' then '美购'
when page_name='search_result_diary' then '日记' end as page_name
,count(1) as pv
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and action in ('search_result_click_infomation_item','on_click_topic_card','on_click_diary_card','search_result_welfare_click_item')
and params['position'] in (0,1,2,3,4,5,6,7,8,9)
and page_name in ('search_result_more','search_result_welfare','search_result_diary')
group by partition_date
,params['query']
,cl_id
,case when page_name='search_result_more' then '综合'
when page_name='search_result_welfare' then '美购'
when page_name='search_result_diary' then '日记' end
)t4
on t2.partition_date=t4.partition_date
and t2.query=t4.query
and t2.cl_id=t4.cl_id
and t2.page_name=t4.page_name
left join
(--日记、美购前5内容卡片点击
select partition_date
,params['query'] as query
,cl_id
,case when page_name='search_result_welfare' then '美购'
when page_name='search_result_diary' then '日记' end as page_name
,count(1) as pv
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and ( action = 'on_click_diary_card' and page_name='search_result_diary'
or action ='search_result_welfare_click_item' and page_name='search_result_welfare')
and params['position'] in (0,1,2,3,4)
group by partition_date
,params['query']
,cl_id
,case when page_name='search_result_welfare' then '美购'
when page_name='search_result_diary' then '日记' end
)t5
on t2.partition_date=t5.partition_date
and t2.query=t5.query
and t2.cl_id=t5.cl_id
and t2.page_name=t5.page_name
left join
(--综搜页日记卡片前5
select c.partition_date,c.query,c.cl_id,'综合' as page_name,count(1) as pv
from
(
SELECT partition_date,query,position
FROM
(
select partition_date
,params['query'] as query
,cast(absolute_position as int) as position
,row_number() over(partition by params['query'], partition_date order by cast (absolute_position as int) asc) as rank
from online.ml_community_precise_exposure_detail
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and page_name='search_result_more'
and action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
and is_exposure = '1' ----精准曝光
and card_content_type='diary'
group by partition_date,params['query'],cast(absolute_position as int)
)a
where rank<=5
)b
join
(
select partition_date
,params['query'] as query
,cl_id
,params['position'] as position
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and page_name='search_result_more'
and (action ='search_result_click_infomation_item'and params['business_type']in('diary','0')
or action ='on_click_diary_card')
)c
on b.partition_date=c.partition_date
and b.query=c.query
and b.position=c.position
group by c.partition_date,c.query,c.cl_id
)t6
on t2.partition_date=t6.partition_date
and t2.query=t6.query
and t2.cl_id=t6.cl_id
and t2.page_name=t6.page_name
left join
(--综搜页回答卡片前5
select c.partition_date,c.query,c.cl_id,'综合' as page_name,count(1) as pv
from
(
SELECT partition_date,query,position
FROM
(
select partition_date
,params['query'] as query
,cast(absolute_position as int) as position
,row_number() over(partition by params['query'], partition_date order by cast (absolute_position as int) asc) as rank
from online.ml_community_precise_exposure_detail
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and page_name='search_result_more'
and action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
and is_exposure = '1' ----精准曝光
and card_content_type='answer'
group by partition_date,params['query'],cast(absolute_position as int)
)a
where rank<=5
)b
join
(
select partition_date
,params['query'] as query
,cl_id
,params['position'] as position
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and page_name='search_result_more'
and action ='search_result_click_infomation_item'
and params['business_type']in ('answer','5')
)c
on b.partition_date=c.partition_date
and b.query=c.query
and b.position=c.position
group by c.partition_date,c.query,c.cl_id
)t7
on t2.partition_date=t7.partition_date
and t2.query=t7.query
and t2.cl_id=t7.cl_id
and t2.page_name=t7.page_name
left join
(--综搜页帖子卡片前5
select c.partition_date,c.query,c.cl_id,'综合' as page_name,count(1) as pv
from
(
SELECT partition_date,query,position
FROM
(
select partition_date
,params['query'] as query
,cast(absolute_position as int) as position
,row_number() over(partition by params['query'], partition_date order by cast (absolute_position as int) asc) as rank
from online.ml_community_precise_exposure_detail
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and page_name='search_result_more'
and action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
and is_exposure = '1' ----精准曝光
and card_content_type='user_post'
group by partition_date,params['query'],cast(absolute_position as int)
)a
where rank<=5
)b
join
(
select partition_date
,params['query'] as query
,cl_id
,params['position'] as position
from online.bl_hdfs_maidian_updates
where partition_date = regexp_replace(date_sub(current_date,1),'-','')
and page_name='search_result_more'
and (action ='search_result_click_infomation_item' and params['business_type'] in ('post','user_post','doctor_post','11')
or action='on_click_topic_card')
)c
on b.partition_date=c.partition_date
and b.query=c.query
and b.position=c.position
group by c.partition_date,c.query,c.cl_id
)t8
on t2.partition_date=t8.partition_date
and t2.query=t8.query
and t2.cl_id=t8.cl_id
and t2.page_name=t8.page_name
join
(
select device_id
from online.ml_device_history_detail
WHERE partition_date = regexp_replace(date_sub(current_date,1),'-','')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not like 'promotion\_jf\_%'
)dev
on t1.cl_id=dev.device_id
left join
( -- 去掉疑似机构刷量的PV和UV
SELECT distinct device_id
FROM ml.ml_d_ct_dv_devicespam_d
WHERE partition_day=regexp_replace(date_sub(current_date,1),'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,t1.query
)t2
on t1.partition_date=t2.partition_date
and t1.query=t2.query
order by day_id desc,search_pv desc,query;
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment