Commit 44bf63c8 authored by 魏艺敏's avatar 魏艺敏

Merge branch 'weiyimin' into 'master'

Weiyimin

See merge request !102
parents fc701780 363c0035
......@@ -3,4 +3,5 @@ home_content_detail=首页内容数据-分日明细
home_content_by_month=首页内容数据-月均
ai_content_detail=ai内容数据-分日明细
ai_content_by_month=ai内容数据-月均
home_content_retention=分类用户次留
\ No newline at end of file
home_content_retention=分类用户次留
search_content_detail=搜索到内容的转化
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 内容日报-简化版-for 思璟
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_search_content
(
day_id string comment '{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}',
device_os_type string comment '{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}',
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
channel string comment '{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}',
dau BIGINT comment '{"chs_name":"日活","description":"","etl":"","value":"","remark":""}',
search_uv BIGINT comment '{"chs_name":"搜索uv","description":"","etl":"","value":"","remark":""}',
search_uv_in_dau string comment '{"chs_name":"搜索uv/dau","description":"","etl":"","value":"","remark":""}',
do_search_uv BIGINT comment '{"chs_name":"完成搜索uv","description":"","etl":"","value":"","remark":""}',
do_search_pv BIGINT comment '{"chs_name":"完成搜索pv","description":"","etl":"","value":"","remark":""}',
do_search_rate string comment '{"chs_name":"完成搜索的用户比例","description":"","etl":"","value":"","remark":""}',
search_times string comment '{"chs_name":"搜索用户人均使用次数","description":"","etl":"","value":"","remark":""}',
search_diary_to_content_uv BIGINT comment '{"chs_name":"来源于搜索日记tab的日记及帖子页uv","description":"","etl":"","value":"","remark":""}',
search_diary_to_content_pv BIGINT comment '{"chs_name":"来源于搜索日记tab的日记及帖子页pv","description":"","etl":"","value":"","remark":""}',
search_diary_to_content_uv_rate string comment '{"chs_name":"来源于搜索日记tab的日记及帖子页uv/搜索uv","description":"","etl":"","value":"","remark":""}',
search_qa_to_content_uv BIGINT comment '{"chs_name":"来源于搜索问答tab的问答页uv","description":"","etl":"","value":"","remark":""}',
search_qa_to_content_pv BIGINT comment '{"chs_name":"来源于搜索问答tab的问答页pv","description":"","etl":"","value":"","remark":""}',
search_qa_to_content_uv_rate string comment '{"chs_name":"来源于搜索问答tab的问答页uv/搜索uv","description":"","etl":"","value":"","remark":""}',
diary_tab_ctr string comment '{"chs_name":"日记tab ctr","description":"","etl":"","value":"","remark":""}',
diary_tab_pv_per_uv string comment '{"chs_name":"日记tab卡片点击pv/uv","description":"","etl":"","value":"","remark":""}',
qa_tab_ctr string comment '{"chs_name":"问答tab ctr","description":"","etl":"","value":"","remark":""}',
qa_tab_pv_per_uv string comment '{"chs_name":"问答tab卡片点击pv/uv","description":"","etl":"","value":"","remark":""}',
content_pv_in_search_uv string comment '{"chs_name":"来源于搜索日记和问答tab的内容页PV/搜索uv","description":"","etl":"","value":"","remark":""}',
diary_second_pv_in_search_uv string comment '{"chs_name":"来源于搜索日记tab的内容二跳PV/搜索uv","description":"","etl":"","value":"","remark":""}',
qa_second_pv_in_search_uv string comment '{"chs_name":"来源于搜索问答tab的内容二跳PV/搜索uv","description":"","etl":"","value":"","remark":""}',
content_pagestay_in_search_uv string comment '{"chs_name":"来源于搜索日记及问答tab的内容总时长/搜索uv(s)","description":"","etl":"","value":"","remark":""}',
avg_contents string comment '{"chs_name":"完成搜索用户人均阅读内容篇数","description":"","etl":"","value":"","remark":""}',
search_retention string comment '{"chs_name":"功能次留","description":"","etl":"","value":"","remark":""}'
)comment '内容日报-搜索到内容的转化'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
......@@ -420,4 +420,332 @@ FROM
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
)t1
\ No newline at end of file
)t1;
INSERT OVERWRITE TABLE pm.tl_pm_search_content PARTITION (PARTITION_DAY = ${partition_day})
SELECT
t1.partition_date as day_id
,device_os_type
,active_type
,channel
,count(distinct t1.device_id) as `DAU`
,count(distinct case when t4.search_pv>0 then t4.cl_id end) as search_uv
,concat(round(count(distinct case when t4.search_pv>0 then t4.cl_id end)/count(distinct t1.device_id)*100,2),'%') as search_uv_in_dau
,count(distinct t2.cl_id) as do_search_uv
,sum(t2.all_search_pv) as do_search_pv
,concat(round(count(distinct t2.cl_id)/count(distinct t1.device_id)*100,2),'%') as do_search_rate
,round(sum(t2.all_search_pv)/count(distinct t2.cl_id),2) as search_times
,count(distinct case when referrer_search_diary_pv>0 then t4.cl_id end) search_diary_to_content_uv
,sum(referrer_search_diary_pv) search_diary_to_content_pv
,concat(round(count(distinct case when referrer_search_diary_pv>0 then t4.cl_id end )/count(distinct case when t4.search_pv>0 then t4.cl_id end)*100,2),'%') search_diary_to_content_uv_rate
,count(distinct case when referrer_search_qa_pv>0 then t4.cl_id end ) search_qa_to_content_uv
,sum(referrer_search_qa_pv) search_qa_to_content_pv
,concat(round(count(distinct case when referrer_search_qa_pv>0 then t4.cl_id end )/count(distinct case when t4.search_pv>0 then t4.cl_id end)*100,2),'%') search_qa_to_content_uv_rate
,concat(round(sum(diary_click_pv)/sum(diary_exp_pv)*100,2),'%') diary_tab_ctr
,round(sum(diary_click_pv)/count(distinct case when diary_click_pv>0 then t6.device_id end),2) diary_tab_pv_per_uv
,concat(round(sum(qa_click_pv)/sum(qa_exp_pv)*100,2),'%') qa_tab_ctr
,round(sum(qa_click_pv)/count(distinct case when qa_click_pv>0 then t6.device_id end),2) qa_tab_pv_per_uv
,round(sum(referrer_search_pv)/count(distinct case when t4.search_pv>0 then t4.cl_id end),2) as content_uv_in_search_uv
,concat(round(count(distinct case when total_diary_second_pv>0 then t4.cl_id end )/count(distinct case when t4.search_pv>0 then t4.cl_id end)*100,2),'%') diary_second_pv_in_search_uv
,concat(round(count(distinct case when total_qa_second_pv>0 then t4.cl_id end )/count(distinct case when t4.search_pv>0 then t4.cl_id end)*100,2),'%') qa_second_pv_in_search_uv
,round(sum(referrer_search_pagestay)/count(distinct case when t4.search_pv>0 then t4.cl_id end),2) as content_pagestay_in_search_uv
,round(sum(business_id_num)/count(distinct t2.cl_id),2) as avg_contents
,concat(round(count(distinct t3.cl_id)/count(distinct t2.cl_id)*100,2),'%') as search_retention
FROM
( --dau
SELECT mas.partition_date,t2.active_type,t2.device_os_type,t2.channel,device_id
FROM
(
SELECT
concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','') ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t1
left JOIN
(
--搜索pvuv
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(1) as all_search_pv
FROM
(
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND ((action = 'do_search' AND params['input_type']<>'everyone_watch')
or action='search_result_click_search')
UNION all
--首页大家都在看
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'do_search'
and params['input_type']='everyone_watch'
and params['tab']='精选'
and page_name='home'
AND params['query'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
union all
--灰度搜索的点击搜索动作
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['page_name']='search_home'
union all
--首页大家都在看的另一种埋点
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
--AND params['tab_name']='精选'
AND params['card_type']='search_word'
AND params['card_name'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
--AND page_name='home' android的page_name为空
union all
--美购首页热搜词
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['page_name']='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
--文内高亮词搜索
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['card_type'] ='highlight_word'
)click
group by partition_date,cl_id
)t2
on t1.partition_date=t2.partition_date and t1.device_id=t2.cl_id
left join
(
--搜索pvuv
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(1) as all_search_pv
FROM
(
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND ((action = 'do_search' AND params['input_type']<>'everyone_watch')
or action='search_result_click_search')
UNION all
--首页大家都在看
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'do_search'
and params['input_type']='everyone_watch'
and params['tab']='精选'
and page_name='home'
AND params['query'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
union all
--灰度搜索的点击搜索动作
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['page_name']='search_home'
union all
--首页大家都在看的另一种埋点
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
--AND params['tab_name']='精选'
AND params['card_type']='search_word'
AND params['card_name'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
--AND page_name='home' android的page_name为空
union all
--美购首页热搜词
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['page_name']='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
--文内高亮词搜索
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['card_type'] ='highlight_word'
)click
group by partition_date,cl_id
)t3
on date_add(t2.partition_date,1)=t3.partition_date and t2.cl_id=t3.cl_id
left JOIN
(
--页面浏览pvuv
SELECT
concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(CASE WHEN page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor'
,'search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer') THEN page.cl_id END) as search_pv
,count(CASE when referrer in ('search_result_diary') and page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail') THEN page.cl_id END) as referrer_search_diary_pv
,count(CASE when referrer in ('search_result_question_answer') and page_name in ('question_answer_detail','answer_detail','question_detail') THEN page.cl_id END) as referrer_search_qa_pv
,count(case when referrer in ('search_result_diary','search_result_question_answer') and page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail','answer_detail','question_detail') then page.cl_id end) as referrer_search_pv
,sum(CASE WHEN referrer in ('search_result_diary','search_result_question_answer') and page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail','answer_detail','question_detail') and page_stay >= '0' and page_stay < '1000' THEN page.page_stay END) as referrer_search_pagestay
FROM
(
SELECT cl_id,partition_date,page_name,params['referrer'] as referrer,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='page_view'
AND page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor'
,'diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
,'question_answer_detail','article_detail'
,'search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer')
)page
group by partition_date,cl_id
)t4
on t1.partition_date=t4.partition_date and t1.device_id=t4.cl_id
left JOIN
( --搜索结果页的二跳率
SELECT nvl(nvl(a.partition_date,b.partition_date),c.partition_date) as partition_date
,nvl(nvl(a.cl_id,b.cl_id),c.cl_id) as cl_id
,nvl(diary_navbar_pv,0)+nvl(diary_service_pv,0)+nvl(diary_content_pv,0)*0.2+nvl(diary_cons_pv,0) as total_diary_second_pv
,nvl(qa_navbar_pv,0)+nvl(qa_service_pv,0)+nvl(qa_content_pv,0)*0.2+nvl(qa_cons_pv,0) as total_qa_second_pv
FROM
(
--搜索框点击行为,高亮词点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct case when (referrer='search_result_diary' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_diary') then time_str end) as diary_navbar_pv
,count(distinct case when (referrer='search_result_question_answer' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_question_answer') then time_str end) as qa_navbar_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (action in ('on_click_navbar_search','do_search') or (action='on_click_card' and params['card_type']='highlight_word'))
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer in ('search_result_diary','search_result_question_answer') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]in ('search_result_diary','search_result_question_answer')))
group by partition_date,cl_id
)a
full join
(
--点击美购卡片,内容卡片
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct case when (referrer='search_result_diary' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_diary') and params['card_content_type']in ('service') then array( params['card_id'],app_session_id) end) as diary_service_pv
,count(distinct case when (referrer='search_result_diary' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_diary') and params['card_content_type']in ('qa','diary','user_post','answer') then array( params['card_id'],app_session_id) end) as diary_content_pv
,count(distinct case when (referrer='search_result_question_answer' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_question_answer') and params['card_content_type']in ('service') then array(params['card_id'],app_session_id) end) as qa_service_pv
,count(distinct case when (referrer='search_result_question_answer' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_question_answer') and params['card_content_type']in ('qa','diary','user_post','answer') then array(params['card_id'],app_session_id) end) as qa_content_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_content_type']in ('service','qa','diary','user_post','answer')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer in ('search_result_diary','search_result_question_answer') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]in ('search_result_diary','search_result_question_answer')))
group by partition_date,cl_id
)b
on a.partition_date=b.partition_date and a.cl_id=b.cl_id
full join
(
--点击视频面诊
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct case when (referrer='search_result_diary' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_diary') then time_str end ) as diary_cons_pv
,count(distinct case when (referrer='search_result_question_answer' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_question_answer') then time_str end ) as qa_cons_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_button'
and params['button_name'] in ('video_interview','referral')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer in ('search_result_diary','search_result_question_answer') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]in ('search_result_diary','search_result_question_answer')))
group by partition_date,cl_id
)c
on nvl(a.partition_date,b.partition_date)=c.partition_date and nvl(a.cl_id,b.cl_id)=c.cl_id
)t5
on t1.partition_date=t5.partition_date and t1.device_id=t5.cl_id
left JOIN
(--搜索结果页卡片精准曝光
SELECT device_id,concat_ws('-',substr(partition_day,1,4),substr(partition_day,5,2),substr(partition_day,7,2)) as partition_date
,count(distinct CASE WHEN page_code='search_result_diary' THEN array(card_id,app_session_id) END) as diary_exp_pv
,count(CASE WHEN page_code='search_result_question_answer' THEN array(card_id,app_session_id) END) as qa_exp_pv
FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
WHERE partition_day =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action in ('page_precise_exposure','home_choiceness_card_exposure')
and is_exposure = '1'
and page_code in ('search_result_diary','search_result_question_answer')
AND card_content_type IN ('answer','diary','user_post','doctor_post','question','qa')
group by partition_day,device_id
)t6
on t1.partition_date=t6.partition_date and t1.device_id=t6.device_id
LEFT JOIN
(--搜索结果页卡片点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct CASE WHEN page_name='search_result_diary' THEN array(params['card_id'],app_session_id) END) as diary_click_pv
,count(distinct CASE WHEN page_name='search_result_question_answer' THEN array(params['card_id'],app_session_id) END) as qa_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['card_content_type'] in ('answer','diary','question','qa')
AND page_name in ('search_result_diary','search_result_question_answer')
GROUP BY cl_id,partition_date
)t7
on t6.partition_date=t7.partition_date and t6.device_id=t7.cl_id
left JOIN
(
--人均阅读内容篇数
SELECT cl_id,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,count(DISTINCT params['business_id']) as business_id_num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='page_view'
AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
,'question_answer_detail','article_detail')
AND referrer in ('search_result_diary','search_result_question_answer')
AND params['business_id'] is not NULL AND params['business_id']<> ''
GROUP BY cl_id,partition_date
)t9
on t1.partition_date=t9.partition_date and t1.device_id=t9.cl_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_content/weiyimin@igengmei.com/hanyingyue@igengmei.com,jiaqingqing@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/daily_content/liudi@igengmei.com,shenzheng@igengmei.com,wangxin@igengmei.com,zhaoyang@igengmei.com/weiyimin@igengmei.com,hanyingyue@igengmei.com,jiaqingqing@igengmei.com
\ No newline at end of file
......@@ -3,12 +3,12 @@ select substr(day_id,1,6) `日期`
,active_type `活跃`
,channel `渠道`
,round(avg(home_good_click_uv),0) as `首页good click设备数`
,round(avg(if(home_good_click_quality=0,NULL,home_good_click_quality)),2) as `首页gc用户次留率/全站次留率`
,concat(round(avg(if(home_good_click_quality=0,NULL,home_good_click_quality))*100,2),'%') as `首页gc用户次留率/全站次留率`
,round(avg(home_ungood_click_uv),0) as `点击首页卡片但非gc设备数`
,round(avg(if(home_ungood_click_quality=0,NULL,home_ungood_click_quality)),2) as `点击首页卡片但非gc设备次留率/全站次留率`
,concat(round(avg(if(home_ungood_click_quality=0,NULL,home_ungood_click_quality))*100,2),'%') as `点击首页卡片但非gc设备次留率/全站次留率`
,round(avg(no_click_uv),0) as `未点击首页feed卡片设备数`
,round(avg(if(no_click_uv_quality=0,NULL,no_click_uv_quality)),2) as `未点击首页feed卡片设备次留率/全站次留率`
,round(avg(if(home_good_click_retention_quality=0,NULL,home_good_click_retention_quality)),2) as `当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率`
,concat(round(avg(if(no_click_uv_quality=0,NULL,no_click_uv_quality))*100,2),'%') as `未点击首页feed卡片设备次留率/全站次留率`
,concat(round(avg(if(home_good_click_retention_quality=0,NULL,home_good_click_retention_quality))*100,2),'%') as `当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率`
FROM pm.tl_pm_content_retention
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by substr(day_id,1,6),device_os_type,active_type,channel
......
select day_id as `日期`
,device_os_type as `平台`
,active_type as `活跃`
,channel as `渠道`
,dau as `dau`
,search_uv as `搜索uv`
,search_uv_in_dau as `搜索uv/dau`
,do_search_uv as `完成搜索uv`
,do_search_pv as `完成搜索pv`
,do_search_rate as `完成搜索的用户比例`
,search_times as `搜索用户人均使用次数`
,search_diary_to_content_uv`来源于搜索日记tab的日记及帖子页uv`
,search_diary_to_content_pv `来源于搜索日记tab的日记及帖子页pv`
,search_diary_to_content_uv_rate`来源于搜索日记tab的日记及帖子页uv/搜索uv`
,search_qa_to_content_uv`来源于搜索问答tab的问答页uv`
,search_qa_to_content_pv`来源于搜索问答tab的问答页pv`
,search_qa_to_content_uv_rate`来源于搜索问答tab的问答页uv/搜索uv`
,diary_tab_ctr`日记tab ctr`
,diary_tab_pv_per_uv`日记tab卡片点击pv/uv`
,qa_tab_ctr`问答tab ctr`
,qa_tab_pv_per_uv`问答tab卡片点击pv/uv`
,content_pv_in_search_uv as `来源于搜索日记和问答tab的内容页PV/搜索uv`
,diary_second_pv_in_search_uv`来源于搜索日记tab的内容二跳PV/搜索uv`
,qa_second_pv_in_search_uv`来源于搜索问答tab的内容二跳PV/搜索uv`
,content_pagestay_in_search_uv as `来源于搜索日记及问答tab的内容总时长/搜索uv(s)`
,avg_contents as `完成搜索用户人均阅读内容篇数`
,search_retention as `功能次留`
FROM pm.tl_pm_search_content
where partition_day>='20201109' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期`,`平台`,`活跃`,`渠道`
\ No newline at end of file
......@@ -49,12 +49,12 @@ SELECT
SELECT action_date,cl_id,count(1) as wel_pv
FROM
(
SELECT concat_ws('-',substr(partition_date,0,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS action_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
SELECT concat_ws('-',substr(partition_date,0,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS action_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND page_name in ('welfare_detail','organization_detail','expert_detail')
AND action = 'page_view'
AND page_name in ('welfare_detail','organization_detail','expert_detail')
AND action = 'page_view'
)a
LEFT JOIN
( -- 2.去掉疑似机构刷量的PV和UV
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment