Commit 42c1991d authored by edz's avatar edz

Merge branch 'master' of http://git.wanmeizhensuo.com/data/bi-report into hanyingyue

parents 8ea8237b a89761a8
......@@ -3,4 +3,5 @@ home_content_detail=首页内容数据-分日明细
home_content_by_month=首页内容数据-月均
ai_content_detail=ai内容数据-分日明细
ai_content_by_month=ai内容数据-月均
home_content_retention=分类用户次留
\ No newline at end of file
home_content_retention=分类用户次留
search_content_detail=搜索到内容的转化
\ No newline at end of file
......@@ -21,14 +21,14 @@ CREATE TABLE IF NOT EXISTS pm.tl_pm_content_retention
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
channel string comment '{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}',
dau BIGINT comment '{"chs_name":"日活","description":"","etl":"","value":"","remark":""}',
retention_rate string comment '{"chs_name":"次留率","description":"","etl":"","value":"","remark":""}',
retention_rate double comment '{"chs_name":"次留率","description":"","etl":"","value":"","remark":""}',
home_good_click_uv BIGINT comment '{"chs_name":"首页good click设备数","description":"","etl":"","value":"","remark":""}',
home_good_click_quality string comment '{"chs_name":"首页gc用户次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_good_click_quality double comment '{"chs_name":"首页gc用户次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_ungood_click_uv BIGINT comment '{"chs_name":"点击首页卡片但非gc设备数","description":"","etl":"","value":"","remark":""}',
home_ungood_click_quality string comment '{"chs_name":"点击首页卡片但非gc设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_ungood_click_quality double comment '{"chs_name":"点击首页卡片但非gc设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
no_click_uv BIGINT comment '{"chs_name":"未点击首页feed卡片设备数","description":"","etl":"","value":"","remark":""}',
no_click_uv_quality string comment '{"chs_name":"未点击首页feed卡片设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_good_click_retention_quality string comment '{"chs_name":"当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
no_click_uv_quality double comment '{"chs_name":"未点击首页feed卡片设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_good_click_retention_quality double comment '{"chs_name":"当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
)comment '内容日报-分用户次留'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
......
--***************************************************************
--*脚本名称:
--*功能: 内容日报-简化版-for 思璟
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_search_content
(
day_id string comment '{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}',
device_os_type string comment '{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}',
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
channel string comment '{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}',
dau BIGINT comment '{"chs_name":"日活","description":"","etl":"","value":"","remark":""}',
search_uv BIGINT comment '{"chs_name":"搜索uv","description":"","etl":"","value":"","remark":""}',
search_uv_in_dau string comment '{"chs_name":"搜索uv/dau","description":"","etl":"","value":"","remark":""}',
do_search_uv BIGINT comment '{"chs_name":"完成搜索uv","description":"","etl":"","value":"","remark":""}',
do_search_pv BIGINT comment '{"chs_name":"完成搜索pv","description":"","etl":"","value":"","remark":""}',
do_search_rate string comment '{"chs_name":"完成搜索的用户比例","description":"","etl":"","value":"","remark":""}',
search_times string comment '{"chs_name":"搜索用户人均使用次数","description":"","etl":"","value":"","remark":""}',
search_diary_to_content_uv BIGINT comment '{"chs_name":"来源于搜索日记tab的日记及帖子页uv","description":"","etl":"","value":"","remark":""}',
search_diary_to_content_pv BIGINT comment '{"chs_name":"来源于搜索日记tab的日记及帖子页pv","description":"","etl":"","value":"","remark":""}',
search_diary_to_content_uv_rate string comment '{"chs_name":"来源于搜索日记tab的日记及帖子页uv/搜索uv","description":"","etl":"","value":"","remark":""}',
search_qa_to_content_uv BIGINT comment '{"chs_name":"来源于搜索问答tab的问答页uv","description":"","etl":"","value":"","remark":""}',
search_qa_to_content_pv BIGINT comment '{"chs_name":"来源于搜索问答tab的问答页pv","description":"","etl":"","value":"","remark":""}',
search_qa_to_content_uv_rate string comment '{"chs_name":"来源于搜索问答tab的问答页uv/搜索uv","description":"","etl":"","value":"","remark":""}',
diary_tab_ctr string comment '{"chs_name":"日记tab ctr","description":"","etl":"","value":"","remark":""}',
diary_tab_pv_per_uv string comment '{"chs_name":"日记tab卡片点击pv/uv","description":"","etl":"","value":"","remark":""}',
qa_tab_ctr string comment '{"chs_name":"问答tab ctr","description":"","etl":"","value":"","remark":""}',
qa_tab_pv_per_uv string comment '{"chs_name":"问答tab卡片点击pv/uv","description":"","etl":"","value":"","remark":""}',
content_pv_in_search_uv string comment '{"chs_name":"来源于搜索日记和问答tab的内容页PV/搜索uv","description":"","etl":"","value":"","remark":""}',
diary_second_pv_in_search_uv string comment '{"chs_name":"来源于搜索日记tab的内容二跳PV/搜索uv","description":"","etl":"","value":"","remark":""}',
qa_second_pv_in_search_uv string comment '{"chs_name":"来源于搜索问答tab的内容二跳PV/搜索uv","description":"","etl":"","value":"","remark":""}',
content_pagestay_in_search_uv string comment '{"chs_name":"来源于搜索日记及问答tab的内容总时长/搜索uv(s)","description":"","etl":"","value":"","remark":""}',
avg_contents string comment '{"chs_name":"完成搜索用户人均阅读内容篇数","description":"","etl":"","value":"","remark":""}',
search_retention string comment '{"chs_name":"功能次留","description":"","etl":"","value":"","remark":""}'
)comment '内容日报-搜索到内容的转化'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
......@@ -340,9 +340,9 @@ FROM
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day =regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
WHERE partition_day >=regexp_replace(DATE_SUB(current_date,2) ,'-','') and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','') and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
......@@ -362,7 +362,7 @@ FROM
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','') and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t2
ON t1.device_id=t2.device_id and date_add(t1.partition_date,1)=t2.partition_date
......@@ -372,7 +372,7 @@ FROM
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','') and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND page_name ='home'
and params['card_content_type'] in ('diary','user_post','answer','qa')
......@@ -385,7 +385,7 @@ FROM
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','') and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
and params['is_push']=0--ios从push点击进入的数据referrer也为首页,故需要去掉(可能存在个别时期的数据有问题)
......@@ -401,7 +401,7 @@ FROM
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','') and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
......@@ -420,4 +420,332 @@ FROM
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
)t1
\ No newline at end of file
)t1;
INSERT OVERWRITE TABLE pm.tl_pm_search_content PARTITION (PARTITION_DAY = ${partition_day})
SELECT
t1.partition_date as day_id
,device_os_type
,active_type
,channel
,count(distinct t1.device_id) as `DAU`
,count(distinct case when t4.search_pv>0 then t4.cl_id end) as search_uv
,concat(round(count(distinct case when t4.search_pv>0 then t4.cl_id end)/count(distinct t1.device_id)*100,2),'%') as search_uv_in_dau
,count(distinct t2.cl_id) as do_search_uv
,sum(t2.all_search_pv) as do_search_pv
,concat(round(count(distinct t2.cl_id)/count(distinct t1.device_id)*100,2),'%') as do_search_rate
,round(sum(t2.all_search_pv)/count(distinct t2.cl_id),2) as search_times
,count(distinct case when referrer_search_diary_pv>0 then t4.cl_id end) search_diary_to_content_uv
,sum(referrer_search_diary_pv) search_diary_to_content_pv
,concat(round(count(distinct case when referrer_search_diary_pv>0 then t4.cl_id end )/count(distinct case when t4.search_pv>0 then t4.cl_id end)*100,2),'%') search_diary_to_content_uv_rate
,count(distinct case when referrer_search_qa_pv>0 then t4.cl_id end ) search_qa_to_content_uv
,sum(referrer_search_qa_pv) search_qa_to_content_pv
,concat(round(count(distinct case when referrer_search_qa_pv>0 then t4.cl_id end )/count(distinct case when t4.search_pv>0 then t4.cl_id end)*100,2),'%') search_qa_to_content_uv_rate
,concat(round(sum(diary_click_pv)/sum(diary_exp_pv)*100,2),'%') diary_tab_ctr
,round(sum(diary_click_pv)/count(distinct case when diary_click_pv>0 then t6.device_id end),2) diary_tab_pv_per_uv
,concat(round(sum(qa_click_pv)/sum(qa_exp_pv)*100,2),'%') qa_tab_ctr
,round(sum(qa_click_pv)/count(distinct case when qa_click_pv>0 then t6.device_id end),2) qa_tab_pv_per_uv
,round(sum(referrer_search_pv)/count(distinct case when t4.search_pv>0 then t4.cl_id end),2) as content_uv_in_search_uv
,concat(round(count(distinct case when total_diary_second_pv>0 then t4.cl_id end )/count(distinct case when t4.search_pv>0 then t4.cl_id end)*100,2),'%') diary_second_pv_in_search_uv
,concat(round(count(distinct case when total_qa_second_pv>0 then t4.cl_id end )/count(distinct case when t4.search_pv>0 then t4.cl_id end)*100,2),'%') qa_second_pv_in_search_uv
,round(sum(referrer_search_pagestay)/count(distinct case when t4.search_pv>0 then t4.cl_id end),2) as content_pagestay_in_search_uv
,round(sum(business_id_num)/count(distinct t2.cl_id),2) as avg_contents
,concat(round(count(distinct t3.cl_id)/count(distinct t2.cl_id)*100,2),'%') as search_retention
FROM
( --dau
SELECT mas.partition_date,t2.active_type,t2.device_os_type,t2.channel,device_id
FROM
(
SELECT
concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','') ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t1
left JOIN
(
--搜索pvuv
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(1) as all_search_pv
FROM
(
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND ((action = 'do_search' AND params['input_type']<>'everyone_watch')
or action='search_result_click_search')
UNION all
--首页大家都在看
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'do_search'
and params['input_type']='everyone_watch'
and params['tab']='精选'
and page_name='home'
AND params['query'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
union all
--灰度搜索的点击搜索动作
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['page_name']='search_home'
union all
--首页大家都在看的另一种埋点
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
--AND params['tab_name']='精选'
AND params['card_type']='search_word'
AND params['card_name'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
--AND page_name='home' android的page_name为空
union all
--美购首页热搜词
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['page_name']='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
--文内高亮词搜索
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['card_type'] ='highlight_word'
)click
group by partition_date,cl_id
)t2
on t1.partition_date=t2.partition_date and t1.device_id=t2.cl_id
left join
(
--搜索pvuv
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(1) as all_search_pv
FROM
(
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND ((action = 'do_search' AND params['input_type']<>'everyone_watch')
or action='search_result_click_search')
UNION all
--首页大家都在看
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'do_search'
and params['input_type']='everyone_watch'
and params['tab']='精选'
and page_name='home'
AND params['query'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
union all
--灰度搜索的点击搜索动作
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['page_name']='search_home'
union all
--首页大家都在看的另一种埋点
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
--AND params['tab_name']='精选'
AND params['card_type']='search_word'
AND params['card_name'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
--AND page_name='home' android的page_name为空
union all
--美购首页热搜词
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['page_name']='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
--文内高亮词搜索
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['card_type'] ='highlight_word'
)click
group by partition_date,cl_id
)t3
on date_add(t2.partition_date,1)=t3.partition_date and t2.cl_id=t3.cl_id
left JOIN
(
--页面浏览pvuv
SELECT
concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(CASE WHEN page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor'
,'search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer') THEN page.cl_id END) as search_pv
,count(CASE when referrer in ('search_result_diary') and page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail') THEN page.cl_id END) as referrer_search_diary_pv
,count(CASE when referrer in ('search_result_question_answer') and page_name in ('question_answer_detail','answer_detail','question_detail') THEN page.cl_id END) as referrer_search_qa_pv
,count(case when referrer in ('search_result_diary','search_result_question_answer') and page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail','answer_detail','question_detail') then page.cl_id end) as referrer_search_pv
,sum(CASE WHEN referrer in ('search_result_diary','search_result_question_answer') and page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail','answer_detail','question_detail') and page_stay >= '0' and page_stay < '1000' THEN page.page_stay END) as referrer_search_pagestay
FROM
(
SELECT cl_id,partition_date,page_name,params['referrer'] as referrer,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='page_view'
AND page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor'
,'diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
,'question_answer_detail','article_detail'
,'search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer')
)page
group by partition_date,cl_id
)t4
on t1.partition_date=t4.partition_date and t1.device_id=t4.cl_id
left JOIN
( --搜索结果页的二跳率
SELECT nvl(nvl(a.partition_date,b.partition_date),c.partition_date) as partition_date
,nvl(nvl(a.cl_id,b.cl_id),c.cl_id) as cl_id
,nvl(diary_navbar_pv,0)+nvl(diary_service_pv,0)+nvl(diary_content_pv,0)*0.2+nvl(diary_cons_pv,0) as total_diary_second_pv
,nvl(qa_navbar_pv,0)+nvl(qa_service_pv,0)+nvl(qa_content_pv,0)*0.2+nvl(qa_cons_pv,0) as total_qa_second_pv
FROM
(
--搜索框点击行为,高亮词点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct case when (referrer='search_result_diary' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_diary') then time_str end) as diary_navbar_pv
,count(distinct case when (referrer='search_result_question_answer' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_question_answer') then time_str end) as qa_navbar_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (action in ('on_click_navbar_search','do_search') or (action='on_click_card' and params['card_type']='highlight_word'))
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer in ('search_result_diary','search_result_question_answer') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]in ('search_result_diary','search_result_question_answer')))
group by partition_date,cl_id
)a
full join
(
--点击美购卡片,内容卡片
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct case when (referrer='search_result_diary' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_diary') and params['card_content_type']in ('service') then array( params['card_id'],app_session_id) end) as diary_service_pv
,count(distinct case when (referrer='search_result_diary' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_diary') and params['card_content_type']in ('qa','diary','user_post','answer') then array( params['card_id'],app_session_id) end) as diary_content_pv
,count(distinct case when (referrer='search_result_question_answer' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_question_answer') and params['card_content_type']in ('service') then array(params['card_id'],app_session_id) end) as qa_service_pv
,count(distinct case when (referrer='search_result_question_answer' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_question_answer') and params['card_content_type']in ('qa','diary','user_post','answer') then array(params['card_id'],app_session_id) end) as qa_content_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_content_type']in ('service','qa','diary','user_post','answer')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer in ('search_result_diary','search_result_question_answer') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]in ('search_result_diary','search_result_question_answer')))
group by partition_date,cl_id
)b
on a.partition_date=b.partition_date and a.cl_id=b.cl_id
full join
(
--点击视频面诊
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct case when (referrer='search_result_diary' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_diary') then time_str end ) as diary_cons_pv
,count(distinct case when (referrer='search_result_question_answer' or json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='search_result_question_answer') then time_str end ) as qa_cons_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_button'
and params['button_name'] in ('video_interview','referral')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer in ('search_result_diary','search_result_question_answer') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]in ('search_result_diary','search_result_question_answer')))
group by partition_date,cl_id
)c
on nvl(a.partition_date,b.partition_date)=c.partition_date and nvl(a.cl_id,b.cl_id)=c.cl_id
)t5
on t1.partition_date=t5.partition_date and t1.device_id=t5.cl_id
left JOIN
(--搜索结果页卡片精准曝光
SELECT device_id,concat_ws('-',substr(partition_day,1,4),substr(partition_day,5,2),substr(partition_day,7,2)) as partition_date
,count(distinct CASE WHEN page_code='search_result_diary' THEN array(card_id,app_session_id) END) as diary_exp_pv
,count(CASE WHEN page_code='search_result_question_answer' THEN array(card_id,app_session_id) END) as qa_exp_pv
FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
WHERE partition_day =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action in ('page_precise_exposure','home_choiceness_card_exposure')
and is_exposure = '1'
and page_code in ('search_result_diary','search_result_question_answer')
AND card_content_type IN ('answer','diary','user_post','doctor_post','question','qa')
group by partition_day,device_id
)t6
on t1.partition_date=t6.partition_date and t1.device_id=t6.device_id
LEFT JOIN
(--搜索结果页卡片点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct CASE WHEN page_name='search_result_diary' THEN array(params['card_id'],app_session_id) END) as diary_click_pv
,count(distinct CASE WHEN page_name='search_result_question_answer' THEN array(params['card_id'],app_session_id) END) as qa_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_card'
AND params['card_content_type'] in ('answer','diary','question','qa')
AND page_name in ('search_result_diary','search_result_question_answer')
GROUP BY cl_id,partition_date
)t7
on t6.partition_date=t7.partition_date and t6.device_id=t7.cl_id
left JOIN
(
--人均阅读内容篇数
SELECT cl_id,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,count(DISTINCT params['business_id']) as business_id_num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='page_view'
AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
,'question_answer_detail','article_detail')
AND referrer in ('search_result_diary','search_result_question_answer')
AND params['business_id'] is not NULL AND params['business_id']<> ''
GROUP BY cl_id,partition_date
)t9
on t1.partition_date=t9.partition_date and t1.device_id=t9.cl_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_content/weiyimin@igengmei.com/hanyingyue@igengmei.com,jiaqingqing@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/daily_content/liudi@igengmei.com,shenzheng@igengmei.com,wangxin@igengmei.com,zhaoyang@igengmei.com/weiyimin@igengmei.com,hanyingyue@igengmei.com,jiaqingqing@igengmei.com
\ No newline at end of file
select substr(day_id,1,6) `日期`
,device_os_type `系统`
,active_type `活跃`
,channel `渠道`
,round(avg(home_good_click_uv),0) as `首页good click设备数`
,round(avg(if(home_good_click_quality=0,NULL,home_good_click_quality)),2) as `首页gc用户次留率/全站次留率`
,concat(round(avg(if(home_good_click_quality=0,NULL,home_good_click_quality))*100,2),'%') as `首页gc用户次留率/全站次留率`
,round(avg(home_ungood_click_uv),0) as `点击首页卡片但非gc设备数`
,round(avg(if(home_ungood_click_quality=0,NULL,home_ungood_click_quality)),2) as `点击首页卡片但非gc设备次留率/全站次留率`
,concat(round(avg(if(home_ungood_click_quality=0,NULL,home_ungood_click_quality))*100,2),'%') as `点击首页卡片但非gc设备次留率/全站次留率`
,round(avg(no_click_uv),0) as `未点击首页feed卡片设备数`
,round(avg(if(no_click_uv_quality=0,NULL,no_click_uv_quality)),2) as `未点击首页feed卡片设备次留率/全站次留率`
,round(avg(if(home_good_click_retention_quality=0,NULL,home_good_click_retention_quality)),2) as `当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率`
,concat(round(avg(if(no_click_uv_quality=0,NULL,no_click_uv_quality))*100,2),'%') as `未点击首页feed卡片设备次留率/全站次留率`
,concat(round(avg(if(home_good_click_retention_quality=0,NULL,home_good_click_retention_quality))*100,2),'%') as `当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率`
FROM pm.tl_pm_content_retention
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
where ((partition_day='20201110' and day_id<='20201109')
or (partition_day>'20201110' and partition_day<regexp_replace(DATE_SUB(current_date,1) ,'-','')
and day_id=REGEXP_REPLACE(date_sub(concat_ws('-',substr(partition_day,1,4),substr(partition_day,5,2),substr(partition_day,7,2)),1), '-', ''))
or partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-',''))
group by substr(day_id,1,6),device_os_type,active_type,channel
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
select day_id as `日期`
,device_os_type as `平台`
,active_type as `活跃`
,channel as `渠道`
,dau as `dau`
,search_uv as `搜索uv`
,search_uv_in_dau as `搜索uv/dau`
,do_search_uv as `完成搜索uv`
,do_search_pv as `完成搜索pv`
,do_search_rate as `完成搜索的用户比例`
,search_times as `搜索用户人均使用次数`
,search_diary_to_content_uv`来源于搜索日记tab的日记及帖子页uv`
,search_diary_to_content_pv `来源于搜索日记tab的日记及帖子页pv`
,search_diary_to_content_uv_rate`来源于搜索日记tab的日记及帖子页uv/搜索uv`
,search_qa_to_content_uv`来源于搜索问答tab的问答页uv`
,search_qa_to_content_pv`来源于搜索问答tab的问答页pv`
,search_qa_to_content_uv_rate`来源于搜索问答tab的问答页uv/搜索uv`
,diary_tab_ctr`日记tab ctr`
,diary_tab_pv_per_uv`日记tab卡片点击pv/uv`
,qa_tab_ctr`问答tab ctr`
,qa_tab_pv_per_uv`问答tab卡片点击pv/uv`
,content_pv_in_search_uv as `来源于搜索日记和问答tab的内容页PV/搜索uv`
,diary_second_pv_in_search_uv`来源于搜索日记tab的内容二跳PV/搜索uv`
,qa_second_pv_in_search_uv`来源于搜索问答tab的内容二跳PV/搜索uv`
,content_pagestay_in_search_uv as `来源于搜索日记及问答tab的内容总时长/搜索uv(s)`
,avg_contents as `完成搜索用户人均阅读内容篇数`
,search_retention as `功能次留`
FROM pm.tl_pm_search_content
where partition_day>='20201109' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期`,`平台`,`活跃`,`渠道`
\ No newline at end of file
daily_grey_recommend=策略灰度实验数据
home_grey_recommend=策略灰度实验数据
--***************************************************************
--*脚本名称:
--*功能: 策略实验数据日报
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_grey_recommend_d
(
day_id string comment '{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}',
device_os_type string comment '{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}',
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
grey_type string comment '{"chs_name":"灰度类型","description":"","etl":"","value":"","remark":""}',
channel string comment '{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}',
retention_rate string comment '{"chs_name":"次留率","description":"","etl":"","value":"","remark":""}',
ctr string comment '{"chs_name":"首页信息流综合ctr","description":"","etl":"","value":"","remark":""}',
card_exp_pv bigint comment '{"chs_name":"卡片曝光pv","description":"","etl":"","value":"","remark":""}',
card_click_pv bigint comment '{"chs_name":"卡片点击pv","description":"","etl":"","value":"","remark":""}',
exp_pv_per_uv double comment '{"chs_name":"人均卡片曝光","description":"","etl":"","value":"","remark":""}',
card_pv_per_uv double comment '{"chs_name":"人均卡片点击","description":"","etl":"","value":"","remark":""}',
card_exp_uv bigint comment '{"chs_name":"卡片曝光uv","description":"","etl":"","value":"","remark":""}',
card_click_uv bigint comment '{"chs_name":"卡片点击uv","description":"","etl":"","value":"","remark":""}',
avg_content_pagestay double comment '{"chs_name":"单内容浏览时长(m)","description":"","etl":"","value":"","remark":""}',
avg_app_duration double comment '{"chs_name":"单设备使用时长(m)","description":"","etl":"","value":"","remark":""}',
pagestay_in_app_duration string comment '{"chs_name":"内容浏览时长在app使用时长上的占比","description":"","etl":"","value":"","remark":""}',
wel_second_in_content_pv string comment '{"chs_name":"来自内容页的商业二跳/内容pv","description":"","etl":"","value":"","remark":""}',
content_second_in_content_pv string comment '{"chs_name":"来自内容页的内容二跳/内容pv","description":"","etl":"","value":"","remark":""}',
home_cardclick_uv_in_home_uv string comment '{"chs_name":"首页卡片点击uv/首页uv","description":"","etl":"","value":"","remark":""}',
home_goodclick_uv_in_homeclick_uv string comment '{"chs_name":"good click卡片数uv/首页点击卡片数uv","description":"","etl":"","value":"","remark":""}'
)comment '策略实验数据'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
CREATE TEMPORARY FUNCTION setencryption AS 'com.gmei.hive.common.udf.UDFStringSetEncryption';
INSERT OVERWRITE TABLE pm.tl_pm_grey_recommend_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT t1.partition_date as day_id
,t1.device_os_type
,t1.active_type
,t1.grey_type
,t1.channel
,nvl(concat(round(count(distinct t3.device_id)/count(distinct t1.device_id)*100,2),'%'),0) as retention_rate
,nvl(concat(round(sum(t4.click_pv)/sum(t4.exp_pv)*100,2),'%'),0) as ctr
,nvl(sum(t4.exp_pv),0) as card_exp_pv
,nvl(sum(t4.click_pv),0) as card_click_pv
,nvl(round(sum(t4.exp_pv)/count(distinct case when t4.exp_pv>0 then t4.cl_id end),2),0) as exp_pv_per_uv
,nvl(round(sum(t4.click_pv)/count(distinct case when t4.click_pv>0 then t4.cl_id end),2),0) as card_pv_per_uv
,count(distinct case when t4.exp_pv>0 then t4.cl_id end) as card_exp_uv
,count(distinct case when t4.click_pv>0 then t4.cl_id end) as card_click_uv
,nvl(round(sum(t5.page_stay)/count(distinct t5.cl_id)/60,2),0) as avg_content_pagestay
,nvl(round(sum(t7.use_duration)/count(distinct t7.device_id)/60,2),0) as avg_app_duration
,nvl(concat(round(sum(t5.page_stay)/sum(t7.use_duration)*100,2),'%'),0) as pagestay_in_app_duration
,nvl(concat(round(sum(t8.pv)/sum(t4.click_pv)*100,2),'%'),0) as wel_second_in_content_pv
,nvl(concat(round(sum(t9.pv)/sum(t4.click_pv)*100,2),'%'),0) as content_second_in_content_pv
,nvl(concat(round(count(distinct t10.cl_id)/count(distinct t11.cl_id)*100,2),'%'),0) as home_cardclick_uv_in_home_uv
,nvl(concat(round(sum(business_num)/count(distinct t10.cl_id)*100,2),'%'),0) as home_goodclick_uv_in_homeclick_uv
from
(
select mas.partition_date,mas.device_id,device_os_type,active_type
,a.grey_type
,e.channel
from
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,m.device_id,device_os_type
,case when active_type in ('1','2') then '新增设备'
when active_type ='4' then '老活跃设备' end as active_type
,array(case when ((partition_date>='20201024' and partition_date<='20201113' and substr(convup(setencryption(m.device_id,'sha-1'),16,10),-2,2)%20 in (5,6,7,8))
or (partition_date>='20201114' and substr(convup(setencryption(m.device_id,'sha-1'),16,10),-2,2)%20 in (0,1,2,3,4,5,6,7,8,9)))then '灰度' else '非灰' end,'合计') as grey_type
,array(CASE WHEN (a.device_id is not null or b.device_id is not null
or first_channel_source_type like '%xinyouxingkong%'
or first_channel_source_type like '%jingmeng%'
or first_channel_source_type like '%longyuzhixing%'
or first_channel_source_type like '%mailuo%') THEN '渠道可疑'
WHEN tmp.is_ai_channel='true' THEN 'AI' ELSE '其他' END , '合计') as channel
from online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20190101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
LEFT JOIN
(SELECT device_id,day_id
FROM pm.tl_pm_channel_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
GROUP BY device_id,day_id)b
ON m.device_id = b.device_id AND m.partition_date = b.day_id
where partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and active_type in ('1','2','4')
and m.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)mas
LATERAL VIEW EXPLODE(grey_type) a as grey_type
LATERAL VIEW EXPLODE(channel) e as channel
)t1
left join
(
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
FROM online.ml_device_day_active_status
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t3
on t1.device_id=t3.device_id and date_add(t1.partition_date,1)=t3.partition_date
left join
(
SELECT t2.partition_date,t2.cl_id
,sum(t2.pv) as exp_pv
,sum(t3.pv) as click_pv
from
(--精准曝光,卡片id和session_id去重
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,
cl_id,
card_id,
count(distinct app_session_id) as pv
from online.ml_community_precise_exposure_detail
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy')
or transaction_type like '%ctr' or transaction_type like '%cvr' or transaction_type like '%deeplink%')
AND card_content_type in ('qa','diary','user_post','answer','special_pool')
group by partition_date,
cl_id,
card_id
)t2
LEFT JOIN
(--卡片,卡片id和session_id去重
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,
cl_id,
params['card_id'] as card_id,
count(distinct app_session_id) as pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
AND (params['transaction_type'] in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr' or params['transaction_type'] like '%deeplink%')
AND params['card_content_type'] in ('qa','diary','user_post','answer','special_pool')
GROUP BY partition_date,
cl_id,
params['card_id']
)t3
on t2.partition_date=t3.partition_date
and t2.cl_id=t3.cl_id
and t2.card_id=t3.card_id
group by t2.partition_date,t2.cl_id
)t4
on t1.partition_date =t4.partition_date and t1.device_id=t4.cl_id
LEFT JOIN
(--内容时长
select concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,cl_id,sum(page_stay) as page_stay
from online.bl_hdfs_maidian_updates
where partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay >= 0 and page_stay < 1000
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
and referrer='home'
group by partition_date,cl_id
)t5
on t1.partition_date =t5.partition_date and t1.device_id=t5.cl_id
LEFT JOIN
(--app使用时长
select concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,device_id,use_duration
from online.ml_device_updates
where partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t7
on t1.partition_date =t7.partition_date and t1.device_id=t7.device_id
left join
(
SELECT cl_id,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
and params['referrer'] in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND ((action='on_click_button' and params['button_name'] = 'more_recommendations')
or (action='on_click_button' and params['button_name']='video_interview')
or (action='on_click_button' and params['button_name']='referral')
or (action ='on_click_card' and params['card_content_type']='service'))
group by cl_id,partition_date
)t8
on t1.partition_date =t8.partition_date and t1.device_id=t8.cl_id
left join
(
SELECT cl_id,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND params['referrer'] in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail') --内容详情页-搜索首页-进行搜索
and (action in ('on_click_navbar_search','do_search')
or (action ='on_click_card' and params['card_type']='highlight_word')
or (action='on_click_card' and params['card_content_type'] in ('qa','diary','user_post','answer')))
group by cl_id,partition_date
)t9
on t1.partition_date =t9.partition_date and t1.device_id=t9.cl_id
LEFT JOIN
(
select partition_date,cl_id
from
( --首页feed卡片点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,cl_id
-- ,count(distinct array(params['card_id'],app_session_id)) as click_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='on_click_card'
and page_name='home'
and params['card_type']='card'
group by partition_date
,cl_id
UNION ALL
--点击埋点有丢失,用浏览事件补充一下
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,cl_id
-- ,count(distinct array(params['card_id'],app_session_id)) as click_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and (referrer in ('home') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('home')))
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
group by partition_date
,cl_id
)t10
group by partition_date,cl_id
)t10
on t1.partition_date =t10.partition_date and t1.device_id=t10.cl_id
LEFT JOIN
(--首页浏览
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,cl_id
FROM online.bl_hdfs_maidian_updates
where partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'page_view'
AND page_name='home'
group by concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)),cl_id
)t11
on t1.partition_date=t11.partition_date and t1.device_id=t11.cl_id
LEFT JOIN
( --内容的good click代码
select concat_ws('-',substr(a.partition_date,1,4),substr(a.partition_date,5,2),substr(a.partition_date,7,2)) as partition_date
,a.cl_id
,count(distinct a.business_id) as business_num
from
(
select *,case when page_name in ('diary_detail','topic_detail') THEN 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') THEN 'post'
when page_name in ('answer_detail') THEN 'answer' end content_type
FROM ONLINE.BL_HDFS_MAIDIAN_UPDATES
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,2) ,'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail')
)a
left join
(--评论的埋点有缺失,所以用业务库数据来补充
--日记这里取的是日记本id,因为从首页点击卡片进入后,先进入日记卡片
SELECT diary_id as content_id,'diary' as type,user_id,create_date
FROM
(
SELECT id,problem_id,user_id,regexp_replace(substr(reply_date,1,10),'-','') as create_date
FROM online.tl_hdfs_topicreply_view
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
and is_online='true'
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,2) ,'-','')
and regexp_replace(substr(reply_date,1,10),'-','') <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,problem_id,user_id,reply_date
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by diary_id,user_id,create_date
UNION ALL
--有评论过回答的设备,排除疑似广告
SELECT answer_id as content_id,'answer' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and is_online='true'
and is_spam = 'false' --排除疑似广告
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,2) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by answer_id,user_id,create_time
UNION ALL
--有评论过用户帖的设备
SELECT tractate_id as content_id,'post' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,2) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online='true'
group by tractate_id,user_id,create_time
)b
on a.business_id=b.content_id and a.partition_date=b.create_date and a.user_id=b.user_id and a.content_type=b.type
WHERE ((action='page_view' and page_stay>=20)--浏览时长大约20s
or action in ('on_click_navbar_search','do_search') --二跳:点击搜索框和搜索按钮
or (action='on_click_card' and params['card_type']='highlight_word')--二跳:高亮词
or (action='on_click_card' and params['card_content_type']in ('service','qa','diary','user_post','answer'))--二跳:点击卡片
or (action='on_click_button' and params['button_name'] in ('video_interview','referral'))--二跳:点击转诊和视频面诊
or (action='on_click_favor' and params['motion']='do')--点击收藏
or action='page_click_share'--点击分享
or (action='on_click_vote' and params['motion']='vote')--点击点赞
or b.user_id is not null )--当天有针对内容的评论
group by a.partition_date,a.cl_id
)t12
on t1.partition_date=t12.partition_date and t1.device_id=t12.cl_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,t1.device_os_type,t1.active_type,t1.channel,t1.grey_type
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_precise_exposure_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_d_ct_dv_devicespam_d
\ No newline at end of file
#step1_5.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_6.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive DIM DIM_AI_CHANNEL_ZP_NEW
\ No newline at end of file
#step1_7.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_ui_user_dimen_d
\ No newline at end of file
#step1_8.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_history_detail
\ No newline at end of file
#step1_9.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_history_detail
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9
command=sh /home/bi/bi-report/lib/shell/hive daily_grey_recommend
\ No newline at end of file
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_grey_recommend/shenzheng@igengmei.com,wangxin@igengmei.com,zhaoyang@igengmei.com,duanyingrong@igengmei.com,xuepengfei@igengmei.com,wanglidan@igengmei.com/weiyimin@igengmei.com,hanyingyue@igengmei.com,jiaqingqing@igengmei.com
\ No newline at end of file
SELECT day_id as `日期`
,device_os_type as `系统`
,active_type as `活跃`
,grey_type as `灰度`
,channel as `渠道`
,retention_rate as `次留率`
,ctr as `首页信息流综合ctr`
,card_exp_pv as `卡片曝光pv`
,card_click_pv as `卡片点击pv`
,exp_pv_per_uv`人均卡片曝光`
,card_pv_per_uv`人均卡片点击`
,card_exp_uv as `卡片曝光uv`
,card_click_uv as `卡片点击uv`
,avg_content_pagestay as `单内容浏览时长(m)`
,avg_app_duration as `单设备使用时长(m)`
,pagestay_in_app_duration as `内容浏览时长在app使用时长上的占比`
,wel_second_in_content_pv`来自内容页的商业二跳/内容pv`
,content_second_in_content_pv`来自内容页的内容二跳/内容pv`
,home_cardclick_uv_in_home_uv as `首页卡片点击uv/首页uv`
,home_goodclick_uv_in_homeclick_uv as `good click卡片数uv/首页点击卡片数uv`
from pm.tl_pm_grey_recommend_d
where ((partition_day='20201112' and day_id<='2020-11-11')
or (partition_day>'20201112' and partition_day<regexp_replace(DATE_SUB(current_date,1) ,'-','')
and day_id=date_sub(concat_ws('-',substr(partition_day,1,4),substr(partition_day,5,2),substr(partition_day,7,2)),1))
or partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-',''))
......@@ -401,6 +401,7 @@ full join
,device_id,msg_id,'ios' as device_os_type
from bl.bl_et_bg_trackingpushlog_inc_d
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (device_type is null or device_type='ios')--在7.35新增了android的埋点,故7.35之后新增了device_type字段
group by partition_day,device_id,msg_id
union all
......
......@@ -67,9 +67,9 @@ FROM
cl_id,
case when card_content_type in ('qa','answer') then 'qa'
when card_content_type in ('special_pool') then 'special' else card_content_type end as card_content_type,
CASE when transaction_type in ('fmctr') then array('fmctr','合计')
CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr')) THEN array('ctr预估','合计')
WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr') ) THEN array('ctr预估','合计')
when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
......@@ -97,9 +97,9 @@ FROM
case when card_content_type in ('qa','answer') then 'qa'
when card_content_type in ('special_pool') then 'special' else card_content_type end,
cl_id,
CASE when transaction_type in ('fmctr') then array('fmctr','合计')
CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr')) THEN array('ctr预估','合计')
WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr')) THEN array('ctr预估','合计')
when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
......@@ -132,9 +132,9 @@ FROM
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa'
when params['card_content_type'] in ('special_pool') then 'special' else params['card_content_type'] end as card_content_type,
CASE when params['transaction_type'] in ('fmctr') then array('fmctr','合计')
CASE when params['transaction_type'] in ('fmctr','samecity_fmctr') then array('fmctr','合计')
when params['transaction_type'] in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN (params['transaction_type'] like '%ctr' and params['transaction_type'] not in ('high_quality_ctr','high_quality_fmctr','fmctr')) THEN array('ctr预估','合计')
WHEN (params['transaction_type'] like '%ctr' and params['transaction_type'] not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr')) THEN array('ctr预估','合计')
when params['transaction_type'] in ('high_quality_ctr') then array('high_quality_ctr','合计')
WHEN params['transaction_type'] like '%cvr' THEN array('cvr预估','合计')
WHEN params['transaction_type'] in ('-1','smr') THEN array('smr','合计')
......@@ -161,9 +161,9 @@ FROM
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa'
when params['card_content_type'] in ('special_pool') then 'special' else params['card_content_type'] end,
CASE when params['transaction_type'] in ('fmctr') then array('fmctr','合计')
CASE when params['transaction_type'] in ('fmctr','samecity_fmctr') then array('fmctr','合计')
when params['transaction_type'] in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN (params['transaction_type'] like '%ctr' and params['transaction_type'] not in ('high_quality_ctr','high_quality_fmctr','fmctr')) THEN array('ctr预估','合计')
WHEN (params['transaction_type'] like '%ctr' and params['transaction_type'] not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr')) THEN array('ctr预估','合计')
when params['transaction_type'] in ('high_quality_ctr') then array('high_quality_ctr','合计')
WHEN params['transaction_type'] like '%cvr' THEN array('cvr预估','合计')
WHEN params['transaction_type'] in ('-1','smr') THEN array('smr','合计')
......@@ -248,7 +248,7 @@ FROM
when page_name in ('custom_special') then 'special' else null end,time_str,page_stay
)a
left join
(
(--在8月份新增了内容专题卡片,需要转换下id
select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool
where partition_day =regexp_replace(DATE_SUB(current_date,1) ,'-','')
......@@ -674,7 +674,7 @@ FROM
partition_date,device_id,device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
, '灰度' AS grey_type
,CASE WHEN substr(md5(device_id), -1, 1) IN ('0', '1', '2', '3', '8', 'a', 'b', 'f') THEN '非灰' ELSE '灰度' END AS grey_type
FROM online.ml_device_day_active_status
where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
......
......@@ -4,14 +4,15 @@ select a.*
FROM
(
--有评论过日记帖的设备,排除疑似广告
SELECT diary_id as content_id,'日记贴' as type,user_id,create_time,content
SELECT t1.id as content_id,'日记贴' as type,user_id,create_time,content
FROM
(
SELECT id,problem_id,user_id,reply_date as create_time,content
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online='true'
and regexp_replace(substr(reply_date,1,10),'-','') = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,problem_id,user_id,reply_date,content
)t1
JOIN
......@@ -22,57 +23,60 @@ FROM
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by diary_id,user_id,create_time,content
group by t1.id,user_id,create_time,content
UNION ALL
--有评论过回答的设备,排除疑似广告
SELECT answer_id as content_id,'回答' as type,user_id,create_time,content
SELECT id as content_id,'回答' as type,user_id,create_time,content
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by answer_id,user_id,create_time,content
and is_online='true'
and is_spam = 'false' --排除疑似广告
and regexp_replace(substr(create_time,1,10),'-','') = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,user_id,create_time,content
UNION ALL
--有评论过用户帖的设备
SELECT tractate_id as content_id,'帖子' as type,user_id,create_time,content
SELECT id as content_id,'帖子' as type,user_id,create_time,content
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,user_id,create_time,content
and regexp_replace(substr(create_time,1,10),'-','') = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_online='true'
group by id,user_id,create_time,content
)a
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,user_id,device_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '回帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)b
ON a.user_id = b.user_id
AND substr(a.create_time,1,10)= b.partition_date
--JOIN --限制用户是在app进行的回复
--(
-- SELECT a.partition_date,user_id
-- FROM
-- (
-- SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,user_id,device_id,action
-- FROM online.bl_hdfs_operation_updates
-- WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- )a
-- JOIN
-- (
-- SELECT code
-- FROM dim.dim_community_action_type
-- WHERE communityuserbehavior_type_name = '回帖'
-- )type
-- ON a.action = code
-- GROUP BY a.partition_date,user_id
--)b
-- ON a.user_id = b.user_id
-- AND substr(a.create_time,1,10)= b.partition_date
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_abnormal_user = 'true'
and (is_classify_user = 'true' or is_puppet ='true')
)c
on a.user_id=c.user_id
where c.user_id is null
daily_userpost=新手精选帖日报
data_by_day=每日数据汇总
key_data=当日数据关键指标
detail_data=当日数据明细
\ No newline at end of file
daily_userpost=新手精选及更美优选帖日报
data_by_day_new=新手精选每日数据汇总
key_data_new=新手精选当日数据关键指标
detail_data_new=新手精选当日数据明细
data_by_day_old=更美优选每日数据汇总
key_data_old=更美优选当日数据关键指标
detail_data_old=更美优选当日数据明细
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 针对老用户的更美优选标签
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_userpost_old_d_v3
(
post_id string comment '{"chs_name":"帖子id","description":"","etl":"","value":"","remark":""}',
title string comment '{"chs_name":"帖子标题","description":"","etl":"","value":"","remark":""}',
audit_date string comment '{"chs_name":"最新审核时间","description":"","etl":"","value":"","remark":""}',
tag_list string comment '{"chs_name":"关联标签","description":"","etl":"","value":"","remark":""}',
exp_pv_1 bigint comment '{"chs_name":"前1日曝光","description":"","etl":"","value":"","remark":""}',
click_pv_1 bigint comment '{"chs_name":"前1日点击","description":"","etl":"","value":"","remark":""}',
page_pv_1 bigint comment '{"chs_name":"前1日浏览pv","description":"","etl":"","value":"","remark":""}',
page_pv_20s_1 bigint comment '{"chs_name":"前1日浏览20s以上pv","description":"","etl":"","value":"","remark":""}',
reply_num_1 bigint comment '{"chs_name":"前1日真实评论","description":"","etl":"","value":"","remark":""}',
vote_num_1 bigint comment '{"chs_name":"前1日真实点赞","description":"","etl":"","value":"","remark":""}',
favor_num_1 bigint comment '{"chs_name":"前1日收藏","description":"","etl":"","value":"","remark":""}',
share_num_1 bigint comment '{"chs_name":"前1日转发","description":"","etl":"","value":"","remark":""}',
avg_page_stay_1 double comment '{"chs_name":"前1日平均阅读时长(s)","description":"","etl":"","value":"","remark":""}',
exp_pv_3 bigint comment '{"chs_name":"前3日曝光","description":"","etl":"","value":"","remark":""}',
click_pv_3 bigint comment '{"chs_name":"前3日点击","description":"","etl":"","value":"","remark":""}',
page_pv_3 bigint comment '{"chs_name":"前3日浏览pv","description":"","etl":"","value":"","remark":""}',
page_pv_20s_3 bigint comment '{"chs_name":"前3日浏览20s以上pv","description":"","etl":"","value":"","remark":""}',
reply_num_3 bigint comment '{"chs_name":"前3日真实评论","description":"","etl":"","value":"","remark":""}',
vote_num_3 bigint comment '{"chs_name":"前3日真实点赞","description":"","etl":"","value":"","remark":""}',
favor_num_3 bigint comment '{"chs_name":"前3日收藏","description":"","etl":"","value":"","remark":""}',
share_num_3 bigint comment '{"chs_name":"前3日转发","description":"","etl":"","value":"","remark":""}',
avg_page_stay_3 double comment '{"chs_name":"前3日平均阅读时长(s)","description":"","etl":"","value":"","remark":""}',
exp_pv_10 bigint comment '{"chs_name":"前10日曝光","description":"","etl":"","value":"","remark":""}',
click_pv_10 bigint comment '{"chs_name":"前10日点击","description":"","etl":"","value":"","remark":""}',
page_pv_10 bigint comment '{"chs_name":"前10日浏览pv","description":"","etl":"","value":"","remark":""}',
page_pv_20s_10 bigint comment '{"chs_name":"前10日浏览20s以上pv","description":"","etl":"","value":"","remark":""}',
reply_num_10 bigint comment '{"chs_name":"前10日真实评论","description":"","etl":"","value":"","remark":""}',
vote_num_10 bigint comment '{"chs_name":"前10日真实点赞","description":"","etl":"","value":"","remark":""}',
favor_num_10 bigint comment '{"chs_name":"前10日收藏","description":"","etl":"","value":"","remark":""}',
share_num_10 bigint comment '{"chs_name":"前10日转发","description":"","etl":"","value":"","remark":""}',
avg_page_stay_10 double comment '{"chs_name":"前10日平均阅读时长(s)","description":"","etl":"","value":"","remark":""}',
exp_pv bigint comment '{"chs_name":"历史曝光","description":"","etl":"","value":"","remark":""}',
click_pv bigint comment '{"chs_name":"历史点击","description":"","etl":"","value":"","remark":""}',
page_pv bigint comment '{"chs_name":"历史浏览pv","description":"","etl":"","value":"","remark":""}',
page_pv_20s bigint comment '{"chs_name":"历史浏览20s以上pv","description":"","etl":"","value":"","remark":""}',
reply_num bigint comment '{"chs_name":"历史真实评论","description":"","etl":"","value":"","remark":""}',
vote_num bigint comment '{"chs_name":"历史真实点赞","description":"","etl":"","value":"","remark":""}',
favor_num bigint comment '{"chs_name":"历史收藏","description":"","etl":"","value":"","remark":""}',
share_num bigint comment '{"chs_name":"历史转发","description":"","etl":"","value":"","remark":""}',
avg_page_stay double comment '{"chs_name":"历史平均阅读时长(s)","description":"","etl":"","value":"","remark":""}'
)comment '更美优选标签日报'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 线索日报
--*功能: 新手精选标签
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
......
......@@ -163,7 +163,7 @@ join
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id']
)f
on a.partition_date=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
on nvl(a.partition_date,e.partition_date)=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
left join
(
select distinct device_id
......@@ -171,7 +171,7 @@ join
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)c
on a.cl_id=c.device_id
on nvl(nvl(a.cl_id,e.cl_id),f.cl_id)=c.device_id
where c.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
......@@ -245,4 +245,276 @@ join
)t2
on t1.id= t2.card_id
where t2.partition_date>=t1.audit_date
group by t1.id,title,audit_date,tag_list;
\ No newline at end of file
group by t1.id,title,audit_date,tag_list;
INSERT OVERWRITE TABLE pm.tl_pm_userpost_old_d_v3 PARTITION (PARTITION_DAY = ${partition_day})
SELECT t1.id as post_id
,title
,audit_date
,tag_list
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then exp_pv end),0) as exp_pv_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then click_pv end),0) as click_pv_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then page_pv end),0) as page_pv_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then page_pv_20s end),0) as page_pv_20s_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then reply_num end),0) as reply_num_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then vote_num end),0) as vote_num_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then favor_num end),0) as favor_num_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,1) then share_num end),0) as share_num_1
,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,1) then avg_page_stay end),2),0) as avg_page_stay_1
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then exp_pv end),0) as exp_pv_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then click_pv end),0) as click_pv_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then page_pv end),0) as page_pv_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then page_pv_20s end),0) as page_pv_20s_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then reply_num end),0) as reply_num_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then vote_num end),0) as vote_num_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then favor_num end),0) as favor_num_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,3) then share_num end),0) as share_num_3
,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,3) then avg_page_stay end),2),0) as avg_page_stay_3
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then exp_pv end),0) as exp_pv_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then click_pv end),0) as click_pv_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then page_pv end),0) as page_pv_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then page_pv_20s end),0) as page_pv_20s_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then reply_num end),0) as reply_num_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then vote_num end),0) as vote_num_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then favor_num end),0) as favor_num_10
,nvl(sum(case when t2.partition_date>=DATE_SUB(current_date,10) then share_num end),0) as share_num_10
,nvl(round(avg(case when t2.partition_date>=DATE_SUB(current_date,10) then avg_page_stay end),2),0) as avg_page_stay_10
,nvl(sum(exp_pv),0) as exp_pv
,nvl(sum(click_pv),0) as click_pv
,nvl(sum(page_pv),0) as page_pv
,nvl(sum(page_pv_20s),0) as page_pv_20s
,nvl(sum(reply_num),0) as reply_num
,nvl(sum(vote_num),0) as vote_num
,nvl(sum(favor_num),0) as favor_num
,nvl(sum(share_num),0) as share_num
,nvl(round(avg(avg_page_stay),2),0) as avg_page_stay
FROM
(
select a.id,title,a.audit_date,collect_set(d.name) as tag_list
from
(
select id,title,user_id,substr(audit_time,1,10) as audit_date
from tl.tl_mp_api_tractate
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and content_level>=3
and is_online='true'
)a
join
(
select distinct tractate_id
from tl.tl_mp_api_tractate_tag_v3
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and tag_v3_id='3369'
)b
on a.id=b.tractate_id
join
(
select tractate_id,tag_v3_id
from tl.tl_mp_api_tractate_tag_v3
where partition_day= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by tractate_id,tag_v3_id
)c
on a.id=c.tractate_id
left join
(
select id,name
from online.tl_hdfs_api_tag_3_0_view
where partition_date= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,name
)d
on d.id=c.tag_v3_id
group by a.id,title,a.audit_date
)t1
join
(--历史数据,指从审核时间至今的数据
SELECT nvl(concat_ws('-',substr(t1.partition_date,1,4),substr(t1.partition_date,5,2),substr(t1.partition_date,7,2))
,concat_ws('-',substr(t2.create_date,1,4),substr(t2.create_date,5,2),substr(t2.create_date,7,2))) as partition_date
,nvl(t1.card_id,t2.tractate_id) as card_id
,exp_pv
,click_pv
,page_pv
,page_pv_20s
,reply_num
,vote_num
,favor_num
,share_num
,avg_page_stay
from
(
select nvl(nvl(a.card_id,e.business_id),f.business_id) as card_id
,nvl(nvl(a.partition_date,e.partition_date),f.partition_date) as partition_date
,sum(exp_pv) as exp_pv
,sum(click_pv) as click_pv
,sum(page_pv) as page_pv
,sum(page_pv_20s) as page_pv_20s
,round(sum(page_stay)/count(distinct e.cl_id,e.partition_date),2) as avg_page_stay
from
(--曝光
select partition_date,card_id,cl_id,count(distinct app_session_id) as exp_pv
from online.ml_community_precise_exposure_detail
where partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
group by partition_date,cl_id,card_id
)a
left join
(--点击
SELECT partition_date,params['card_id'] as card_id,cl_id,count(distinct app_session_id,cl_id) as click_pv
from online.bl_hdfs_maidian_updates
WHERE partition_date>='20160101' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
GROUP BY partition_date,params['card_id'],cl_id
)b
on a.partition_date=b.partition_date and a.card_id=b.card_id and a.cl_id=b.cl_id
full join
(--平均阅读时长
SELECT partition_date,business_id,cl_id,page_stay
from
(
SELECT partition_date,cl_id,params['business_id'] as business_id,page_stay,time_str
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_stay>=0 and page_stay<1000
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id'],page_stay,time_str
)a
)e
on a.partition_date=e.partition_date and a.card_id=e.business_id and a.cl_id=e.cl_id
full join
(--浏览pv
SELECT partition_date,cl_id,params['business_id'] as business_id
,count(distinct time_str) as page_pv
,count(distinct case when page_stay>=20 then time_str end) as page_pv_20s
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by partition_date,cl_id,params['business_id']
)f
on nvl(a.partition_date,e.partition_date)=f.partition_date and nvl(a.card_id,e.business_id)=f.business_id and nvl(a.cl_id,e.cl_id)=f.cl_id
left join
(
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)c
on nvl(nvl(a.cl_id,e.cl_id),f.cl_id)=c.device_id
left join
(
select partition_date,device_id
from online.ml_device_day_active_status
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and active_type in ('1','2')
)d
on nvl(nvl(a.cl_id,e.cl_id),f.cl_id)=d.device_id and nvl(nvl(a.partition_date,e.partition_date),f.partition_date)=d.partition_date
where c.device_id is null
and d.device_id is null
group by nvl(nvl(a.card_id,e.business_id),f.business_id),nvl(nvl(a.partition_date,e.partition_date),f.partition_date)
)t1
full join
(
SELECT a.tractate_id,a.create_date
,sum(case when type='reply' then num end) as reply_num
,sum(case when type='vote' then num end) as vote_num
,sum(case when type='favor' then num end) as favor_num
,sum(case when type='share' then num end) as share_num
from
( --真实评论数
SELECT tractate_id,create_date,a.user_id,'reply' as type,sum(reply_num) as num
from
(--评论数
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as reply_num
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
GROUP by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
group by tractate_id,create_date,a.user_id
union all
--真实点赞数
SELECT tractate_id,create_date,a.user_id,'vote' as type,sum(vote_num) as num
FROM
(
SELECT tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,count(distinct create_time) as vote_num
FROM online.tl_hdfs_api_tractate_vote_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '20200101'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by tractate_id,user_id,regexp_replace(substr(create_time,1,10),'-','')
)a
GROUP BY tractate_id,create_date,a.user_id
union all
--真实收藏数
SELECT tractate_id,regexp_replace(substr(create_time,1,10),'-','') as create_date,user_id,'favor' as type,count(distinct create_time) as num
FROM online.tl_hdfs_api_tractate_favor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
-- and regexp_replace(substr(create_time,1,10),'-','') >= '${start_date}'
-- and regexp_replace(substr(create_time,1,10),'-','') < '${end_date}'
group by user_id,tractate_id,regexp_replace(substr(create_time,1,10),'-','')
union all
--点击分享数
SELECT params['business_id'] as tractate_id,partition_date as create_date,user_id,'share' as type,count(distinct time_str) as num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '20160101'
and partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_click_share'
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
group by params['business_id'],partition_date,user_id
)a
left join
(
select distinct user_id
from ml.ML_D_CT_UI_USERCLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and is_abnormal_user = 'true'
)b
on a.user_id=b.user_id
left join
(
select a.partition_date,user_id
from
(
select partition_date,device_id
from online.ml_device_day_active_status
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and active_type in ('1','2')
)a
join
(
select partition_date,cl_id,user_id
from online.bl_hdfs_maidian_updates
where partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and user_id is not null and user_id<>''
group by partition_date,cl_id,user_id
)b
on a.partition_date=b.partition_date and a.device_id=b.cl_id
)c
on a.create_date=c.partition_date and a.user_id=c.user_id
where b.user_id is null
and c.user_id is null
group by a.tractate_id,a.create_date
)t2
on t1.card_id=t2.tractate_id and t1.partition_date =t2.create_date
)t2
on t1.id= t2.card_id
where t2.partition_date>=t1.audit_date
group by t1.id,title,audit_date,tag_list;
SELECT partition_day as `日期`
,count(post_id) as `帖子数量`
,count(post_id) as `新手精选帖子数量`
,nvl(sum(exp_pv_1),0) as `前1日曝光`
,nvl(concat(round(sum(click_pv_1)/sum(exp_pv_1)*100,2),'%'),0) as `前1日ctr`
,nvl(concat(round(sum(page_pv_20s_1)/sum(exp_pv_1)*100,2),'%'),0) as `日优秀浏览点击ctr`
......
SELECT partition_day as `日期`
,count(post_id) as `更美优选帖子数量`
,nvl(sum(exp_pv_1),0) as `前1日曝光`
,nvl(concat(round(sum(click_pv_1)/sum(exp_pv_1)*100,2),'%'),0) as `前1日ctr`
,nvl(concat(round(sum(page_pv_20s_1)/sum(exp_pv_1)*100,2),'%'),0) as `日优秀浏览点击ctr`
,nvl(round(avg(avg_page_stay_1),2),0) as `前1日平均阅读时长(s)`
,nvl(sum(exp_pv),0) as `历史曝光`
,nvl(concat(round(sum(click_pv)/sum(exp_pv)*100,2),'%'),0) as `历史ctr`
,nvl(concat(round(sum(page_pv_20s)/sum(exp_pv)*100,2),'%'),0) as `历史优秀浏览点击ctr`
,nvl(round(avg(avg_page_stay),2),0) as `历史平均阅读时长(s)`
FROM pm.tl_pm_userpost_old_d_v3
where partition_day>='20201102'
group by partition_day
order by `日期`
\ No newline at end of file
SELECT post_id as `帖子id`
SELECT post_id as `新手精选帖子id`
,title as `帖子标题`
,audit_date as `审核日期`
,tag_list as `所有关联标签`
......
SELECT post_id as `更美优选帖子id`
,title as `帖子标题`
,audit_date as `审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round((nvl(reply_num_1,0)+nvl(vote_num_1,0)+nvl(favor_num_1,0)+nvl(share_num_1,0))/page_pv_1*100,2),'%'),0) as `前1日互动率`
,nvl(concat(round(click_pv_1/exp_pv_1*100,2),'%'),0) as `前1日ctr`
,nvl(concat(round(page_pv_20s_1/page_pv_1*100,2),'%'),0) as `前1日浏览20s以上pv/前1日总浏览pv`
,nvl(click_pv_1,0) as `前1日点击`
,nvl(exp_pv_1,0) as `前1日曝光`
,nvl(page_pv_1,0) as `前1日浏览pv`
,nvl(page_pv_20s_1,0) as `前1日浏览20s以上pv`
,nvl(reply_num_1,0) as `前1日真实评论`
,nvl(vote_num_1,0) as `前1日真实点赞`
,nvl(favor_num_1,0) as `前1日收藏`
,nvl(share_num_1,0) as `前1日转发`
,nvl(avg_page_stay_1,0) as `前1日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num_3,0)+nvl(vote_num_3,0)+nvl(favor_num_3,0)+nvl(share_num_3,0))/page_pv_3*100,2),'%'),0) as `前3日互动率`
,nvl(concat(round(click_pv_3/exp_pv_3*100,2),'%'),0) as `前3日ctr`
,nvl(concat(round(page_pv_20s_3/page_pv_3*100,2),'%'),0) as `前3日浏览20s以上pv/前3日总浏览pv`
,nvl(click_pv_3,0) as `前3日点击`
,nvl(exp_pv_3,0) as `前3日曝光`
,nvl(page_pv_3,0) as `前3日浏览pv`
,nvl(page_pv_20s_3,0) as `前3日浏览20s以上pv`
,nvl(reply_num_3,0) as `前3日真实评论`
,nvl(vote_num_3,0) as `前3日真实点赞`
,nvl(favor_num_3,0) as `前3日收藏`
,nvl(share_num_3,0) as `前3日转发`
,nvl(avg_page_stay_3,0) as `前3日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num_10,0)+nvl(vote_num_10,0)+nvl(favor_num_10,0)+nvl(share_num_10,0))/page_pv_10*100,2),'%'),0) as `前10日互动率`
,nvl(concat(round(click_pv_10/exp_pv_10*100,2),'%'),0) as `前10日ctr`
,nvl(concat(round(page_pv_20s_10/page_pv_10*100,2),'%'),0) as `前10日浏览20s以上pv/前10日总浏览pv`
,nvl(click_pv_10,0) as `前10日点击`
,nvl(exp_pv_10,0) as `前10日曝光`
,nvl(page_pv_10,0) as `前10日浏览pv`
,nvl(page_pv_20s_10,0) as `前10日浏览20s以上pv`
,nvl(reply_num_10,0) as `前10日真实评论`
,nvl(vote_num_10,0) as `前10日真实点赞`
,nvl(favor_num_10,0) as `前10日收藏`
,nvl(share_num_10,0) as `前10日转发`
,nvl(avg_page_stay_10,0) as `前10日平均阅读时长(s)`
,nvl(concat(round((nvl(reply_num,0)+nvl(vote_num,0)+nvl(favor_num,0)+nvl(share_num,0))/page_pv*100,2),'%'),0) as `历史互动率`
,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(concat(round(page_pv_20s/page_pv*100,2),'%'),0) as `历史浏览20s以上pv/历史总浏览pv`
,nvl(click_pv,0) as `历史点击`
,nvl(exp_pv,0) as `历史曝光`
,nvl(page_pv,0) as `历史浏览pv`
,nvl(page_pv_20s,0) as `历史浏览20s以上pv`
,nvl(reply_num,0) as `历史真实评论`
,nvl(vote_num,0) as `历史真实点赞`
,nvl(favor_num,0) as `历史收藏`
,nvl(share_num,0) as `历史转发`
,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM pm.tl_pm_userpost_old_d_v3
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `前10日曝光` desc
SELECT post_id as `帖子id`
SELECT post_id as `新手精选帖子id`
,title as `帖子标题`
,audit_date as `审核日期`
,tag_list as `所有关联标签`
......
SELECT post_id as `更美优选帖子id`
,title as `帖子标题`
,audit_date as `审核日期`
,tag_list as `所有关联标签`
,nvl(concat(round(click_pv_1/exp_pv_1*100,2),'%'),0) as `前1日ctr`
,nvl(concat(round(page_pv_20s_1/page_pv_1*100,2),'%'),0) as `前1日浏览20s以上pv/前1日总浏览pv`
,nvl(exp_pv_1,0) as `前1日曝光`
,nvl(avg_page_stay_1,0) as `前1日平均阅读时长(s)`
,nvl(concat(round(click_pv_3/exp_pv_3*100,2),'%'),0) as `前3日ctr`
,nvl(concat(round(page_pv_20s_3/page_pv_3*100,2),'%'),0) as `前3日浏览20s以上pv/前3日总浏览pv`
,nvl(exp_pv_3,0) as `前3日曝光`
,nvl(avg_page_stay_3,0) as `前3日平均阅读时长(s)`
,nvl(concat(round(click_pv_10/exp_pv_10*100,2),'%'),0) as `前10日ctr`
,nvl(concat(round(page_pv_20s_10/page_pv_10*100,2),'%'),0) as `前10日浏览20s以上pv/前10日总浏览pv`
,nvl(exp_pv_10,0) as `前10日曝光`
,nvl(avg_page_stay_10,0) as `前10日平均阅读时长(s)`
,nvl(concat(round(click_pv/exp_pv*100,2),'%'),0) as `历史ctr`
,nvl(concat(round(page_pv_20s/page_pv*100,2),'%'),0) as `历史浏览20s以上pv/历史总浏览pv`
,nvl(exp_pv,0) as `历史曝光`
,nvl(page_pv,0) as `历史浏览pv`
,nvl(avg_page_stay,0) as `历史平均阅读时长(s)`
FROM pm.tl_pm_userpost_old_d_v3
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `前10日曝光` desc
......@@ -252,7 +252,10 @@ LEFT JOIN
and referrer='custom_special'
and ((partition_date<'20200924' and params['referrer_id']='605')
or (partition_date>='20200924' and partition_date<'20201020' and params['referrer_id']='707')
or (partition_date>='20201020' and params['referrer_id']='901'))
or (partition_date>='20201020' and partition_date <'20201027' and params['referrer_id']='901')
or (partition_date>='20201027' and partition_date <'20201103' and params['referrer_id']='939')
or (partition_date>='20201103' and partition_date <'20201113' and params['referrer_id']='982')
or (partition_date>='20201113' and partition_date <'20201121' and params['referrer_id']='1055'))
group by partition_date,
cl_id
)t3
......
clue_by_channel=线索pv
welfare_by_channel=核心页pv
wel_clue_by_channel=分渠道3日及7日内核心页和线索pv
\ No newline at end of file
wel_clue_by_channel=分渠道3日及7日内核心页和线索pv
welfare_by_channel_uv=商详页uv
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10
command=curl -X GET http://localhost:8553/api/report/email/wel_clue_by_channel/huchunhe@igengmei.com,wangjun@igengmei.com,zhaoyang@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/wel_clue_by_channel/huchunhe@igengmei.com,wangjun@igengmei.com,zhaoyang@igengmei.com,xukai@igengmei.com,duanchenxia@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
......@@ -86,7 +86,7 @@ SELECT
FROM ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
WHERE
T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
AND regexp_replace(SUBSTR(T.pay_date,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND regexp_replace(SUBSTR(T.pay_date,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND regexp_replace(SUBSTR(T.pay_date,1,10),'-','')<=regexp_replace(date_sub(current_date,1),'-','')
AND T.is_pure_user='true'
GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
......
......@@ -49,12 +49,12 @@ SELECT
SELECT action_date,cl_id,count(1) as wel_pv
FROM
(
SELECT concat_ws('-',substr(partition_date,0,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS action_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
SELECT concat_ws('-',substr(partition_date,0,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS action_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND page_name in ('welfare_detail','organization_detail','expert_detail')
AND action = 'page_view'
AND page_name in ('welfare_detail','organization_detail','expert_detail')
AND action = 'page_view'
)a
LEFT JOIN
( -- 2.去掉疑似机构刷量的PV和UV
......
SELECT
first_active_date as `日期`
,channel as `渠道`
,device_os_type as `平台`
,COUNT(DISTINCT device.device_id) AS `当天新活量`
,nvl(count(distinct IF(pv.action_date=device.first_active_date, pv.cl_id, NULL)),0) AS `当日商详页uv`
,nvl(count(distinct IF(pv.action_date>=device.first_active_date AND pv.action_date<=date_add(device.first_active_date,2), pv.cl_id, NULL)),0) AS `3日内商详页uv`
,nvl(count(distinct IF(pv.action_date>=device.first_active_date AND pv.action_date<=date_add(device.first_active_date,6), pv.cl_id, NULL)),0) AS `7日内商详页uv`
FROM
(
SELECT
first_active_date,device_id,device_os_type
,IF(ios_device.channel IS NOT NULL, ios_device.channel, first_channel_source_type) AS channel
FROM
(
SELECT
device_id,device_os_type
,first_channel_source_type
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS first_active_date
FROM online.ml_device_day_active_status
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND active_type in ('1','2')
AND first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)device
LEFT JOIN
( SELECT channel,idfa
FROM
(
SELECT channel,idfa,row_number () over (PARTITION BY idfa ORDER BY active_time asc) rn--,substr(active_time,1,7) as new_month
FROM online.tl_hdfs_ios_idfa_tmp--设备表,区分App Store 明细渠道名字
)a
WHERE rn = 1
)ios_device
ON ios_device.idfa = device.device_id
GROUP BY first_active_date,device_id,device_os_type,IF(ios_device.channel IS NOT NULL, ios_device.channel, first_channel_source_type)
)device
LEFT JOIN
(
SELECT action_date,cl_id,count(1) as wel_pv
FROM
(
SELECT concat_ws('-',substr(partition_date,0,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS action_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND page_name in ('welfare_detail')
AND action = 'page_view'
)a
LEFT JOIN
( -- 2.去掉疑似机构刷量的PV和UV
SELECT device_id
FROM ml.ml_d_ct_dv_devicespam_d
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)spam_pv
on a.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
GROUP BY action_date,cl_id
)pv
ON device.device_id = pv.cl_id
GROUP BY
first_active_date,channel,device_os_type
\ No newline at end of file
yeji_day=业绩基础数据
\ No newline at end of file
yeji_day=分机构转化数据
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 业绩基础数据
--*功能: 分机构转化数据
--*业务名称: pm
--*输入数据:
--*作者: yindanlei@igengmei.com
......@@ -17,25 +17,29 @@ USE pm;
CREATE TABLE IF NOT EXISTS pm.tl_pm_yeji_d
(
data string comment '{"chs_name":"日期","description":"","etl":"","value":"","remark":""}',
merchant_name string comment '{"chs_name":"商户名称","description":"","etl":"","value":"","remark":""}',
business_group_name string comment '{"chs_name":"商务组","description":"","etl":"","value":"","remark":""}',
business_partener_name string comment '{"chs_name":"商务名称","description":"","etl":"","value":"","remark":""}',
hospital_name string comment '{"chs_name":"机构名称","description":"","etl":"","value":"","remark":""}',
hospital_id string comment '{"chs_name":"机构id","description":"","etl":"","value":"","remark":""}',
city_name string comment '{"chs_name":"城市","description":"","etl":"","value":"","remark":""}',
hexin_exp_pv BIGINT comment '{"chs_name":"核心卡片曝光pv","description":"","etl":"","value":"","remark":""}',
service_exp_pv BIGINT comment '{"chs_name":"商品卡片曝光pv","description":"","etl":"","value":"","remark":""}',
doc_hos_exp_pv BIGINT comment '{"chs_name":"医生医院卡片曝光pv","description":"","etl":"","value":"","remark":""}',
hexin_pv BIGINT comment '{"chs_name":"核心页pv","description":"","etl":"","value":"","remark":""}',
service_pv BIGINT comment '{"chs_name":"美购页pv","description":"","etl":"","value":"","remark":""}',
service_page_pv BIGINT comment '{"chs_name":"商详页pv","description":"","etl":"","value":"","remark":""}',
doc_hos_page_pv BIGINT comment '{"chs_name":"医生医院页pv","description":"","etl":"","value":"","remark":""}',
click_pv BIGINT comment '{"chs_name":"线索点击人次","description":"","etl":"","value":"","remark":""}',
clue_num BIGINT comment '{"chs_name":"有效线索人次","description":"","etl":"","value":"","remark":""}',
discount BIGINT comment '{"chs_name":"总验证抽成","description":"","etl":"","value":"","remark":""}',
ord_num BIGINT comment '{"chs_name":"验证人次","description":"","etl":"","value":"","remark":""}',
service_price BIGINT comment '{"chs_name":"纯用户支付gmv","description":"","etl":"","value":"","remark":""}',
discount BIGINT comment '{"chs_name":"佣金","description":"","etl":"","value":"","remark":""}',
recharge_amount BIGINT comment '{"chs_name":"广告收款","description":"","etl":"","value":"","remark":""}',
chongzhi_amount BIGINT comment '{"chs_name":"广告消耗","description":"","etl":"","value":"","remark":""}',
all_amount BIGINT comment '{"chs_name":"广告总消耗(含返点)","description":"","etl":"","value":"","remark":""}',
cpc_amount BIGINT comment '{"chs_name":"cpc总消耗","description":"","etl":"","value":"","remark":""}',
cpc_chongzhi_amount BIGINT comment '{"chs_name":"cpc充值消耗","description":"","etl":"","value":"","remark":""}',
budget BIGINT comment '{"chs_name":"cpc商品有效预算","description":"","etl":"","value":"","remark":""}',
cpc_click_num BIGINT comment '{"chs_name":"cpc点击数","description":"","etl":"","value":"","remark":""}',
liulan_amount BIGINT comment '{"chs_name":"浏览收入","description":"","etl":"","value":"","remark":""}',
sixin_amount BIGINT comment '{"chs_name":"私信收入","description":"","etl":"","value":"","remark":""}',
service_price BIGINT comment '{"chs_name":"纯用户支付金额","description":"","etl":"","value":"","remark":""}'
)comment '业绩基础数据'
sixin_amount BIGINT comment '{"chs_name":"私信收入","description":"","etl":"","value":"","remark":""}'
)comment '分机构转化数据'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
......
......@@ -10,43 +10,48 @@ SET role admin;
INSERT OVERWRITE TABLE pm.tl_pm_yeji_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT nvl(nvl(nvl(nvl(t1.partition_date,t2.DATA_DAY),t3.DATA_MONTH),t4.PAY_MONTH),t5.day_month) as data
,nvl(nvl(nvl(nvl(t1.merchant_name,t2.merchant_name),t3.merchant_name),t4.merchant_name),t5.merchant_name) as merchant_name
,nvl(nvl(nvl(nvl(t1.business_group_name,t2.business_group_name),t3.business_group_name),t4.business_group_name),t5.business_group_name) as business_group_name
,nvl(nvl(nvl(nvl(t1.business_partener_name,t2.business_partener_name),t3.business_partener_name),t4.business_partener_name),t5.business_partener_name) as business_partener_name
,nvl(nvl(nvl(nvl(t1.city_name,t2.city_name),t3.city_name),t4.city_name),t5.city_name) as city_name
SELECT nvl(nvl(nvl(nvl(nvl(nvl(t1.partition_date,t2.partition_date),t3.partition_date),t4.partition_date),t5.partition_date),t6.partition_date),t7.partition_date) as data
,nvl(nvl(nvl(nvl(nvl(nvl(t1.hospital_name,t2.hospital_name),t3.hospital_name),t4.hospital_name),t5.hospital_name),t6.hospital_name),t7.hospital_name) as hospital_name
,nvl(nvl(nvl(nvl(nvl(nvl(t1.hospital_id,t2.hospital_id),t3.hospital_id),t4.hospital_id),t5.hospital_id),t6.hospital_id),t7.hospital_id) as hospital_id
,nvl(nvl(nvl(nvl(nvl(nvl(t1.city_name,t2.city_name),t3.city_name),t4.city_name),t5.city_name),t6.city_name),t7.city_name) as city_name
--,nvl(nvl(nvl(nvl(t1.business_group_name,t2.business_group_name),t3.business_group_name),t4.business_group_name),t5.business_group_name) as `商务组`
,NVL(hexin_exp_pv,0) AS hexin_exp_pv
,nvl(service_exp_pv,0) as service_exp_pv
,nvl(doc_hos_exp_pv,0) as doc_hos_exp_pv
,nvl(hexin_pv,0) as hexin_pv
,nvl(service_pv,0) as service_pv
,nvl(service_page_pv ,0) as service_page_pv
,nvl(doc_hos_page_pv,0) as doc_hos_page_pv
,nvl(click_pv,0) as click_pv
,nvl(clue_num,0) as clue_num
,nvl(ord_num,0) as ord_num
,nvl(service_price,0) as service_price
,nvl(discount,0) as discount
,nvl(recharge_amount,0) as recharge_amount
,nvl(chongzhi_amount,0) as chongzhi_amount
,nvl(all_amount,0) as all_amount
,nvl(cpc_amount,0) as cpc_amount
,nvl(cpc_chongzhi_amount,0) as cpc_chongzhi_amount
,nvl(budget,0) as budget
,nvl(cpc_click_num,0) as cpc_click_num
,nvl(liulan_amount,0) as liulan_amount
,nvl(sixin_amount,0) as sixin_amount
,nvl(service_price,0) as service_price
FROM
---核心、美购页pv
---医生医院、美购页pv
(
SELECT
a.partition_date
,merchant_name
,b.business_group_name
,b.business_partener_name
,hospital_name
,hospital_id
,city_name
,SUM(doc_hos_pv) as doc_hos_page_pv
,SUM(service_pv) as service_page_pv
,SUM(hexin_pv) as hexin_pv
,SUM(service_pv) as service_pv
FROM
(
SELECT --之前是根据merchant_id算的,有重复,如果按doctor_id值不会重复
partition_date,doctor_id,SUM(pv_num) as hexin_pv,SUM(case when business_type = 'service' then pv_num end) as service_pv
partition_date,doctor_id,SUM(case when business_type in ('org','doctor') then pv_num end) as doc_hos_pv,SUM(case when business_type = 'service' then pv_num end) as service_pv,SUM(pv_num) as hexin_pv
FROM online.ml_doctor_event_indic_day
WHERE partition_date=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND merchant_id IS NOT NULL
AND merchant_id <> ''
AND business_type IN ('service','org', 'doctor')
......@@ -59,25 +64,104 @@ FROM
( --商务归属1009更新
--!! 注意:这里的日期变量设置为当前日期的前一天,定时任务为每月第一天,故查询的为上月最后一天的归属
SELECT
doctor_id,merchant_name,business_group_name,business_partener_name,city_name
doctor_id,city_name,hospital_id,hospital_name
FROM
online.ml_doctor_relation_dimen_day
WHERE partition_date =REGEXP_REPLACE(date_sub(current_date(),1),'-','')
GROUP BY doctor_id,merchant_name,business_group_name,business_partener_name,city_name
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
AND hospital_is_online='true'
GROUP BY doctor_id,city_name,hospital_id,hospital_name
)b
ON a.doctor_id=b.doctor_id
GROUP BY a.partition_date
,merchant_name
,b.business_group_name
,b.business_partener_name
,hospital_name
,hospital_id
,city_name
)t1
FULL JOIN ---医生医院、美购页曝光
(
SELECT
a.partition_date
,hospital_name
,hospital_id
,city_name
,SUM(CASE WHEN type='service' THEN exp_pv END) as service_exp_pv
,SUM(CASE WHEN type<>'service' THEN exp_pv END) as doc_hos_exp_pv
,SUM(exp_pv) as hexin_exp_pv
FROM
(
SELECT partition_date,doctor_id,SUM(pv_num) as exp_pv,'service' as type
FROM online.ml_doctor_event_indic_day
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND merchant_id IS NOT NULL
AND merchant_id <> ''
AND business_type ='service'
AND data_type = 'native_exposure'
AND cl_id <> ''
AND cl_id <> '0'
GROUP BY partition_date,doctor_id,'service'
UNION ALL
SELECT partition_date,card_id as doctor_id,count(1) as exp_pv,'doc_hos'as type
FROM online.ml_community_precise_exposure_detail
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND card_content_type = 'doctor'
AND card_id <>''
AND cl_id <> ''
AND cl_id <> '0'
GROUP BY partition_date,card_id,'doc_hos'
UNION ALL
SELECT partition_date,doctor_id,SUM(exp_pv) as exp_pv,'doc_hos'as type
FROM
(
SELECT partition_date,card_id as hospital_id,count(1) as exp_pv
FROM online.ml_community_precise_exposure_detail
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date <=regexp_replace(date_sub(current_date(),1),'-','')
AND card_content_type = 'hospital'
AND card_id <>''
AND cl_id <> ''
AND cl_id <> '0'
GROUP BY partition_date,card_id
)a
LEFT JOIN
(
SELECT id AS doctor_id,hospital_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND doctor_type = '1'
)b
ON a.hospital_id=b.hospital_id
GROUP BY partition_date,doctor_id,'doc_hos'
)a
LEFT JOIN
( --商务归属1009更新
--!! 注意:这里的日期变量设置为当前日期的前一天,定时任务为每月第一天,故查询的为上月最后一天的归属
SELECT
doctor_id,city_name,hospital_name,hospital_id
FROM
online.ml_doctor_relation_dimen_day
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
AND hospital_is_online='true'
GROUP BY doctor_id,city_name,hospital_name,hospital_id
)b
ON a.doctor_id=b.doctor_id
GROUP BY a.partition_date
,hospital_name
,hospital_id
,city_name
)t2
ON t1.partition_date=t2.partition_date AND t1.hospital_name=t2.hospital_name
AND t1.hospital_id=t2.hospital_id AND t1.city_name=t2.city_name
FULL JOIN
(
SELECT DATA_DAY
,merchant_name
,b.business_group_name
,b.business_partener_name
SELECT DATA_DAY as partition_date
,hospital_name
,hospital_id
,city_name
,SUM(clue_num) as clue_num
FROM
......@@ -112,7 +196,8 @@ FULL JOIN
,1 AS consult_num
FROM ml.ML_C_ET_MSG_CONVERSATION_DIMEN_INC_D
WHERE
partition_day =REGEXP_REPLACE(date_sub(current_date(),1),'-','')
partition_day >=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_day <= regexp_replace(date_sub(current_date(),1),'-','')
AND is_valid='true'
AND merchant_id IS NOT NULL
GROUP BY user_id,merchant_id,substr(partition_day,1,6)
......@@ -127,8 +212,9 @@ FULL JOIN
1 AS PAY_NUM
FROM ONLINE.ML_TRADE_ORDER_DETAIL_DAY T
WHERE
T.PARTITION_DATE = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND regexp_replace(substr(T.PAY_DATE,1,10),'-','') =REGEXP_REPLACE(date_sub(current_date(),1),'-','')
T.PARTITION_DATE = regexp_replace(date_sub(current_date(),1),'-','')
AND regexp_replace(substr(T.PAY_DATE,1,10),'-','') >=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND regexp_replace(substr(T.PAY_DATE,1,10),'-','') <= regexp_replace(date_sub(current_date(),1),'-','')
AND T.is_pure_user='true'
GROUP BY T.USER_ID,T.merchant_id,regexp_replace(SUBSTR(T.PAY_DATE,1,7),'-','')
) T4
......@@ -146,7 +232,8 @@ FULL JOIN
SELECT sub_id,REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '') as partition_date
FROM tl.tl_gm_sl_ali_virtual_phone_call_detail --通话记录表,call_type呼叫类型
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')>=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(call_time,0,10), '-', '')<=regexp_replace(date_sub(current_date(),1),'-','')
--AND start_time+ interval 30 second <= release_time --通话时长大于30秒
AND release_time>start_time
)a
......@@ -154,28 +241,28 @@ FULL JOIN
(
SELECT id,sub_id
FROM tl.tl_gm_sl_ali_virtual_phone_binding --阿里记录表
WHERE partition_day=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)b
ON a.sub_id = b.sub_id
LEFT JOIN
(
SELECT id,platform_binding_id
FROM tl.tl_gm_sl_virtual_phone_binding
WHERE partition_day=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)c
ON b.id = c.platform_binding_id
LEFT JOIN
(
SELECT phone_binding_id,lead_task_id
FROM tl.tl_gm_sl_lead_task_phone_binding
WHERE partition_day=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
)d
ON c.id = d.phone_binding_id
JOIN
(
SELECT id,user_id,merchant_id
FROM tl.tl_gm_sl_lead_task --线索任务表(用户点击授权后记入该表)
WHERE partition_day=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
AND source='2' --用户行为电话授权
AND (USER_ID is NOT NULL or USER_ID <>'')
)e
......@@ -191,29 +278,167 @@ FULL JOIN
( --商务归属1009更新
--!! 注意:这里的日期变量设置为当前日期的前一天,定时任务为每月第一天,故查询的为上月最后一天的归属
SELECT
merchant_id,merchant_name,business_group_name,business_partener_name,city_name
merchant_id,city_name,hospital_name,hospital_id
FROM
online.ml_doctor_relation_dimen_day
WHERE partition_date =REGEXP_REPLACE(date_sub(current_date(),1),'-','')
GROUP BY merchant_id,merchant_name,business_group_name,business_partener_name,city_name
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
AND doctor_is_merchant='true'
AND hospital_is_online='true'
GROUP BY merchant_id,city_name,hospital_name,hospital_id
)b
ON t3.merchant_id=b.merchant_id
GROUP BY DATA_DAY
,merchant_name
,b.business_group_name
,b.business_partener_name
,hospital_name
,hospital_id
,city_name
)t2
ON t1.partition_date=t2.DATA_DAY AND t1.merchant_name=t2.merchant_name
AND t1.business_group_name=t2.business_group_name AND t1.business_partener_name=t2.business_partener_name
AND t1.city_name=t2.city_name
)t3
on nvl(t1.partition_date,t2.partition_date)=t3.partition_date
and nvl(t1.hospital_name,t2.hospital_name)=t3.hospital_name
AND nvl(t1.hospital_id,t2.hospital_id)=t3.hospital_id
AND nvl(t1.city_name,t2.city_name)=t3.city_name
FULL JOIN
( --验证人次
SELECT validate_date as partition_date,hospital_name,hospital_id,city_name,SUM(ord_num) as ord_num
FROM
(
SELECT min(REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', '')) as validate_date,merchant_id,user_id,REGEXP_REPLACE(SUBSTR(validate_time, 1, 7), '-', '') as month,'1' as ord_num
FROM online.ml_trade_order_detail_day
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', '') >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', '') <= regexp_replace(date_sub(current_date(),1),'-','')
AND is_pure_user = 'true' --取纯用户
GROUP BY merchant_id,user_id,REGEXP_REPLACE(SUBSTR(validate_time, 1, 7), '-', '')
)ord
LEFT JOIN
(
SELECT merchant_id,city_name,hospital_name,hospital_id
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date=regexp_replace(date_sub(current_date(),1),'-','')
AND hospital_is_online='true'
AND doctor_is_merchant='true'
GROUP BY merchant_id,city_name,hospital_name,hospital_id
)doc
on ord.merchant_id = doc.merchant_id
GROUP BY validate_date,hospital_name,hospital_id,city_name
)t4
on nvl(nvl(t1.partition_date,t2.partition_date),t3.partition_date)=t4.partition_date
and nvl(nvl(t1.hospital_name,t2.hospital_name),t3.hospital_name) =t4.hospital_name
AND nvl(nvl(t1.hospital_id,t2.hospital_id),t3.hospital_id)=t4.hospital_id
AND nvl(nvl(t1.city_name,t2.city_name),t3.city_name)=t4.city_name
FULL JOIN ---医生医院、美购页线索点击
(
SELECT nvl(nvl(t1.month,t2.month),t3.month) as DATA_MONTH
,nvl(nvl(t1.merchant_name,t2.merchant_name),t3.merchant_name) as merchant_name
,nvl(nvl(t1.business_group_name,t2.business_group_name),t3.business_group_name) as business_group_name
,nvl(nvl(t1.business_partener_name,t2.business_partener_name),t3.business_partener_name) as business_partener_name
SELECT
a.partition_date
,hospital_name
,hospital_id
,city_name
,count(cl_id) as click_pv
FROM
(
SELECT SUBSTR(partition_date,1,6) as month,cl_id,merchant_id,min(partition_date) as partition_date
FROM
(
SELECT partition_date,cl_id,doctor_id,SUM(click_pv) as click_pv --美购页线索点击
FROM
(
SELECT partition_date,cl_id,nvl(params['service_id'],params['business_id']) as service_id,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
AND ((action = 'welfare_multiattribute_click_buy' AND page_name = 'welfare_detail')
or action = 'welfare_detail_click_message'
or (action ='on_click_button' AND page_name ='welfare_detail' AND params['popup_name'] in ('consult_reserved','consult_call') AND params['button_name'] in ('message','call','reserved')))
AND cl_id <> ''
AND cl_id <> '0'
AND nvl(params['service_id'],params['business_id']) <>''
GROUP BY partition_date,cl_id,nvl(params['service_id'],params['business_id'])
)a
LEFT JOIN
(
SELECT id,doctor_id
FROM online.tl_meigou_service_view --美购表
where partition_date = regexp_replace(date_sub(current_date(),1),'-','')
)b
ON a.service_id=b.id
GROUP BY partition_date,cl_id,doctor_id
UNION ALL
SELECT partition_date,cl_id,params['business_id'] as doctor_id,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
AND ((action ='on_click_button' AND page_name='expert_detail' AND params['button_name'] in ('discount_pay','phone_consult','online_consult','message','call','reserved'))
or action='online_consult')
AND params['business_id'] <>''
AND cl_id <> ''
AND cl_id <> '0'
GROUP BY partition_date,cl_id,params['business_id']
UNION ALL
SELECT partition_date,cl_id,doctor_id,SUM(click_pv) as click_pv
FROM
(
SELECT partition_date,cl_id,params['business_id'] as hospital_id,count(1) as click_pv --医院页线索点击
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_date<=regexp_replace(date_sub(current_date(),1),'-','')
AND action ='on_click_button' AND page_name='organization_detail' AND params['button_name'] in ('phone_consult','online_consult','message','call','reserved')
AND params['business_id'] <>''
AND cl_id <> ''
AND cl_id <> '0'
GROUP BY partition_date,cl_id,params['business_id']
)a
LEFT JOIN
(
SELECT id AS doctor_id,hospital_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND doctor_type = '1'
)b
ON a.hospital_id=b.hospital_id
GROUP BY partition_date,cl_id,doctor_id
)a
LEFT JOIN
(
SELECT doctor_id,merchant_id
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
AND merchant_id <>''
GROUP BY doctor_id,merchant_id
)b
ON a.doctor_id=b.doctor_id
GROUP BY SUBSTR(partition_date,1,6),cl_id,merchant_id
)a
LEFT JOIN
( --商务归属1009更新
--!! 注意:这里的日期变量设置为当前日期的前一天,定时任务为每月第一天,故查询的为上月最后一天的归属
SELECT
merchant_id,city_name,hospital_name,hospital_id
FROM
online.ml_doctor_relation_dimen_day
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
AND doctor_is_merchant='true'
AND hospital_is_online='true'
GROUP BY merchant_id,city_name,hospital_name,hospital_id
)b
ON a.merchant_id=b.merchant_id
GROUP BY a.partition_date
,hospital_name
,hospital_id
,city_name
)t5
on nvl(nvl(nvl(t1.partition_date,t2.partition_date),t3.partition_date),t4.partition_date)=t5.partition_date
and nvl(nvl(nvl(t1.hospital_name,t2.hospital_name),t3.hospital_name),t4.hospital_name)=t5.hospital_name
AND nvl(nvl(nvl(t1.hospital_id,t2.hospital_id),t3.hospital_id),t4.hospital_id)=t5.hospital_id
AND nvl(nvl(nvl(t1.city_name,t2.city_name),t3.city_name),t4.city_name)=t5.city_name
FULL JOIN
(
SELECT nvl(nvl(t1.partition_date,t2.partition_date),t3.partition_date) as partition_date
,nvl(nvl(t1.hospital_name,t2.hospital_name),t3.hospital_name) as hospital_name
,nvl(nvl(t1.hospital_id,t2.hospital_id),t3.hospital_id) as hospital_id
,nvl(nvl(t1.city_name,t2.city_name),t3.city_name) as city_name
,nvl(discount,0) as discount
,nvl(recharge_amount,0) as recharge_amount
......@@ -227,75 +452,81 @@ FULL JOIN
FROM
(
--佣金(订单验证抽成+一般补单(常规补单+罚款)+买单+剔除转诊关联的订单
SELECT month,merchant_name,business_group_name,business_partener_name,city_name,SUM(money1) as discount
SELECT partition_date,city_name,hospital_id,hospital_name,SUM(money1) as discount
FROM
(
SELECT SUBSTR(REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', ''),1,8) as month,doctor_id,sum(discount) as money1
SELECT SUBSTR(REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', ''),1,8) as partition_date,doctor_id,sum(discount) as money1
FROM online.ml_trade_order_detail_day --美购交易表
WHERE partition_date = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', '') = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', '') >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', '') <= regexp_replace(date_sub(current_date(),1),'-','')
AND status <> '3' --去除已退款
AND is_referral_order ='false' --去除转诊关联的订单
GROUP BY REGEXP_REPLACE(SUBSTR(validate_time, 1, 10), '-', ''),doctor_id
UNION ALL
SELECT SUBSTR(REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', ''),1,8) as month,doctor_id,sum(amount) as money1
SELECT SUBSTR(REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', ''),1,8) as partition_date,doctor_id,sum(amount) as money1
FROM online.tl_hdfs_budanluru_view --补单录入表
WHERE partition_date = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', '') = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_date = '${partition_date}'
AND REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', '') >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', '') <= regexp_replace(date_sub(current_date(),1),'-','')
AND status = 0 --已录入
AND type in ('0','2') --常规补单和罚款
GROUP BY REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', ''),doctor_id
UNION ALL
SELECT SUBSTR(REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', ''),1,8) as month,doctor_id,sum(discount_cent)/100 as money1
SELECT SUBSTR(REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', ''),1,8) as partition_date,doctor_id,sum(discount_cent)/100 as money1
FROM online.tl_maidan_order_view --买单表
WHERE partition_date = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', '') = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_date = '${partition_date}'
AND REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', '') >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', '') <= regexp_replace(date_sub(current_date(),1),'-','')
AND status ='1'
GROUP BY REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', ''),doctor_id
)ord
LEFT JOIN
(
SELECT doctor_id,city_name,business_group_name,business_partener_name,merchant_name
SELECT doctor_id,city_name,hospital_id,hospital_name
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
GROUP BY doctor_id,city_name,business_group_name,business_partener_name,merchant_name
WHERE partition_date=regexp_replace(date_sub(current_date(),1),'-','')
AND hospital_is_online='true'
GROUP BY doctor_id,city_name,hospital_id,hospital_name
)doc
on ord.doctor_id = doc.doctor_id
GROUP BY month,merchant_name,business_group_name,business_partener_name,city_name
GROUP BY partition_date,city_name,hospital_id,hospital_name
)t1
FULL JOIN
( --广告充值
SELECT month,merchant_name,business_group_name,business_partener_name,city_name,SUM(recharge_amount) as recharge_amount
SELECT partition_date,hospital_name,hospital_id,city_name,SUM(recharge_amount) as recharge_amount
FROM
(
SELECT SUBSTR(partition_day,1,8) as month,merchant_id,sum(proportion_recharge_amount) as recharge_amount --当天充值金额
SELECT SUBSTR(partition_day,1,8) as partition_date,merchant_id,sum(proportion_recharge_amount) as recharge_amount --当天充值金额
FROM ml.ml_c_ct_mc_merchant_indic_d
WHERE partition_day = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_day >=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_day <= regexp_replace(date_sub(current_date(),1),'-','')
GROUP BY SUBSTR(partition_day,1,8),merchant_id
)gg
LEFT JOIN
(
SELECT merchant_id,city_name,business_group_name,business_partener_name,merchant_name
SELECT merchant_id,city_name,hospital_name,hospital_id
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_date=regexp_replace(date_sub(current_date(),1),'-','')
AND doctor_is_merchant = 'true'
GROUP BY merchant_id,city_name,business_group_name,business_partener_name,merchant_name
AND hospital_is_online='true'
GROUP BY merchant_id,city_name,hospital_name,hospital_id
)doc
on gg.merchant_id = doc.merchant_id
GROUP BY month,merchant_name,business_group_name,business_partener_name,city_name
GROUP BY partition_date,hospital_id,hospital_name,city_name
)t2
on t1.month=t2.month AND t1.merchant_name=t2.merchant_name AND t1.business_group_name=t2.business_group_name AND t1.business_partener_name=t2.business_partener_name AND t1.city_name=t2.city_name
on t1.partition_date=t2.partition_date AND t1.hospital_id=t2.hospital_id AND t1.hospital_name=t2.hospital_name AND t1.city_name=t2.city_name
FULL JOIN
(--广告充值消耗(报表口径)
SELECT month,merchant_name,business_group_name,business_partener_name,city_name,SUM(all_amount) as all_amount,SUM(chongzhi_amount) as chongzhi_amount
SELECT partition_date,hospital_id,hospital_name,city_name,SUM(all_amount) as all_amount,SUM(chongzhi_amount) as chongzhi_amount
,SUM(cpc_amount) as cpc_amount,SUM(cpc_chongzhi_amount) as cpc_chongzhi_amount,SUM(cpc_click_num) as cpc_click_num,SUM(liulan_amount) as liulan_amount,SUM(sixin_amount) as sixin_amount
FROM
(
SELECT SUBSTR(partition_day,1,8) as month,merchant_id
SELECT SUBSTR(partition_day,1,8) as partition_date,merchant_id
,SUM(CASE WHEN advertise_type = 'cpc' AND advertise_calculate_type = 'cpc_flownext' THEN
proportion_expend_recharge_amount
WHEN advertise_type = 'cpt' AND advertise_calculate_type = 'cpt_schedule' THEN
......@@ -324,125 +555,106 @@ FULL JOIN
,SUM(CASE WHEN advertise_type='browse' THEN proportion_expend_recharge_amount END) as liulan_amount --浏览收入(不含返点)
,SUM(CASE WHEN advertise_type='message' THEN proportion_expend_recharge_amount END) as sixin_amount --私信收入(不含返点)
from ml.ml_c_ct_mc_merchantadclassify_indic_d
where partition_day=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
where partition_day>=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND partition_day <=regexp_replace(date_sub(current_date(),1),'-','')
group by SUBSTR(partition_day,1,8),merchant_id
UNION ALL
SELECT SUBSTR(t1.dt,1,8) as partition_date,merchant_id,sum(rechange) as chongzhi_amount,''all_amount,''cpc_amount,''cpc_chongzhi_amount,''cpc_click_num,''liulan_amount,''sixin_amount
FROM
(
SELECT
b.doctor_id,dt
,sum(rechange) as rechange --转诊充值消耗
FROM
(
SELECT account_id,abs(rechange) as rechange,REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', '') as dt
FROM tl.tl_ap_account_accountflow--流水表
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
and REGEXP_REPLACE(SUBSTR(created_time, 1, 10), '-', '')>=REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
and REGEXP_REPLACE(SUBSTR(created_time,1,10),'-','')<=regexp_replace(date_sub(current_date(),1),'-','')
and type='16'--转诊消耗
)a
LEFT JOIN
(
SELECT id,doctor_id
FROM tl.tl_ap_account_account
WHERE partition_day=regexp_replace(date_sub(current_date(),1),'-','')
GROUP BY id,doctor_id
)b
on a.account_id=b.id
GROUP BY b.doctor_id,dt
)t1
LEFT JOIN
(
SELECT doctor_id,merchant_id
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date = regexp_replace(date_sub(current_date(),1),'-','')
and doctor_is_merchant='true'
GROUP BY doctor_id,merchant_id
)t2
on t1.doctor_id=t2.doctor_id
GROUP BY SUBSTR(t1.dt,1,8),merchant_id
)gg
LEFT JOIN
(
SELECT merchant_id,city_name,business_group_name,business_partener_name,merchant_name
SELECT merchant_id,city_name,hospital_name,hospital_id
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
WHERE partition_date=regexp_replace(date_sub(current_date(),1),'-','')
AND doctor_is_merchant = 'true'
GROUP BY merchant_id,city_name,business_group_name,business_partener_name,merchant_name
AND hospital_is_online='true'
GROUP BY merchant_id,city_name,hospital_name,hospital_id
)doc
on gg.merchant_id = doc.merchant_id
GROUP BY month,merchant_name,business_group_name,business_partener_name,city_name
GROUP BY partition_date,hospital_id,hospital_name,city_name
)t3
on nvl(t1.month,t2.month)=t3.month
and nvl(t1.merchant_name,t2.merchant_name)=t3.merchant_name
AND nvl(t1.business_group_name,t2.business_group_name)=t3.business_group_name
AND nvl(t1.business_partener_name,t2.business_partener_name)=t3.business_partener_name
on nvl(t1.partition_date,t2.partition_date)=t3.partition_date
and nvl(t1.hospital_id,t2.hospital_id)=t3.hospital_id
AND nvl(t1.hospital_name,t2.hospital_name)=t3.hospital_name
AND nvl(t1.city_name,t2.city_name)=t3.city_name
)t3
on nvl(t1.partition_date,t2.DATA_DAY)=t3.DATA_MONTH
and nvl(t1.merchant_name,t2.merchant_name)=t3.merchant_name
AND nvl(t1.business_group_name,t2.business_group_name)=t3.business_group_name
AND nvl(t1.business_partener_name,t2.business_partener_name)=t3.business_partener_name
AND nvl(t1.city_name,t2.city_name)=t3.city_name
)t6
on nvl(nvl(nvl(nvl(t1.partition_date,t2.partition_date),t3.partition_date),t4.partition_date),t5.partition_date)=t6.partition_date
and nvl(nvl(nvl(nvl(t1.hospital_name,t2.hospital_name),t3.hospital_name),t4.hospital_name),t5.hospital_name)=t6.hospital_name
AND nvl(nvl(nvl(nvl(t1.hospital_id,t2.hospital_id),t3.hospital_id),t4.hospital_id),t5.hospital_id)=t6.hospital_id
AND nvl(nvl(nvl(nvl(t1.city_name,t2.city_name),t3.city_name),t4.city_name),t5.city_name)=t6.city_name
FULL JOIN
(
SELECT SUBSTR(pay_date, 1, 8) as PAY_MONTH,merchant_name,business_group_name,business_partener_name,city_name,SUM(service_price) as service_price
( --纯用户支付gmv
SELECT pay_date as partition_date,hospital_name,hospital_id,city_name,SUM(service_price) as service_price
FROM
(
SELECT REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') as pay_date,doctor_id,sum(service_price) as service_price
FROM online.ml_trade_order_detail_day --美购交易表(梦雨新做)
WHERE partition_date = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') = REGEXP_REPLACE(date_sub(current_date(),1),'-','')
FROM online.ml_trade_order_detail_day
WHERE partition_date =regexp_replace(date_sub(current_date(),1),'-','')
AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') >= REGEXP_REPLACE(trunc(date_sub(current_date(),8),'MM'),'-','')
AND REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', '') <= regexp_replace(date_sub(current_date(),1),'-','')
AND is_pure_user = 'true' --取纯用户
GROUP BY REGEXP_REPLACE(SUBSTR(pay_time, 1, 10), '-', ''),doctor_id
)ord
LEFT JOIN
(
SELECT doctor_id,city_name,business_group_name,business_partener_name,merchant_name
SELECT doctor_id,city_name,hospital_name,hospital_id
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
GROUP BY doctor_id,city_name,business_group_name,business_partener_name,merchant_name
WHERE partition_date=regexp_replace(date_sub(current_date(),1),'-','')
AND hospital_is_online='true'
GROUP BY doctor_id,city_name,hospital_name,hospital_id
)doc
on ord.doctor_id = doc.doctor_id
GROUP BY SUBSTR(pay_date, 1, 8),merchant_name,business_group_name,business_partener_name,city_name
)t4
on nvl(nvl(t1.partition_date,t2.DATA_DAY),t3.DATA_MONTH)=t4.PAY_MONTH
and nvl(nvl(t1.merchant_name,t2.merchant_name),t3.merchant_name) =t4.merchant_name
AND nvl(nvl(t1.business_group_name,t2.business_group_name),t3.business_group_name)=t4.business_group_name
AND nvl(nvl(t1.business_partener_name,t2.business_partener_name),t3.business_partener_name)=t4.business_partener_name
AND nvl(nvl(t1.city_name,t2.city_name),t3.city_name)=t4.city_name
GROUP BY pay_date,hospital_name,hospital_id,city_name
)t7
on nvl(nvl(nvl(nvl(nvl(t1.partition_date,t2.partition_date),t3.partition_date),t4.partition_date),t5.partition_date),t6.partition_date)=t7.partition_date
and nvl(nvl(nvl(nvl(nvl(t1.hospital_name,t2.hospital_name),t3.hospital_name),t4.hospital_name),t5.hospital_name),t6.hospital_name)=t7.hospital_name
AND nvl(nvl(nvl(nvl(nvl(t1.hospital_id,t2.hospital_id),t3.hospital_id),t4.hospital_id),t5.hospital_id),t6.hospital_id)=t7.hospital_id
AND nvl(nvl(nvl(nvl(nvl(t1.city_name,t2.city_name),t3.city_name),t4.city_name),t5.city_name),t6.city_name)=t7.city_name
order by data desc
FULL JOIN
(
SELECT SUBSTR(day_id, 1, 8) as day_month,merchant_name,business_group_name,business_partener_name,city_name,SUM(budget) as budget
FROM
(
SELECT T1.day_id,T1.merchant_doctor_id,case when merchant_budget>=tot_service_budget then tot_service_budget else merchant_budget end as budget
FROM
(
SELECT
clicklog.create_time AS day_id
,clicklog.merchant_doctor_id
,max(merchant_budget) as merchant_budget --商户预算
FROM
(
SELECT id,promote_id,price,service_budget,merchant_budget,merchant_doctor_id,regexp_replace(substr(create_time,1,10),'-','') as create_time,recharge
FROM online.tl_hdfs_cpc_clicklog_view
WHERE partition_date=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND regexp_replace(substr(create_time,1,10),'-','')= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
)clicklog
group by clicklog.create_time,clicklog.merchant_doctor_id
)T1
LEFT JOIN
(
SELECT
day_id
,merchant_doctor_id
,sum(service_budget) as tot_service_budget
FROM
(
SELECT
clicklog.create_time AS day_id
,clicklog.merchant_doctor_id,clicklog.service_id
,max(service_budget) as service_budget
FROM
(
SELECT id,promote_id,price,service_budget,merchant_budget,merchant_doctor_id,service_id,regexp_replace(substr(create_time,1,10),'-','') as create_time
FROM online.tl_hdfs_cpc_clicklog_view
WHERE partition_date=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND regexp_replace(substr(create_time,1,10),'-','')= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
)clicklog
GROUP BY clicklog.create_time,clicklog.merchant_doctor_id,clicklog.service_id
)service_budget
GROUP BY day_id,merchant_doctor_id
)T2
ON T1.day_id=T2.day_id
AND T1.merchant_doctor_id=T2.merchant_doctor_id
)ord
LEFT JOIN
(
SELECT merchant_doctor_id,merchant_name,business_group_name,business_partener_name,city_name
FROM online.ml_doctor_relation_dimen_day
WHERE partition_date=REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND doctor_is_merchant='true'
group by merchant_doctor_id,merchant_name,business_group_name,business_partener_name,city_name
)doc
on ord.merchant_doctor_id = doc.merchant_doctor_id
GROUP BY SUBSTR(day_id, 1, 8),merchant_name,business_group_name,business_partener_name,city_name
)t5
on nvl(nvl(nvl(t1.partition_date,t2.DATA_DAY),t3.DATA_MONTH),t4.PAY_MONTH)=t5.day_month
and nvl(nvl(nvl(t1.merchant_name,t2.merchant_name),t3.merchant_name),t4.merchant_name)=t5.merchant_name
AND nvl(nvl(nvl(t1.business_group_name,t2.business_group_name),t3.business_group_name),t4.business_group_name)=t5.business_group_name
AND nvl(nvl(nvl(t1.business_partener_name,t2.business_partener_name),t3.business_partener_name),t4.business_partener_name)=t5.business_partener_name
AND nvl(nvl(nvl(t1.city_name,t2.city_name),t3.city_name),t4.city_name)=t5.city_name
order by data desc
......
#step1_10.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_budanluru_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task
\ No newline at end of file
#step1_11.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_maidan_order_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_12.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_mc_merchant_indic_d
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_meigou_service_view
\ No newline at end of file
#step1_13.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_mc_merchantadclassify_indic_d
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_budanluru_view
\ No newline at end of file
#step1_14.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_cpc_clicklog_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_maidan_order_view
\ No newline at end of file
#step1_14.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_mc_merchant_indic_d
\ No newline at end of file
#step1_14.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_mc_merchantadclassify_indic_d
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_C_ET_MSG_CONVERSATION_DIMEN_INC_D
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_precise_exposure_detail
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ML_TRADE_ORDER_DETAIL_DAY
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_doctor_view
\ No newline at end of file
#step1_5.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_call_detail
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_C_ET_MSG_CONVERSATION_DIMEN_INC_D
\ No newline at end of file
#step1_6.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_binding
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ML_TRADE_ORDER_DETAIL_DAY
\ No newline at end of file
#step1_7.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_virtual_phone_binding
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_call_detail
\ No newline at end of file
#step1_8.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task_phone_binding
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_ali_virtual_phone_binding
\ No newline at end of file
#step1_9.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive tl tl_gm_sl_lead_task_phone_binding
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13,step1_14,step1_15,step1_16
command=sh /home/bi/bi-report/lib/shell/hive yeji_day
\ No newline at end of file
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/yeji_day/jiaweijie@igengmei.com/zhanghaoyu@igengmei.com,cp-sunyinghe@igengmei.com,yindanlei@igengmei.com
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/yeji_day/jiaweijie@igengmei.com/cp-sunyinghe@igengmei.com,yindanlei@igengmei.com
\ No newline at end of file
......@@ -2,23 +2,27 @@
SELECT
data AS `日期`
,merchant_name AS `商户名称`
,business_group_name AS `商务组`
,business_partener_name AS `商务名称`
,hospital_name AS `机构名称`
,hospital_id AS `机构id`
,city_name AS `城市`
,hexin_exp_pv AS `核心卡片曝光pv`
,service_exp_pv AS `商品卡片曝光pv`
,doc_hos_exp_pv AS `医生医院卡片曝光pv`
,hexin_pv AS `核心页pv`
,service_pv AS `美购页pv`
,service_page_pv AS `商详页pv`
,doc_hos_page_pv AS `医生医院页pv`
,click_pv AS `线索点击人次`
,clue_num AS `有效线索人次`
,discount AS `总验证抽成`
,ord_num AS `验证人次`
,service_price AS `纯用户支付gmv`
,discount AS `佣金`
,recharge_amount AS `广告收款`
,chongzhi_amount AS `广告消耗`
,all_amount AS `广告总消耗(含返点)`
,cpc_amount AS `cpc总消耗`
,cpc_chongzhi_amount AS `cpc充值消耗`
,budget AS `cpc商品有效预算`
,cpc_click_num AS `cpc点击数`
,liulan_amount AS `浏览收入`
,sixin_amount AS `私信收入`
,service_price AS `纯用户支付金额`
FROM pm.tl_pm_yeji_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment