Commit 2d0eff76 authored by 魏艺敏's avatar 魏艺敏

push codes

parent 34b7d118
......@@ -3,3 +3,4 @@ home_content_detail=首页内容数据-分日明细
home_content_by_month=首页内容数据-月均
ai_content_detail=ai内容数据-分日明细
ai_content_by_month=ai内容数据-月均
home_content_retention=分类用户次留
\ No newline at end of file
--***************************************************************
--*脚本名称:
--*功能: 内容日报-简化版-for 思璟
--*业务名称: pm
--*输入数据:
--*作者: weiyimin@igengmei.com
--*更新时间:
--***************************************************************
--设置全局变量&UDF
SET mapreduce.job.queuename=data;
--使用bl数据库
USE pm;
--创建BL层内部表
CREATE TABLE IF NOT EXISTS pm.tl_pm_content_retention
(
day_id string comment '{"chs_name":"当天日期","description":"","etl":"","value":"","remark":""}',
device_os_type string comment '{"chs_name":"设备类型","description":"","etl":"","value":"","remark":""}',
active_type string comment '{"chs_name":"活跃类型","description":"","etl":"","value":"","remark":""}',
channel string comment '{"chs_name":"渠道","description":"","etl":"","value":"","remark":""}',
dau BIGINT comment '{"chs_name":"日活","description":"","etl":"","value":"","remark":""}',
retention_rate string comment '{"chs_name":"次留率","description":"","etl":"","value":"","remark":""}',
home_good_click_uv BIGINT comment '{"chs_name":"首页good click设备数","description":"","etl":"","value":"","remark":""}',
home_good_click_quality string comment '{"chs_name":"首页gc用户次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_ungood_click_uv BIGINT comment '{"chs_name":"点击首页卡片但非gc设备数","description":"","etl":"","value":"","remark":""}',
home_ungood_click_quality string comment '{"chs_name":"点击首页卡片但非gc设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
no_click_uv BIGINT comment '{"chs_name":"未点击首页feed卡片设备数","description":"","etl":"","value":"","remark":""}',
no_click_uv_quality string comment '{"chs_name":"未点击首页feed卡片设备次留率/全站次留率","description":"","etl":"","value":"","remark":""}',
home_good_click_retention_quality string comment '{"chs_name":"当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率","description":"","etl":"","value":"","remark":""}'
)comment '内容日报-分用户次留'
PARTITIONED BY (PARTITION_DAY STRING comment '分区日期')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
\ No newline at end of file
This diff is collapsed.
......@@ -8,9 +8,9 @@ SELECT
,ai_report_card_click_uv AS `ai结果页内容卡片点击uv`
,ai_report_card_click_pv AS `ai结果页内容卡片点击pv`
,ai_report_good_click AS `来源于ai结果页的good click`
,CONCAT(ROUND(ai_report_card_click_uv/ai_report_uv*100,2),'%') AS `ai结果页内容卡片点击uv/DAU`
,CONCAT(ROUND(ai_report_card_click_pv/ai_report_uv*100,2),'%') AS `ai结果页内容卡片点击pv/DAU`
,CONCAT(ROUND(ai_report_good_click/ai_report_uv*100,2),'%') AS `来源于ai结果页的good click/DAU`
,CONCAT(ROUND(ai_report_card_click_uv/ai_report_uv*100,2),'%') AS `ai结果页内容卡片点击uv/ai结果页uv`
,CONCAT(ROUND(ai_report_card_click_pv/ai_report_uv*100,2),'%') AS `ai结果页内容卡片点击pv/ai结果页uv`
,CONCAT(ROUND(ai_report_good_click/ai_report_uv*100,2),'%') AS `来源于ai结果页的good click/ai结果页uv`
FROM
(
select substr(day_id,1,6) as month
......
......@@ -8,9 +8,9 @@ SELECT
,ai_report_card_click_uv AS `ai结果页内容卡片点击uv`
,ai_report_card_click_pv AS `ai结果页内容卡片点击pv`
,ai_report_good_click AS `来源于ai结果页的good click`
,NVL(CONCAT(ROUND(ai_report_card_click_uv/ai_report_uv*100,2),'%'),0) AS `ai结果页内容卡片点击uv/DAU`
,NVL(CONCAT(ROUND(ai_report_card_click_pv/ai_report_uv*100,2),'%'),0) AS `ai结果页内容卡片点击pv/DAU`
,NVL(CONCAT(ROUND(ai_report_good_click/ai_report_uv*100,2),'%'),0) AS `来源于ai结果页的good click/DAU`
,NVL(CONCAT(ROUND(ai_report_card_click_uv/ai_report_uv*100,2),'%'),0) AS `ai结果页内容卡片点击uv/ai结果页uv`
,NVL(CONCAT(ROUND(ai_report_card_click_pv/ai_report_uv*100,2),'%'),0) AS `ai结果页内容卡片点击pv/ai结果页uv`
,NVL(CONCAT(ROUND(ai_report_good_click/ai_report_uv*100,2),'%'),0) AS `来源于ai结果页的good click/ai结果页uv`
FROM pm.tl_pm_content_v3
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
select substr(day_id,1,6) `日期`
,device_os_type `系统`
,active_type `活跃`
,channel `渠道`
,round(avg(home_good_click_uv),0) as `首页good click设备数`
,round(avg(if(home_good_click_quality=0,NULL,home_good_click_quality)),2) as `首页gc用户次留率/全站次留率`
,round(avg(home_ungood_click_uv),0) as `点击首页卡片但非gc设备数`
,round(avg(if(home_ungood_click_quality=0,NULL,home_ungood_click_quality)),2) as `点击首页卡片但非gc设备次留率/全站次留率`
,round(avg(no_click_uv),0) as `未点击首页feed卡片设备数`
,round(avg(if(no_click_uv_quality=0,NULL,no_click_uv_quality)),2) as `未点击首页feed卡片设备次留率/全站次留率`
,round(avg(if(home_good_click_retention_quality=0,NULL,home_good_click_retention_quality)),2) as `当天点击首页feed卡片,且次日依旧点击的次留率/全站次留率`
FROM pm.tl_pm_content_retention
where partition_day>='20201018' and partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by substr(day_id,1,6),device_os_type,active_type,channel
order by `日期`,`系统`,`活跃`,`渠道`
\ No newline at end of file
daily_content_retention=内容日报-次留率
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_DV_DEVICECLEAN_DIMEN_D
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive DIM DIM_AI_CHANNEL_ZP_NEW
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4
command=curl -X GET http://localhost:8553/api/report/email/daily_content_retention/weiyimin@igengmei.com/hanyingyue@igengmei.com
\ No newline at end of file
select substr(day_id,1,6) as month
,device_os_type
,active_type
,channel
,round(avg(dau),0) as dau
,round(avg(if(retention_rate=0,NULL,retention_rate)),2) as retention_rate
,round(avg(home_good_click_uv),0) as home_good_click_uv
,round(avg(if(home_good_click_quality=0,NULL,home_good_click_quality)),2) as home_good_click_quality
,round(avg(home_ungood_click_uv),0) as home_ungood_click_uv
,round(avg(if(home_ungood_click_quality=0,NULL,home_ungood_click_quality)),2) as home_ungood_click_quality
,round(avg(no_click_uv),0) as no_click_uv
,round(avg(if(no_click_uv_quality=0,NULL,no_click_uv_quality)),2) as no_click_uv_quality
,round(avg(if(home_good_click_retention_quality=0,NULL,home_good_click_retention_quality)),2) as home_good_click_retention_quality
from
(
SELECT
day_id
,device_os_type
,active_type
,channel
,dau
,COALESCE(ROUND(retention_num/dau*100,2),0) AS retention_rate
,home_good_click_uv
,COALESCE(ROUND(home_good_click_retention_num*dau/home_good_click_uv/retention_num,2),0) as home_good_click_quality
,home_ungood_click_uv
,COALESCE(ROUND(home_ungood_click_retention_num*dau/home_ungood_click_uv/retention_num,2),0) as home_ungood_click_quality
,no_click_uv
,COALESCE(ROUND(no_click_uv_retention_num*dau/no_click_uv/retention_num,2),0) as no_click_uv_quality
,COALESCE(ROUND(home_good_click_uv_2*dau/home_good_click_uv/retention_num,2),0) as home_good_click_retention_quality
FROM
(
SELECT regexp_replace(substr(t1.partition_date,1,10),'-','') as day_id
,device_os_type
,active_type
,channel
,count(distinct t1.device_id) as dau
,count(distinct t2.device_id) as retention_num
,count(distinct t3.cl_id) as home_good_click_uv
,count(distinct t5.cl_id) as home_good_click_uv_2
,count(distinct case when t3.cl_id is not null then t2.device_id end) as home_good_click_retention_num
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t4.cl_id end) as home_ungood_click_uv
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t2.device_id end) as home_ungood_click_retention_num
,count(distinct case when t4.cl_id is null and t3.cl_id is null then t1.device_id end) as no_click_uv
,count(distinct case when t4.cl_id is not null and t3.cl_id is null then t2.device_id end) as no_click_uv_retention_num
FROM
(
SELECT partition_date,a.device_os_type,b.active_type,device_id,v.channel
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,array(device_os_type,'合计') as device_os_type
,array(CASE WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type IN ('1','2') THEN '新增设备' END,'合计') AS active_type
,device_id
,array(CASE WHEN tmp.is_ai_channel='true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day >='20190701' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)mas
lateral view explode(mas.device_os_type ) a as device_os_type
lateral view explode(mas.active_type ) b as active_type
lateral view explode(mas.channel ) v as channel
)t1
LEFT JOIN
(--活跃设备
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t2
ON t1.device_id=t2.device_id and date_add(t1.partition_date,1)=t2.partition_date
left join
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect','welfare_detail')
and page_stay>=20
group by partition_date,cl_id
)t3
on t1.partition_date=t3.partition_date and t1.device_id=t3.cl_id
left join
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND page_name ='home'
and params['card_content_type'] in ('diary','user_post','answer','qa')
group by partition_date,cl_id
)t4
on t1.partition_date=t4.partition_date and t1.device_id=t4.cl_id
LEFT JOIN
(--活跃设备
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
,cl_id
,count(distinct time_str) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='20190701' AND partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and referrer='home'
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect','welfare_detail')
and page_stay>=20
group by partition_date,cl_id
)t5
ON t3.cl_id=t5.cl_id and date_add(t3.partition_date,1)=t5.partition_date
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
)t1
)t2
group by substr(day_id,1,6)
,device_os_type
,active_type
,channel
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment