Commit b0776c98 authored by 赵建伟's avatar 赵建伟

update codes

parent 9b677760
#!/bin/bash
# 定义变量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
hive=/opt/hive/hive-1.1.0-cdh5.16.1/bin/beeline
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
log_date=$1
partition_day=$1
else
log_date=`date --date="$V_PARYMD -1 day" +%Y%m%d`
partition_day=`date --date="$V_PARYMD -1 day" +%Y%m%d`
fi
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table "$APP".dwd_order_info partition(dt)
select * from "$APP".ods_order_info
where dt='$log_date' and id is not null;
insert overwrite table "$APP".dwd_order_detail partition(dt)
select * from "$APP".ods_order_detail
where dt='$log_date' and id is not null;
insert overwrite table "$APP".dwd_user_info partition(dt)
select * from "$APP".ods_user_info
where dt='$log_date' and id is not null;
insert overwrite table "$APP".dwd_payment_info partition(dt)
select * from "$APP".ods_payment_info
where dt='$log_date' and id is not null;
insert overwrite table "$APP".dwd_sku_info partition(dt)
select
sku.id,
sku.spu_id,
sku.price,
sku.sku_name,
sku.sku_desc,
sku.weight,
sku.tm_id,
sku.category3_id,
c2.id category2_id ,
c1.id category1_id,
c3.name category3_name,
c2.name category2_name,
c1.name category1_name,
sku.create_time,
sku.dt
from
"$APP".ods_sku_info sku
join "$APP".ods_base_category3 c3 on sku.category3_id=c3.id
join "$APP".ods_base_category2 c2 on c3.category2_id=c2.id
join "$APP".ods_base_category1 c1 on c2.category1_id=c1.id
where sku.dt='$log_date' and c2.dt='$log_date'
and c3.dt='$log_date' and c1.dt='$log_date'
and sku.id is not null;
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
INSERT OVERWRITE TABLE pm.tl_pm_recommend_strategy_d PARTITION (PARTITION_DAY =$partition_day)
SELECT
t1.partition_date as day_id,
t1.device_os_type as device_os_type,
t1.active_type as active_type,
t2.card_content_type as card_content_type,
t2.recommend_type as recommend_type,
NVL(concat(round(sum(t3.session_pv)/sum(t2.session_pv)*100,2),'%'),0) as home_ctr,
NVL(concat(round((NVL(sum(navbar_pv),0)+NVL(sum(highlight_pv),0)
+NVL(sum(recom_wel_pv),0)+NVL(sum(recom_content_pv),0))/sum(t2.session_pv)*100,2),'%'),0) as second_rate,
NVL(concat(round((NVL(sum(navbar_pv),0)+NVL(sum(highlight_pv),0)
+NVL(sum(recom_wel_pv),0)+NVL(sum(recom_content_pv),0))/sum(t3.session_pv)*100,2),'%'),0) as first_rate,
NVL(sum(t3.session_pv),0) as card_click,
NVL(sum(t2.session_pv),0) as card_exposure,
NVL(sum(navbar_pv),0)+NVL(sum(highlight_pv),0)+NVL(sum(self_wel_pv),0)+NVL(sum(recom_wel_pv),0)+NVL(sum(recom_content_pv),0) as total_second_click,
NVL(round(sum(page_stay)/count(distinct t4.cl_id)/60,2),0) as avg_page_stay,
NVL(sum(navbar_pv),0) as navbar_search,
NVL(sum(highlight_pv),0) as highlight_word,
NVL(sum(self_wel_pv),0) as self_welfare_card,
NVL(sum(recom_wel_pv),0)-NVL(sum(self_wel_pv),0) as recommend_welfare_card,--需要排除关联的商品卡片点击
NVL(sum(recom_content_pv),0) as recommend_content_card,
NULL as recommend_special_card,
NULL as transfer_card,
NULL as video_consultation
FROM
(
SELECT partition_date
,device_os_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(--精准曝光,卡片id和session_id去重
SELECT partition_date,
card_content_type,
cl_id,
recommend_type,
card_id,
count(distinct app_session_id) as session_pv
FROM
(
SELECT partition_date,
cl_id,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end as card_content_type,
CASE WHEN transaction_type in ('ctr') THEN 'ctr预估'
WHEN transaction_type in ('cvr') THEN 'cvr预估'
WHEN transaction_type in ('-1','smr') THEN 'smr'
when transaction_type in ('pgc','hotspot') then '热点卡片'
when transaction_type in ('newdata') then '保量卡片'
END AS recommend_type,
card_id,
app_session_id
from online.ml_community_precise_exposure_detail
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND transaction_type in ('-1','ctr','smr','cvr','hotspot','pgc','newdata')
AND card_content_type in ('qa','diary','user_post','answer')
group by partition_date,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end,
cl_id,
CASE WHEN transaction_type in ('ctr') THEN 'ctr预估'
WHEN transaction_type in ('cvr') THEN 'cvr预估'
WHEN transaction_type in ('-1','smr') THEN 'smr'
when transaction_type in ('pgc','hotspot') then '热点卡片'
when transaction_type in ('newdata') then '保量卡片' END,
card_id,
app_session_id
)a
group by partition_date,card_content_type,cl_id,recommend_type,card_id
)t2
on t1.device_id=t2.cl_id and t1.partition_date=t2.partition_date
LEFT JOIN
(--卡片,卡片id和session_id去重
SELECT partition_date,
card_content_type,
cl_id,
recommend_type,
card_id,
count(distinct app_session_id) as session_pv
FROM
(
SELECT partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end as card_content_type,
CASE WHEN params['transaction_type'] in ('ctr') THEN 'ctr预估'
WHEN params['transaction_type'] in ('cvr') THEN 'cvr预估'
WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
when params['transaction_type'] in ('newdata') then '保量卡片'
END AS recommend_type,
params['card_id'] as card_id,
app_session_id
from online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
AND params['transaction_type'] in ('-1','ctr','smr','cvr','hotspot','pgc','newdata')
AND params['card_content_type'] in ('qa','diary','user_post','answer')
GROUP BY partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end,
CASE WHEN params['transaction_type'] in ('ctr') THEN 'ctr预估'
WHEN params['transaction_type'] in ('cvr') THEN 'cvr预估'
WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
when params['transaction_type'] in ('newdata') then '保量卡片' END,
params['card_id'],
app_session_id
)a
group by partition_date,card_content_type,cl_id,recommend_type,card_id
)t3
on t2.partition_date=t3.partition_date
and t2.cl_id=t3.cl_id
and t2.card_id=t3.card_id
and t2.card_content_type=t3.card_content_type
and t2.recommend_type=t3.recommend_type
LEFT JOIN
(--页面浏览时长
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
sum(page_stay) as page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND referrer='home'
AND page_stay>=0 AND page_stay<1000
GROUP BY partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t4
on t4.partition_date=t3.partition_date
and t4.cl_id=t3.cl_id
and t4.business_id=t3.card_id
and t4.page_name=t3.card_content_type
LEFT JOIN
(--搜索框和点击行为
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as navbar_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action in ('on_click_navbar_search','do_search')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t5
on t5.partition_date=t3.partition_date
and t5.cl_id=t3.cl_id
and t5.business_id=t3.card_id
and t5.page_name=t3.card_content_type
LEFT JOIN
(--点击高亮词
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as highlight_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
and params['card_type']='highlight_word'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t6
on t6.partition_date=t3.partition_date
and t6.cl_id=t3.cl_id
and t6.business_id=t3.card_id
and t6.page_name=t3.card_content_type
LEFT JOIN
(--关联的美购卡片
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as self_wel_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND (get_json_object(params['extra_param'], '$.type')='交互栏'
or get_json_object(params['extra_param'], '$.jump_from')='msg_link')
AND action='on_click_card'
and params['card_content_type']='service'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t7
on t7.partition_date=t3.partition_date
and t7.cl_id=t3.cl_id
and t7.business_id=t3.card_id
and t7.page_name=t3.card_content_type
LEFT JOIN
(--推荐的美购卡片(需要排除作者消费的美购)
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_wel_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as service_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND (action='on_click_card'and params['card_content_type']='service'
or action='on_click_button' and params['button_name']='unfold')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t8
on t8.partition_date=t3.partition_date
and t8.cl_id=t3.cl_id
and t8.business_id=t3.card_id
and t8.page_name=t3.card_content_type
LEFT JOIN
(--推荐的内容卡片
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_content_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as service_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
and params['card_content_type'] in ('qa','diary','user_post','answer')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t9
on t9.partition_date=t3.partition_date
and t9.cl_id=t3.cl_id
and t9.business_id=t3.card_id
and t9.page_name=t3.card_content_type
LEFT JOIN
(
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
union all
select distinct device_id
from dim.dim_device_user_staff --去除内网用户
)spam_pv
on spam_pv.device_id=t2.cl_id
LEFT JOIN
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t2
on t1.user_id=t2.user_id
group by partition_date,device_id
)dev
on t2.partition_date=dev.partition_date and t2.cl_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is null
GROUP BY t1.partition_date,t1.device_os_type,t1.active_type,t2.card_content_type,t2.recommend_type
order by day_id,device_os_type,active_type,card_content_type,recommend_type;
"
$hive -e "$sql"
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment