Commit 86c943f2 authored by litaolemo's avatar litaolemo

update

parent ba9a569c
...@@ -83,7 +83,7 @@ for t in range(0, 3): ...@@ -83,7 +83,7 @@ for t in range(0, 3):
ctr_sql = """ ctr_sql = """
SELECT SELECT
t1.partition_date as day_id, t1.partition_date as day_id,
t1.device_os_type as device_os_type, t1.device_os_type as device_os_type,
t1.active_type as active_type, t1.active_type as active_type,
...@@ -105,8 +105,7 @@ for t in range(0, 3): ...@@ -105,8 +105,7 @@ for t in range(0, 3):
NVL(sum(post_pv),0) as total_post_pv, NVL(sum(post_pv),0) as total_post_pv,
NVL(sum(post_click_pv),0) as post_click_pv NVL(sum(post_click_pv),0) as post_click_pv
FROM FROM
( (
SELECT partition_date SELECT partition_date
,device_os_type ,device_os_type
...@@ -114,7 +113,7 @@ for t in range(0, 3): ...@@ -114,7 +113,7 @@ for t in range(0, 3):
WHEN active_type IN ('1','2') THEN '新增' END AS active_type WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id ,device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND active_type IN ('1','2','4') AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang' ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
...@@ -124,34 +123,8 @@ for t in range(0, 3): ...@@ -124,34 +123,8 @@ for t in range(0, 3):
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ' ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei') ,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%' AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1 JOIN )t1
JOIN
(--卡片,卡片id和session_id去重
SELECT partition_date,
cl_id,
count(distinct card_id) as session_pv0
FROM
(SELECT partition_date,
cl_id,
card_id,
app_session_id
from online.ml_community_precise_exposure_detail
WHERE partition_date={partition_day}
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','FIXEDSTRATEGY','FIXEDSTRATEGY_VIDEO')
or transaction_type like '%ctr' or transaction_type like '%cvr' or transaction_type like 'deeplink%')
AND card_content_type in ('qa','diary','user_post','answer','special_pool')
group by partition_date,
cl_id,
card_id,
app_session_id
)a
group by partition_date,cl_id
) t0 on t1.device_id = t0.cl_id
LEFT JOIN
(--精准曝光,卡片id和session_id去重 (--精准曝光,卡片id和session_id去重
SELECT partition_date, SELECT partition_date,
card_content_type, card_content_type,
...@@ -183,7 +156,7 @@ for t in range(0, 3): ...@@ -183,7 +156,7 @@ for t in range(0, 3):
card_id, card_id,
app_session_id app_session_id
from online.ml_community_precise_exposure_detail from online.ml_community_precise_exposure_detail
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光 AND is_exposure = '1' ----精准曝光
AND page_name ='home' AND page_name ='home'
...@@ -215,7 +188,7 @@ for t in range(0, 3): ...@@ -215,7 +188,7 @@ for t in range(0, 3):
LATERAL VIEW explode (a.recommend_type) v as recommend_type LATERAL VIEW explode (a.recommend_type) v as recommend_type
group by partition_date,card_content_type,cl_id,v.recommend_type,card_id group by partition_date,card_content_type,cl_id,v.recommend_type,card_id
)t2 )t2
on t0.cl_id=t2.cl_id and t0.partition_date=t2.partition_date on t1.device_id=t2.cl_id and t1.partition_date=t2.partition_date
LEFT JOIN LEFT JOIN
(--卡片,卡片id和session_id去重 (--卡片,卡片id和session_id去重
SELECT partition_date, SELECT partition_date,
...@@ -248,7 +221,7 @@ for t in range(0, 3): ...@@ -248,7 +221,7 @@ for t in range(0, 3):
params['card_id'] as card_id, params['card_id'] as card_id,
app_session_id app_session_id
from online.bl_hdfs_maidian_updates from online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='on_click_card' AND action='on_click_card'
AND params['page_name'] ='home' AND params['page_name'] ='home'
AND params['tab_name'] = '精选' AND params['tab_name'] = '精选'
...@@ -297,7 +270,7 @@ for t in range(0, 3): ...@@ -297,7 +270,7 @@ for t in range(0, 3):
when page_name in ('custom_special') then 'special' else null end as page_name, when page_name in ('custom_special') then 'special' else null end as page_name,
page_stay,time_str page_stay,time_str
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='page_view' AND action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special') AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special')
AND referrer='home' AND referrer='home'
...@@ -312,7 +285,7 @@ for t in range(0, 3): ...@@ -312,7 +285,7 @@ for t in range(0, 3):
( (
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -335,7 +308,7 @@ for t in range(0, 3): ...@@ -335,7 +308,7 @@ for t in range(0, 3):
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa'
when page_name in ('custom_special') then 'special' else null end as page_name,time_str,page_stay when page_name in ('custom_special') then 'special' else null end as page_name,time_str,page_stay
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='page_view' AND action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special') AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special')
AND referrer='home' AND referrer='home'
...@@ -349,7 +322,7 @@ for t in range(0, 3): ...@@ -349,7 +322,7 @@ for t in range(0, 3):
(--在8月份新增了内容专题卡片,需要转换下id (--在8月份新增了内容专题卡片,需要转换下id
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -371,7 +344,7 @@ for t in range(0, 3): ...@@ -371,7 +344,7 @@ for t in range(0, 3):
when page_name in ('custom_special') then 'special' else null end as page_name, when page_name in ('custom_special') then 'special' else null end as page_name,
count(1) as navbar_pv count(1) as navbar_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action in ('on_click_navbar_search','do_search') AND action in ('on_click_navbar_search','do_search')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special') AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special')
AND (referrer='home' or AND (referrer='home' or
...@@ -387,7 +360,7 @@ for t in range(0, 3): ...@@ -387,7 +360,7 @@ for t in range(0, 3):
( (
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -408,7 +381,7 @@ for t in range(0, 3): ...@@ -408,7 +381,7 @@ for t in range(0, 3):
when page_name in ('custom_special') then 'special' else null end as page_name, when page_name in ('custom_special') then 'special' else null end as page_name,
count(1) as highlight_pv count(1) as highlight_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='on_click_card' AND action='on_click_card'
and params['card_type']='highlight_word' and params['card_type']='highlight_word'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special') AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special')
...@@ -425,7 +398,7 @@ for t in range(0, 3): ...@@ -425,7 +398,7 @@ for t in range(0, 3):
( (
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -446,7 +419,7 @@ for t in range(0, 3): ...@@ -446,7 +419,7 @@ for t in range(0, 3):
when page_name in ('custom_special') then 'special' else null end as page_name, when page_name in ('custom_special') then 'special' else null end as page_name,
count(1) as pv count(1) as pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND (get_json_object(params['extra_param'], '$.type')='交互栏' AND (get_json_object(params['extra_param'], '$.type')='交互栏'
or get_json_object(params['extra_param'], '$.jump_from')='msg_link' or get_json_object(params['extra_param'], '$.jump_from')='msg_link'
or params['in_page_pos']='top' or params['in_page_pos']='top'
...@@ -468,7 +441,7 @@ for t in range(0, 3): ...@@ -468,7 +441,7 @@ for t in range(0, 3):
( (
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -490,7 +463,7 @@ for t in range(0, 3): ...@@ -490,7 +463,7 @@ for t in range(0, 3):
when page_name in ('custom_special') then 'special' else null end as page_name, when page_name in ('custom_special') then 'special' else null end as page_name,
count(1) as service_pv count(1) as service_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND (action='on_click_card'and params['card_content_type']='service' AND (action='on_click_card'and params['card_content_type']='service'
or action='on_click_button' and params['button_name']='unfold' and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail') or action='on_click_button' and params['button_name']='unfold' and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail')
or action='on_click_button' and params['button_name'] = 'more_recommendations') or action='on_click_button' and params['button_name'] = 'more_recommendations')
...@@ -508,7 +481,7 @@ for t in range(0, 3): ...@@ -508,7 +481,7 @@ for t in range(0, 3):
( (
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -530,7 +503,7 @@ for t in range(0, 3): ...@@ -530,7 +503,7 @@ for t in range(0, 3):
when page_name in ('custom_special') then 'special' else null end as page_name, when page_name in ('custom_special') then 'special' else null end as page_name,
count(1) as service_pv count(1) as service_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='on_click_card' AND action='on_click_card'
and params['card_content_type'] in ('qa','diary','user_post','answer') and params['card_content_type'] in ('qa','diary','user_post','answer')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special') AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special')
...@@ -547,7 +520,7 @@ for t in range(0, 3): ...@@ -547,7 +520,7 @@ for t in range(0, 3):
( (
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -569,7 +542,7 @@ for t in range(0, 3): ...@@ -569,7 +542,7 @@ for t in range(0, 3):
when page_name in ('custom_special') then 'special' else null end as page_name, when page_name in ('custom_special') then 'special' else null end as page_name,
count(1) as video_pv count(1) as video_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='on_click_button' AND action='on_click_button'
and params['button_name']='video_interview' and params['button_name']='video_interview'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special') AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special')
...@@ -586,7 +559,7 @@ for t in range(0, 3): ...@@ -586,7 +559,7 @@ for t in range(0, 3):
( (
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -607,7 +580,7 @@ for t in range(0, 3): ...@@ -607,7 +580,7 @@ for t in range(0, 3):
when page_name in ('custom_special') then 'special' else null end as page_name, when page_name in ('custom_special') then 'special' else null end as page_name,
count(1) as referral_pv count(1) as referral_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='on_click_button' AND action='on_click_button'
and params['button_name']='referral' and params['button_name']='referral'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special') AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','custom_special')
...@@ -624,7 +597,7 @@ for t in range(0, 3): ...@@ -624,7 +597,7 @@ for t in range(0, 3):
( (
select id,visual_page_id,'special' as page_name select id,visual_page_id,'special' as page_name
from tl.tl_zx_api_special_pool from tl.tl_zx_api_special_pool
where partition_day ={partition_day} where partition_day ='{partition_day}'
group by id,visual_page_id group by id,visual_page_id
)b )b
on a.business_id=b.visual_page_id and a.page_name=b.page_name on a.business_id=b.visual_page_id and a.page_name=b.page_name
...@@ -640,7 +613,7 @@ for t in range(0, 3): ...@@ -640,7 +613,7 @@ for t in range(0, 3):
else null end as page_name, else null end as page_name,
count(distinct params['business_id'],app_session_id) as post_pv count(distinct params['business_id'],app_session_id) as post_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='page_view' AND action='page_view'
AND page_name IN ('post_detail','user_post_detail','doctor_post_detail','custom_special') AND page_name IN ('post_detail','user_post_detail','doctor_post_detail','custom_special')
AND (json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]IN ('post_detail','user_post_detail','doctor_post_detail') AND (json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]IN ('post_detail','user_post_detail','doctor_post_detail')
...@@ -660,7 +633,7 @@ for t in range(0, 3): ...@@ -660,7 +633,7 @@ for t in range(0, 3):
else null end as page_name, else null end as page_name,
count(distinct params['card_id'],app_session_id) as post_click_pv count(distinct params['card_id'],app_session_id) as post_click_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date={partition_day} WHERE partition_date='{partition_day}'
AND action='on_click_card' AND action='on_click_card'
and params['card_content_type'] in ('user_post') and params['card_content_type'] in ('user_post')
AND page_name IN ('post_detail','user_post_detail','doctor_post_detail') AND page_name IN ('post_detail','user_post_detail','doctor_post_detail')
...@@ -681,11 +654,12 @@ for t in range(0, 3): ...@@ -681,11 +654,12 @@ for t in range(0, 3):
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = '{partition_day}' where PARTITION_DAY = '{partition_day}'
AND is_abnormal_device = 'true' AND is_abnormal_device = 'true'
)dev )dev
on t1.device_id=dev.device_id on t1.partition_date=dev.partition_date and t1.device_id=dev.device_id
WHERE dev.device_id is null WHERE dev.device_id is null
GROUP BY t1.partition_date,t1.device_os_type,t1.active_type,t2.card_content_type,t2.recommend_type GROUP BY t1.partition_date,t1.device_os_type,t1.active_type,t2.card_content_type,t2.recommend_type
order by day_id,device_os_type,active_type,card_content_type,recommend_type order by day_id,device_os_type,active_type,card_content_type,recommend_type
""".format(partition_day=today_str) """.format(partition_day=today_str)
ctr_df = spark.sql(ctr_sql) ctr_df = spark.sql(ctr_sql)
ctr_df.createOrReplaceTempView("temp_ctr") ctr_df.createOrReplaceTempView("temp_ctr")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment