Commit 74fa4f4d authored by litaolemo's avatar litaolemo

update

parent f59b999a
......@@ -93,66 +93,13 @@ spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDF
# huidu_device_id_df = spark.sql(huidu_device_id_sql)
# huidu_device_id_df.createOrReplaceTempView("dev_view")
sql_search_ctr = r"""
select count(1) from (
SELECT partition_date,
cl_id,
count(card_id) as session_pv0,
FROM
(
SELECT partition_date,
cl_id,
case when card_content_type in ('qa','answer') then 'qa'
when card_content_type in ('special_pool') then 'special' else card_content_type end as card_content_type,
CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr') ) THEN array('ctr预估','合计')
when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
when transaction_type in ('pgc','hotspot') then array('热点卡片')
when transaction_type in ('newdata') then array('保量卡片')
when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
when transaction_type like 'deeplink%' then array('deeplink策略','合计')
end AS recommend_type,
card_id,
app_session_id
from online.ml_community_precise_exposure_detail
WHERE partition_date={partition_day}
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','FIXEDSTRATEGY','FIXEDSTRATEGY_VIDEO')
or transaction_type like '%ctr' or transaction_type like '%cvr' or transaction_type like 'deeplink%')
AND card_content_type in ('qa','diary','user_post','answer','special_pool')
group by partition_date,
case when card_content_type in ('qa','answer') then 'qa'
when card_content_type in ('special_pool') then 'special' else card_content_type end,
cl_id,
CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr')) THEN array('ctr预估','合计')
when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
when transaction_type in ('pgc','hotspot') then array('热点卡片')
when transaction_type in ('newdata') then array('保量卡片')
when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
when transaction_type like 'deeplink%' then array('deeplink策略','合计') end,
card_id,
app_session_id
)a
LATERAL VIEW explode (a.recommend_type) v as recommend_type
group by partition_date,cl_id,v.recommend_type having session_pv0 >= 4)
""".format(partition_day='20201105',end_date='20201025')
SELECT *
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_navbar_search'
limit 20
""".format(start_date='20201105',end_date='20201025')
print(sql_search_ctr)
search_ctr_df = spark.sql(sql_search_ctr)
......
--搜索日报-英赫
SELECT
partition_date as `日期`
,device_os_type as `平台`,active_type as `活跃类型`,channel as `渠道类型`
,dau as `DAU`
,all_search_uv as `完成搜索uv`
,all_search_pv as `完成搜索pv`
,if(dau <> 0 ,concat(cast((all_search_uv/dau)*100 as decimal(18,2)),'%') , '-') as `完成搜索的用户比例`
,if(all_search_uv <> 0 ,concat(cast((all_search_pv/all_search_uv) as decimal(18,2)),'') , '-') as `搜索用户人均使用次数`
FROM
(
SELECT
coalesce(t1.partition_date,t2.partition_date,t3.partition_date,t4.partition_date,t5.partition_date,t6.partition_date,t7.partition_date,t8.partition_date,t9.partition_date) as partition_date
,coalesce(t1.active_type,t2.active_type,t3.active_type,t4.active_type,t5.active_type,t6.active_type,t7.active_type,t8.active_type,t9.active_type) as active_type
,coalesce(t1.device_os_type,t2.device_os_type,t3.device_os_type,t4.device_os_type,t5.device_os_type,t6.device_os_type,t7.device_os_type,t8.device_os_type,t9.device_os_type) as device_os_type
,coalesce(t1.channel,t2.channel,t3.channel,t4.channel,t5.channel,t6.channel,t7.channel,t8.channel,t9.channel) as channel
,coalesce(t1.dau,0) as dau
,coalesce(t3.all_search_uv,0) as all_search_uv --全部搜索uv
,coalesce(t3.all_search_pv,0) as all_search_pv --全部搜索pv
FROM
( --dau
SELECT mas.partition_date,count(DISTINCT mas.device_id) as dau
FROM
(
SELECT
partition_date,m.device_id
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '${start_date}' AND partition_day < '${end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND active_type in ('1','2','4')
) mas
GROUP BY mas.partition_date
)t1
full JOIN
(
--搜索发起,搜索框点击,724版本埋点修复
SELECT
click.partition_date as partition_date
,count(click.cl_id) as all_search_click_pv_724
,count(distinct click.cl_id) as all_search_click_uv_724
FROM
(
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'on_click_navbar_search'
AND (int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 24)
UNION all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,params['input_type'] as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'do_search'
AND params['input_type'] = '详情页默认词'
UNION all
SELECT cl_id,partition_date,action,page_name,'旧首页-大家都在看' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'do_search'
and params['input_type']='everyone_watch'
and params['tab']='精选'
and page_name='home'
AND params['query'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
union all
SELECT cl_id,partition_date,action,'home' as page_name,'新首页-猜你喜欢' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
--AND params['tab_name']='精选'
AND params['card_type']='search_word'
AND params['card_name'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
--AND page_name='home' android的page_name为空
union all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'美购首页-大家都在搜' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'on_click_card'
AND params['page_name']='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'高亮词' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'on_click_card'
AND params['card_type'] ='highlight_word'
)click
GROUP BY click.partition_date,active_type,device_os_type,channel
)t2
on t2.partition_date = t1.partition_date
AND t2.active_type = t1.active_type
AND t2.device_os_type = t1.device_os_type
AND t2.channel = t1.channel
full JOIN
(
--搜索pvuv
SELECT
click.partition_date as partition_date
,active_type
,device_os_type
,channel
,count(click.cl_id) as all_search_pv
,count(distinct click.cl_id) as all_search_uv
,count(case when int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 26 AND input_type in ('联想','聚合模块') then click.cl_id end) as sug_search_pv_726
,count(distinct case when int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 26 AND input_type in ('联想','聚合模块') then click.cl_id end) as sug_search_uv_726
FROM
(
SELECT cl_id,partition_date,action,params['page_name'] as page_name,params['input_type'] as input_type,app_version,params['query'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND ((action = 'do_search' AND params['input_type']<>'everyone_watch') or action='search_result_click_search')
UNION all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,params['input_type'] as input_type,app_version,params['query'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'do_search'
and params['input_type']='everyone_watch'
and params['tab']='精选'
and page_name='home'
AND params['query'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
union all
SELECT cl_id,partition_date,action,'search_home' as page_name,'' as input_type,app_version,params['query'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'on_click_card'
AND params['page_name']='search_home'
union all
SELECT cl_id,partition_date,action,'home' as page_name,'首页-猜你喜欢' as input_type,app_version,params['card_name'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
--AND params['tab_name']='精选'
AND params['card_type']='search_word'
AND params['card_name'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
--AND page_name='home' android的page_name为空
union all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'美购首页-大家都在搜' as input_type,app_version,params['card_name'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'on_click_card'
AND params['page_name']='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'高亮词' as input_type,app_version,params['card_name'] as query
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND action = 'on_click_card'
AND params['card_type'] ='highlight_word'
)click
GROUP BY click.partition_date,active_type,device_os_type,channel
)t3
on t3.partition_date = t1.partition_date
AND t3.active_type = t1.active_type
AND t3.device_os_type = t1.device_os_type
AND t3.channel = t1.channel
)t
ORDER BY `日期`,`平台`,`活跃类型`,`渠道类型`
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment