Commit 53df02e0 authored by litaolemo's avatar litaolemo

update

parent 3e3ba7b5
......@@ -93,108 +93,67 @@ spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDF
# huidu_device_id_df = spark.sql(huidu_device_id_sql)
# huidu_device_id_df.createOrReplaceTempView("dev_view")
sql_search_ctr = r"""
SELECT query,search_pv,search_uv
FROM
(
SELECT query
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {start_date}
AND partition_date < {end_date}
AND ((action = 'do_search' AND params['input_type']<>'everyone_watch') or action='search_result_click_search')
UNION ALL
SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {start_date}
AND partition_date < {end_date}
AND action = 'do_search'
and params['input_type']='everyone_watch'
and params['tab']='精选'
and page_name='home'
AND params['query'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
UNION ALL
SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {start_date}
AND partition_date < {end_date}
AND action = 'on_click_card'
AND params['page_name']='search_home'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {start_date}
AND partition_date < {end_date}
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
--AND params['tab_name']='精选'
AND params['card_type']='search_word'
AND params['card_name'] not in ('AI测颜值','AI测肤质') --这两个词不跳转搜索结果页
--AND page_name='home' android的page_name为空
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {start_date}
AND partition_date < {end_date}
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {start_date}
AND partition_date < {end_date}
AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card'
AND params['card_type']='highlight_word'
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
select count(1),avg(session_pv) from (SELECT partition_date,
card_content_type,
cl_id,
v.recommend_type,
count(distinct app_session_id) as session_pv ,
count(app_session_id) as session_pv1
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {start_date}
AND partition_day < {end_date}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {start_date}
AND partition_date < {end_date}
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY query
) order by search_pv desc limit 200
SELECT partition_date,
cl_id,
case when card_content_type in ('qa','answer') then 'qa'
when card_content_type in ('special_pool') then 'special' else card_content_type end as card_content_type,
CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr') ) THEN array('ctr预估','合计')
when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
when transaction_type in ('pgc','hotspot') then array('热点卡片')
when transaction_type in ('newdata') then array('保量卡片')
when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
when transaction_type like 'deeplink%' then array('deeplink策略','合计')
end AS recommend_type,
card_id,
app_session_id
from online.ml_community_precise_exposure_detail
WHERE partition_date='20201105'
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','FIXEDSTRATEGY','FIXEDSTRATEGY_VIDEO')
or transaction_type like '%ctr' or transaction_type like '%cvr' or transaction_type like 'deeplink%')
AND card_content_type in ('qa','diary','user_post','answer','special_pool')
group by partition_date,
case when card_content_type in ('qa','answer') then 'qa'
when card_content_type in ('special_pool') then 'special' else card_content_type end,
cl_id,
CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr')) THEN array('ctr预估','合计')
when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
when transaction_type in ('pgc','hotspot') then array('热点卡片')
when transaction_type in ('newdata') then array('保量卡片')
when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
when transaction_type like 'deeplink%' then array('deeplink策略','合计') end,
card_id,
app_session_id
)a
LATERAL VIEW explode (a.recommend_type) v as recommend_type
group by partition_date,card_content_type,cl_id,v.recommend_type )
""".format(start_date='20201018',end_date='20201025')
print(sql_search_ctr)
......@@ -205,11 +164,12 @@ sql_res = search_ctr_df.collect()
print("-------------------------------")
for res in sql_res:
print(res.query,res.search_pv)
results = es.search(
index='gm-dbmw-diary-read',
doc_type='diary',
timeout='10s',
body=body
)
print(res)
# print(res.query,res.search_pv)
# results = es.search(
# index='gm-dbmw-diary-read',
# doc_type='diary',
# timeout='10s',
# body=body
# )
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment