Commit 80e0e071 authored by litaolemo's avatar litaolemo

update

parent 73827f33
...@@ -91,41 +91,47 @@ for t in range(0, task_days): ...@@ -91,41 +91,47 @@ for t in range(0, task_days):
print("-------------------------------") print("-------------------------------")
sql = r"""SELECT sql = r"""
,t3.device_os_type as device_type SELECT t3.device_os_type as device_type
,t3.active_type as active_type ,t3.active_type as active_type
,t3.channel as channel_type ,t3.channel as channel_type
,NVL(t3.search_pv,0) as pv ,NVL(t3.search_pv,0) as pv
,NVL(t3.search_uv,0) as uv ,NVL(t3.search_uv,0) as uv
,NVL(t4.hexin_card_click_pv,0) as search_core_pv
,NVL(t4.neirong_card_click_pv,0) as search_pv
FROM FROM
( (
SELECT active_type,device_os_type,channel,search_pv,search_uv FROM SELECT query,active_type,device_os_type,channel,search_pv,search_uv
FROM
( (
SELECT active_type,device_os_type,channel SELECT query,active_type,device_os_type,channel
,count(t1.cl_id) as search_pv ,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv ,count(distinct t1.cl_id) as search_uv
FROM FROM
( (
SELECT partition_date,cl_id SELECT partition_date
,params['query'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{yesterday_str}' WHERE partition_date >= {yesterday_str}
AND partition_date < '{today_str}' AND partition_date < {today_str}
AND action in ('do_search','search_result_click_search') AND action in ('do_search','search_result_click_search')
UNION ALL UNION ALL
SELECT cl_id SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{yesterday_str}' WHERE partition_date >= {yesterday_str}
AND partition_date < '{today_str}' AND partition_date < {today_str}
AND action = 'on_click_card' AND action = 'on_click_card'
AND params['page_name']='search_home' AND params['page_name']='search_home'
UNION ALL UNION ALL
SELECT partition_date SELECT partition_date
,params['card_name'] as query
,cl_id ,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{yesterday_str}' WHERE partition_date >= {yesterday_str}
AND partition_date < '{today_str}' AND partition_date < {today_str}
AND action = 'on_click_card' AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢' AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选' AND params['tab_name']='精选'
...@@ -134,10 +140,11 @@ for t in range(0, task_days): ...@@ -134,10 +140,11 @@ for t in range(0, task_days):
UNION ALL UNION ALL
SELECT partition_date SELECT partition_date
,params['card_name'] as query
,cl_id ,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{yesterday_str}' WHERE partition_date >= {yesterday_str}
AND partition_date < '{today_str}' AND partition_date < {today_str}
AND action = 'on_click_card' AND action = 'on_click_card'
AND page_name='welfare_home' AND page_name='welfare_home'
AND params['card_type'] ='search_word' AND params['card_type'] ='search_word'
...@@ -145,10 +152,11 @@ for t in range(0, task_days): ...@@ -145,10 +152,11 @@ for t in range(0, task_days):
UNION ALL UNION ALL
SELECT partition_date SELECT partition_date
,params['card_name'] as query
,cl_id ,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{yesterday_str}' WHERE partition_date >= {yesterday_str}
AND partition_date < '{today_str}' AND partition_date < {today_str}
AND int(split(app_version,'\\.')[1]) >= 27 AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card' AND action='on_click_card'
AND params['card_type']='highlight_word' AND params['card_type']='highlight_word'
...@@ -168,11 +176,11 @@ for t in range(0, task_days): ...@@ -168,11 +176,11 @@ for t in range(0, task_days):
LEFT JOIN LEFT JOIN
(SELECT code,is_ai_channel,partition_day (SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{yesterday_str}' WHERE partition_day>= {yesterday_str}
AND partition_day < '{today_str}') tmp AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= '{yesterday_str}' WHERE partition_date >= {yesterday_str}
AND partition_date < '{today_str}' AND partition_date < {today_str}
AND active_type in ('1','2','4') AND active_type in ('1','2','4')
) mas ) mas
LATERAL VIEW explode(mas.channel) t2 AS channel LATERAL VIEW explode(mas.channel) t2 AS channel
...@@ -181,8 +189,6 @@ for t in range(0, task_days): ...@@ -181,8 +189,6 @@ for t in range(0, task_days):
)t2 )t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY active_type,device_os_type,channel GROUP BY active_type,device_os_type,channel
)t
)t3
""".format(today_str=today_str, yesterday_str=yesterday_str, ) """.format(today_str=today_str, yesterday_str=yesterday_str, )
device_df = spark.sql(sql) device_df = spark.sql(sql)
device_df.show(1, False) device_df.show(1, False)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment