Commit 814278ac authored by litaolemo's avatar litaolemo

update

parent d00049b6
...@@ -122,42 +122,42 @@ for t in range(0, task_days): ...@@ -122,42 +122,42 @@ for t in range(0, task_days):
)t2 )t2
ON t1.device_id = t2.device_id ON t1.device_id = t2.device_id
)t3 )t3
on t1.user_id=t3.user_id group by partition_date,device_id""".format(yesterday_str=yesterday_str,today_str=today_str) group by partition_date,device_id""".format(yesterday_str=yesterday_str,today_str=today_str)
print(sql_distinct_device_id) print(sql_distinct_device_id)
distinct_device_id_df = spark.sql(sql_distinct_device_id,) distinct_device_id_df = spark.sql(sql_distinct_device_id,)
distinct_device_id_df.show(1) distinct_device_id_view = distinct_device_id_df.createOrReplaceTempView("distinct_device_id_view")
sql_res = distinct_device_id_df.collect() # distinct_device_id_df.show(1)
for res in sql_res: # sql_res = distinct_device_id_df.collect()
print(res) # for res in sql_res:
# print(res)
print("-------------------------------") print("-------------------------------")
sql = r""" sql = r"""
SELECT SELECT t3.device_os_type as device_type
t3.device_os_type as device_type
,t3.active_type as active_type ,t3.active_type as active_type
,t3.channel as channel_type ,t3.channel as channel_type
---,t3.search_pv as pv ---,t3.search_pv as pv
---,t3.search_uv as uv ---,t3.search_uv as uv
---,t4.hexin_card_click_pv as search_core_pv ---,t4.hexin_card_click_pv as search_core_pv
---,t4.neirong_card_click_pv as search_pv ---,t4.neirong_card_click_pv as search_pv
,sum(t3.search_pv) as pv ,t3.search_pv) as pv
,sum(t3.search_uv) as uv ,t3.search_uv as uv
,sum(t4.hexin_card_click_pv) as search_core_pv ,t4.hexin_card_click_pv as search_core_pv
,sum(t4.neirong_card_click_pv) as search_pv ,t4.neirong_card_click_pv as search_pv
,distinct (t4.neirong_card_click_pv) as search_pv
FROM FROM
( (
SELECT query,active_type,device_os_type,channel,search_pv,search_uv SELECT ,active_type,device_os_type,channel,search_pv,search_uv
FROM FROM
( (
SELECT query,active_type,device_os_type,channel SELECT active_type,device_os_type,channel
,count(t1.cl_id) as search_pv ,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv ,count(distinct t1.cl_id) as search_uv
FROM FROM
( (
SELECT partition_date SELECT partition_date
,params['query'] as query
,cl_id ,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} WHERE partition_date >= {yesterday_str}
...@@ -165,7 +165,7 @@ for t in range(0, task_days): ...@@ -165,7 +165,7 @@ for t in range(0, task_days):
AND action in ('do_search','search_result_click_search') AND action in ('do_search','search_result_click_search')
UNION ALL UNION ALL
SELECT partition_date,params['query'] as query,cl_id SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str} AND partition_date < {today_str}
...@@ -174,7 +174,6 @@ for t in range(0, task_days): ...@@ -174,7 +174,6 @@ for t in range(0, task_days):
UNION ALL UNION ALL
SELECT partition_date SELECT partition_date
,params['card_name'] as query
,cl_id ,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} WHERE partition_date >= {yesterday_str}
...@@ -187,7 +186,6 @@ for t in range(0, task_days): ...@@ -187,7 +186,6 @@ for t in range(0, task_days):
UNION ALL UNION ALL
SELECT partition_date SELECT partition_date
,params['card_name'] as query
,cl_id ,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} WHERE partition_date >= {yesterday_str}
...@@ -199,7 +197,6 @@ for t in range(0, task_days): ...@@ -199,7 +197,6 @@ for t in range(0, task_days):
UNION ALL UNION ALL
SELECT partition_date SELECT partition_date
,params['card_name'] as query
,cl_id ,cl_id
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} WHERE partition_date >= {yesterday_str}
...@@ -235,26 +232,24 @@ for t in range(0, task_days): ...@@ -235,26 +232,24 @@ for t in range(0, task_days):
LATERAL VIEW explode(mas.active_type) t2 AS active_type LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2 )t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY query,active_type,device_os_type,channel GROUP BY active_type,device_os_type,channel
)t )t
)t3 )t3
LEFT JOIN LEFT JOIN
( (
SELECT t1.query,active_type,device_os_type,channel SELECT active_type,device_os_type,channel
,sum(hexin) as hexin_card_click_pv ,sum(hexin) as hexin_card_click_pv
,sum(neirong) as neirong_card_click_pv ,sum(neirong) as neirong_card_click_pv
FROM FROM
( (
SELECT NVL(t2.partition_date,t3.partition_date) as partition_date SELECT NVL(t2.partition_date,t3.partition_date) as partition_date
,NVL(t2.cl_id,t3.cl_id) as cl_id ,NVL(t2.cl_id,t3.cl_id) as cl_id
,NVL(t2.query,t3.query) as query
,NVL(t2.pv,0) as hexin ,NVL(t2.pv,0) as hexin
,NVL(t3.pv,0) as neirong ,NVL(t3.pv,0) as neirong
FROM FROM
( (
SELECT partition_date SELECT partition_date
,params['query'] as query
,cl_id ,cl_id
,count(1) as pv ,count(1) as pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
...@@ -266,13 +261,11 @@ for t in range(0, task_days): ...@@ -266,13 +261,11 @@ for t in range(0, task_days):
or (action = 'on_click_card' AND params['card_content_type'] in ('service','hospital','doctor') AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor')) or (action = 'on_click_card' AND params['card_content_type'] in ('service','hospital','doctor') AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'on_click_button' AND params['button_name'] = 'check_plan' AND page_name = 'search_result_more')) or (action = 'on_click_button' AND params['button_name'] = 'check_plan' AND page_name = 'search_result_more'))
GROUP BY partition_date GROUP BY partition_date
,params['query']
,cl_id ,cl_id
)t2 )t2
FULL JOIN FULL JOIN
( (
SELECT partition_date SELECT partition_date
,params['query'] as query
,cl_id ,cl_id
,count(1) as pv ,count(1) as pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
...@@ -282,11 +275,9 @@ for t in range(0, task_days): ...@@ -282,11 +275,9 @@ for t in range(0, task_days):
AND page_name in ('search_result_more','search_result_diary','search_result_post')) AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer'))) or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date GROUP BY partition_date
,params['query']
,cl_id ,cl_id
)t3 )t3
on t3.partition_date=t2.partition_date on t3.partition_date=t2.partition_date
AND t3.query=t2.query
AND t3.cl_id=t2.cl_id AND t3.cl_id=t2.cl_id
)t1 )t1
JOIN JOIN
...@@ -316,9 +307,9 @@ for t in range(0, task_days): ...@@ -316,9 +307,9 @@ for t in range(0, task_days):
LATERAL VIEW explode(mas.active_type) t2 AS active_type LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev )dev
on t1.cl_id=dev.device_id and t1.partition_date = dev.partition_date on t1.cl_id=dev.device_id and t1.partition_date = dev.partition_date
GROUP BY t1.query,active_type,device_os_type,channel GROUP BY active_type,device_os_type,channel
)t4 )t4
on t3.query=t4.query and t3.active_type=t4.active_type and t3.device_os_type = t4.device_os_type AND t3.channel = t4.channel group by t3.active_type , t3.device_os_type ,t3.channel on t3.active_type=t4.active_type and t3.device_os_type = t4.device_os_type AND t3.channel = t4.channel group by t3.active_type , t3.device_os_type ,t3.channel
""".format(today_str=today_str, yesterday_str=yesterday_str, ) """.format(today_str=today_str, yesterday_str=yesterday_str, )
device_df = spark.sql(sql) device_df = spark.sql(sql)
device_df.show(1, False) device_df.show(1, False)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment