Commit 74b4a60c authored by litaolemo's avatar litaolemo

update

parent 6ee9106b
...@@ -123,106 +123,106 @@ for t in range(0, task_days): ...@@ -123,106 +123,106 @@ for t in range(0, task_days):
print("-------------------------------") print("-------------------------------")
sql = r"""SELECT # sql = r"""SELECT
,t3.device_os_type as device_type # ,t3.device_os_type as device_type
,t3.active_type as active_type # ,t3.active_type as active_type
,t3.channel as channel_type # ,t3.channel as channel_type
,NVL(t3.search_pv,0) as pv # ,NVL(t3.search_pv,0) as pv
,NVL(t3.search_uv,0) as uv # ,NVL(t3.search_uv,0) as uv
FROM # FROM
( # (
SELECT active_type,device_os_type,channel,search_pv,search_uv # SELECT active_type,device_os_type,channel,search_pv,search_uv
FROM # FROM
( # (
SELECT active_type,device_os_type,channel # SELECT active_type,device_os_type,channel
,count(t1.cl_id) as search_pv # ,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv # ,count(distinct t1.cl_id) as search_uv
FROM # FROM
( # (
SELECT partition_date # SELECT partition_date
,cl_id # ,cl_id
FROM online.bl_hdfs_maidian_updates # FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} # WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str} # AND partition_date < {today_str}
AND action in ('do_search','search_result_click_search') # AND action in ('do_search','search_result_click_search')
#
UNION ALL # UNION ALL
SELECT cl_id # SELECT cl_id
FROM online.bl_hdfs_maidian_updates # FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} # WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str} # AND partition_date < {today_str}
AND action = 'on_click_card' # AND action = 'on_click_card'
AND params['page_name']='search_home' # AND params['page_name']='search_home'
#
UNION ALL # UNION ALL
SELECT partition_date # SELECT partition_date
,cl_id # ,cl_id
FROM online.bl_hdfs_maidian_updates # FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} # WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str} # AND partition_date < {today_str}
AND action = 'on_click_card' # AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢' # AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选' # AND params['tab_name']='精选'
AND params['card_type']='search_word' # AND params['card_type']='search_word'
#
#
UNION ALL # UNION ALL
SELECT partition_date # SELECT partition_date
,cl_id # ,cl_id
FROM online.bl_hdfs_maidian_updates # FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} # WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str} # AND partition_date < {today_str}
AND action = 'on_click_card' # AND action = 'on_click_card'
AND page_name='welfare_home' # AND page_name='welfare_home'
AND params['card_type'] ='search_word' # AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜' # AND params['in_page_pos']='大家都在搜'
#
UNION ALL # UNION ALL
SELECT partition_date # SELECT partition_date
,cl_id # ,cl_id
FROM online.bl_hdfs_maidian_updates # FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str} # WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str} # AND partition_date < {today_str}
AND int(split(app_version,'\\.')[1]) >= 27 # AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card' # AND action='on_click_card'
AND params['card_type']='highlight_word' # AND params['card_type']='highlight_word'
)t1 # )t1
JOIN # JOIN
( # (
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type # SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM # FROM
( # (
SELECT # SELECT
partition_date,m.device_id # partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type # ,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活' # ,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type # WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel # ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m # FROM online.ml_device_day_active_status m
LEFT JOIN # LEFT JOIN
(SELECT code,is_ai_channel,partition_day # (SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW # FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {yesterday_str} # WHERE partition_day>= {yesterday_str}
AND partition_day < {today_str}) tmp # AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code # ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {yesterday_str} # WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str} # AND partition_date < {today_str}
AND active_type in ('1','2','4') # AND active_type in ('1','2','4')
) mas # ) mas
LATERAL VIEW explode(mas.channel) t2 AS channel # LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type # LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type # LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2 # )t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date # on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY active_type,device_os_type,channel # GROUP BY active_type,device_os_type,channel
)t # )t
)t3 # )t3
""".format(today_str=today_str, yesterday_str=yesterday_str, ) # """.format(today_str=today_str, yesterday_str=yesterday_str, )
device_df = spark.sql(sql) # device_df = spark.sql(sql)
device_df.show(1, False) # device_df.show(1, False)
sql_res = device_df.collect() # sql_res = device_df.collect()
for res in sql_res: # for res in sql_res:
print(res) # print(res)
# device_df.createOrReplaceTempView("data_table") # device_df.createOrReplaceTempView("data_table")
# collects_sql = """ # collects_sql = """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment