Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
黎涛
meta_base_code
Commits
74b4a60c
Commit
74b4a60c
authored
Aug 31, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
6ee9106b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
100 additions
and
100 deletions
+100
-100
search_strategy_d.py
task/search_strategy_d.py
+100
-100
No files found.
task/search_strategy_d.py
View file @
74b4a60c
...
@@ -123,106 +123,106 @@ for t in range(0, task_days):
...
@@ -123,106 +123,106 @@ for t in range(0, task_days):
print
(
"-------------------------------"
)
print
(
"-------------------------------"
)
sql
=
r"""SELECT
#
sql = r"""SELECT
,t3.device_os_type as device_type
#
,t3.device_os_type as device_type
,t3.active_type as active_type
#
,t3.active_type as active_type
,t3.channel as channel_type
#
,t3.channel as channel_type
,NVL(t3.search_pv,0) as pv
#
,NVL(t3.search_pv,0) as pv
,NVL(t3.search_uv,0) as uv
#
,NVL(t3.search_uv,0) as uv
FROM
#
FROM
(
#
(
SELECT active_type,device_os_type,channel,search_pv,search_uv
#
SELECT active_type,device_os_type,channel,search_pv,search_uv
FROM
#
FROM
(
#
(
SELECT active_type,device_os_type,channel
#
SELECT active_type,device_os_type,channel
,count(t1.cl_id) as search_pv
#
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
#
,count(distinct t1.cl_id) as search_uv
FROM
#
FROM
(
#
(
SELECT partition_date
#
SELECT partition_date
,cl_id
#
,cl_id
FROM online.bl_hdfs_maidian_updates
#
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
# WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
#
AND partition_date < {today_str}
AND action in ('do_search','search_result_click_search')
#
AND action in ('do_search','search_result_click_search')
#
UNION ALL
#
UNION ALL
SELECT cl_id
#
SELECT cl_id
FROM online.bl_hdfs_maidian_updates
#
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
# WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
#
AND partition_date < {today_str}
AND action = 'on_click_card'
#
AND action = 'on_click_card'
AND params['page_name']='search_home'
#
AND params['page_name']='search_home'
#
UNION ALL
#
UNION ALL
SELECT partition_date
#
SELECT partition_date
,cl_id
#
,cl_id
FROM online.bl_hdfs_maidian_updates
#
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
# WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
#
AND partition_date < {today_str}
AND action = 'on_click_card'
#
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
#
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
#
AND params['tab_name']='精选'
AND params['card_type']='search_word'
#
AND params['card_type']='search_word'
#
#
UNION ALL
#
UNION ALL
SELECT partition_date
#
SELECT partition_date
,cl_id
#
,cl_id
FROM online.bl_hdfs_maidian_updates
#
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
# WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
#
AND partition_date < {today_str}
AND action = 'on_click_card'
#
AND action = 'on_click_card'
AND page_name='welfare_home'
#
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
#
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
#
AND params['in_page_pos']='大家都在搜'
#
UNION ALL
#
UNION ALL
SELECT partition_date
#
SELECT partition_date
,cl_id
#
,cl_id
FROM online.bl_hdfs_maidian_updates
#
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
# WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
#
AND partition_date < {today_str}
AND int(split(app_version,'\\.')[1]) >= 27
#
AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card'
#
AND action='on_click_card'
AND params['card_type']='highlight_word'
#
AND params['card_type']='highlight_word'
)t1
#
)t1
JOIN
#
JOIN
(
#
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
#
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
#
FROM
(
#
(
SELECT
#
SELECT
partition_date,m.device_id
#
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
#
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
# ,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
# WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
#
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
#
FROM online.ml_device_day_active_status m
LEFT JOIN
# LEFT JOIN
(SELECT code,is_ai_channel,partition_day
#
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
#
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {yesterday_str}
# WHERE partition_day>= {yesterday_str}
AND partition_day < {today_str}) tmp
#
AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
#
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {yesterday_str}
# WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
#
AND partition_date < {today_str}
AND active_type in ('1','2','4')
#
AND active_type in ('1','2','4')
) mas
#
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
#
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
#
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
#
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2
#
)t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
#
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY active_type,device_os_type,channel
#
GROUP BY active_type,device_os_type,channel
)t
#
)t
)t3
#
)t3
"""
.
format
(
today_str
=
today_str
,
yesterday_str
=
yesterday_str
,
)
#
""".format(today_str=today_str, yesterday_str=yesterday_str, )
device_df
=
spark
.
sql
(
sql
)
#
device_df = spark.sql(sql)
device_df
.
show
(
1
,
False
)
#
device_df.show(1, False)
sql_res
=
device_df
.
collect
()
#
sql_res = device_df.collect()
for
res
in
sql_res
:
#
for res in sql_res:
print
(
res
)
#
print(res)
# device_df.createOrReplaceTempView("data_table")
# device_df.createOrReplaceTempView("data_table")
# collects_sql = """
# collects_sql = """
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment