Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
4895ca0b
Commit
4895ca0b
authored
Oct 12, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change
parent
eb5b8bdf
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
93 additions
and
76 deletions
+93
-76
meigou.py
local/meigou.py
+93
-76
No files found.
local/meigou.py
View file @
4895ca0b
...
...
@@ -14,104 +14,130 @@ def all_click(x):
date
=
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
x
))
.
strftime
(
"
%
Y
%
m
%
d"
)
total
.
append
(
date
)
print
(
"美购
首页美购列表卡片
点击"
)
print
(
"美购
搜索
点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"where partition_date='{}' "
"and a
ction = 'goto_welfare_detail' and params['from'] = 'welfare_home_list_item'
"
"where partition_date='{}'
and action = 'search_result_welfare_click_item'
"
"and a
pp['version'] in ('7.15.0','7.14.0')
"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
total
.
append
(
tmp
)
sum
=
sum
+
tmp
print
(
"美
券相关的美购列表页美购卡片点击
"
)
print
(
"美
购首页相关推荐
"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"where partition_date='{}' "
"and a
ction = 'goto_welfare_detail' and params['from'] = 'coupon_welfare_list
'"
"where partition_date='{}'
and action = 'goto_welfare_detail'
"
"and a
pp['version'] in ('7.15.0','7.14.0') and params['from'] = 'welfare_home_list_item
'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
total
.
append
(
tmp
)
sum
=
sum
+
tmp
print
(
"新美购首页-固定ICON美购卡片点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"where partition_date='{}' "
"and action = 'goto_welfare_detail' and params['from'] = 'welfare_list'"
home_page_sum
=
0
print
(
"首页点击'全部'icon按钮进入的列表-美购卡片点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
"and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
"and params['from'] = 'welfare_list' and params['cpc_referer'] = '6'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
total
.
append
(
tmp
)
sum
=
sum
+
tmp
home_page_sum
=
home_page_sum
+
tmp
print
(
"首页
-品类模块点击跳转到品类聚合
美购卡片点击"
)
print
(
"首页
点击icon进入的列表-
美购卡片点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"where partition_date='{}' "
"and action = 'goto_welfare_detail' and params['from'] = 'category'"
"where partition_date='{}'and action = 'goto_welfare_detail' "
"and app['version'] in ('7.15.0','7.14.0') "
"and params['from'] = 'category' and params['cpc_referer'] = '19'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
total
.
append
(
tmp
)
sum
=
sum
+
tmp
home_page_sum
=
home_page_sum
+
tmp
total
.
append
(
home_page_sum
)
sum
=
sum
+
home_page_sum
meigou_homepage_sum
=
0
print
(
"美购首页'全部'点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
"and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
"and params['from'] = 'welfare_list' and params['cpc_referer'] = '21'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
meigou_homepage_sum
=
meigou_homepage_sum
+
tmp
print
(
"serach
"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"
where partition_date='{}'
"
"and
action = 'search_result_welfare_click_item
'"
print
ln
(
"美购首页icon美购点击
"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates
where partition_date='{}'
"
"
and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0')
"
"and
params['from'] = 'welfare_list' and params['cpc_referer'] = '18
'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
total
.
append
(
tmp
)
sum
=
sum
+
tmp
meigou_homepage_sum
=
meigou_homepage_sum
+
tmp
total
.
append
(
meigou_homepage_sum
)
sum
=
sum
+
meigou_homepage_sum
total
.
append
(
sum
)
return
sum
return
total
def
cpc_click
(
x
):
cpc
=
[]
date
=
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
x
))
.
strftime
(
"
%
Y
%
m
%
d"
)
cpc
.
append
(
date
)
total
=
[]
sum
=
0
date
=
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
x
))
.
strftime
(
"
%
Y
%
m
%
d"
)
total
.
append
(
date
)
print
(
"美购
首页美购列表卡片
点击"
)
print
(
"美购
搜索
点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"where partition_date='{}' "
"and action = 'goto_welfare_detail' and params['from'] = 'welfare_home_list_item' "
"and params['is_cpc'] = '1'"
"where partition_date='{}'and action = 'search_result_welfare_click_item' "
"and app['version'] in ('7.15.0','7.14.0') and params['is_cpc'] = '1'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
cpc
.
append
(
tmp
)
total
.
append
(
tmp
)
sum
=
sum
+
tmp
print
(
"美
券相关的美购列表页美购卡片点击
"
)
print
(
"美
购首页相关推荐
"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"where partition_date='{}' "
"and a
ction = 'goto_welfare_detail' and params['from'] = 'coupon_welfare_list
' "
"where partition_date='{}'
and action = 'goto_welfare_detail'
"
"and a
pp['version'] in ('7.15.0','7.14.0') and params['from'] = 'welfare_home_list_item
' "
"and params['is_cpc'] = '1'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
cpc
.
append
(
tmp
)
total
.
append
(
tmp
)
sum
=
sum
+
tmp
print
(
"新美购首页-固定ICON美购卡片点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"where partition_date='{}' "
"and action = 'goto_welfare_detail' and params['from'] = 'welfare_list' "
home_page_sum
=
0
print
(
"首页点击'全部'icon按钮进入的列表-美购卡片点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
"and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
"and params['from'] = 'welfare_list' and params['cpc_referer'] = '6' "
"and params['is_cpc'] = '1'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
cpc
.
append
(
tmp
)
sum
=
sum
+
tmp
home_page_sum
=
home_page_sum
+
tmp
print
(
"首页
-品类模块点击跳转到品类聚合
美购卡片点击"
)
print
(
"首页
点击icon进入的列表-
美购卡片点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"where partition_date='{}' "
"and action = 'goto_welfare_detail' and params['from'] = 'category' "
"where partition_date='{}'and action = 'goto_welfare_detail' "
"and app['version'] in ('7.15.0','7.14.0') "
"and params['from'] = 'category' and params['cpc_referer'] = '19' "
"and params['is_cpc'] = '1'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
cpc
.
append
(
tmp
)
sum
=
sum
+
tmp
home_page_sum
=
home_page_sum
+
tmp
total
.
append
(
home_page_sum
)
sum
=
sum
+
home_page_sum
meigou_home_sum
=
0
print
(
"美购首页'全部'点击"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates where partition_date='{}' "
"and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0') "
"and params['from'] = 'welfare_list' and params['cpc_referer'] = '21' "
"and params['is_cpc'] = '1'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
meigou_home_sum
=
meigou_home_sum
+
tmp
print
(
"serach
"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates "
"
where partition_date='{}'
"
"and
action = 'search_result_welfare_click_item
' "
print
ln
(
"美购首页icon美购点击
"
)
tmp
=
spark
.
sql
(
"select count(*) from online.bl_hdfs_maidian_updates
where partition_date='{}'
"
"
and action = 'goto_welfare_detail' and app['version'] in ('7.15.0','7.14.0')
"
"and
params['from'] = 'welfare_list' and params['cpc_referer'] = '18
' "
"and params['is_cpc'] = '1'"
.
format
(
date
))
.
rdd
.
map
(
lambda
x
:
x
[
0
])
.
collect
()[
0
]
cpc
.
append
(
tmp
)
sum
=
sum
+
tmp
meigou_home_sum
=
meigou_home_sum
+
tmp
total
.
append
(
meigou_home_sum
)
sum
=
sum
+
meigou_home_sum
total
.
append
(
sum
)
return
total
cpc
.
append
(
sum
)
return
sum
...
...
@@ -125,29 +151,20 @@ if __name__ == '__main__':
.
set
(
"spark.driver.maxResultSize"
,
"8g"
)
.
set
(
"spark.sql.avro.compression.codec"
,
"snappy"
)
spark
=
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
enableHiveSupport
()
.
getOrCreate
()
clicks
=
[]
cpcs
=
[]
dates
=
[]
for
i
in
range
(
1
,
26
):
clicks
.
append
(
all_click
(
i
))
cpcs
.
append
(
cpc_click
(
i
))
all_list
=
[]
for
i
in
range
(
1
,
3
):
date_str
=
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
i
))
.
strftime
(
"
%
Y
%
m
%
d"
)
dates
.
append
(
date_str
)
print
(
"clicks"
)
print
(
clicks
)
print
(
"cpcs"
)
print
(
cpcs
)
df
=
pd
.
DataFrame
({
'date'
:
dates
,
'clicks'
:
clicks
,
"cpc"
:
cpcs
})
tmp_list
=
[
date_str
]
tmp_list
.
extend
(
all_click
(
i
))
tmp_list
.
extend
(
cpc_click
(
i
))
all_list
.
append
(
tmp_list
)
df
=
pd
.
DataFrame
(
all_list
)
df
=
df
.
rename
(
columns
=
{
0
:
"搜索点击"
,
1
:
"相关推荐"
,
2
:
"首页icon"
,
3
:
"美购首页icon"
,
4
:
"总点击"
,
5
:
"搜索点击cpc"
,
6
:
"相关推荐cpc"
,
7
:
"首页icon_cpc"
,
8
:
"美购首页icon_cpc"
,
8
:
"cpc总点击"
})
df
.
to_csv
(
'/home/gmuser/cpc.csv'
,
index
=
False
)
# rdd = spark.sparkContext.parallelize(cpcs)
# df = spark.createDataFrame(rdd).toDF.toPandas()
# df.to_csv('/home/gmuser/cpc.csv',index=False)
#
# rdd = spark.sparkContext.parallelize(clicks)
# df = spark.createDataFrame(rdd).toDF.toPandas()
# df.to_csv('/home/gmuser/clicks.csv', index=False)
spark
.
stop
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment