Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
049f0087
Commit
049f0087
authored
Oct 18, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change result format and output path
parent
a0e73dd2
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
41 additions
and
13 deletions
+41
-13
WeafareStat.scala
eda/feededa/src/main/scala/com/gmei/WeafareStat.scala
+41
-13
No files found.
eda/feededa/src/main/scala/com/gmei/WeafareStat.scala
View file @
049f0087
...
@@ -44,6 +44,7 @@ object WeafareStat {
...
@@ -44,6 +44,7 @@ object WeafareStat {
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
import
sc.implicits._
val
stat_date
=
GmeiConfig
.
getMinusNDate
(
1
)
val
stat_date
=
GmeiConfig
.
getMinusNDate
(
1
)
println
(
stat_date
)
println
(
stat_date
)
val
video_cids
=
sc
.
sql
(
val
video_cids
=
sc
.
sql
(
...
@@ -72,9 +73,9 @@ object WeafareStat {
...
@@ -72,9 +73,9 @@ object WeafareStat {
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
println
(
partition_date
)
println
(
partition_date
)
val
video_count
=
sc
.
sql
(
val
video_
meigou_
count
=
sc
.
sql
(
s
"""
s
"""
|select
count(page_name) as vd
_count
|select
'${stat_date}' as stat_date, count(page_name) as video_meigou
_count
|from online.bl_hdfs_page_view_updates pv inner join tmp1
|from online.bl_hdfs_page_view_updates pv inner join tmp1
|on pv.referrer_id = tmp1.cid_id
|on pv.referrer_id = tmp1.cid_id
|where pv.partition_date = '${partition_date}'
|where pv.partition_date = '${partition_date}'
...
@@ -82,15 +83,11 @@ object WeafareStat {
...
@@ -82,15 +83,11 @@ object WeafareStat {
|and pv.referrer='diary_detail'
|and pv.referrer='diary_detail'
"""
.
stripMargin
"""
.
stripMargin
)
)
video_count
.
show
()
video_meigou_count
.
show
()
val
output1
=
"./test_vd_cids.csv"
video_count
.
repartition
(
1
)
.
write
.
format
(
"com.databricks.spark.csv"
)
.
option
(
"header"
,
"true"
).
save
(
output1
)
val
txt_count
=
sc
.
sql
(
val
txt_
meigou_
count
=
sc
.
sql
(
s
"""
s
"""
|select
count(page_name) as txt
_count
|select
'${stat_date}' as stat_date, count(page_name) as txt_meigou
_count
|from online.bl_hdfs_page_view_updates pv inner join tmp2
|from online.bl_hdfs_page_view_updates pv inner join tmp2
|on pv.referrer_id = tmp2.cid_id
|on pv.referrer_id = tmp2.cid_id
|where pv.partition_date = '${partition_date}'
|where pv.partition_date = '${partition_date}'
...
@@ -98,11 +95,42 @@ object WeafareStat {
...
@@ -98,11 +95,42 @@ object WeafareStat {
|and pv.referrer='diary_detail'
|and pv.referrer='diary_detail'
"""
.
stripMargin
"""
.
stripMargin
)
)
txt_count
.
show
()
txt_meigou_count
.
show
()
val
output2
=
"./test_txt_cids.csv"
txt_count
.
repartition
(
1
)
val
video_clk_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(cid_id) as video_clk_count
|from data_feed_click
|where cid_type = 'diary'
|and cid_id in (select cid from diary_video where stat_date='${stat_date}')
|and stat_date=''${stat_date}'
"""
.
stripMargin
)
video_clk_count
.
show
()
val
txt_clk_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(cid_id) as txt_clk_count
|from data_feed_click
|where cid_type = 'diary'
|and cid_id not in (select cid from diary_video where stat_date='${stat_date}')
|and stat_date='${stat_date}'
"""
.
stripMargin
)
txt_clk_count
.
show
()
val
result
=
video_clk_count
.
join
(
video_meigou_count
,
"stat_date"
)
.
join
(
txt_clk_count
,
"stat_date"
)
.
join
(
txt_meigou_count
,
"stat_date"
)
result
.
withColumn
(
"video_rate"
,
result
.
col
(
"video_meigou_count"
)/
result
.
col
(
"video_clk_count"
))
result
.
withColumn
(
"txt_rate"
,
result
.
col
(
"txt_meigou_count"
)/
result
.
col
(
"txt_clk_count"
))
result
.
show
()
val
output
=
"/data2/model/eda/gray_stat/welfare_detail_count_fromvideo.csv"
result
.
repartition
(
1
)
.
write
.
format
(
"com.databricks.spark.csv"
)
.
write
.
format
(
"com.databricks.spark.csv"
)
.
option
(
"header"
,
"true"
).
save
(
output
2
)
.
option
(
"header"
,
"true"
).
save
(
output
)
sc
.
stop
()
sc
.
stop
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment