Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
ab86987d
Commit
ab86987d
authored
Dec 07, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改rdd
parent
e24c3cb0
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
22 deletions
+32
-22
Data2FFM.scala
eda/feededa/src/main/scala/com/gmei/Data2FFM.scala
+32
-22
No files found.
eda/feededa/src/main/scala/com/gmei/Data2FFM.scala
View file @
ab86987d
...
...
@@ -62,22 +62,25 @@ object Data2FFM {
val
column_list
=
esmm_data
.
columns
val
esmm_pre_data
=
sc
.
sql
(
s
"""
|select device_id,y,z,stat_date,ucity_id,cid_id,clevel1_id,ccity_name
|from esmm_pre_data
"""
.
stripMargin
).
na
.
drop
()
val
esmm_pre_cids
=
esmm_pre_data
.
select
(
"cid_id"
).
distinct
().
collect
().
map
(
s
=>
s
(
0
).
toString
)
val
esmm_pre_city
=
esmm_pre_data
.
select
(
"ucity_id"
).
distinct
().
collect
().
map
(
s
=>
s
(
0
).
toString
)
// val esmm_pre_data = sc.sql(
// s"""
// |select device_id,y,z,stat_date,ucity_id,cid_id,clevel1_id,ccity_name
// |from esmm_pre_data
// """.stripMargin
// ).na.drop()
//
// val esmm_pre_cids = esmm_pre_data.select("cid_id").distinct().collect().map(
// s => s(0).toString
// )
// val esmm_pre_city = esmm_pre_data.select("ucity_id").distinct().collect().map(
// s => s(0).toString
// )
// val esmm_pre_device = esmm_pre_data.select("device_id").distinct().collect().map(
// s => s(0).toString
// )
//
//
//
val
max_stat_date
=
sc
.
sql
(
s
"""
|select max(stat_date) from esmm_train_data
...
...
@@ -99,17 +102,23 @@ object Data2FFM {
column_number
(
i
)
=
esmm_data
.
select
(
i
).
distinct
().
collect
().
map
(
x
=>
x
(
0
).
toString
)
}
val
esmm_join_cids
=
esmm_pre_cids
.
intersect
(
column_number
(
"cid_id"
))
val
esmm_join_city
=
esmm_pre_city
.
intersect
(
column_number
(
"ucity_id"
))
//
val esmm_join_cids = esmm_pre_cids.intersect(column_number("cid_id"))
//
val esmm_join_city = esmm_pre_city.intersect(column_number("ucity_id"))
// val esmm_join_device = esmm_pre_device.intersect(column_number("device_id"))
val
a
=
column_number
(
"device_id"
).
toList
println
(
a
)
println
(
"dict"
)
val
rdd
=
esmm_data
.
rdd
.
repartition
(
200
)
.
map
(
x
=>
(
x
(
0
).
toString
,
x
(
1
).
toString
,
x
(
2
).
toString
,
x
(
3
).
toString
,
x
(
4
).
toString
,
x
(
5
).
toString
,
x
(
6
).
toString
,
x
(
7
).
toString
))
rdd
.
persist
()
val
b
=
rdd
.
map
(
x
=>
x
.
_1
).
collect
().
toList
println
(
b
)
import
sc.implicits._
val
train
=
rdd
.
filter
(
x
=>
x
.
_4
!=
max_stat_date_str
)
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
column_number
(
"device_id"
).
indexOf
(
x
.
_1
),
...
...
@@ -122,9 +131,9 @@ object Data2FFM {
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
+
","
+
x
.
_3
+
","
+
x
.
_4
+
","
+
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
)).
toDF
(
"number"
,
"data"
,
"device_id"
,
"city_id"
,
"cid"
)
println
(
"train"
)
train
.
show
(
6
)
val
jdbcuri
=
"jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig
.
writeToJDBCTable
(
jdbcuri
,
train
,
"esmm_data2ffm_train"
,
SaveMode
.
Overwrite
)
//
val jdbcuri = "jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
//
GmeiConfig.writeToJDBCTable(jdbcuri, train, "esmm_data2ffm_train", SaveMode.Overwrite)
//
// val test = rdd.filter(x => x._4 == max_stat_date_str)
// .map(x => (x._1,x._2,x._3,column_number("device_id").indexOf(x._1),
// column_number("stat_date").indexOf(x._4), column_number("ucity_id").indexOf(x._5),
...
...
@@ -146,6 +155,7 @@ object Data2FFM {
// x(4).toString,x(5).toString,x(6).toString,
// x(7).toString)).filter(x => esmm_join_cids.indexOf(x._6) != -1)
// .filter(x => esmm_join_city.indexOf(x._5) != -1)
// .filter(x => esmm_join_device.indexOf(x._1) != -1)
// val pre = rdd_pre.map(x => (x._1,x._2,x._3,column_number("device_id").indexOf(x._1),
// column_number("stat_date").indexOf(x._4), column_number("ucity_id").indexOf(x._5),
// column_number("cid_id").indexOf(x._6), column_number("clevel1_id").indexOf(x._7),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment