Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
f3d0a6a8
Commit
f3d0a6a8
authored
Dec 07, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改ffm文件
parent
323fb84a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
30 deletions
+26
-30
Data2FFM.scala
eda/feededa/src/main/scala/com/gmei/Data2FFM.scala
+26
-30
No files found.
eda/feededa/src/main/scala/com/gmei/Data2FFM.scala
View file @
f3d0a6a8
...
...
@@ -55,7 +55,7 @@ object Data2FFM {
// val yesteday_have_seq = GmeiConfig.getMinusNDate(5)
val
esmm_data
=
sc
.
sql
(
s
"""
|select device_id,y,z,stat_date,ucity_id,cid_id,
diary_service_id,clevel1_id,slevel1_id,ccity_name,scity_id
|select device_id,y,z,stat_date,ucity_id,cid_id,
clevel1_id,ccity_name
|from esmm_train_data
"""
.
stripMargin
).
na
.
drop
()
...
...
@@ -64,7 +64,7 @@ object Data2FFM {
val
esmm_pre_data
=
sc
.
sql
(
s
"""
|select device_id,y,z,stat_date,ucity_id,cid_id,
diary_service_id,clevel1_id,slevel1_id,ccity_name,scity_id
|select device_id,y,z,stat_date,ucity_id,cid_id,
clevel1_id,ccity_name
|from esmm_pre_data
"""
.
stripMargin
).
na
.
drop
()
...
...
@@ -108,32 +108,30 @@ object Data2FFM {
val
rdd
=
esmm_data
.
rdd
.
repartition
(
200
)
.
map
(
x
=>
(
x
(
0
).
toString
,
x
(
1
).
toString
,
x
(
2
).
toString
,
x
(
3
).
toString
,
x
(
4
).
toString
,
x
(
5
).
toString
,
x
(
6
).
toString
,
x
(
7
).
toString
,
x
(
8
).
toString
,
x
(
9
).
toString
,
x
(
10
).
toString
))
x
(
7
).
toString
))
rdd
.
persist
()
import
sc.implicits._
val
train
=
rdd
.
filter
(
x
=>
x
.
_4
!=
max_stat_date_str
)
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
column_number
(
"device_id"
).
indexOf
(
x
.
_1
),
column_number
(
"stat_date"
).
indexOf
(
x
.
_4
),
column_number
(
"ucity_id"
).
indexOf
(
x
.
_5
),
column_number
(
"cid_id"
).
indexOf
(
x
.
_6
),
column_number
(
"diary_service_id"
).
indexOf
(
x
.
_7
),
column_number
(
"clevel1_id"
).
indexOf
(
x
.
_8
),
column_number
(
"slevel1_id"
).
indexOf
(
x
.
_9
),
column_number
(
"ccity_name"
).
indexOf
(
x
.
_10
),
column_number
(
"scity_id"
).
indexOf
(
x
.
_11
)))
.
map
(
x
=>
((
new
util
.
Random
).
nextInt
(
2147483647
),
x
.
_2
,
x
.
_3
,
"1:%d:1.0 2:%d:1.0 3:%d:1.0 4:%d:1.0 5:%d:1.0 6:%d:1.0 7:%d:1.0 8:%d:1.0"
.
format
(
x
.
_4
,
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
,
x
.
_9
,
x
.
_10
,
x
.
_11
))).
zipWithIndex
()
.
map
(
x
=>
(
x
.
_1
.
_1
,
x
.
_2
,
x
.
_1
.
_2
,
x
.
_1
.
_3
,
x
.
_1
.
_4
))
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
+
","
+
x
.
_3
+
","
+
x
.
_4
+
","
+
x
.
_5
)).
toDF
(
"number"
,
"data"
)
column_number
(
"cid_id"
).
indexOf
(
x
.
_6
),
column_number
(
"clevel1_id"
).
indexOf
(
x
.
_7
),
column_number
(
"ccity_name"
).
indexOf
(
x
.
_8
),
x
.
_6
,
x
.
_7
))
.
map
(
x
=>
((
new
util
.
Random
).
nextInt
(
2147483647
),
x
.
_2
,
x
.
_3
,
"1:%d:1.0 2:%d:1.0 3:%d:1.0 4:%d:1.0 5:%d:1.0 6:%d:1.0"
.
format
(
x
.
_4
,
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
,
x
.
_9
),
x
.
_1
,
x
.
_10
,
x
.
_11
)).
zipWithIndex
()
.
map
(
x
=>
(
x
.
_1
.
_1
,
x
.
_2
,
x
.
_1
.
_2
,
x
.
_1
.
_3
,
x
.
_1
.
_4
,
x
.
_1
.
_5
,
x
.
_1
.
_6
,
x
.
_1
.
_7
))
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
+
","
+
x
.
_3
+
","
+
x
.
_4
+
","
+
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
)).
toDF
(
"number"
,
"data"
,
"device_id"
,
"city_id"
,
"cid"
)
val
jdbcuri
=
"jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig
.
writeToJDBCTable
(
jdbcuri
,
train
,
"esmm_data2ffm_train"
,
SaveMode
.
Overwrite
)
val
test
=
rdd
.
filter
(
x
=>
x
.
_4
==
max_stat_date_str
)
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
column_number
(
"stat_date"
).
indexOf
(
x
.
_4
),
column_number
(
"ucity_id"
).
indexOf
(
x
.
_5
),
column_number
(
"cid_id"
).
indexOf
(
x
.
_6
),
column_number
(
"diary_service_id"
).
indexOf
(
x
.
_7
),
column_number
(
"clevel1_id"
).
indexOf
(
x
.
_8
),
column_number
(
"slevel1_id"
).
indexOf
(
x
.
_9
),
column_number
(
"ccity_name"
).
indexOf
(
x
.
_10
),
column_number
(
"scity_id"
).
indexOf
(
x
.
_11
)))
.
map
(
x
=>
((
new
util
.
Random
).
nextInt
(
2147483647
),
x
.
_2
,
x
.
_3
,
"1:%d:1.0 2:%d:1.0 3:%d:1.0 4:%d:1.0 5:%d:1.0 6:%d:1.0 7:%d:1.0 8:%d:1.0"
.
format
(
x
.
_4
,
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
,
x
.
_9
,
x
.
_10
,
x
.
_11
))).
zipWithIndex
()
.
map
(
x
=>
(
x
.
_1
.
_1
,
x
.
_2
,
x
.
_1
.
_2
,
x
.
_1
.
_3
,
x
.
_1
.
_4
))
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
+
","
+
x
.
_3
+
","
+
x
.
_4
+
","
+
x
.
_5
)).
toDF
(
"number"
,
"data"
)
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
column_number
(
"device_id"
).
indexOf
(
x
.
_1
),
column_number
(
"stat_date"
).
indexOf
(
x
.
_4
),
column_number
(
"ucity_id"
).
indexOf
(
x
.
_5
),
column_number
(
"cid_id"
).
indexOf
(
x
.
_6
),
column_number
(
"clevel1_id"
).
indexOf
(
x
.
_7
),
column_number
(
"ccity_name"
).
indexOf
(
x
.
_8
),
x
.
_6
,
x
.
_7
))
.
map
(
x
=>
((
new
util
.
Random
).
nextInt
(
2147483647
),
x
.
_2
,
x
.
_3
,
"1:%d:1.0 2:%d:1.0 3:%d:1.0 4:%d:1.0 5:%d:1.0 6:%d:1.0"
.
format
(
x
.
_4
,
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
,
x
.
_9
),
x
.
_1
,
x
.
_10
,
x
.
_11
)).
zipWithIndex
()
.
map
(
x
=>
(
x
.
_1
.
_1
,
x
.
_2
,
x
.
_1
.
_2
,
x
.
_1
.
_3
,
x
.
_1
.
_4
,
x
.
_1
.
_5
,
x
.
_1
.
_6
,
x
.
_1
.
_7
))
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
+
","
+
x
.
_3
+
","
+
x
.
_4
+
","
+
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
)).
toDF
(
"number"
,
"data"
,
"device_id"
,
"city_id"
,
"cid"
)
GmeiConfig
.
writeToJDBCTable
(
jdbcuri
,
test
,
"esmm_data2ffm_cv"
,
SaveMode
.
Overwrite
)
...
...
@@ -142,18 +140,16 @@ object Data2FFM {
val
rdd_pre
=
esmm_pre_data
.
rdd
.
repartition
(
200
)
.
map
(
x
=>
(
x
(
0
).
toString
,
x
(
1
).
toString
,
x
(
2
).
toString
,
x
(
3
).
toString
,
x
(
4
).
toString
,
x
(
5
).
toString
,
x
(
6
).
toString
,
x
(
7
).
toString
,
x
(
8
).
toString
,
x
(
9
).
toString
,
x
(
10
).
toString
)).
filter
(
x
=>
esmm_join_cids
.
indexOf
(
x
.
_6
)
!=
-
1
)
x
(
7
).
toString
)).
filter
(
x
=>
esmm_join_cids
.
indexOf
(
x
.
_6
)
!=
-
1
)
.
filter
(
x
=>
esmm_join_city
.
indexOf
(
x
.
_5
)
!=
-
1
)
rdd_pre
.
persist
()
val
pre
=
rdd_pre
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
val
pre
=
rdd_pre
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
column_number
(
"device_id"
).
indexOf
(
x
.
_1
),
column_number
(
"stat_date"
).
indexOf
(
x
.
_4
),
column_number
(
"ucity_id"
).
indexOf
(
x
.
_5
),
column_number
(
"cid_id"
).
indexOf
(
x
.
_6
),
column_number
(
"diary_service_id"
).
indexOf
(
x
.
_7
),
column_number
(
"clevel1_id"
).
indexOf
(
x
.
_8
),
column_number
(
"slevel1_id"
).
indexOf
(
x
.
_9
),
column_number
(
"ccity_name"
).
indexOf
(
x
.
_10
),
column_number
(
"scity_id"
).
indexOf
(
x
.
_11
)))
.
map
(
x
=>
((
new
util
.
Random
).
nextInt
(
2147483647
),
x
.
_2
,
x
.
_3
,
"1:%d:1.0 2:%d:1.0 3:%d:1.0 4:%d:1.0 5:%d:1.0 6:%d:1.0 7:%d:1.0 8:%d:1.0"
.
format
(
x
.
_4
,
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
,
x
.
_9
,
x
.
_10
,
x
.
_11
))).
zipWithIndex
()
.
map
(
x
=>
(
x
.
_1
.
_1
,
x
.
_2
,
x
.
_1
.
_2
,
x
.
_1
.
_3
,
x
.
_1
.
_4
))
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
+
","
+
x
.
_3
+
","
+
x
.
_4
+
","
+
x
.
_5
)).
toDF
(
"number"
,
"data"
)
column_number
(
"cid_id"
).
indexOf
(
x
.
_6
),
column_number
(
"clevel1_id"
).
indexOf
(
x
.
_7
),
column_number
(
"ccity_name"
).
indexOf
(
x
.
_8
),
x
.
_6
,
x
.
_7
))
.
map
(
x
=>
((
new
util
.
Random
).
nextInt
(
2147483647
),
x
.
_2
,
x
.
_3
,
"1:%d:1.0 2:%d:1.0 3:%d:1.0 4:%d:1.0 5:%d:1.0 6:%d:1.0"
.
format
(
x
.
_4
,
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
,
x
.
_9
),
x
.
_1
,
x
.
_10
,
x
.
_11
)).
zipWithIndex
()
.
map
(
x
=>
(
x
.
_1
.
_1
,
x
.
_2
,
x
.
_1
.
_2
,
x
.
_1
.
_3
,
x
.
_1
.
_4
,
x
.
_1
.
_5
,
x
.
_1
.
_6
,
x
.
_1
.
_7
))
.
map
(
x
=>
(
x
.
_1
,
x
.
_2
+
","
+
x
.
_3
+
","
+
x
.
_4
+
","
+
x
.
_5
,
x
.
_6
,
x
.
_7
,
x
.
_8
)).
toDF
(
"number"
,
"data"
,
"device_id"
,
"city_id"
,
"cid"
)
GmeiConfig
.
writeToJDBCTable
(
jdbcuri
,
pre
,
"esmm_data2ffm_infer"
,
SaveMode
.
Overwrite
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment