Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
48797936
Commit
48797936
authored
Oct 06, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rm log
parent
ac1f4f66
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
19 deletions
+2
-19
Main.scala
eda/node2vec/src/main/scala/com/gmei/Main.scala
+2
-19
No files found.
eda/node2vec/src/main/scala/com/gmei/Main.scala
View file @
48797936
...
...
@@ -16,8 +16,8 @@ import com.soundcloud.lsh.Lsh
object
Main
{
//
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
//
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
iter
:
Int
=
10
,
...
...
@@ -121,23 +121,17 @@ object Main {
val
id2Node
=
Node2vec
.
node2id
.
map
{
case
(
strNode
,
index
)
=>
(
index
,
strNode
)
}
println
(
"get id2node"
)
println
(
id2Node
.
first
())
val
node2vec_2
=
node2vector
.
join
(
id2Node
)
.
map
{
case
(
nodeId
,
(
vector
,
name
))
=>
(
name
,
vector
)
}
.
repartition
(
200
)
println
(
"get node2vec"
)
println
(
node2vec_2
.
first
())
//2. compute similar cid and then take top k
val
storageLevel
=
StorageLevel
.
MEMORY_AND_DISK
val
indexed
=
node2vec_2
.
zipWithIndex
.
persist
(
storageLevel
)
println
(
"get indexed"
)
println
(
indexed
.
first
())
// create indexed row matrix where every row represents one word
val
rows
=
indexed
.
map
{
...
...
@@ -145,16 +139,11 @@ object Main {
IndexedRow
(
index
,
Vectors
.
dense
(
features
))
}
println
(
"get index2vec"
)
println
(
rows
.
first
())
// store index for later re-mapping (index to word)
val
index
=
indexed
.
map
{
case
((
word
,
features
),
index
)
=>
(
index
,
word
)
}.
persist
(
storageLevel
)
println
(
"get index2node"
)
println
(
index
.
first
())
// create an input matrix from all rows and run lsh on it
val
matrix
=
new
IndexedRowMatrix
(
rows
)
...
...
@@ -172,20 +161,16 @@ object Main {
// remap both ids back to words
val
remapFirst
=
similarityMatrix
.
entries
.
keyBy
(
_
.
i
).
join
(
index
).
values
println
(
"get remapFirst"
)
println
(
remapFirst
.
first
())
val
remapSecond
=
remapFirst
.
keyBy
{
case
(
entry
,
word1
)
=>
entry
.
j
}.
join
(
index
).
values
.
map
{
case
((
entry
,
word1
),
word2
)
=>
(
word1
,
word2
,
entry
.
value
)
}
remapSecond
.
take
(
20
).
foreach
(
println
)
val
score_result
=
remapSecond
.
toDF
(
"cid1"
,
"cid2"
,
"score"
)
GmeiConfig
.
writeToJDBCTable
(
score_result
,
table
=
"nd_cid_pairs_cosine_distince"
,
SaveMode
.
Overwrite
)
println
(
"done1"
)
// group by neighbours to get a list of similar words and then take top k
val
result
=
remapSecond
.
groupBy
(
_
.
_1
).
map
{
...
...
@@ -198,7 +183,6 @@ object Main {
val
similar_result
=
result
.
toDF
(
"cid"
,
"similarity_cid"
)
GmeiConfig
.
writeToJDBCTable
(
similar_result
,
table
=
"nd_cid_similarity_matrix"
,
SaveMode
.
Overwrite
)
println
(
"done2"
)
...
...
@@ -229,7 +213,6 @@ object Main {
device_queue
.
take
(
20
).
foreach
(
println
)
GmeiConfig
.
writeToJDBCTable
(
device_queue
,
table
=
"nd_device_cid_similarity_matrix"
,
SaveMode
.
Overwrite
)
println
(
"done3"
)
sc
.
stop
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment