Commit 48797936 authored by 高雅喆's avatar 高雅喆

rm log

parent ac1f4f66
......@@ -16,8 +16,8 @@ import com.soundcloud.lsh.Lsh
object Main {
// Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
// Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(iter: Int = 10,
......@@ -121,23 +121,17 @@ object Main {
val id2Node = Node2vec.node2id.map{ case (strNode, index) =>
(index, strNode)
}
println("get id2node")
println(id2Node.first())
val node2vec_2 = node2vector.join(id2Node)
.map { case (nodeId, (vector, name)) => (name,vector) }
.repartition(200)
println("get node2vec")
println(node2vec_2.first())
//2. compute similar cid and then take top k
val storageLevel = StorageLevel.MEMORY_AND_DISK
val indexed = node2vec_2.zipWithIndex.persist(storageLevel)
println("get indexed")
println(indexed.first())
// create indexed row matrix where every row represents one word
val rows = indexed.map {
......@@ -145,16 +139,11 @@ object Main {
IndexedRow(index, Vectors.dense(features))
}
println("get index2vec")
println(rows.first())
// store index for later re-mapping (index to word)
val index = indexed.map {
case ((word, features), index) =>
(index, word)
}.persist(storageLevel)
println("get index2node")
println(index.first())
// create an input matrix from all rows and run lsh on it
val matrix = new IndexedRowMatrix(rows)
......@@ -172,20 +161,16 @@ object Main {
// remap both ids back to words
val remapFirst = similarityMatrix.entries.keyBy(_.i).join(index).values
println("get remapFirst")
println(remapFirst.first())
val remapSecond = remapFirst.keyBy { case (entry, word1) => entry.j }.join(index).values.map {
case ((entry, word1), word2) =>
(word1, word2, entry.value)
}
remapSecond.take(20).foreach(println)
val score_result = remapSecond.toDF("cid1","cid2","score")
GmeiConfig.writeToJDBCTable(score_result, table="nd_cid_pairs_cosine_distince", SaveMode.Overwrite)
println("done1")
// group by neighbours to get a list of similar words and then take top k
val result = remapSecond.groupBy(_._1).map {
......@@ -198,7 +183,6 @@ object Main {
val similar_result = result.toDF("cid","similarity_cid")
GmeiConfig.writeToJDBCTable(similar_result, table="nd_cid_similarity_matrix", SaveMode.Overwrite)
println("done2")
......@@ -229,7 +213,6 @@ object Main {
device_queue.take(20).foreach(println)
GmeiConfig.writeToJDBCTable(device_queue, table="nd_device_cid_similarity_matrix", SaveMode.Overwrite)
println("done3")
sc.stop()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment