Commit d8fd40f8 authored by 高雅喆's avatar 高雅喆

change similar queue length

parent a145739a
...@@ -149,7 +149,7 @@ object Main { ...@@ -149,7 +149,7 @@ object Main {
val matrix = new IndexedRowMatrix(rows) val matrix = new IndexedRowMatrix(rows)
val lsh = new Lsh( val lsh = new Lsh(
minCosineSimilarity = 0.5, minCosineSimilarity = 0.5,
dimensions = 128, dimensions = 20,
numNeighbours = 200, numNeighbours = 200,
numPermutations = 10, numPermutations = 10,
partitions = 200, partitions = 200,
...@@ -173,12 +173,12 @@ object Main { ...@@ -173,12 +173,12 @@ object Main {
// group by neighbours to get a list of similar words and then take top k // group by neighbours to get a list of similar words and then take top k
val result = remapSecond.groupBy(_._1).map { val result = remapSecond.filter(_._1.startsWith("diary")).groupBy(_._1).map {
case (word1, similarWords) => case (word1, similarWords) =>
// sort by score desc. and take top 10 entries // sort by score desc. and take top 20 entries
val similar = similarWords.toSeq.sortBy(-1 * _._3).filter(_._2.startsWith("diary")).take(50).map(_._2).mkString(",") val similar = Try(similarWords.toSeq.sortBy(-1 * _._3).filter(_._2.startsWith("diary")).take(20).map(_._2).mkString(",")).getOrElse(null)
(word1,s"$similar") (word1,s"$similar")
} }.filter(_._2.split(",").length > 9)
result.take(20).foreach(println) result.take(20).foreach(println)
val similar_result = result.toDF("cid","similarity_cid") val similar_result = result.toDF("cid","similarity_cid")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment