Commit d8fd40f8 authored by 高雅喆's avatar 高雅喆

change similar queue length

parent a145739a
......@@ -149,7 +149,7 @@ object Main {
val matrix = new IndexedRowMatrix(rows)
val lsh = new Lsh(
minCosineSimilarity = 0.5,
dimensions = 128,
dimensions = 20,
numNeighbours = 200,
numPermutations = 10,
partitions = 200,
......@@ -173,12 +173,12 @@ object Main {
// group by neighbours to get a list of similar words and then take top k
val result = remapSecond.groupBy(_._1).map {
val result = remapSecond.filter(_._1.startsWith("diary")).groupBy(_._1).map {
case (word1, similarWords) =>
// sort by score desc. and take top 10 entries
val similar = similarWords.toSeq.sortBy(-1 * _._3).filter(_._2.startsWith("diary")).take(50).map(_._2).mkString(",")
// sort by score desc. and take top 20 entries
val similar = Try(similarWords.toSeq.sortBy(-1 * _._3).filter(_._2.startsWith("diary")).take(20).map(_._2).mkString(",")).getOrElse(null)
(word1,s"$similar")
}
}.filter(_._2.split(",").length > 9)
result.take(20).foreach(println)
val similar_result = result.toDF("cid","similarity_cid")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment