Commit 1423c987 authored by 高雅喆's avatar 高雅喆

add log

parent fbc5c610
...@@ -127,13 +127,16 @@ object Main { ...@@ -127,13 +127,16 @@ object Main {
.map { case (nodeId, (vector, name)) => (name,vector) } .map { case (nodeId, (vector, name)) => (name,vector) }
.repartition(200) .repartition(200)
println("get node2vec")
println(node2vec_2.first())
//2. compute similar cid and then take top k //2. compute similar cid and then take top k
val storageLevel = StorageLevel.MEMORY_AND_DISK val storageLevel = StorageLevel.MEMORY_AND_DISK
val indexed = node2vec_2.zipWithIndex.persist(storageLevel) val indexed = node2vec_2.zipWithIndex.persist(storageLevel)
println("get indexed")
println(indexed.first())
// create indexed row matrix where every row represents one word // create indexed row matrix where every row represents one word
val rows = indexed.map { val rows = indexed.map {
...@@ -141,11 +144,16 @@ object Main { ...@@ -141,11 +144,16 @@ object Main {
IndexedRow(index, Vectors.dense(features)) IndexedRow(index, Vectors.dense(features))
} }
println("get index2vec")
println(rows.first())
// store index for later re-mapping (index to word) // store index for later re-mapping (index to word)
val index = indexed.map { val index = indexed.map {
case ((word, features), index) => case ((word, features), index) =>
(index, word) (index, word)
}.persist(storageLevel) }.persist(storageLevel)
println("get index2node")
println(index.first())
// create an input matrix from all rows and run lsh on it // create an input matrix from all rows and run lsh on it
val matrix = new IndexedRowMatrix(rows) val matrix = new IndexedRowMatrix(rows)
...@@ -197,7 +205,7 @@ object Main { ...@@ -197,7 +205,7 @@ object Main {
|select a.device_id device_id,b.similarity_cid similarity_cid from |select a.device_id device_id,b.similarity_cid similarity_cid from
|(select device_id,first(cid) as cid from data_feed_click |(select device_id,first(cid) as cid from data_feed_click
|where cid_type='diary' |where cid_type='diary'
|and stat_date > '2018-09-25' |and stat_date > '${date8}'
|group by device_id) a left join |group by device_id) a left join
|nd_cid_similarity_matrix b |nd_cid_similarity_matrix b
|on a.cid = b.cid |on a.cid = b.cid
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment