Commit 6f906fb2 authored by 高雅喆's avatar 高雅喆

drop duplicate

parent b058016d
...@@ -577,11 +577,26 @@ object GetDevicePortrait { ...@@ -577,11 +577,26 @@ object GetDevicePortrait {
|(select device_id,max(level1_count) as max_count from tag_count group by device_id) b |(select device_id,max(level1_count) as max_count from tag_count group by device_id) b
|on a.level1_count = b.max_count and a.device_id = b.device_id |on a.level1_count = b.max_count and a.device_id = b.device_id
""".stripMargin """.stripMargin
).rdd.map(x => (x(0).toString,x(1).toString,x(2).toString,x(3).toString)) )
max_count_tag.foreachPartition(GmeiConfig.updateDeviceFeat) // .rdd.map(x => (x(0).toString,x(1).toString,x(2).toString,x(3).toString))
// max_count_tag.foreachPartition(GmeiConfig.updateDeviceFeat)
max_count_tag.take(10).foreach(println) //
println(max_count_tag.count()) // max_count_tag.take(10).foreach(println)
// println(max_count_tag.count())
//drop duplicates
val max_count_tag_rdd = max_count_tag.rdd.groupBy(_.getAs[String("device_id")).map {
case (device_id,data) =>
val stat_date = data.map(_.getAs[String]("stat_date")).head
val max_level1_id = data.map(_.getAs[String]("max_level1_id")).head
val max_level1_count = data.map(_.getAs[String]("max_level1_count")).head
(device_id,stat_date,max_level1_id,max_level1_count)
}.filter(_._1!=null)
max_count_tag_rdd.foreachPartition(GmeiConfig.updateDeviceFeat)
max_count_tag_rdd.take(10).foreach(println)
println(max_count_tag_rdd.count())
sc.stop() sc.stop()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment