Commit 35857086 authored by Pengfei Xue's avatar Pengfei Xue

fix

parent bc002afe
...@@ -4,12 +4,32 @@ import org.apache.spark.sql.{Row, SparkSession} ...@@ -4,12 +4,32 @@ import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics} import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
// [41BB2B9E-C72C-4124-9650-FDBAEEEE05F8,page_view,ios,7.9.3,bind_phone,1,0,1,1,1,0,0,1,1,1]
// [B6DA0665-621A-40F1-8823-0B72FAEF3A48,page_view,ios,7.9.3,message_home,1,1,1,1,1,0,1,1,1,1]
// df.printSchema
/*
|-- cl_id: string (nullable = true)
|-- action: string (nullable = true)
|-- cl_type: string (nullable = true)
|-- app_version: string (nullable = true)
|-- page_name: string (nullable = true)
|-- extra: integer (nullable = false)
|-- referrer: integer (nullable = false)
|-- is_push: integer (nullable = false)
|-- in: integer (nullable = false)
|-- out: integer (nullable = false)
|-- referrer_id: integer (nullable = false)
|-- referrer_tab_name: integer (nullable = false)
|-- bz_id: integer (nullable = false)
|-- fake: integer (nullable = false)
|-- pv: integer (nullable = false)
case class Record( case class Record(
cl_id: String, action: String, app_version: String, page_name: String, cl_id: String, action: String, cl_type: String, app_version: String, page_name: String,
extra: Int, referrer: Int, is_push: Int, in: Int, out: Int, extra: Int, referrer: Int, is_push: Int, in: Int, out: Int,
referrer_id: Int, referrer_tab_name: Int, bz_id: Int, fake: Int, referrer_id: Int, referrer_tab_name: Int, bz_id: Int, fake: Int, pv: Int
pv: Int
) )
*/
object pvCheker { object pvCheker {
...@@ -51,7 +71,14 @@ object pvCheker { ...@@ -51,7 +71,14 @@ object pvCheker {
import sc.sqlContext.implicits._ import sc.sqlContext.implicits._
val y = df.rdd.map { val y = df.rdd.map {
case r: Record => Seq(r.extra, r.referrer, r.is_push, r.in, r.out, r.referrer_id, r.referrer_tab_name, r.bz_id) case Row(
cl_id: String, action: String, cl_type: String, app_version: String, page_name: String,
extra: Int, referrer: Int, is_push: Int, in: Int, out: Int,
referrer_id: Int, referrer_tab_name: Int, bz_id: Int, fake: Int, pv: Int
) => Seq(
extra, referrer, is_push, in,
out, referrer_id, referrer_tab_name, bz_id
).map(_ * 1.0)
} }
val z = y map {i => Vectors.dense(i.toArray[Double])} val z = y map {i => Vectors.dense(i.toArray[Double])}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment