Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
data-dqmonitor
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
薛鹏飞
data-dqmonitor
Commits
35857086
Commit
35857086
authored
Jun 20, 2019
by
Pengfei Xue
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix
parent
bc002afe
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
4 deletions
+31
-4
pvCheker.scala
src/main/scala/com/gmei/data/dq/pvCheker.scala
+31
-4
No files found.
src/main/scala/com/gmei/data/dq/pvCheker.scala
View file @
35857086
...
@@ -4,12 +4,32 @@ import org.apache.spark.sql.{Row, SparkSession}
...
@@ -4,12 +4,32 @@ import org.apache.spark.sql.{Row, SparkSession}
import
org.apache.spark.mllib.linalg.Vectors
import
org.apache.spark.mllib.linalg.Vectors
import
org.apache.spark.mllib.stat.
{
MultivariateStatisticalSummary
,
Statistics
}
import
org.apache.spark.mllib.stat.
{
MultivariateStatisticalSummary
,
Statistics
}
// [41BB2B9E-C72C-4124-9650-FDBAEEEE05F8,page_view,ios,7.9.3,bind_phone,1,0,1,1,1,0,0,1,1,1]
// [B6DA0665-621A-40F1-8823-0B72FAEF3A48,page_view,ios,7.9.3,message_home,1,1,1,1,1,0,1,1,1,1]
// df.printSchema
/*
|-- cl_id: string (nullable = true)
|-- action: string (nullable = true)
|-- cl_type: string (nullable = true)
|-- app_version: string (nullable = true)
|-- page_name: string (nullable = true)
|-- extra: integer (nullable = false)
|-- referrer: integer (nullable = false)
|-- is_push: integer (nullable = false)
|-- in: integer (nullable = false)
|-- out: integer (nullable = false)
|-- referrer_id: integer (nullable = false)
|-- referrer_tab_name: integer (nullable = false)
|-- bz_id: integer (nullable = false)
|-- fake: integer (nullable = false)
|-- pv: integer (nullable = false)
case class Record(
case class Record(
cl_id
:
String
,
action
:
String
,
app_version
:
String
,
page_name
:
String
,
cl_id: String, action: String,
cl_type: String,
app_version: String, page_name: String,
extra: Int, referrer: Int, is_push: Int, in: Int, out: Int,
extra: Int, referrer: Int, is_push: Int, in: Int, out: Int,
referrer_id
:
Int
,
referrer_tab_name
:
Int
,
bz_id
:
Int
,
fake
:
Int
,
referrer_id: Int, referrer_tab_name: Int, bz_id: Int, fake: Int, pv: Int
pv
:
Int
)
)
*/
object
pvCheker
{
object
pvCheker
{
...
@@ -51,7 +71,14 @@ object pvCheker {
...
@@ -51,7 +71,14 @@ object pvCheker {
import
sc.sqlContext.implicits._
import
sc.sqlContext.implicits._
val
y
=
df
.
rdd
.
map
{
val
y
=
df
.
rdd
.
map
{
case
r
:
Record
=>
Seq
(
r
.
extra
,
r
.
referrer
,
r
.
is_push
,
r
.
in
,
r
.
out
,
r
.
referrer_id
,
r
.
referrer_tab_name
,
r
.
bz_id
)
case
Row
(
cl_id
:
String
,
action
:
String
,
cl_type
:
String
,
app_version
:
String
,
page_name
:
String
,
extra
:
Int
,
referrer
:
Int
,
is_push
:
Int
,
in
:
Int
,
out
:
Int
,
referrer_id
:
Int
,
referrer_tab_name
:
Int
,
bz_id
:
Int
,
fake
:
Int
,
pv
:
Int
)
=>
Seq
(
extra
,
referrer
,
is_push
,
in
,
out
,
referrer_id
,
referrer_tab_name
,
bz_id
).
map
(
_
*
1.0
)
}
}
val
z
=
y
map
{
i
=>
Vectors
.
dense
(
i
.
toArray
[
Double
])}
val
z
=
y
map
{
i
=>
Vectors
.
dense
(
i
.
toArray
[
Double
])}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment