Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
data-dqmonitor
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
薛鹏飞
data-dqmonitor
Commits
5d0d30e2
Commit
5d0d30e2
authored
Jun 28, 2019
by
Pengfei Xue
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
check pv write to tidb
parent
496d3108
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
82 additions
and
57 deletions
+82
-57
Main.scala
src/main/scala/com/gmei/data/dq/Main.scala
+10
-4
Utils.scala
src/main/scala/com/gmei/data/dq/Utils.scala
+1
-1
actionCheck.scala
src/main/scala/com/gmei/data/dq/actionCheck.scala
+0
-4
pvCheker.scala
src/main/scala/com/gmei/data/dq/pvCheker.scala
+71
-48
No files found.
src/main/scala/com/gmei/data/dq/Main.scala
View file @
5d0d30e2
...
...
@@ -21,9 +21,11 @@ object Main {
var
partition_date
=
""
var
yesterday
=
""
var
cmd
=
""
if
(
args
.
length
==
1
&&
Utils
.
is_date_string
(
args
(
0
)))
{
if
(
args
.
length
==
2
&&
Utils
.
is_date_string
(
args
(
0
)))
{
partition_date
=
args
(
0
)
cmd
=
args
(
1
)
}
else
{
throw
new
IllegalArgumentException
(
"have no partition date!"
)
...
...
@@ -31,9 +33,13 @@ object Main {
// check pv
// pvReferCheker.check(spark, partition_date)
actionCheck
.
check
(
spark
,
partition_date
)
pvChecker
.
check
(
spark
,
partition_date
)
if
(
cmd
==
"actionCheck"
)
{
actionCheck
.
check
(
spark
,
partition_date
)
}
else
if
(
cmd
==
"pvCheck"
)
{
pvChecker
.
check
(
spark
,
partition_date
)
}
else
{
println
(
"unknow cmd. supported actionCheck|pvCheck "
)
}
spark
.
stop
()
}
}
src/main/scala/com/gmei/data/dq/Utils.scala
View file @
5d0d30e2
...
...
@@ -28,7 +28,7 @@ object Utils {
def
getDateByStringAndFormatStr
(
date
:
String
,
formatString
:
String
=
"yyyyMMdd"
)
=
{
val
dateFormat
=
new
SimpleDateFormat
(
formatString
)
dateFormat
.
parse
(
partition_
date
)
dateFormat
.
parse
(
date
)
}
def
getYesterday
(
date
:
Date
)
=
{
...
...
src/main/scala/com/gmei/data/dq/actionCheck.scala
View file @
5d0d30e2
package
com.gmei.data.dq
import
java.text.SimpleDateFormat
import
java.util.Date
import
javax.rmi.CORBA.Util
import
org.apache.spark.sql.SaveMode
import
org.apache.spark.sql.SparkSession
...
...
src/main/scala/com/gmei/data/dq/pvCheker.scala
View file @
5d0d30e2
package
com.gmei.data.dq
import
org.apache.spark.sql.
{
Row
,
SparkSession
}
import
org.apache.spark.sql.
{
Row
,
S
aveMode
,
S
parkSession
}
object
pvReferCheker
{
object
pvChecker
{
private
[
this
]
def
getSql
(
date
:
String
)
:
String
=
{
s
"""
|select
| z.partition_date as partition_date,
| z.cl_type as cl_type,
| z.name as name,
| z.active_type as active_type,
| sum(z.c) as duration
|from
|(
| select
| x.device_id as cl_id,
| y.partition_date as partition_date,
| y.cl_type as cl_type,
| x.active_type as active_type,
| y.name as name,
| y.c as c
| from
| (
| select
| device_id,
| partition_date,
| case when active_type = '1' or active_type = '2' or active_type = '3' then 'new' else 'old' end as active_type
| from online.ml_device_day_active_status
| where partition_date = '${date}'
| ) x left join (
| select
| cl_id,
| partition_date,
| cl_type,
| params['page_name'] as name,
| (params['out'] - params['in']) as c
| from
| online.bl_hdfs_maidian_updates
| where
| partition_date = '${date}' and action = 'page_view'
| ) y on x.device_id = y.cl_id
| where y.cl_id is not null
|) z
|group by z.partition_date, z.cl_type, z.name, z.active_type
"""
.
stripMargin
}
def
check
(
sc
:
SparkSession
,
partition_date
:
String
)
=
{
import
sc.implicits._
val
x
=
sc
.
sql
(
s
"""
|select
| z.partition_date,
| z.cl_type,
| z.name,
| z.active_type,
| z.referrer,
| sum(z.c)
|from
|(
| select
| x.device_id as cl_id,
| y.partition_date as partition_date,
| y.cl_type as cl_type,
| x.active_type as active_type,
| y.name as name,
| y.referrer as referrer,
| y.c as c
| from
| (
| select
| device_id,
| partition_date,
| case when active_type = '1' or active_type = '2' or active_type = '3' then 'new' else 'old' end as active_type
| from online.ml_device_day_active_status
| where partition_date = '${partition_date}'
| ) x left join (
| select
| cl_id,
| partition_date,
| cl_type,
| params['page_name'] as name,
| params['referrer'] as referrer,
| (params['out'] - params['in']) as c
| from
| online.bl_hdfs_maidian_updates
| where
| partition_date = '${partition_date}' and action = 'page_view'
| ) y on x.device_id = y.cl_id
| where y.cl_id is not null
|) z
|group by z.partition_date, z.cl_type, z.name, z.active_type, z.referrer
"""
.
stripMargin
)
val
x
=
sc
.
sql
(
getSql
(
partition_date
))
x
.
createTempView
(
"x"
)
val
yesterday
=
Utils
.
getYesterDayStrByTodayStr
(
partition_date
)
val
y
=
sc
.
sql
(
getSql
(
yesterday
))
y
.
createTempView
(
"y"
)
val
z
=
sc
.
sql
(
"""
| select
| x.partition_date as date,
| y.partition_date as yesterday,
| x.name,
| x.cl_type,
| x.active_type,
| x.duration as todayCount,
| y.duration as yesterdayCount,
| case when y.duration = 0 then 1.0 else (x.duration - y.duration) * 1.0 / y.duration end as chainRate
| from x left join y on x.cl_type = y.cl_type and x.name = y.name and x.active_type = y.active_type
"""
.
stripMargin
)
val
tidb
=
Utils
.
getTidbConnectionInfo
//df is a dataframe contains the data which you want to write.
z
.
write
.
mode
(
SaveMode
.
Append
).
jdbc
(
tidb
.
_1
,
"pv_check"
,
tidb
.
_2
)
// TODO: delete date before 14 days ago
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment