Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
84a07c97
Commit
84a07c97
authored
Nov 08, 2018
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new Demand
parent
109a6a45
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
35 deletions
+31
-35
testt.scala
eda/feededa/src/main/scala/com/gmei/testt.scala
+31
-35
No files found.
eda/feededa/src/main/scala/com/gmei/testt.scala
View file @
84a07c97
package
com.gmei
import
java.io.Serializable
import
java.text.SimpleDateFormat
import
java.util.Calendar
import
com.gmei.WeafareStat.
{
defaultParams
,
parser
}
import
org.apache.spark.sql.
{
SaveMode
,
TiContext
}
...
...
@@ -10,30 +8,30 @@ import org.apache.log4j.{Level, Logger}
import
scopt.OptionParser
import
com.gmei.lib.AbstractParams
import
scala.util.parsing.json.JSON
object
testt
{
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
)
extends
AbstractParams
[
Params
]
with
Serializable
case
class
Params
(
env
:
String
=
"dev"
,
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"
test
t"
)
head
(
"
WeafareSta
t"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
//
opt[String] ("date")
//
.text(s"the date you used")
//
.action((x,c) => c.copy(date = x))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.
test
t ./target/scala-2.11/feededa-assembly-0.1.jar \
| spark-submit --class com.gmei.
WeafareSta
t ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
...
...
@@ -52,45 +50,43 @@ object testt {
ti
.
tidbMapTable
(
dbName
=
"jerry_test"
,
tableName
=
"bl_device_list"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
val
strategies
=
Seq
(
"3$"
,
"4$"
,
"5$"
,
"6$"
,
"7$"
,
"8$"
,
"c$"
,
"d$"
,
"e$"
,
"A$"
,
"B$"
,
"C$"
,
"D$"
)
for
(
strategy
<-
strategies
){
println
(
strategy
)
for
(
i
<-
1
to
10
){
val
cal
=
Calendar
.
getInstance
cal
.
add
(
Calendar
.
DATE
,
-
i
)
//设置时间格式
val
time
=
cal
.
getTime
//将时间格式套用在获取的时间戳上
val
newtime
:
String
=
new
SimpleDateFormat
(
"yyyyMMdd"
).
format
(
time
)
println
(
newtime
)
val
get_data_dura
=
sc
.
sql
(
import
sc.implicits._
// val stat_date = GmeiConfig.getMinusNDate(1)
println
(
param
.
date
)
val
partition_date
=
param
.
date
.
replace
(
"-"
,
""
)
val
diary_id
=
sc
.
sql
(
s
"""
|select partition_date,params["duration"]
|from online.tl_hdfs_maidian_view
|where action="on_app_session_over"
|and cl_id regexp'${strategy}'
|and partition_date = ${newtime}
|select distinct(cid_id) as diary_id
|from data_feed_click
"""
.
stripMargin
)
get_data_dura
.
printSchema
get_data_dura
.
show
(
5
)
val
tst
=
get_data_dura
.
filter
(
"params[duration] > 0"
)
val
tst2
=
tst
.
filter
(
"params[duration] > 0"
)
diary_id
.
show
()
diary_id
.
createOrReplaceTempView
(
"diary_id"
)
val
clk_count
=
sc
.
sql
(
s
"""
|select sum(params["out"]-params["in"]) as dur_time count(cl_id) as num_clk dur_time/num_clk as avg_dur
|from online.tl_hdfs_maidian_view tl inner join diary_id
|on tl.params["business_id"] = diary_id.diary_id
|where action="page_view"
|and partition_date >='20180801' and partition_date <'20181107'
|group by tl.params["business_id"] order by avg_dur desc
"""
.
stripMargin
)
// println(rows(1)) 这样会报错
}
val
result
=
clk_count
result
.
show
()
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment