Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
b86483fe
Commit
b86483fe
authored
Nov 01, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
增加曝光量统计指标
parents
dc617a5d
2af16a4a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
76 additions
and
40 deletions
+76
-40
run.sh
eda/feededa/run.sh
+12
-11
testt.scala
eda/feededa/src/main/scala/com/gmei/testt.scala
+64
-29
No files found.
eda/feededa/run.sh
View file @
b86483fe
if
[[
$#
-ne
2
]]
;
then
echo
'Usage:'
$0
' <startdate> <enddate>'
exit
fi
startdate
=
`
date
-d
"
$1
"
+%Y-%m-%d
`
enddate
=
`
date
-d
"
$2
"
+%Y-%m-%d
`
if
[[
$#
-ne
2
]]
;
then
echo
'Usage:'
$0
' <startdate> <enddate>'
exit
fi
while
[[
$startdate
<
$enddate
]]
do
/opt/spark/bin/spark-submit
--master
spark://10.31.242.83:7077
--total-executor-cores
20
--executor-memory
3g
--executor-cores
2
--driver-memory
8g
--conf
spark.default.parallelism
=
200
--conf
spark.storage.memoryFraction
=
0.5
--conf
spark.shuffle.memoryFraction
=
0.3
--class
com.gmei.jerry.strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar
--env
prod
--date
$startdate
>>
ctr1.log
startdate
=
`
date
-d
"+1 day
$startdate
"
+%Y-%m-%d
`
done
startdate
=
`
date
-d
"
$1
"
+%Y-%m-%d
`
enddate
=
`
date
-d
"
$2
"
+%Y-%m-%d
`
while
[[
$startdate
<
$enddate
]]
do
/opt/spark/bin/spark-submit
--master
spark://10.31.242.83:7077
--total-executor-cores
10
--executor-memory
3g
--executor-cores
2
--driver-memory
8g
--class
com.gmei. strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar
--env
prod
--date
$startdate
>>
ctr2.log
startdate
=
`
date
-d
"+1 day
$startdate
"
+%Y-%m-%d
`
done
eda/feededa/src/main/scala/com/gmei/testt.scala
View file @
b86483fe
package
com.gmei
import
java.text.SimpleDateFormat
import
java.util.
{
Calendar
,
Date
}
import
java.text.SimpleDateFormat
import
java.util.Calendar
import
scala.collection.mutable.ArrayBuffer
import
java.io.Serializable
import
com.gmei.WeafareStat.
{
defaultParams
,
parser
}
import
org.apache.spark.sql.
{
SaveMode
,
TiContext
}
import
org.apache.log4j.
{
Level
,
Logger
}
import
scopt.OptionParser
import
com.gmei.lib.AbstractParams
object
testt
{
def
main
(
args
:
Array
[
String
])
:
Unit
={
val
dateArray2
=
get_date
()
println
(
dateArray2
(
0
))
for
(
elem
<-
dateArray2
)
{
println
(
elem
)
}
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
//opt[String] ("date")
// .text(s"the date you used")
// .action((x,c) => c.copy(date = x))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.testt ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
}
def
get_date
()
:
ArrayBuffer
[
String
]
={
val
startTime
=
"2017-12-01"
val
endTime
=
"2017-12-10"
val
dateFormat
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
)
val
dateFiled
=
Calendar
.
DAY_OF_MONTH
var
beginDate
=
dateFormat
.
parse
(
startTime
)
val
endDate
=
dateFormat
.
parse
(
endTime
)
val
calendar
=
Calendar
.
getInstance
()
calendar
.
setTime
(
beginDate
)
val
dateArray
:
ArrayBuffer
[
String
]
=
ArrayBuffer
()
while
(
beginDate
.
compareTo
(
endDate
)
<=
0
)
{
dateArray
+=
dateFormat
.
format
(
beginDate
)
calendar
.
add
(
dateFiled
,
1
)
beginDate
=
calendar
.
getTime
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
val
ti
=
new
TiContext
(
sc
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_test"
,
tableName
=
"bl_device_list"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
val
strategies
=
Seq
(
"3$"
,
"4$"
,
"5$"
,
"6$"
,
"7$"
,
"8$"
,
"a$"
,
"b$"
,
"c$"
,
"d$"
,
"e$"
,
"A$"
,
"B$"
,
"C$"
,
"D$"
)
for
(
strategy
<-
strategies
){
println
(
strategy
)
val
get_data_dura
=
sc
.
sql
(
s
"""
|select partition_date, sum(params['duration']) as total_dur,count(distinct(cl_id)) as num
|from online.tl_hdfs_maidian_view
|where where action="on_app_session_over"
|and cl_id regexp '${strategy}'
|and params['duration']>=0 and params['duration']<86400
|and partition_date >='20180801'
|group by partition_date order by partition_date desc
"""
.
stripMargin
)
get_data_dura
.
show
()
}
}
//println(dateArray)
dateArray
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment