Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
5edec3f4
Commit
5edec3f4
authored
Nov 01, 2018
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
chang run.sh
parent
6bcae79b
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
153 additions
and
40 deletions
+153
-40
run.sh
eda/feededa/run.sh
+12
-11
testt.scala
eda/feededa/src/main/scala/com/gmei/testt.scala
+141
-29
No files found.
eda/feededa/run.sh
View file @
5edec3f4
if
[[
$#
-ne
2
]]
;
then
echo
'Usage:'
$0
' <startdate> <enddate>'
exit
fi
startdate
=
`
date
-d
"
$1
"
+%Y-%m-%d
`
enddate
=
`
date
-d
"
$2
"
+%Y-%m-%d
`
if
[[
$#
-ne
2
]]
;
then
echo
'Usage:'
$0
' <startdate> <enddate>'
exit
fi
while
[[
$startdate
<
$enddate
]]
do
/opt/spark/bin/spark-submit
--master
spark://10.31.242.83:7077
--total-executor-cores
20
--executor-memory
3g
--executor-cores
2
--driver-memory
8g
--conf
spark.default.parallelism
=
200
--conf
spark.storage.memoryFraction
=
0.5
--conf
spark.shuffle.memoryFraction
=
0.3
--class
com.gmei.jerry.strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar
--env
prod
--date
$startdate
>>
ctr1.log
startdate
=
`
date
-d
"+1 day
$startdate
"
+%Y-%m-%d
`
done
startdate
=
`
date
-d
"
$1
"
+%Y-%m-%d
`
enddate
=
`
date
-d
"
$2
"
+%Y-%m-%d
`
while
[[
$startdate
<
$enddate
]]
do
/opt/spark/bin/spark-submit
--master
spark://10.31.242.83:7077
--total-executor-cores
10
--executor-memory
3g
--executor-cores
2
--driver-memory
8g
--class
com.gmei. strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar
--env
prod
--date
$startdate
>>
ctr2.log
startdate
=
`
date
-d
"+1 day
$startdate
"
+%Y-%m-%d
`
done
eda/feededa/src/main/scala/com/gmei/testt.scala
View file @
5edec3f4
package
com.gmei
import
java.text.SimpleDateFormat
import
java.util.
{
Calendar
,
Date
}
import
java.text.SimpleDateFormat
import
java.util.Calendar
import
scala.collection.mutable.ArrayBuffer
import
java.io.Serializable
import
com.gmei.WeafareStat.
{
defaultParams
,
parser
}
import
org.apache.spark.sql.
{
SaveMode
,
TiContext
}
import
org.apache.log4j.
{
Level
,
Logger
}
import
scopt.OptionParser
import
com.gmei.lib.AbstractParams
object
testt
{
def
main
(
args
:
Array
[
String
])
:
Unit
={
val
dateArray2
=
get_date
(
)
println
(
dateArray2
(
0
))
for
(
elem
<-
dateArray2
)
{
println
(
elem
)
}
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
,
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
}
def
get_date
()
:
ArrayBuffer
[
String
]
={
val
startTime
=
"2017-12-01"
val
endTime
=
"2017-12-10"
val
dateFormat
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
)
val
dateFiled
=
Calendar
.
DAY_OF_MONTH
var
beginDate
=
dateFormat
.
parse
(
startTime
)
val
endDate
=
dateFormat
.
parse
(
endTime
)
val
calendar
=
Calendar
.
getInstance
()
calendar
.
setTime
(
beginDate
)
val
dateArray
:
ArrayBuffer
[
String
]
=
ArrayBuffer
()
while
(
beginDate
.
compareTo
(
endDate
)
<=
0
)
{
dateArray
+=
dateFormat
.
format
(
beginDate
)
calendar
.
add
(
dateFiled
,
1
)
beginDate
=
calendar
.
getTime
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
val
ti
=
new
TiContext
(
sc
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_test"
,
tableName
=
"bl_device_list"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
import
sc.implicits._
// val stat_date = GmeiConfig.getMinusNDate(1)
println
(
param
.
date
)
val
partition_date
=
param
.
date
.
replace
(
"-"
,
""
)
val
decive_id_oldUser
=
sc
.
sql
(
s
"""
|select distinct(device_id) as decive_id
|from online.ml_device_day_active_status
|where active_type = '4'
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
decive_id_oldUser
.
show
()
decive_id_oldUser
.
createOrReplaceTempView
(
"device_id_old"
)
val
decive_id_newUser
=
sc
.
sql
(
s
"""
|select distinct(device_id) as decive_id
|from online.ml_device_day_active_status
|where active_type != '4'
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
decive_id_newUser
.
show
()
decive_id_newUser
.
createOrReplaceTempView
(
"device_id_newUser"
)
val
strategies
=
Seq
(
"[1|2]$"
,
"[3|4]$"
,
"[5|6]$"
,
"[7|8]$"
)
for
(
strategy
<-
strategies
){
val
clk_count_oldUser
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(cid_id) as get_clk_count_old
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.decive_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'${strategy}'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
"""
.
stripMargin
)
val
imp_count_oldUser
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(cid_id) as get_imp_count_old
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.decive_id
|where je.cid_type = 'diary'
|and je.device_id regexp'${strategy}'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${param.date}'
"""
.
stripMargin
)
val
clk_count_newUser
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(cid_id) as get_clk_count_newUser
|from data_feed_click jd inner join device_id_newUser
|on jd.device_id = device_id_newUser.decive_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'${strategy}'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
"""
.
stripMargin
)
val
imp_count_newUser
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(cid_id) as get_imp_count_newUser
|from data_feed_exposure je inner join device_id_newUser
|on je.device_id = device_id_newUser.decive_id
|where je.cid_type = 'diary'
|and je.device_id regexp'${strategy}'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${param.date}'
"""
.
stripMargin
)
imp_count_newUser
.
show
()
val
result
=
clk_count_oldUser
.
join
(
imp_count_oldUser
,
"stat_date"
)
.
join
(
clk_count_newUser
,
"stat_date"
)
.
join
(
imp_count_newUser
,
"stat_date"
)
result
.
show
()
}
}
//println(dateArray)
dateArray
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment