Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
de06e9e5
Commit
de06e9e5
authored
Dec 06, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bug fix
parent
a5c140f6
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
7 deletions
+16
-7
EsmmData.scala
eda/feededa/src/main/scala/com/gmei/EsmmData.scala
+15
-6
GmeiConfig.scala
eda/feededa/src/main/scala/com/gmei/GmeiConfig.scala
+1
-1
No files found.
eda/feededa/src/main/scala/com/gmei/EsmmData.scala
View file @
de06e9e5
...
...
@@ -266,7 +266,7 @@ object EsmmPredData {
|select device_id,city_id,native_queue as merge_queue from ffm_diary_queue
|union
|select device_id,city_id,search_queue as merge_queue from search_queue) as tmp1
|where tmp1.device_id in (select distinct device_id from esmm_train_data
limit 20000
)
|where tmp1.device_id in (select distinct device_id from esmm_train_data)
"""
.
stripMargin
)
raw_data
.
show
()
...
...
@@ -284,11 +284,19 @@ object EsmmPredData {
val
raw_data2
=
sc
.
sql
(
s
"""
|select device_id,city_id
as ucity_id,explode(split(merge_queue, ',')) as cid_id from raw_data1
|select device_id,city_id
,merge_queue from raw_data1 limit 10000
"""
.
stripMargin
)
raw_data2
.
createOrReplaceTempView
(
"raw_data"
)
print
(
raw_data2
.
count
())
raw_data1
.
createOrReplaceTempView
(
"raw_data2"
)
println
(
raw_data2
.
count
())
val
raw_data3
=
sc
.
sql
(
s
"""
|select device_id,city_id as ucity_id,explode(split(merge_queue, ',')) as cid_id from raw_data2
"""
.
stripMargin
)
raw_data3
.
createOrReplaceTempView
(
"raw_data"
)
print
(
raw_data3
.
count
())
...
...
@@ -310,7 +318,8 @@ object EsmmPredData {
|where b.partition_date = '${yesteday}'
"""
.
stripMargin
)
sid_data
.
show
()
// sid_data.show()
println
(
sid_data
.
count
())
val
sid_data_label
=
sid_data
.
withColumn
(
"y"
,
lit
(
0
)).
withColumn
(
"z"
,
lit
(
0
))
sid_data_label
.
createOrReplaceTempView
(
"union_data"
)
...
...
@@ -372,7 +381,7 @@ object EsmmPredData {
"""
.
stripMargin
)
// union_data_scity_id.createOrReplaceTempView("union_data_scity_id")
union_data_scity_id
.
show
(
)
println
(
union_data_scity_id
.
count
()
)
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://10.66.157.22:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
union_data_scity_id
,
table
=
"esmm_pre_data"
,
SaveMode
.
Overwrite
)
...
...
eda/feededa/src/main/scala/com/gmei/GmeiConfig.scala
View file @
de06e9e5
...
...
@@ -33,7 +33,7 @@ object GmeiConfig extends Serializable {
def
getSparkSession
()
:
(
SparkContext
,
SparkSession
)
=
{
val
sparkConf
=
new
SparkConf
sparkConf
.
set
(
"spark.sql.crossJoin.enabled"
,
"true"
)
sparkConf
.
set
(
"spark.debug.maxToStringFields"
,
"1
0
0"
)
sparkConf
.
set
(
"spark.debug.maxToStringFields"
,
"1
3
0"
)
sparkConf
.
set
(
"spark.sql.broadcastTimeout"
,
"6000"
)
if
(!
sparkConf
.
contains
(
"spark.master"
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment