Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
288911ea
Commit
288911ea
authored
Apr 23, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
http://git.wanmeizhensuo.com/ML/ffm-baseline
parents
6e9fda44
bc68a743
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
31 additions
and
12 deletions
+31
-12
EsmmData.scala
eda/feededa/src/main/scala/com/gmei/EsmmData.scala
+4
-4
GmeiConfig.scala
eda/feededa/src/main/scala/com/gmei/GmeiConfig.scala
+3
-1
esmm_feature.scala
eda/feededa/src/main/scala/com/gmei/esmm_feature.scala
+2
-2
multi.py
tensnsorflow/multi.py
+22
-5
No files found.
eda/feededa/src/main/scala/com/gmei/EsmmData.scala
View file @
288911ea
...
...
@@ -215,7 +215,7 @@ object EsmmData {
"""
.
stripMargin
)
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://1
52.136.44.13
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
union_data_scity_id2
,
table
=
"esmm_train_data"
,
SaveMode
.
Append
)
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://1
72.16.40.15
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
union_data_scity_id2
,
table
=
"esmm_train_data"
,
SaveMode
.
Append
)
}
else
{
println
(
"jerry_test.esmm_train_data already have param.date data"
)
...
...
@@ -549,7 +549,7 @@ object EsmmPredData {
val
jdbcDF
=
sc
.
read
.
format
(
"jdbc"
)
.
option
(
"driver"
,
"com.mysql.jdbc.Driver"
)
.
option
(
"url"
,
"jdbc:mysql://
rdsfewzdmf0jfjp9un8xj.mysql.rds.aliyuncs.com
:3306/zhengxing"
)
.
option
(
"url"
,
"jdbc:mysql://
172.16.30.143
:3306/zhengxing"
)
.
option
(
"dbtable"
,
"api_punishment"
)
.
option
(
"user"
,
"work"
)
.
option
(
"password"
,
"BJQaT9VzDcuPBqkd"
)
...
...
@@ -595,7 +595,7 @@ object EsmmPredData {
// union_data_scity_id.createOrReplaceTempView("union_data_scity_id")
// println(union_data_scity_id2.count())
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://1
52.136.44.13
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
union_data_scity_id2
,
table
=
"esmm_pre_data"
,
SaveMode
.
Overwrite
)
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://1
72.16.40.15
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
union_data_scity_id2
,
table
=
"esmm_pre_data"
,
SaveMode
.
Overwrite
)
...
...
@@ -669,7 +669,7 @@ object GetDiaryPortrait {
|select diary_id,level1_ids,level2_ids,level3_ids,split(level2_ids,",")[0] as level2 from t
"""
.
stripMargin
)
val
jdbc
=
"jdbc:mysql://1
52.136.44.13
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
val
jdbc
=
"jdbc:mysql://1
72.16.40.15
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig
.
writeToJDBCTable
(
jdbc
,
result
,
"diary_feat"
,
SaveMode
.
Overwrite
)
...
...
eda/feededa/src/main/scala/com/gmei/GmeiConfig.scala
View file @
288911ea
...
...
@@ -32,7 +32,9 @@ object GmeiConfig extends Serializable {
}
def
getSparkSession
()
:
(
SparkContext
,
SparkSession
)
=
{
val
sparkConf
=
new
SparkConf
val
sparkConf
=
new
SparkConf
().
set
(
"spark.tispark.plan.allow_index_read"
,
"false"
)
.
set
(
"spark.hive.mapred.supports.subdirectories"
,
"true"
)
.
set
(
"spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive"
,
"true"
)
sparkConf
.
set
(
"spark.sql.crossJoin.enabled"
,
"true"
)
sparkConf
.
set
(
"spark.debug.maxToStringFields"
,
"130"
)
sparkConf
.
set
(
"spark.sql.broadcastTimeout"
,
"6000"
)
...
...
eda/feededa/src/main/scala/com/gmei/esmm_feature.scala
View file @
288911ea
...
...
@@ -78,7 +78,7 @@ object esmm_feature {
val
new_user
=
rdd
.
filter
(
x
=>
old
.
indexOf
(
x
.
_1
)==
-
1
)
.
toDF
(
"device_id"
,
"os"
,
"app_list"
,
"update_date"
)
if
(
new_user
.
take
(
1
).
nonEmpty
){
val
tecent_jdbc
=
"jdbc:mysql://1
52.136.44.13
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
val
tecent_jdbc
=
"jdbc:mysql://1
72.16.40.15
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig
.
writeToJDBCTable
(
tecent_jdbc
,
new_user
,
"device_app_list"
,
SaveMode
.
Append
)
}
else
{
...
...
@@ -125,7 +125,7 @@ object esmm_feature {
val
df_new
=
rdd
.
filter
(
x
=>
old
.
indexOf
(
x
.
_1
)==
-
1
)
.
toDF
(
"device_id"
,
"device_type"
,
"manufacturer"
,
"city_id"
,
"channel"
,
"date"
)
if
(
df_new
.
take
(
1
).
nonEmpty
){
val
tecent_jdbc
=
"jdbc:mysql://1
52.136.44.13
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
val
tecent_jdbc
=
"jdbc:mysql://1
72.16.40.15
8:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
GmeiConfig
.
writeToJDBCTable
(
tecent_jdbc
,
df_new
,
"user_feature"
,
SaveMode
.
Append
)
}
else
{
println
(
"no need to insert into user feature"
)
...
...
tensnsorflow/multi.py
View file @
288911ea
...
...
@@ -3,16 +3,33 @@ from pyspark.sql import HiveContext
from
pyspark.context
import
SparkContext
from
pyspark.conf
import
SparkConf
import
pytispark.pytispark
as
pti
from
pyspark.sql
import
SQLContext
#
from pyspark.sql import SQLContext
from
pyspark.sql
import
SparkSession
import
datetime
def
test
():
spark
=
SparkSession
.
builder
.
enableHiveSupport
()
.
getOrCreate
()
ti
=
pti
.
TiContext
(
spark
)
ti
.
tidbMapDatabase
(
"jerry_test"
)
df
=
spark
.
sql
(
"select max(stat_date) from esmm_train_data"
)
sparkConf
=
SparkConf
()
.
set
(
"spark.hive.mapred.supports.subdirectories"
,
"true"
)
\
.
set
(
"spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive"
,
"true"
)
\
.
set
(
"spark.tispark.plan.allow_index_double_read"
,
"false"
)
\
.
set
(
"spark.tispark.plan.allow_index_read"
,
"true"
)
\
.
set
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
\
.
set
(
"spark.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
spark
=
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
enableHiveSupport
()
.
getOrCreate
()
# spark.sql("use online")
# spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
# spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
# spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
# spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")
spark
.
sparkContext
.
setLogLevel
(
"WARN"
)
# ti = pti.TiContext(spark)
# ti.tidbMapDatabase("jerry_test")
df
=
spark
.
sql
(
"select max(stat_date) from jerry_test.esmm_train_data"
)
df
.
show
()
t
=
df
.
rdd
.
map
(
lambda
x
:
str
(
x
[
0
]))
.
collect
()
print
(
t
.
count
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment