Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
3e6ab5c5
Commit
3e6ab5c5
authored
May 29, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
http://git.wanmeizhensuo.com/ML/ffm-baseline
parents
7a4644e0
ec4eb794
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
50 additions
and
42 deletions
+50
-42
feature_engineering.py
eda/esmm/Model_pipline/feature_engineering.py
+8
-10
submit.sh
eda/esmm/Model_pipline/submit.sh
+18
-3
train.py
eda/esmm/Model_pipline/train.py
+5
-21
multi.py
tensnsorflow/multi.py
+19
-8
No files found.
eda/esmm/Model_pipline/feature_engineering.py
View file @
3e6ab5c5
...
...
@@ -204,7 +204,7 @@ def feature_engineer():
value_map
[
x
[
17
]],
value_map
[
x
[
18
]],
value_map
[
x
[
19
]],
value_map
[
x
[
20
]],
value_map
[
x
[
21
]],
value_map
[
x
[
22
]],
value_map
[
x
[
23
]],
value_map
[
x
[
24
]],
value_map
[
x
[
25
]],
value_map
[
x
[
26
]]]))
rdd
.
persist
(
storageLevel
=
StorageLevel
.
MEMORY_
AND_DISK
)
rdd
.
persist
(
storageLevel
=
StorageLevel
.
MEMORY_
ONLY_SER
)
# TODO 上线后把下面train fliter 删除,因为最近一天的数据也要作为训练集
...
...
@@ -215,11 +215,14 @@ def feature_engineer():
spark
.
createDataFrame
(
train
)
.
toDF
(
"y"
,
"z"
,
"app_list"
,
"level2_list"
,
"level3_list"
,
"tag1_list"
,
"tag2_list"
,
"tag3_list"
,
"tag4_list"
,
"tag5_list"
,
"tag6_list"
,
"tag7_list"
,
"ids"
)
\
.
write
.
format
(
"tfrecords"
)
.
save
(
path
=
path
+
"tr/"
,
mode
=
"overwrite"
)
.
repartition
(
1
)
.
write
.
format
(
"tfrecords"
)
.
save
(
path
=
path
+
"tr/"
,
mode
=
"overwrite"
)
h
=
time
.
time
()
print
(
"train tfrecord done"
)
print
((
h
-
f
)
/
60
)
print
(
"样本总量:"
)
print
(
rdd
.
count
())
test
=
rdd
.
filter
(
lambda
x
:
x
[
0
]
==
validate_date
)
.
map
(
lambda
x
:
(
x
[
1
],
x
[
2
],
x
[
3
],
x
[
4
],
x
[
5
],
x
[
6
],
x
[
7
],
x
[
8
],
x
[
9
],
x
[
10
],
x
[
11
],
x
[
12
],
x
[
13
]))
...
...
@@ -227,7 +230,7 @@ def feature_engineer():
spark
.
createDataFrame
(
test
)
.
toDF
(
"y"
,
"z"
,
"app_list"
,
"level2_list"
,
"level3_list"
,
"tag1_list"
,
"tag2_list"
,
"tag3_list"
,
"tag4_list"
,
"tag5_list"
,
"tag6_list"
,
"tag7_list"
,
"ids"
)
\
.
write
.
format
(
"tfrecords"
)
.
save
(
path
=
path
+
"va/"
,
mode
=
"overwrite"
)
.
repartition
(
1
)
.
write
.
format
(
"tfrecords"
)
.
save
(
path
=
path
+
"va/"
,
mode
=
"overwrite"
)
print
(
"va tfrecord done"
)
...
...
@@ -263,6 +266,7 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
"treatment_method"
,
"price_min"
,
"price_max"
,
"treatment_time"
,
"maintain_time"
,
"recover_time"
]
df
=
spark
.
sql
(
sql
)
df
=
df
.
drop_duplicates
([
"ucity_id"
,
"device_id"
,
"cid_id"
])
df
=
df
.
na
.
fill
(
dict
(
zip
(
features
,
features
)))
f
=
time
.
time
()
rdd
=
df
.
select
(
"label"
,
"y"
,
"z"
,
"ucity_id"
,
"device_id"
,
"cid_id"
,
"app_list"
,
"level2_ids"
,
"level3_ids"
,
...
...
@@ -286,16 +290,12 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
value_map
.
get
(
x
[
29
],
15
)
]))
rdd
.
persist
(
storageLevel
=
StorageLevel
.
MEMORY_
AND_DISK
)
rdd
.
persist
(
storageLevel
=
StorageLevel
.
MEMORY_
ONLY_SER
)
native_pre
=
spark
.
createDataFrame
(
rdd
.
filter
(
lambda
x
:
x
[
0
]
==
0
)
.
map
(
lambda
x
:(
x
[
3
],
x
[
4
],
x
[
5
])))
\
.
toDF
(
"city"
,
"uid"
,
"cid_id"
)
print
(
"native csv"
)
native_pre
.
toPandas
()
.
to_csv
(
local_path
+
"native.csv"
,
header
=
True
)
# TODO 写成csv文件改成下面这样
# native_pre.coalesce(1).write.format('com.databricks.spark.csv').save(path+"native/",header = 'true')
# 预测的tfrecord必须写成一个文件,这样可以摆保证顺序
spark
.
createDataFrame
(
rdd
.
filter
(
lambda
x
:
x
[
0
]
==
0
)
.
map
(
lambda
x
:
(
x
[
1
],
x
[
2
],
x
[
6
],
x
[
7
],
x
[
8
],
x
[
9
],
x
[
10
],
x
[
11
],
x
[
12
],
x
[
13
],
x
[
14
],
x
[
15
],
x
[
16
])))
\
.
toDF
(
"y"
,
"z"
,
"app_list"
,
"level2_list"
,
"level3_list"
,
"tag1_list"
,
"tag2_list"
,
"tag3_list"
,
"tag4_list"
,
...
...
@@ -309,8 +309,6 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
.
toDF
(
"city"
,
"uid"
,
"cid_id"
)
print
(
"nearby csv"
)
native_pre
.
toPandas
()
.
to_csv
(
local_path
+
"nearby.csv"
,
header
=
True
)
# TODO 写成csv文件改成下面这样
# nearby_pre.coalesce(1).write.format('com.databricks.spark.csv').save(path+"nearby/",header = 'true')
spark
.
createDataFrame
(
rdd
.
filter
(
lambda
x
:
x
[
0
]
==
1
)
.
map
(
...
...
eda/esmm/Model_pipline/submit.sh
View file @
3e6ab5c5
...
...
@@ -4,10 +4,26 @@ PYTHON_PATH=/srv/envs/esmm/bin/python
MODEL_PATH
=
/srv/apps/ffm-baseline_git/eda/esmm/Model_pipline
LOCAL_PATH
=
/home/gmuser/esmm
HDFS_PATH
=
hdfs://172.16.32.4:8020/strategy/esmm
export
CLASSPATH
=
"/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/api-asn1-api-1.0.0-M20.jar:/opt/hadoop/share/hadoop/common/lib/hadoop-annotations-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/activation-1.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/common/lib/jasper-runtime-5.5.23.jar:/opt/hadoop/share/hadoop/common/lib/jsch-0.1.42.jar:/opt/hadoop/share/hadoop/common/lib/hadoop-auth-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/jsp-api-2.1.jar:/opt/hadoop/share/hadoop/common/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/common/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/common/lib/commons-beanutils-1.9.2.jar:/opt/hadoop/share/hadoop/common/lib/zookeeper-3.4.5-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/api-util-1.0.0-M20.jar:/opt/hadoop/share/hadoop/common/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/common/lib/snappy-java-1.0.4.1.jar:/opt/hadoop/share/hadoop/common/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/common/lib/apacheds-kerberos-codec-2.0.0-M15.jar:/opt/hadoop/share/hadoop/common/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/common/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/common/lib/commons-collections-3.2.2.jar:/opt/hadoop/share/hadoop/common/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/common/lib/commons-digester-1.8.jar:/opt/hadoop/share/hadoop/common/lib/jasper-compiler-5.5.23.jar:/opt/hadoop/share/hadoop/common/lib/java-xmlbuilder-0.4.jar:/opt/hadoop/share/hadoop/common/lib/curator-client-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/common/lib/jackson-jaxrs-1.8.10.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar:/opt/hadoop/share/hadoop/common/lib/gson-2.2.4.jar:/opt/hadoop/share/hadoop/common/lib/commons-configuration-1.6.jar:/opt/hadoop/share/hadoop/common/lib/commons-httpclient-3.1.jar:/opt/hadoop/share/hadoop/common/lib/hamcrest-core-1.3.jar:/opt/hadoop/share/hadoop/common/lib/httpclient-4.2.5.jar:/opt/hadoop/share/hadoop/common/lib/jets3t-0.9.0.jar:/opt/hadoop/share/hadoop/common/lib/xmlenc-0.52.jar:/opt/hadoop/share/hadoop/common/lib/logredactor-1.0.3.jar:/opt/hadoop/share/hadoop/common/lib/slf4j-api-1.7.5.jar:/opt/hadoop/share/hadoop/common/lib/htrace-core4-4.0.1-incubating.jar:/opt/hadoop/share/hadoop/common/lib/curator-recipes-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/apacheds-i18n-2.0.0-M15.jar:/opt/hadoop/share/hadoop/common/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/common/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/common/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/common/lib/junit-4.11.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-api-2.2.2.jar:/opt/hadoop/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar:/opt/hadoop/share/hadoop/common/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-net-3.1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-json-1.9.jar:/opt/hadoop/share/hadoop/common/lib/stax-api-1.0-2.jar:/opt/hadoop/share/hadoop/common/lib/commons-el-1.0.jar:/opt/hadoop/share/hadoop/common/lib/mockito-all-1.8.5.jar:/opt/hadoop/share/hadoop/common/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/common/lib/jettison-1.1.jar:/opt/hadoop/share/hadoop/common/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/common/lib/avro-1.7.6-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/httpcore-4.2.5.jar:/opt/hadoop/share/hadoop/common/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/common/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/common/lib/paranamer-2.3.jar:/opt/hadoop/share/hadoop/common/lib/curator-framework-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/jackson-xc-1.8.10.jar:/opt/hadoop/share/hadoop/common/lib/commons-math3-3.1.1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/common/hadoop-common-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/common/hadoop-nfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/hadoop-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/jasper-runtime-5.5.23.jar:/opt/hadoop/share/hadoop/hdfs/lib/jsp-api-2.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/hdfs/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/xml-apis-1.3.04.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar:/opt/hadoop/share/hadoop/hdfs/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/hdfs/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/hdfs/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/xmlenc-0.52.jar:/opt/hadoop/share/hadoop/hdfs/lib/htrace-core4-4.0.1-incubating.jar:/opt/hadoop/share/hadoop/hdfs/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/hdfs/lib/xercesImpl-2.9.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-el-1.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/hdfs/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/hdfs/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-nfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/yarn/lib/activation-1.1.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/yarn/lib/aopalliance-1.0.jar:/opt/hadoop/share/hadoop/yarn/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/yarn/lib/zookeeper-3.4.5-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/lib/guice-3.0.jar:/opt/hadoop/share/hadoop/yarn/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/yarn/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/yarn/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-collections-3.2.2.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-jaxrs-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/yarn/lib/jline-2.11.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-guice-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/yarn/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/yarn/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/yarn/lib/javax.inject-1.jar:/opt/hadoop/share/hadoop/yarn/lib/jaxb-api-2.2.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-json-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/stax-api-1.0-2.jar:/opt/hadoop/share/hadoop/yarn/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/yarn/lib/jettison-1.1.jar:/opt/hadoop/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/yarn/lib/guice-servlet-3.0.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/yarn/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-client-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-xc-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-client-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-tests-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-api-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-registry-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-applicationhistoryservice-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/hadoop-annotations-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/aopalliance-1.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/mapreduce/lib/guice-3.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/hamcrest-core-1.3.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/mapreduce/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/junit-4.11.jar:/opt/hadoop/share/hadoop/mapreduce/lib/javax.inject-1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/avro-1.7.6-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/mapreduce/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/mapreduce/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/mapreduce/lib/paranamer-2.3.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-nativetask-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.16.1.jar"
echo
$CLASSPATH
export
LD_LIBRARY_PATH
=
"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server:/opt/hadoop/lib/native"
echo
$LD_LIBRARY_PATH
export
PATH
=
$PATH
:/usr/local/hadoop/bin/
echo
"rm model file"
rm
-r
${
LOCAL_PATH
}
/model_ckpt/DeepCvrMTL/20
*
b
=
`
date
+%Y%m%d
`
echo
"train..."
CLASSPATH
=
"
$(
hadoop classpath
--glob
)
"
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.5
--learning_rate
=
0.0001
--deep_layers
=
512,256,128,64,32
--dropout
=
0.3,0.3,0.3,0.3,0.3
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
10000
--field_size
=
15
--feature_size
=
600000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
LOCAL_PATH
}
/model_ckpt/DeepCvrMTL/
--local_dir
=
${
LOCAL_PATH
}
--task_type
=
train
\ No newline at end of file
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.5
--learning_rate
=
0.0001
--deep_layers
=
512,256,128,64,32
--dropout
=
0.3,0.3,0.3,0.3,0.3
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
10000
--field_size
=
15
--feature_size
=
600000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
LOCAL_PATH
}
/model_ckpt/DeepCvrMTL/
--local_dir
=
${
LOCAL_PATH
}
--hdfs_dir
=
${
HDFS_PATH
}
/native
--task_type
=
train
>
"/home/gmuser/esmm/log/
$b_train
.log"
echo
"infer native..."
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.5
--learning_rate
=
0.0001
--deep_layers
=
512,256,128,64,32
--dropout
=
0.3,0.3,0.3,0.3,0.3
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
8000
--field_size
=
15
--feature_size
=
600000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
LOCAL_PATH
}
/model_ckpt/DeepCvrMTL/
--local_dir
=
${
LOCAL_PATH
}
/native
--hdfs_dir
=
${
HDFS_PATH
}
/native
--task_type
=
infer
>
"/home/gmuser/esmm/log/
$b_native
.log"
echo
"infer nearby..."
${
PYTHON_PATH
}
${
MODEL_PATH
}
/train.py
--ctr_task_wgt
=
0.5
--learning_rate
=
0.0001
--deep_layers
=
512,256,128,64,32
--dropout
=
0.3,0.3,0.3,0.3,0.3
--optimizer
=
Adam
--num_epochs
=
1
--embedding_size
=
16
--batch_size
=
8000
--field_size
=
15
--feature_size
=
600000
--l2_reg
=
0.005
--log_steps
=
100
--num_threads
=
36
--model_dir
=
${
LOCAL_PATH
}
/model_ckpt/DeepCvrMTL/
--local_dir
=
${
LOCAL_PATH
}
/nearby
--hdfs_dir
=
${
HDFS_PATH
}
/nearby
--task_type
=
infer
>
"/home/gmuser/esmm/log/
$b_nearby
.log"
echo
"sort and 2sql"
${
PYTHON_PATH
}
${
MODEL_PATH
}
/to_database.py
>
"/home/gmuser/esmm/log/
$b_insert
.log"
eda/esmm/Model_pipline/train.py
View file @
3e6ab5c5
...
...
@@ -300,8 +300,8 @@ def main(_):
FLAGS
.
model_dir
=
FLAGS
.
model_dir
+
FLAGS
.
dt_dir
#FLAGS.data_dir = FLAGS.data_dir + FLAGS.dt_dir
tr_files
=
get_filename
(
"tr"
)
va_files
=
get_filename
(
"va"
)
tr_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"
]
va_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"
]
te_files
=
[
"
%
s/part-r-00000"
%
FLAGS
.
hdfs_dir
]
if
FLAGS
.
clear_existing_model
:
...
...
@@ -347,30 +347,14 @@ def main(_):
elif
FLAGS
.
task_type
==
'export'
:
print
(
"Not Implemented, Do It Yourself!"
)
def
get_filename
(
dir_in
):
pre_add
=
"hdfs://172.16.32.4:8020/strategy/esmm/"
x
=
[]
for
i
in
range
(
0
,
200
):
if
i
<
10
:
t
=
pre_add
+
dir_in
+
"/part-r-0000"
+
str
(
i
)
x
.
append
(
t
)
elif
10
<=
i
<
100
:
t
=
pre_add
+
dir_in
+
"/part-r-000"
+
str
(
i
)
x
.
append
(
t
)
elif
100
<=
i
<
200
:
t
=
pre_add
+
dir_in
+
"/part-r-00"
+
str
(
i
)
x
.
append
(
t
)
return
x
if
__name__
==
"__main__"
:
b
=
time
.
time
()
classpath
=
"$CLASSPATH:
%
JAVA_HOME
%
/lib/dt.jar:
%
JAVA_HOME
%
/lib/tools.jar:$(/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/api-asn1-api-1.0.0-M20.jar:/opt/hadoop/share/hadoop/common/lib/hadoop-annotations-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/activation-1.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/common/lib/jasper-runtime-5.5.23.jar:/opt/hadoop/share/hadoop/common/lib/jsch-0.1.42.jar:/opt/hadoop/share/hadoop/common/lib/hadoop-auth-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/jsp-api-2.1.jar:/opt/hadoop/share/hadoop/common/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/common/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/common/lib/commons-beanutils-1.9.2.jar:/opt/hadoop/share/hadoop/common/lib/zookeeper-3.4.5-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/api-util-1.0.0-M20.jar:/opt/hadoop/share/hadoop/common/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/common/lib/snappy-java-1.0.4.1.jar:/opt/hadoop/share/hadoop/common/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/common/lib/apacheds-kerberos-codec-2.0.0-M15.jar:/opt/hadoop/share/hadoop/common/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/common/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/common/lib/commons-collections-3.2.2.jar:/opt/hadoop/share/hadoop/common/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/common/lib/commons-digester-1.8.jar:/opt/hadoop/share/hadoop/common/lib/jasper-compiler-5.5.23.jar:/opt/hadoop/share/hadoop/common/lib/java-xmlbuilder-0.4.jar:/opt/hadoop/share/hadoop/common/lib/curator-client-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/common/lib/jackson-jaxrs-1.8.10.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar:/opt/hadoop/share/hadoop/common/lib/gson-2.2.4.jar:/opt/hadoop/share/hadoop/common/lib/commons-configuration-1.6.jar:/opt/hadoop/share/hadoop/common/lib/commons-httpclient-3.1.jar:/opt/hadoop/share/hadoop/common/lib/hamcrest-core-1.3.jar:/opt/hadoop/share/hadoop/common/lib/httpclient-4.2.5.jar:/opt/hadoop/share/hadoop/common/lib/jets3t-0.9.0.jar:/opt/hadoop/share/hadoop/common/lib/xmlenc-0.52.jar:/opt/hadoop/share/hadoop/common/lib/logredactor-1.0.3.jar:/opt/hadoop/share/hadoop/common/lib/slf4j-api-1.7.5.jar:/opt/hadoop/share/hadoop/common/lib/htrace-core4-4.0.1-incubating.jar:/opt/hadoop/share/hadoop/common/lib/curator-recipes-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/apacheds-i18n-2.0.0-M15.jar:/opt/hadoop/share/hadoop/common/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/common/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/common/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/common/lib/junit-4.11.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-api-2.2.2.jar:/opt/hadoop/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar:/opt/hadoop/share/hadoop/common/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-net-3.1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-json-1.9.jar:/opt/hadoop/share/hadoop/common/lib/stax-api-1.0-2.jar:/opt/hadoop/share/hadoop/common/lib/commons-el-1.0.jar:/opt/hadoop/share/hadoop/common/lib/mockito-all-1.8.5.jar:/opt/hadoop/share/hadoop/common/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/common/lib/jettison-1.1.jar:/opt/hadoop/share/hadoop/common/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/common/lib/avro-1.7.6-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/httpcore-4.2.5.jar:/opt/hadoop/share/hadoop/common/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/common/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/common/lib/paranamer-2.3.jar:/opt/hadoop/share/hadoop/common/lib/curator-framework-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/jackson-xc-1.8.10.jar:/opt/hadoop/share/hadoop/common/lib/commons-math3-3.1.1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/common/hadoop-common-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/common/hadoop-nfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/hadoop-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/jasper-runtime-5.5.23.jar:/opt/hadoop/share/hadoop/hdfs/lib/jsp-api-2.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/hdfs/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/xml-apis-1.3.04.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar:/opt/hadoop/share/hadoop/hdfs/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/hdfs/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/hdfs/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/xmlenc-0.52.jar:/opt/hadoop/share/hadoop/hdfs/lib/htrace-core4-4.0.1-incubating.jar:/opt/hadoop/share/hadoop/hdfs/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/hdfs/lib/xercesImpl-2.9.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-el-1.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/hdfs/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/hdfs/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-nfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/yarn/lib/activation-1.1.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/yarn/lib/aopalliance-1.0.jar:/opt/hadoop/share/hadoop/yarn/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/yarn/lib/zookeeper-3.4.5-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/lib/guice-3.0.jar:/opt/hadoop/share/hadoop/yarn/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/yarn/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/yarn/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-collections-3.2.2.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-jaxrs-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/yarn/lib/jline-2.11.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-guice-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/yarn/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/yarn/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/yarn/lib/javax.inject-1.jar:/opt/hadoop/share/hadoop/yarn/lib/jaxb-api-2.2.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-json-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/stax-api-1.0-2.jar:/opt/hadoop/share/hadoop/yarn/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/yarn/lib/jettison-1.1.jar:/opt/hadoop/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/yarn/lib/guice-servlet-3.0.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/yarn/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-client-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-xc-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-client-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-tests-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-api-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-registry-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-applicationhistoryservice-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/hadoop-annotations-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/aopalliance-1.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/mapreduce/lib/guice-3.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/hamcrest-core-1.3.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/mapreduce/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/junit-4.11.jar:/opt/hadoop/share/hadoop/mapreduce/lib/javax.inject-1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/avro-1.7.6-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/mapreduce/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/mapreduce/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/mapreduce/lib/paranamer-2.3.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-nativetask-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.16.1.jar)"
# classpath = "$CLASSPATH:%JAVA_HOME%/lib/dt.jar:%JAVA_HOME%/lib/tools.jar:$(/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/api-asn1-api-1.0.0-M20.jar:/opt/hadoop/share/hadoop/common/lib/hadoop-annotations-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/activation-1.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/common/lib/jasper-runtime-5.5.23.jar:/opt/hadoop/share/hadoop/common/lib/jsch-0.1.42.jar:/opt/hadoop/share/hadoop/common/lib/hadoop-auth-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/jsp-api-2.1.jar:/opt/hadoop/share/hadoop/common/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/common/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/common/lib/commons-beanutils-1.9.2.jar:/opt/hadoop/share/hadoop/common/lib/zookeeper-3.4.5-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/api-util-1.0.0-M20.jar:/opt/hadoop/share/hadoop/common/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/common/lib/snappy-java-1.0.4.1.jar:/opt/hadoop/share/hadoop/common/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/common/lib/apacheds-kerberos-codec-2.0.0-M15.jar:/opt/hadoop/share/hadoop/common/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/common/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/common/lib/commons-collections-3.2.2.jar:/opt/hadoop/share/hadoop/common/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/common/lib/commons-digester-1.8.jar:/opt/hadoop/share/hadoop/common/lib/jasper-compiler-5.5.23.jar:/opt/hadoop/share/hadoop/common/lib/java-xmlbuilder-0.4.jar:/opt/hadoop/share/hadoop/common/lib/curator-client-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/common/lib/jackson-jaxrs-1.8.10.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar:/opt/hadoop/share/hadoop/common/lib/gson-2.2.4.jar:/opt/hadoop/share/hadoop/common/lib/commons-configuration-1.6.jar:/opt/hadoop/share/hadoop/common/lib/commons-httpclient-3.1.jar:/opt/hadoop/share/hadoop/common/lib/hamcrest-core-1.3.jar:/opt/hadoop/share/hadoop/common/lib/httpclient-4.2.5.jar:/opt/hadoop/share/hadoop/common/lib/jets3t-0.9.0.jar:/opt/hadoop/share/hadoop/common/lib/xmlenc-0.52.jar:/opt/hadoop/share/hadoop/common/lib/logredactor-1.0.3.jar:/opt/hadoop/share/hadoop/common/lib/slf4j-api-1.7.5.jar:/opt/hadoop/share/hadoop/common/lib/htrace-core4-4.0.1-incubating.jar:/opt/hadoop/share/hadoop/common/lib/curator-recipes-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/apacheds-i18n-2.0.0-M15.jar:/opt/hadoop/share/hadoop/common/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/common/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/common/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/common/lib/junit-4.11.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-api-2.2.2.jar:/opt/hadoop/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar:/opt/hadoop/share/hadoop/common/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-net-3.1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-json-1.9.jar:/opt/hadoop/share/hadoop/common/lib/stax-api-1.0-2.jar:/opt/hadoop/share/hadoop/common/lib/commons-el-1.0.jar:/opt/hadoop/share/hadoop/common/lib/mockito-all-1.8.5.jar:/opt/hadoop/share/hadoop/common/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/common/lib/jettison-1.1.jar:/opt/hadoop/share/hadoop/common/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/common/lib/avro-1.7.6-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/httpcore-4.2.5.jar:/opt/hadoop/share/hadoop/common/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/common/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/common/lib/paranamer-2.3.jar:/opt/hadoop/share/hadoop/common/lib/curator-framework-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/jackson-xc-1.8.10.jar:/opt/hadoop/share/hadoop/common/lib/commons-math3-3.1.1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/common/hadoop-common-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/common/hadoop-nfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/hadoop-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/jasper-runtime-5.5.23.jar:/opt/hadoop/share/hadoop/hdfs/lib/jsp-api-2.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/hdfs/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/xml-apis-1.3.04.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar:/opt/hadoop/share/hadoop/hdfs/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/hdfs/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/hdfs/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/xmlenc-0.52.jar:/opt/hadoop/share/hadoop/hdfs/lib/htrace-core4-4.0.1-incubating.jar:/opt/hadoop/share/hadoop/hdfs/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/hdfs/lib/xercesImpl-2.9.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-el-1.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/hdfs/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/hdfs/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-nfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/yarn/lib/activation-1.1.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/yarn/lib/aopalliance-1.0.jar:/opt/hadoop/share/hadoop/yarn/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/yarn/lib/zookeeper-3.4.5-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/lib/guice-3.0.jar:/opt/hadoop/share/hadoop/yarn/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/yarn/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/yarn/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-collections-3.2.2.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-jaxrs-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/yarn/lib/jline-2.11.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-guice-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/yarn/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/yarn/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/yarn/lib/javax.inject-1.jar:/opt/hadoop/share/hadoop/yarn/lib/jaxb-api-2.2.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-json-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/stax-api-1.0-2.jar:/opt/hadoop/share/hadoop/yarn/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/yarn/lib/jettison-1.1.jar:/opt/hadoop/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/yarn/lib/guice-servlet-3.0.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/yarn/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-client-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-xc-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-client-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-tests-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-api-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-registry-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-applicationhistoryservice-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/hadoop-annotations-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/aopalliance-1.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/mapreduce/lib/guice-3.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/hamcrest-core-1.3.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/mapreduce/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/junit-4.11.jar:/opt/hadoop/share/hadoop/mapreduce/lib/javax.inject-1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/avro-1.7.6-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/mapreduce/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/mapreduce/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/mapreduce/lib/paranamer-2.3.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-nativetask-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.16.1.jar)"
# a = "export CLASSPATH='$(hadoop classpath --glob)'"
# os.system(a)
# print("环境")
# print(os.popen('hadoop classpath --glob').read())
# os.system('export MY_DATA="$LD_LIBRARY_PATH:/opt/hadoop/lib/native"')
# os.system('export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/hadoop/lib/native"')
path
=
"hdfs://172.16.32.4:8020/strategy/esmm/"
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
()
...
...
tensnsorflow/multi.py
View file @
3e6ab5c5
...
...
@@ -7,6 +7,7 @@ from pyspark.sql import SparkSession
import
datetime
import
pandas
as
pd
import
subprocess
import
tensorflow
as
tf
def
app_list_func
(
x
,
l
):
...
...
@@ -142,7 +143,22 @@ def get_filename(dir_in):
x
.
append
(
t
)
return
x
def
get_hdfs
(
dir_in
):
pre_path
=
"hdfs://172.16.32.4:8020"
args
=
"hdfs dfs -ls "
+
dir_in
+
" | awk '{print $8}'"
proc
=
subprocess
.
Popen
(
args
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
)
s_output
,
s_err
=
proc
.
communicate
()
all_dart_dirs
=
s_output
.
split
()
a
=
[]
for
i
in
all_dart_dirs
:
b
=
str
(
i
)
.
split
(
"/"
)[
4
]
if
b
[:
4
]
==
"part"
:
tmp
=
pre_path
+
str
(
i
)[
2
:
-
1
]
a
.
append
(
tmp
)
return
a
if
__name__
==
'__main__'
:
print
(
"hello"
)
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
# .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
# .set("spark.tispark.plan.allow_index_double_read", "false") \
...
...
@@ -161,22 +177,17 @@ if __name__ == '__main__':
#
# validate_date, value_map, app_list_map = feature()
# get_predict(validate_date, value_map, app_list_map)
# [path + "tr/part-r-00000"]
#
#
# spark = SparkSession.builder.getOrCreate()
#
# b = [("a", 1), ("a", 1), ("b", 3), ("a", 2)]
# rdd = spark.sparkContext.parallelize(b)
# df = spark.createDataFrame(rdd).toDF("id", "n")
# df.show()
# df.createOrReplaceTempView("df")
# t = spark.sql("select id from df").map()
import
glob
import
random
tr_files
=
glob
.
glob
(
"/home/gmuser/test/*"
)
random
.
shuffle
(
tr_files
)
print
(
"tr_files:"
,
tr_files
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment