Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
915416d1
Commit
915416d1
authored
5 years ago
by
Your Name
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change train.py
parent
1a8494e6
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
31 deletions
+14
-31
train.py
eda/esmm/Model_pipline/train.py
+14
-31
No files found.
eda/esmm/Model_pipline/train.py
View file @
915416d1
...
@@ -13,7 +13,6 @@ import tensorflow as tf
...
@@ -13,7 +13,6 @@ import tensorflow as tf
import
subprocess
import
subprocess
import
time
import
time
import
glob
import
glob
import
pandas
as
pd
import
random
import
random
#################### CMD Arguments ####################
#################### CMD Arguments ####################
...
@@ -66,10 +65,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
...
@@ -66,10 +65,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
"tag6_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag6_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag7_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag7_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"search_tag2_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"search_tag2_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"search_tag3_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"search_tag3_list"
:
tf
.
VarLenFeature
(
tf
.
int64
)
"uid"
:
tf
.
VarLenFeature
(
tf
.
string
),
"city"
:
tf
.
VarLenFeature
(
tf
.
string
),
"cid_id"
:
tf
.
VarLenFeature
(
tf
.
string
)
}
}
parsed
=
tf
.
parse_single_example
(
record
,
features
)
parsed
=
tf
.
parse_single_example
(
record
,
features
)
y
=
parsed
.
pop
(
'y'
)
y
=
parsed
.
pop
(
'y'
)
...
@@ -139,9 +135,6 @@ def model_fn(features, labels, mode, params):
...
@@ -139,9 +135,6 @@ def model_fn(features, labels, mode, params):
tag7_list
=
features
[
'tag7_list'
]
tag7_list
=
features
[
'tag7_list'
]
search_tag2_list
=
features
[
'search_tag2_list'
]
search_tag2_list
=
features
[
'search_tag2_list'
]
search_tag3_list
=
features
[
'search_tag3_list'
]
search_tag3_list
=
features
[
'search_tag3_list'
]
uid
=
features
[
'uid'
]
city
=
features
[
'city'
]
cid_id
=
features
[
'cid_id'
]
if
FLAGS
.
task_type
!=
"infer"
:
if
FLAGS
.
task_type
!=
"infer"
:
y
=
labels
[
'y'
]
y
=
labels
[
'y'
]
...
@@ -168,10 +161,6 @@ def model_fn(features, labels, mode, params):
...
@@ -168,10 +161,6 @@ def model_fn(features, labels, mode, params):
x_concat
=
tf
.
concat
([
tf
.
reshape
(
embedding_id
,
shape
=
[
-
1
,
common_dims
]),
app_id
,
level2
,
level3
,
tag1
,
x_concat
=
tf
.
concat
([
tf
.
reshape
(
embedding_id
,
shape
=
[
-
1
,
common_dims
]),
app_id
,
level2
,
level3
,
tag1
,
tag2
,
tag3
,
tag4
,
tag5
,
tag6
,
tag7
,
search_tag2
,
search_tag3
],
axis
=
1
)
tag2
,
tag3
,
tag4
,
tag5
,
tag6
,
tag7
,
search_tag2
,
search_tag3
],
axis
=
1
)
uid
=
tf
.
sparse
.
to_dense
(
uid
,
default_value
=
""
)
city
=
tf
.
sparse
.
to_dense
(
city
,
default_value
=
""
)
cid_id
=
tf
.
sparse
.
to_dense
(
cid_id
,
default_value
=
""
)
with
tf
.
name_scope
(
"CVR_Task"
):
with
tf
.
name_scope
(
"CVR_Task"
):
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
train_phase
=
True
train_phase
=
True
...
@@ -216,7 +205,7 @@ def model_fn(features, labels, mode, params):
...
@@ -216,7 +205,7 @@ def model_fn(features, labels, mode, params):
pcvr
=
tf
.
sigmoid
(
y_cvr
)
pcvr
=
tf
.
sigmoid
(
y_cvr
)
pctcvr
=
pctr
*
pcvr
pctcvr
=
pctr
*
pcvr
predictions
=
{
"pc
tcvr"
:
pctcvr
,
"uid"
:
uid
,
"city"
:
city
,
"cid_id"
:
cid_id
}
predictions
=
{
"pc
vr"
:
pcvr
,
"pctr"
:
pctr
,
"pctcvr"
:
pctcvr
}
export_outputs
=
{
tf
.
saved_model
.
signature_constants
.
DEFAULT_SERVING_SIGNATURE_DEF_KEY
:
tf
.
estimator
.
export
.
PredictOutput
(
predictions
)}
export_outputs
=
{
tf
.
saved_model
.
signature_constants
.
DEFAULT_SERVING_SIGNATURE_DEF_KEY
:
tf
.
estimator
.
export
.
PredictOutput
(
predictions
)}
# Provide an estimator spec for `ModeKeys.PREDICT`
# Provide an estimator spec for `ModeKeys.PREDICT`
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
...
@@ -237,11 +226,11 @@ def model_fn(features, labels, mode, params):
...
@@ -237,11 +226,11 @@ def model_fn(features, labels, mode, params):
# Provide an estimator spec for `ModeKeys.EVAL`
# Provide an estimator spec for `ModeKeys.EVAL`
eval_metric_ops
=
{
eval_metric_ops
=
{
#
"CTR_AUC": tf.metrics.auc(y, pctr),
"CTR_AUC"
:
tf
.
metrics
.
auc
(
y
,
pctr
),
#"CTR_F1": tf.contrib.metrics.f1_score(y,pctr),
#"CTR_F1": tf.contrib.metrics.f1_score(y,pctr),
#"CTR_Precision": tf.metrics.precision(y,pctr),
#"CTR_Precision": tf.metrics.precision(y,pctr),
#"CTR_Recall": tf.metrics.recall(y,pctr),
#"CTR_Recall": tf.metrics.recall(y,pctr),
#
"CVR_AUC": tf.metrics.auc(z, pcvr),
"CVR_AUC"
:
tf
.
metrics
.
auc
(
z
,
pcvr
),
"CTCVR_AUC"
:
tf
.
metrics
.
auc
(
z
,
pctcvr
)
"CTCVR_AUC"
:
tf
.
metrics
.
auc
(
z
,
pctcvr
)
}
}
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
...
@@ -324,7 +313,7 @@ def set_dist_env():
...
@@ -324,7 +313,7 @@ def set_dist_env():
print
(
json
.
dumps
(
tf_config
))
print
(
json
.
dumps
(
tf_config
))
os
.
environ
[
'TF_CONFIG'
]
=
json
.
dumps
(
tf_config
)
os
.
environ
[
'TF_CONFIG'
]
=
json
.
dumps
(
tf_config
)
def
main
(
te_files
):
def
main
(
_
):
#------check Arguments------
#------check Arguments------
if
FLAGS
.
dt_dir
==
""
:
if
FLAGS
.
dt_dir
==
""
:
FLAGS
.
dt_dir
=
(
date
.
today
()
+
timedelta
(
-
1
))
.
strftime
(
'
%
Y
%
m
%
d'
)
FLAGS
.
dt_dir
=
(
date
.
today
()
+
timedelta
(
-
1
))
.
strftime
(
'
%
Y
%
m
%
d'
)
...
@@ -333,7 +322,7 @@ def main(te_files):
...
@@ -333,7 +322,7 @@ def main(te_files):
tr_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"
]
tr_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"
]
va_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"
]
va_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"
]
#
te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
te_files
=
[
"
%
s/part-r-00000"
%
FLAGS
.
hdfs_dir
]
if
FLAGS
.
clear_existing_model
:
if
FLAGS
.
clear_existing_model
:
try
:
try
:
...
@@ -371,11 +360,10 @@ def main(te_files):
...
@@ -371,11 +360,10 @@ def main(te_files):
for
key
,
value
in
sorted
(
result
.
items
()):
for
key
,
value
in
sorted
(
result
.
items
()):
print
(
'
%
s:
%
s'
%
(
key
,
value
))
print
(
'
%
s:
%
s'
%
(
key
,
value
))
elif
FLAGS
.
task_type
==
'infer'
:
elif
FLAGS
.
task_type
==
'infer'
:
preds
=
Estimator
.
predict
(
input_fn
=
lambda
:
input_fn
(
te_files
,
num_epochs
=
1
,
batch_size
=
FLAGS
.
batch_size
),
predict_keys
=
[
"pctcvr"
,
"uid"
,
"city"
,
"cid_id"
])
preds
=
Estimator
.
predict
(
input_fn
=
lambda
:
input_fn
(
te_files
,
num_epochs
=
1
,
batch_size
=
FLAGS
.
batch_size
),
predict_keys
=
[
"pctcvr"
,
"pctr"
,
"pcvr"
])
result
=
[]
with
open
(
FLAGS
.
local_dir
+
"/pred.txt"
,
"w"
)
as
fo
:
for
prob
in
preds
:
for
prob
in
preds
:
result
.
append
([
str
(
prob
[
"uid"
][
0
]),
str
(
prob
[
"city"
][
0
]),
str
(
prob
[
"cid_id"
][
0
]),
str
(
prob
[
'pctcvr'
])])
fo
.
write
(
"
%
f
\t
%
f
\t
%
f
\n
"
%
(
prob
[
'pctr'
],
prob
[
'pcvr'
],
prob
[
'pctcvr'
]))
return
result
elif
FLAGS
.
task_type
==
'export'
:
elif
FLAGS
.
task_type
==
'export'
:
print
(
"Not Implemented, Do It Yourself!"
)
print
(
"Not Implemented, Do It Yourself!"
)
...
@@ -383,13 +371,7 @@ def main(te_files):
...
@@ -383,13 +371,7 @@ def main(te_files):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
b
=
time
.
time
()
b
=
time
.
time
()
path
=
"hdfs://172.16.32.4:8020/strategy/esmm/"
path
=
"hdfs://172.16.32.4:8020/strategy/esmm/"
# tf.logging.set_verbosity(tf.logging.INFO)
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
te_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"
]
tf
.
app
.
run
()
print
(
"hello up"
)
result
=
main
(
te_files
)
df
=
pd
.
DataFrame
(
result
,
columns
=
[
"uid"
,
"city"
,
"cid_id"
,
"pctcvr"
])
df
.
head
(
10
)
print
(
"hello down"
)
print
(
"耗时(分钟):"
)
print
(
"耗时(分钟):"
)
print
((
time
.
time
()
-
b
)
/
60
)
print
((
time
.
time
()
-
b
)
/
60
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment