Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
7d05b362
Commit
7d05b362
authored
Jun 24, 2019
by
Your Name
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
test
parent
fd53cce3
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
18 deletions
+18
-18
train.py
eda/esmm/Model_pipline/train.py
+18
-18
No files found.
eda/esmm/Model_pipline/train.py
View file @
7d05b362
...
...
@@ -10,12 +10,11 @@ import os
import
json
from
datetime
import
date
,
timedelta
import
tensorflow
as
tf
from
tensorflow.python.client
import
timeline
import
subprocess
import
time
import
glob
import
random
import
pandas
as
pd
import
random
#################### CMD Arguments ####################
FLAGS
=
tf
.
app
.
flags
.
FLAGS
...
...
@@ -48,7 +47,7 @@ tf.app.flags.DEFINE_string("servable_model_dir", '', "export servable model for
tf
.
app
.
flags
.
DEFINE_string
(
"task_type"
,
'train'
,
"task type {train, infer, eval, export}"
)
tf
.
app
.
flags
.
DEFINE_boolean
(
"clear_existing_model"
,
False
,
"clear existing model or not"
)
#40362692,0,0,216:9342395:1.0 301:9351665:1.0 205:7702673:1.0 206:8317829:1.0 207:8967741:1.0 508:9356012:2.30259 210:9059239:1.0 210:9042796:1.0 210:9076972:1.0 210:9103884:1.0 210:9063064:1.0 127_14:3529789:2.3979 127_14:3806412:2.70805
def
input_fn
(
filenames
,
batch_size
=
32
,
num_epochs
=
1
,
perform_shuffle
=
False
):
print
(
'Parsing'
,
filenames
)
def
_parse_fn
(
record
):
...
...
@@ -66,7 +65,8 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
"tag5_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag6_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"tag7_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"number"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"search_tag2_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"search_tag3_list"
:
tf
.
VarLenFeature
(
tf
.
int64
),
"uid"
:
tf
.
VarLenFeature
(
tf
.
string
),
"city"
:
tf
.
VarLenFeature
(
tf
.
string
),
"cid_id"
:
tf
.
VarLenFeature
(
tf
.
string
)
...
...
@@ -108,6 +108,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
#print(batch_features,batch_labels)
return
batch_features
,
batch_labels
def
model_fn
(
features
,
labels
,
mode
,
params
):
"""Bulid Model function f(x) for Estimator."""
#------hyperparameters----
...
...
@@ -136,7 +137,8 @@ def model_fn(features, labels, mode, params):
tag5_list
=
features
[
'tag5_list'
]
tag6_list
=
features
[
'tag6_list'
]
tag7_list
=
features
[
'tag7_list'
]
number
=
features
[
'number'
]
search_tag2_list
=
features
[
'search_tag2_list'
]
search_tag3_list
=
features
[
'search_tag3_list'
]
uid
=
features
[
'uid'
]
city
=
features
[
'city'
]
cid_id
=
features
[
'cid_id'
]
...
...
@@ -158,12 +160,14 @@ def model_fn(features, labels, mode, params):
tag5
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag5_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag6
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag6_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
tag7
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
tag7_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
search_tag2
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
search_tag2_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
search_tag3
=
tf
.
nn
.
embedding_lookup_sparse
(
Feat_Emb
,
sp_ids
=
search_tag3_list
,
sp_weights
=
None
,
combiner
=
"sum"
)
# x_concat = tf.reshape(embedding_id,shape=[-1, common_dims]) # None * (F * K)
x_concat
=
tf
.
concat
([
tf
.
reshape
(
embedding_id
,
shape
=
[
-
1
,
common_dims
]),
app_id
,
level2
,
level3
,
tag1
,
tag2
,
tag3
,
tag4
,
tag5
,
tag6
,
tag7
],
axis
=
1
)
tag2
,
tag3
,
tag4
,
tag5
,
tag6
,
tag7
,
search_tag2
,
search_tag3
],
axis
=
1
)
sample_id
=
tf
.
sparse
.
to_dense
(
number
)
uid
=
tf
.
sparse
.
to_dense
(
uid
,
default_value
=
""
)
city
=
tf
.
sparse
.
to_dense
(
city
,
default_value
=
""
)
cid_id
=
tf
.
sparse
.
to_dense
(
cid_id
,
default_value
=
""
)
...
...
@@ -212,8 +216,7 @@ def model_fn(features, labels, mode, params):
pcvr
=
tf
.
sigmoid
(
y_cvr
)
pctcvr
=
pctr
*
pcvr
predictions
=
{
"pctcvr"
:
pctcvr
,
"sample_id"
:
sample_id
,
"uid"
:
uid
,
"city"
:
city
,
"cid_id"
:
cid_id
}
predictions
=
{
"pctcvr"
:
pctcvr
,
"uid"
:
uid
,
"city"
:
city
,
"cid_id"
:
cid_id
}
export_outputs
=
{
tf
.
saved_model
.
signature_constants
.
DEFAULT_SERVING_SIGNATURE_DEF_KEY
:
tf
.
estimator
.
export
.
PredictOutput
(
predictions
)}
# Provide an estimator spec for `ModeKeys.PREDICT`
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
...
...
@@ -328,7 +331,7 @@ def main(te_files):
FLAGS
.
model_dir
=
FLAGS
.
model_dir
+
FLAGS
.
dt_dir
#FLAGS.data_dir = FLAGS.data_dir + FLAGS.dt_dir
tr_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/t
est_t
r/part-r-00000"
]
tr_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"
]
va_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"
]
# te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
...
...
@@ -355,7 +358,6 @@ def main(te_files):
}
config
=
tf
.
estimator
.
RunConfig
()
.
replace
(
session_config
=
tf
.
ConfigProto
(
device_count
=
{
'GPU'
:
0
,
'CPU'
:
FLAGS
.
num_threads
}),
log_step_count_steps
=
FLAGS
.
log_steps
,
save_summary_steps
=
FLAGS
.
log_steps
)
Estimator
=
tf
.
estimator
.
Estimator
(
model_fn
=
model_fn
,
model_dir
=
FLAGS
.
model_dir
,
params
=
model_params
,
config
=
config
)
if
FLAGS
.
task_type
==
'train'
:
...
...
@@ -369,25 +371,24 @@ def main(te_files):
for
key
,
value
in
sorted
(
result
.
items
()):
print
(
'
%
s:
%
s'
%
(
key
,
value
))
elif
FLAGS
.
task_type
==
'infer'
:
preds
=
Estimator
.
predict
(
input_fn
=
lambda
:
input_fn
(
te_files
,
num_epochs
=
1
,
batch_size
=
FLAGS
.
batch_size
),
predict_keys
=
[
"pctcvr"
,
"
sample_id"
,
"
uid"
,
"city"
,
"cid_id"
])
preds
=
Estimator
.
predict
(
input_fn
=
lambda
:
input_fn
(
te_files
,
num_epochs
=
1
,
batch_size
=
FLAGS
.
batch_size
),
predict_keys
=
[
"pctcvr"
,
"uid"
,
"city"
,
"cid_id"
])
result
=
[]
for
prob
in
preds
:
result
.
append
([
str
(
prob
[
"sample_id"
][
0
]),
str
(
prob
[
"uid"
][
0
]),
str
(
prob
[
"city"
][
0
]),
str
(
prob
[
"cid_id"
][
0
]),
str
(
prob
[
'pctcvr'
])])
return
result
result
.
append
([
str
(
prob
[
"uid"
][
0
]),
str
(
prob
[
"city"
][
0
]),
str
(
prob
[
"cid_id"
][
0
]),
str
(
prob
[
'pctcvr'
])])
elif
FLAGS
.
task_type
==
'export'
:
print
(
"Not Implemented, Do It Yourself!"
)
if
__name__
==
"__main__"
:
b
=
time
.
time
()
path
=
"hdfs://172.16.32.4:8020/strategy/esmm/"
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
te_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/test_n
ative
/part-r-00000"
]
te_files
=
[
"hdfs://172.16.32.4:8020/strategy/esmm/test_n
earby
/part-r-00000"
]
print
(
"hello up"
)
result
=
main
(
te_files
)
df
=
pd
.
DataFrame
(
result
,
columns
=
[
"sample_id"
,
"uid"
,
"city"
,
"cid_id"
,
"pctcvr"
])
df
.
head
(
10
)
print
(
"hello down"
)
print
(
"耗时(分钟):"
)
print
((
time
.
time
()
-
b
)
/
60
)
\ No newline at end of file
print
((
time
.
time
()
-
b
)
/
60
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment