Commit 915416d1 authored by Your Name's avatar Your Name

change train.py

parent 1a8494e6
...@@ -13,7 +13,6 @@ import tensorflow as tf ...@@ -13,7 +13,6 @@ import tensorflow as tf
import subprocess import subprocess
import time import time
import glob import glob
import pandas as pd
import random import random
#################### CMD Arguments #################### #################### CMD Arguments ####################
...@@ -66,10 +65,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False): ...@@ -66,10 +65,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
"tag6_list": tf.VarLenFeature(tf.int64), "tag6_list": tf.VarLenFeature(tf.int64),
"tag7_list": tf.VarLenFeature(tf.int64), "tag7_list": tf.VarLenFeature(tf.int64),
"search_tag2_list": tf.VarLenFeature(tf.int64), "search_tag2_list": tf.VarLenFeature(tf.int64),
"search_tag3_list": tf.VarLenFeature(tf.int64), "search_tag3_list": tf.VarLenFeature(tf.int64)
"uid": tf.VarLenFeature(tf.string),
"city": tf.VarLenFeature(tf.string),
"cid_id": tf.VarLenFeature(tf.string)
} }
parsed = tf.parse_single_example(record, features) parsed = tf.parse_single_example(record, features)
y = parsed.pop('y') y = parsed.pop('y')
...@@ -139,9 +135,6 @@ def model_fn(features, labels, mode, params): ...@@ -139,9 +135,6 @@ def model_fn(features, labels, mode, params):
tag7_list = features['tag7_list'] tag7_list = features['tag7_list']
search_tag2_list = features['search_tag2_list'] search_tag2_list = features['search_tag2_list']
search_tag3_list = features['search_tag3_list'] search_tag3_list = features['search_tag3_list']
uid = features['uid']
city = features['city']
cid_id = features['cid_id']
if FLAGS.task_type != "infer": if FLAGS.task_type != "infer":
y = labels['y'] y = labels['y']
...@@ -168,10 +161,6 @@ def model_fn(features, labels, mode, params): ...@@ -168,10 +161,6 @@ def model_fn(features, labels, mode, params):
x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1, x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1,
tag2, tag3, tag4, tag5, tag6, tag7,search_tag2,search_tag3], axis=1) tag2, tag3, tag4, tag5, tag6, tag7,search_tag2,search_tag3], axis=1)
uid = tf.sparse.to_dense(uid,default_value="")
city = tf.sparse.to_dense(city,default_value="")
cid_id = tf.sparse.to_dense(cid_id,default_value="")
with tf.name_scope("CVR_Task"): with tf.name_scope("CVR_Task"):
if mode == tf.estimator.ModeKeys.TRAIN: if mode == tf.estimator.ModeKeys.TRAIN:
train_phase = True train_phase = True
...@@ -216,7 +205,7 @@ def model_fn(features, labels, mode, params): ...@@ -216,7 +205,7 @@ def model_fn(features, labels, mode, params):
pcvr = tf.sigmoid(y_cvr) pcvr = tf.sigmoid(y_cvr)
pctcvr = pctr*pcvr pctcvr = pctr*pcvr
predictions={"pctcvr": pctcvr, "uid":uid, "city":city, "cid_id":cid_id} predictions={"pcvr": pcvr, "pctr": pctr, "pctcvr": pctcvr}
export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
# Provide an estimator spec for `ModeKeys.PREDICT` # Provide an estimator spec for `ModeKeys.PREDICT`
if mode == tf.estimator.ModeKeys.PREDICT: if mode == tf.estimator.ModeKeys.PREDICT:
...@@ -237,11 +226,11 @@ def model_fn(features, labels, mode, params): ...@@ -237,11 +226,11 @@ def model_fn(features, labels, mode, params):
# Provide an estimator spec for `ModeKeys.EVAL` # Provide an estimator spec for `ModeKeys.EVAL`
eval_metric_ops = { eval_metric_ops = {
# "CTR_AUC": tf.metrics.auc(y, pctr), "CTR_AUC": tf.metrics.auc(y, pctr),
#"CTR_F1": tf.contrib.metrics.f1_score(y,pctr), #"CTR_F1": tf.contrib.metrics.f1_score(y,pctr),
#"CTR_Precision": tf.metrics.precision(y,pctr), #"CTR_Precision": tf.metrics.precision(y,pctr),
#"CTR_Recall": tf.metrics.recall(y,pctr), #"CTR_Recall": tf.metrics.recall(y,pctr),
# "CVR_AUC": tf.metrics.auc(z, pcvr), "CVR_AUC": tf.metrics.auc(z, pcvr),
"CTCVR_AUC": tf.metrics.auc(z, pctcvr) "CTCVR_AUC": tf.metrics.auc(z, pctcvr)
} }
if mode == tf.estimator.ModeKeys.EVAL: if mode == tf.estimator.ModeKeys.EVAL:
...@@ -324,7 +313,7 @@ def set_dist_env(): ...@@ -324,7 +313,7 @@ def set_dist_env():
print(json.dumps(tf_config)) print(json.dumps(tf_config))
os.environ['TF_CONFIG'] = json.dumps(tf_config) os.environ['TF_CONFIG'] = json.dumps(tf_config)
def main(te_files): def main(_):
#------check Arguments------ #------check Arguments------
if FLAGS.dt_dir == "": if FLAGS.dt_dir == "":
FLAGS.dt_dir = (date.today() + timedelta(-1)).strftime('%Y%m%d') FLAGS.dt_dir = (date.today() + timedelta(-1)).strftime('%Y%m%d')
...@@ -333,7 +322,7 @@ def main(te_files): ...@@ -333,7 +322,7 @@ def main(te_files):
tr_files = ["hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"] tr_files = ["hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"]
va_files = ["hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"] va_files = ["hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"]
# te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir] te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
if FLAGS.clear_existing_model: if FLAGS.clear_existing_model:
try: try:
...@@ -371,11 +360,10 @@ def main(te_files): ...@@ -371,11 +360,10 @@ def main(te_files):
for key,value in sorted(result.items()): for key,value in sorted(result.items()):
print('%s: %s' % (key,value)) print('%s: %s' % (key,value))
elif FLAGS.task_type == 'infer': elif FLAGS.task_type == 'infer':
preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","uid","city","cid_id"]) preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","pctr","pcvr"])
result = [] with open(FLAGS.local_dir + "/pred.txt", "w") as fo:
for prob in preds: for prob in preds:
result.append([str(prob["uid"][0]), str(prob["city"][0]), str(prob["cid_id"][0]), str(prob['pctcvr'])]) fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
return result
elif FLAGS.task_type == 'export': elif FLAGS.task_type == 'export':
print("Not Implemented, Do It Yourself!") print("Not Implemented, Do It Yourself!")
...@@ -383,13 +371,7 @@ def main(te_files): ...@@ -383,13 +371,7 @@ def main(te_files):
if __name__ == "__main__": if __name__ == "__main__":
b = time.time() b = time.time()
path = "hdfs://172.16.32.4:8020/strategy/esmm/" path = "hdfs://172.16.32.4:8020/strategy/esmm/"
# tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"] tf.app.run()
print("hello up")
result = main(te_files)
df = pd.DataFrame(result,columns=["uid","city","cid_id","pctcvr"])
df.head(10)
print("hello down")
print("耗时(分钟):") print("耗时(分钟):")
print((time.time()-b)/60) print((time.time()-b)/60)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment