change train.py

915416d1 · Your Name · 1a8494e6 · 915416d1
Commit 915416d1 authored Jun 25, 2019 by Your Name
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 31 deletions

train.py eda/esmm/Model_pipline/train.py +14 -31

No files found.
--- a/eda/esmm/Model_pipline/train.py
+++ b/eda/esmm/Model_pipline/train.py
@@ -13,7 +13,6 @@ import tensorflow as tf
 import subprocess
 import time
 import glob
-import pandas as pd
 import random
 #################### CMD Arguments ####################
@@ -66,10 +65,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
            "tag6_list": tf.VarLenFeature(tf.int64),
            "tag7_list": tf.VarLenFeature(tf.int64),
            "search_tag2_list": tf.VarLenFeature(tf.int64),
-            "search_tag3_list": tf.VarLenFeature(tf.int64),
+            "search_tag3_list": tf.VarLenFeature(tf.int64)
-            "uid": tf.VarLenFeature(tf.string),
-            "city": tf.VarLenFeature(tf.string),
-            "cid_id": tf.VarLenFeature(tf.string)
        }
        parsed = tf.parse_single_example(record, features)
        y = parsed.pop('y')
@@ -139,9 +135,6 @@ def model_fn(features, labels, mode, params):
    tag7_list = features['tag7_list']
    search_tag2_list = features['search_tag2_list']
    search_tag3_list = features['search_tag3_list']
-    uid = features['uid']
-    city = features['city']
-    cid_id = features['cid_id']
    if FLAGS.task_type != "infer":
        y = labels['y']
@@ -168,10 +161,6 @@ def model_fn(features, labels, mode, params):
        x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1,
                              tag2, tag3, tag4, tag5, tag6, tag7,search_tag2,search_tag3], axis=1)
-        uid = tf.sparse.to_dense(uid,default_value="")
-        city = tf.sparse.to_dense(city,default_value="")
-        cid_id = tf.sparse.to_dense(cid_id,default_value="")
    with tf.name_scope("CVR_Task"):
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_phase = True
@@ -216,7 +205,7 @@ def model_fn(features, labels, mode, params):
        pcvr = tf.sigmoid(y_cvr)
        pctcvr = pctr*pcvr
-    predictions={"pctcvr": pctcvr, "uid":uid, "city":city, "cid_id":cid_id}
+    predictions={"pcvr": pcvr, "pctr": pctr, "pctcvr": pctcvr}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
@@ -237,11 +226,11 @@ def model_fn(features, labels, mode, params):
        # Provide an estimator spec for `ModeKeys.EVAL`
        eval_metric_ops = {
-            # "CTR_AUC": tf.metrics.auc(y, pctr),
+            "CTR_AUC": tf.metrics.auc(y, pctr),
            #"CTR_F1": tf.contrib.metrics.f1_score(y,pctr),
            #"CTR_Precision": tf.metrics.precision(y,pctr),
            #"CTR_Recall": tf.metrics.recall(y,pctr),
-            # "CVR_AUC": tf.metrics.auc(z, pcvr),
+            "CVR_AUC": tf.metrics.auc(z, pcvr),
            "CTCVR_AUC": tf.metrics.auc(z, pctcvr)
        }
        if mode == tf.estimator.ModeKeys.EVAL:
@@ -324,7 +313,7 @@ def set_dist_env():
        print(json.dumps(tf_config))
        os.environ['TF_CONFIG'] = json.dumps(tf_config)
-def main(te_files):
+def main(_):
    #------check Arguments------
    if FLAGS.dt_dir == "":
        FLAGS.dt_dir = (date.today() + timedelta(-1)).strftime('%Y%m%d')
@@ -333,7 +322,7 @@ def main(te_files):
    tr_files = ["hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"]
    va_files = ["hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"]
-    # te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
+    te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
    if FLAGS.clear_existing_model:
        try:
@@ -371,11 +360,10 @@ def main(te_files):
        for key,value in sorted(result.items()):
            print('%s: %s' % (key,value))
    elif FLAGS.task_type == 'infer':
-        preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","uid","city","cid_id"])
+        preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","pctr","pcvr"])
-        result = []
+        with open(FLAGS.local_dir + "/pred.txt", "w") as fo:
-        for prob in preds:
+            for prob in preds:
-            result.append([str(prob["uid"][0]), str(prob["city"][0]), str(prob["cid_id"][0]), str(prob['pctcvr'])])
+                fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
-        return result
    elif FLAGS.task_type == 'export':
        print("Not Implemented, Do It Yourself!")
@@ -383,13 +371,7 @@ def main(te_files):
 if __name__ == "__main__":
    b = time.time()
    path = "hdfs://172.16.32.4:8020/strategy/esmm/"
-    # tf.logging.set_verbosity(tf.logging.INFO)
+    tf.logging.set_verbosity(tf.logging.INFO)
-    te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"]
+    tf.app.run()
-    print("hello up")
-    result = main(te_files)
-    df = pd.DataFrame(result,columns=["uid","city","cid_id","pctcvr"])
-    df.head(10)
-    print("hello down")
    print("耗时(分钟)：")
    print((time.time()-b)/60)
\ No newline at end of file