test

7d05b362 · Your Name · fd53cce3 · 7d05b362
Commit 7d05b362 authored Jun 24, 2019 by Your Name
Show whitespace changes
Inline Side-by-side

Showing with 17 additions and 16 deletions

train.py eda/esmm/Model_pipline/train.py +17 -16

No files found.
--- a/eda/esmm/Model_pipline/train.py
+++ b/eda/esmm/Model_pipline/train.py
@@ -10,12 +10,11 @@ import os
 import json
 from datetime import date, timedelta
 import tensorflow as tf
-from tensorflow.python.client import timeline
 import subprocess
 import time
 import glob
-import random
 import pandas as pd
+import random
 #################### CMD Arguments ####################
 FLAGS = tf.app.flags.FLAGS
@@ -48,7 +47,7 @@ tf.app.flags.DEFINE_string("servable_model_dir", '', "export servable model for
 tf.app.flags.DEFINE_string("task_type", 'train', "task type {train, infer, eval, export}")
 tf.app.flags.DEFINE_boolean("clear_existing_model", False, "clear existing model or not")
-#40362692,0,0,216:9342395:1.0 301:9351665:1.0 205:7702673:1.0 206:8317829:1.0 207:8967741:1.0 508:9356012:2.30259 210:9059239:1.0 210:9042796:1.0 210:9076972:1.0 210:9103884:1.0 210:9063064:1.0 127_14:3529789:2.3979 127_14:3806412:2.70805
 def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
    print('Parsing', filenames)
    def _parse_fn(record):
@@ -66,7 +65,8 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
            "tag5_list": tf.VarLenFeature(tf.int64),
            "tag6_list": tf.VarLenFeature(tf.int64),
            "tag7_list": tf.VarLenFeature(tf.int64),
-            "number": tf.VarLenFeature(tf.int64),
+            "search_tag2_list": tf.VarLenFeature(tf.int64),
+            "search_tag3_list": tf.VarLenFeature(tf.int64),
            "uid": tf.VarLenFeature(tf.string),
            "city": tf.VarLenFeature(tf.string),
            "cid_id": tf.VarLenFeature(tf.string)
@@ -108,6 +108,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
    #print(batch_features,batch_labels)
    return batch_features, batch_labels
 def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    #------hyperparameters----
@@ -136,7 +137,8 @@ def model_fn(features, labels, mode, params):
    tag5_list = features['tag5_list']
    tag6_list = features['tag6_list']
    tag7_list = features['tag7_list']
-    number = features['number']
+    search_tag2_list = features['search_tag2_list']
+    search_tag3_list = features['search_tag3_list']
    uid = features['uid']
    city = features['city']
    cid_id = features['cid_id']
@@ -158,12 +160,14 @@ def model_fn(features, labels, mode, params):
        tag5 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag5_list, sp_weights=None, combiner="sum")
        tag6 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag6_list, sp_weights=None, combiner="sum")
        tag7 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag7_list, sp_weights=None, combiner="sum")
+        search_tag2 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=search_tag2_list, sp_weights=None, combiner="sum")
+        search_tag3 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=search_tag3_list, sp_weights=None, combiner="sum")
        # x_concat = tf.reshape(embedding_id,shape=[-1, common_dims])  # None * (F * K)
        x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1,
-                              tag2, tag3, tag4, tag5, tag6, tag7], axis=1)
+                              tag2, tag3, tag4, tag5, tag6, tag7,search_tag2,search_tag3], axis=1)
-        sample_id = tf.sparse.to_dense(number)
        uid = tf.sparse.to_dense(uid,default_value="")
        city = tf.sparse.to_dense(city,default_value="")
        cid_id = tf.sparse.to_dense(cid_id,default_value="")
@@ -212,8 +216,7 @@ def model_fn(features, labels, mode, params):
        pcvr = tf.sigmoid(y_cvr)
        pctcvr = pctr*pcvr
+    predictions={"pctcvr": pctcvr, "uid":uid, "city":city, "cid_id":cid_id}
-    predictions={"pctcvr": pctcvr, "sample_id": sample_id, "uid":uid, "city":city, "cid_id":cid_id}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
@@ -328,7 +331,7 @@ def main(te_files):
    FLAGS.model_dir = FLAGS.model_dir + FLAGS.dt_dir
    #FLAGS.data_dir  = FLAGS.data_dir + FLAGS.dt_dir
-    tr_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_tr/part-r-00000"]
+    tr_files = ["hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"]
    va_files = ["hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"]
    # te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
@@ -355,7 +358,6 @@ def main(te_files):
    }
    config = tf.estimator.RunConfig().replace(session_config = tf.ConfigProto(device_count={'GPU':0, 'CPU':FLAGS.num_threads}),
            log_step_count_steps=FLAGS.log_steps, save_summary_steps=FLAGS.log_steps)
    Estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir, params=model_params, config=config)
    if FLAGS.task_type == 'train':
@@ -369,21 +371,19 @@ def main(te_files):
        for key,value in sorted(result.items()):
            print('%s: %s' % (key,value))
    elif FLAGS.task_type == 'infer':
-        preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","sample_id","uid","city","cid_id"])
+        preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","uid","city","cid_id"])
        result = []
        for prob in preds:
-            result.append([str(prob["sample_id"][0]),str(prob["uid"][0]),str(prob["city"][0]),str(prob["cid_id"][0]),str(prob['pctcvr'])])
+            result.append([str(prob["uid"][0]), str(prob["city"][0]), str(prob["cid_id"][0]), str(prob['pctcvr'])])
-        return result
    elif FLAGS.task_type == 'export':
        print("Not Implemented, Do It Yourself!")
 if __name__ == "__main__":
    b = time.time()
    path = "hdfs://172.16.32.4:8020/strategy/esmm/"
    tf.logging.set_verbosity(tf.logging.INFO)
-    te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
+    te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"]
    print("hello up")
    result = main(te_files)
    df = pd.DataFrame(result,columns=["sample_id","uid","city","cid_id","pctcvr"])
@@ -391,3 +391,4 @@ if __name__ == "__main__":
    print("hello down")
    print("耗时(分钟)：")
    print((time.time()-b)/60)