test

4fa0a50f · Your Name · ced9f2c4 · 4fa0a50f
Commit 4fa0a50f authored Jun 24, 2019 by Your Name
Hide whitespace changes
Inline Side-by-side

Showing with 47 additions and 36 deletions

predict.py eda/esmm/Model_pipline/predict.py +47 -36

No files found.
--- a/eda/esmm/Model_pipline/dist_sort.py
+++ b/eda/esmm/Model_pipline/dist_sort.py
 import tensorflow as tf
 import pymysql
-from pyspark.conf import SparkConf
-import pytispark.pytispark as pti
-from pyspark.sql import SparkSession
 import datetime
 import pandas as pd
 from datetime import date, timedelta
 import time
-from pyspark import StorageLevel
-from pyspark.sql import Row
 import os
 import sys
 from sqlalchemy import create_engine
@@ -30,7 +25,8 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
            "tag5_list": tf.VarLenFeature(tf.int64),
            "tag6_list": tf.VarLenFeature(tf.int64),
            "tag7_list": tf.VarLenFeature(tf.int64),
-            "number": tf.VarLenFeature(tf.int64),
+            "search_tag2_list": tf.VarLenFeature(tf.int64),
+            "search_tag3_list": tf.VarLenFeature(tf.int64),
            "uid": tf.VarLenFeature(tf.string),
            "city": tf.VarLenFeature(tf.string),
            "cid_id": tf.VarLenFeature(tf.string)
@@ -88,7 +84,8 @@ def model_fn(features, labels, mode, params):
    tag5_list = features['tag5_list']
    tag6_list = features['tag6_list']
    tag7_list = features['tag7_list']
-    number = features['number']
+    search_tag2_list = features['search_tag2_list']
+    search_tag3_list = features['search_tag3_list']
    uid = features['uid']
    city = features['city']
    cid_id = features['cid_id']
@@ -107,12 +104,14 @@ def model_fn(features, labels, mode, params):
        tag5 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag5_list, sp_weights=None, combiner="sum")
        tag6 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag6_list, sp_weights=None, combiner="sum")
        tag7 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag7_list, sp_weights=None, combiner="sum")
+        search_tag2 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=search_tag2_list, sp_weights=None, combiner="sum")
+        search_tag3 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=search_tag3_list, sp_weights=None, combiner="sum")
        # x_concat = tf.reshape(embedding_id,shape=[-1, common_dims])  # None * (F * K)
        x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1,
-                              tag2, tag3, tag4, tag5, tag6, tag7], axis=1)
+                              tag2, tag3, tag4, tag5, tag6, tag7,search_tag2,search_tag3], axis=1)
-        sample_id = tf.sparse.to_dense(number)
        uid = tf.sparse.to_dense(uid,default_value="")
        city = tf.sparse.to_dense(city,default_value="")
        cid_id = tf.sparse.to_dense(cid_id,default_value="")
@@ -149,7 +148,7 @@ def model_fn(features, labels, mode, params):
        pcvr = tf.sigmoid(y_cvr)
        pctcvr = pctr*pcvr
-    predictions={"pcvr": pcvr, "pctr": pctr, "pctcvr": pctcvr, "sample_id": sample_id, "uid":uid, "city":city, "cid_id":cid_id}
+    predictions = {"pctcvr": pctcvr, "uid": uid, "city": city, "cid_id": cid_id}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
@@ -176,25 +175,11 @@ def main(te_file):
            log_step_count_steps=100, save_summary_steps=100)
    Estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, params=model_params, config=config)
-    preds = Estimator.predict(input_fn=lambda: input_fn(te_file, num_epochs=1, batch_size=10000), predict_keys=["pctcvr","pctr","pcvr","sample_id","uid","city","cid_id"])
+    preds = Estimator.predict(input_fn=lambda: input_fn(te_file, num_epochs=1, batch_size=10000), predict_keys=["pctcvr","uid","city","cid_id"])
-    # with open("/home/gmuser/esmm/nearby/pred.txt", "w") as fo:
-    #     for prob in preds:
-    #         fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
-    # ctcvr = []
+    result = []
-    str_result = ""
    for prob in preds:
-        # ctcvr.append((prob["sample_id"][0],prob['pctcvr']))
+        result.append([str(prob["uid"][0]), str(prob["city"][0]), str(prob["cid_id"][0]), str(prob['pctcvr'])])
-        str_result = str_result + str(prob["sample_id"][0]) + ":"+ str(prob["uid"][0]) + ":" + str(prob["city"][0]) + ":" + str(prob["cid_id"][0]) + ":" + str(prob['pctcvr']) + ";"
-        # str_result = list(prob.keys())
-    # return str_result
-    return str_result[:-1]
-    # indices = []
-    # for prob in preds:
-    #     indices.append([prob['pctr'], prob['pcvr'], prob['pctcvr']])
-    # return indices
 def trans(x):
    return str(x)[2:-1] if str(x)[0] == 'b' else x
@@ -208,22 +193,49 @@ def set_join(lst):
 if __name__ == "__main__":
-    if sys.argv[1] == "nearby":
+    if sys.argv[1] == "native":
        b = time.time()
+        print("infer native task")
        path = "hdfs://172.16.32.4:8020/strategy/esmm/"
        # df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000")
        # df.show()
+        te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
+        print("dist predict native")
+        print("耗时(秒)：")
+        print((time.time()-b))
+    if sys.argv[1] == "nearby":
+        print("infer nearby task")
+        b = time.time()
+        path = "hdfs://172.16.32.4:8020/strategy/esmm/"
+        # df = spark.read.format("tfrecords").load(path+"test_nearby/part-r-00000")
+        # df.show()
        te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"]
-        print("-" * 100)
+        result = main(te_files)
-        indices = main(te_files)
+        df = pd.DataFrame(result, columns=["uid", "city", "cid_id", "pctcvr"])
-        print(indices[0])
+        df.head(10)
+        host = '172.16.40.158'
+        port = 4000
+        user = 'root'
+        password = '3SYz54LS9#^9sBvC'
+        db = 'jerry_test'
+        charset = 'utf8'
+        print("耗时(min)：")
+        print((time.time()-b)/60)
-        print("耗时(秒)：")
-        print((time.time()-b))
-    else:
-        print("hello")
\ No newline at end of file