Commit 7d05b362 authored by Your Name's avatar Your Name

test

parent fd53cce3
...@@ -10,12 +10,11 @@ import os ...@@ -10,12 +10,11 @@ import os
import json import json
from datetime import date, timedelta from datetime import date, timedelta
import tensorflow as tf import tensorflow as tf
from tensorflow.python.client import timeline
import subprocess import subprocess
import time import time
import glob import glob
import random
import pandas as pd import pandas as pd
import random
#################### CMD Arguments #################### #################### CMD Arguments ####################
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
...@@ -48,7 +47,7 @@ tf.app.flags.DEFINE_string("servable_model_dir", '', "export servable model for ...@@ -48,7 +47,7 @@ tf.app.flags.DEFINE_string("servable_model_dir", '', "export servable model for
tf.app.flags.DEFINE_string("task_type", 'train', "task type {train, infer, eval, export}") tf.app.flags.DEFINE_string("task_type", 'train', "task type {train, infer, eval, export}")
tf.app.flags.DEFINE_boolean("clear_existing_model", False, "clear existing model or not") tf.app.flags.DEFINE_boolean("clear_existing_model", False, "clear existing model or not")
#40362692,0,0,216:9342395:1.0 301:9351665:1.0 205:7702673:1.0 206:8317829:1.0 207:8967741:1.0 508:9356012:2.30259 210:9059239:1.0 210:9042796:1.0 210:9076972:1.0 210:9103884:1.0 210:9063064:1.0 127_14:3529789:2.3979 127_14:3806412:2.70805
def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False): def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
print('Parsing', filenames) print('Parsing', filenames)
def _parse_fn(record): def _parse_fn(record):
...@@ -66,7 +65,8 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False): ...@@ -66,7 +65,8 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
"tag5_list": tf.VarLenFeature(tf.int64), "tag5_list": tf.VarLenFeature(tf.int64),
"tag6_list": tf.VarLenFeature(tf.int64), "tag6_list": tf.VarLenFeature(tf.int64),
"tag7_list": tf.VarLenFeature(tf.int64), "tag7_list": tf.VarLenFeature(tf.int64),
"number": tf.VarLenFeature(tf.int64), "search_tag2_list": tf.VarLenFeature(tf.int64),
"search_tag3_list": tf.VarLenFeature(tf.int64),
"uid": tf.VarLenFeature(tf.string), "uid": tf.VarLenFeature(tf.string),
"city": tf.VarLenFeature(tf.string), "city": tf.VarLenFeature(tf.string),
"cid_id": tf.VarLenFeature(tf.string) "cid_id": tf.VarLenFeature(tf.string)
...@@ -108,6 +108,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False): ...@@ -108,6 +108,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
#print(batch_features,batch_labels) #print(batch_features,batch_labels)
return batch_features, batch_labels return batch_features, batch_labels
def model_fn(features, labels, mode, params): def model_fn(features, labels, mode, params):
"""Bulid Model function f(x) for Estimator.""" """Bulid Model function f(x) for Estimator."""
#------hyperparameters---- #------hyperparameters----
...@@ -136,7 +137,8 @@ def model_fn(features, labels, mode, params): ...@@ -136,7 +137,8 @@ def model_fn(features, labels, mode, params):
tag5_list = features['tag5_list'] tag5_list = features['tag5_list']
tag6_list = features['tag6_list'] tag6_list = features['tag6_list']
tag7_list = features['tag7_list'] tag7_list = features['tag7_list']
number = features['number'] search_tag2_list = features['search_tag2_list']
search_tag3_list = features['search_tag3_list']
uid = features['uid'] uid = features['uid']
city = features['city'] city = features['city']
cid_id = features['cid_id'] cid_id = features['cid_id']
...@@ -158,12 +160,14 @@ def model_fn(features, labels, mode, params): ...@@ -158,12 +160,14 @@ def model_fn(features, labels, mode, params):
tag5 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag5_list, sp_weights=None, combiner="sum") tag5 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag5_list, sp_weights=None, combiner="sum")
tag6 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag6_list, sp_weights=None, combiner="sum") tag6 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag6_list, sp_weights=None, combiner="sum")
tag7 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag7_list, sp_weights=None, combiner="sum") tag7 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag7_list, sp_weights=None, combiner="sum")
search_tag2 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=search_tag2_list, sp_weights=None, combiner="sum")
search_tag3 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=search_tag3_list, sp_weights=None, combiner="sum")
# x_concat = tf.reshape(embedding_id,shape=[-1, common_dims]) # None * (F * K) # x_concat = tf.reshape(embedding_id,shape=[-1, common_dims]) # None * (F * K)
x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1, x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1,
tag2, tag3, tag4, tag5, tag6, tag7], axis=1) tag2, tag3, tag4, tag5, tag6, tag7,search_tag2,search_tag3], axis=1)
sample_id = tf.sparse.to_dense(number)
uid = tf.sparse.to_dense(uid,default_value="") uid = tf.sparse.to_dense(uid,default_value="")
city = tf.sparse.to_dense(city,default_value="") city = tf.sparse.to_dense(city,default_value="")
cid_id = tf.sparse.to_dense(cid_id,default_value="") cid_id = tf.sparse.to_dense(cid_id,default_value="")
...@@ -212,8 +216,7 @@ def model_fn(features, labels, mode, params): ...@@ -212,8 +216,7 @@ def model_fn(features, labels, mode, params):
pcvr = tf.sigmoid(y_cvr) pcvr = tf.sigmoid(y_cvr)
pctcvr = pctr*pcvr pctcvr = pctr*pcvr
predictions={"pctcvr": pctcvr, "uid":uid, "city":city, "cid_id":cid_id}
predictions={"pctcvr": pctcvr, "sample_id": sample_id, "uid":uid, "city":city, "cid_id":cid_id}
export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
# Provide an estimator spec for `ModeKeys.PREDICT` # Provide an estimator spec for `ModeKeys.PREDICT`
if mode == tf.estimator.ModeKeys.PREDICT: if mode == tf.estimator.ModeKeys.PREDICT:
...@@ -328,7 +331,7 @@ def main(te_files): ...@@ -328,7 +331,7 @@ def main(te_files):
FLAGS.model_dir = FLAGS.model_dir + FLAGS.dt_dir FLAGS.model_dir = FLAGS.model_dir + FLAGS.dt_dir
#FLAGS.data_dir = FLAGS.data_dir + FLAGS.dt_dir #FLAGS.data_dir = FLAGS.data_dir + FLAGS.dt_dir
tr_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_tr/part-r-00000"] tr_files = ["hdfs://172.16.32.4:8020/strategy/esmm/tr/part-r-00000"]
va_files = ["hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"] va_files = ["hdfs://172.16.32.4:8020/strategy/esmm/va/part-r-00000"]
# te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir] # te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
...@@ -355,7 +358,6 @@ def main(te_files): ...@@ -355,7 +358,6 @@ def main(te_files):
} }
config = tf.estimator.RunConfig().replace(session_config = tf.ConfigProto(device_count={'GPU':0, 'CPU':FLAGS.num_threads}), config = tf.estimator.RunConfig().replace(session_config = tf.ConfigProto(device_count={'GPU':0, 'CPU':FLAGS.num_threads}),
log_step_count_steps=FLAGS.log_steps, save_summary_steps=FLAGS.log_steps) log_step_count_steps=FLAGS.log_steps, save_summary_steps=FLAGS.log_steps)
Estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir, params=model_params, config=config) Estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir, params=model_params, config=config)
if FLAGS.task_type == 'train': if FLAGS.task_type == 'train':
...@@ -369,21 +371,19 @@ def main(te_files): ...@@ -369,21 +371,19 @@ def main(te_files):
for key,value in sorted(result.items()): for key,value in sorted(result.items()):
print('%s: %s' % (key,value)) print('%s: %s' % (key,value))
elif FLAGS.task_type == 'infer': elif FLAGS.task_type == 'infer':
preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","sample_id","uid","city","cid_id"]) preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","uid","city","cid_id"])
result = [] result = []
for prob in preds: for prob in preds:
result.append([str(prob["sample_id"][0]),str(prob["uid"][0]),str(prob["city"][0]),str(prob["cid_id"][0]),str(prob['pctcvr'])]) result.append([str(prob["uid"][0]), str(prob["city"][0]), str(prob["cid_id"][0]), str(prob['pctcvr'])])
return result
elif FLAGS.task_type == 'export': elif FLAGS.task_type == 'export':
print("Not Implemented, Do It Yourself!") print("Not Implemented, Do It Yourself!")
if __name__ == "__main__": if __name__ == "__main__":
b = time.time() b = time.time()
path = "hdfs://172.16.32.4:8020/strategy/esmm/" path = "hdfs://172.16.32.4:8020/strategy/esmm/"
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"] te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"]
print("hello up") print("hello up")
result = main(te_files) result = main(te_files)
df = pd.DataFrame(result,columns=["sample_id","uid","city","cid_id","pctcvr"]) df = pd.DataFrame(result,columns=["sample_id","uid","city","cid_id","pctcvr"])
...@@ -391,3 +391,4 @@ if __name__ == "__main__": ...@@ -391,3 +391,4 @@ if __name__ == "__main__":
print("hello down") print("hello down")
print("耗时(分钟):") print("耗时(分钟):")
print((time.time()-b)/60) print((time.time()-b)/60)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment