Commit 04b52880 authored by 王志伟's avatar 王志伟
parents 7491370b 9b554ea0
This diff is collapsed.
...@@ -8,10 +8,7 @@ ...@@ -8,10 +8,7 @@
import shutil import shutil
import os import os
import json import json
import glob
from datetime import date, timedelta from datetime import date, timedelta
import random
import tensorflow as tf import tensorflow as tf
#################### CMD Arguments #################### #################### CMD Arguments ####################
...@@ -37,7 +34,8 @@ tf.app.flags.DEFINE_string("deep_layers", '256,128,64', "deep layers") ...@@ -37,7 +34,8 @@ tf.app.flags.DEFINE_string("deep_layers", '256,128,64', "deep layers")
tf.app.flags.DEFINE_string("dropout", '0.5,0.5,0.5', "dropout rate") tf.app.flags.DEFINE_string("dropout", '0.5,0.5,0.5', "dropout rate")
tf.app.flags.DEFINE_boolean("batch_norm", False, "perform batch normaization (True or False)") tf.app.flags.DEFINE_boolean("batch_norm", False, "perform batch normaization (True or False)")
tf.app.flags.DEFINE_float("batch_norm_decay", 0.9, "decay for the moving average(recommend trying decay=0.9)") tf.app.flags.DEFINE_float("batch_norm_decay", 0.9, "decay for the moving average(recommend trying decay=0.9)")
tf.app.flags.DEFINE_string("data_dir", '', "data dir") tf.app.flags.DEFINE_string("hdfs_dir", '', "hdfs dir")
tf.app.flags.DEFINE_string("local_dir", '', "local dir")
tf.app.flags.DEFINE_string("dt_dir", '', "data dt partition") tf.app.flags.DEFINE_string("dt_dir", '', "data dt partition")
tf.app.flags.DEFINE_string("model_dir", '', "model check point dir") tf.app.flags.DEFINE_string("model_dir", '', "model check point dir")
tf.app.flags.DEFINE_string("servable_model_dir", '', "export servable model for TensorFlow Serving") tf.app.flags.DEFINE_string("servable_model_dir", '', "export servable model for TensorFlow Serving")
...@@ -49,19 +47,10 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False): ...@@ -49,19 +47,10 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
print('Parsing', filenames) print('Parsing', filenames)
def _parse_fn(record): def _parse_fn(record):
features = { features = {
"y": tf.FixedLenFeature([], tf.float32), "y": tf.VarLenFeature(tf.int64),
"z": tf.FixedLenFeature([], tf.float32), "z": tf.VarLenFeature(tf.int64),
"ids": tf.FixedLenFeature([FLAGS.field_size], tf.int64), "ids": tf.VarLenFeature(tf.int64),
"app_list": tf.VarLenFeature(tf.int64), "level2_ids": tf.VarLenFeature(tf.int64)
"level2_list": tf.VarLenFeature(tf.int64),
"level3_list": tf.VarLenFeature(tf.int64),
"tag1_list": tf.VarLenFeature(tf.int64),
"tag2_list": tf.VarLenFeature(tf.int64),
"tag3_list": tf.VarLenFeature(tf.int64),
"tag4_list": tf.VarLenFeature(tf.int64),
"tag5_list": tf.VarLenFeature(tf.int64),
"tag6_list": tf.VarLenFeature(tf.int64),
"tag7_list": tf.VarLenFeature(tf.int64)
} }
parsed = tf.parse_single_example(record, features) parsed = tf.parse_single_example(record, features)
...@@ -108,15 +97,8 @@ def model_fn(features, labels, mode, params): ...@@ -108,15 +97,8 @@ def model_fn(features, labels, mode, params):
feat_ids = features['ids'] feat_ids = features['ids']
app_list = features['app_list'] app_list = features['app_list']
level2_list = features['level2_list'] level2_list = features['level2_ids']
level3_list = features['level3_list']
tag1_list = features['tag1_list']
tag2_list = features['tag2_list']
tag3_list = features['tag3_list']
tag4_list = features['tag4_list']
tag5_list = features['tag5_list']
tag6_list = features['tag6_list']
tag7_list = features['tag7_list']
if FLAGS.task_type != "infer": if FLAGS.task_type != "infer":
y = labels['y'] y = labels['y']
...@@ -127,18 +109,10 @@ def model_fn(features, labels, mode, params): ...@@ -127,18 +109,10 @@ def model_fn(features, labels, mode, params):
embedding_id = tf.nn.embedding_lookup(Feat_Emb,feat_ids) embedding_id = tf.nn.embedding_lookup(Feat_Emb,feat_ids)
app_id = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=app_list, sp_weights=None, combiner="sum") app_id = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=app_list, sp_weights=None, combiner="sum")
level2 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=level2_list, sp_weights=None, combiner="sum") level2 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=level2_list, sp_weights=None, combiner="sum")
level3 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=level3_list, sp_weights=None, combiner="sum")
tag1 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag1_list, sp_weights=None, combiner="sum")
tag2 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag2_list, sp_weights=None, combiner="sum")
tag3 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag3_list, sp_weights=None, combiner="sum")
tag4 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag4_list, sp_weights=None, combiner="sum")
tag5 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag5_list, sp_weights=None, combiner="sum")
tag6 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag6_list, sp_weights=None, combiner="sum")
tag7 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag7_list, sp_weights=None, combiner="sum")
# x_concat = tf.reshape(embedding_id,shape=[-1, common_dims]) # None * (F * K) # x_concat = tf.reshape(embedding_id,shape=[-1, common_dims]) # None * (F * K)
x_concat = tf.concat([tf.reshape(embedding_id,shape=[-1,common_dims]),app_id,level2,level3,tag1, x_concat = tf.concat([tf.reshape(embedding_id,shape=[-1,common_dims]),app_id,level2], axis=1)
tag2,tag3,tag4,tag5,tag6,tag7], axis=1)
with tf.name_scope("CVR_Task"): with tf.name_scope("CVR_Task"):
if mode == tf.estimator.ModeKeys.TRAIN: if mode == tf.estimator.ModeKeys.TRAIN:
...@@ -301,7 +275,8 @@ def main(_): ...@@ -301,7 +275,8 @@ def main(_):
print('task_type ', FLAGS.task_type) print('task_type ', FLAGS.task_type)
print('model_dir ', FLAGS.model_dir) print('model_dir ', FLAGS.model_dir)
print('data_dir ', FLAGS.data_dir) print('hdfs_dir ', FLAGS.hdfs_dir)
print('local_dir ', FLAGS.local_dir)
print('dt_dir ', FLAGS.dt_dir) print('dt_dir ', FLAGS.dt_dir)
print('num_epochs ', FLAGS.num_epochs) print('num_epochs ', FLAGS.num_epochs)
print('feature_size ', FLAGS.feature_size) print('feature_size ', FLAGS.feature_size)
...@@ -320,6 +295,7 @@ def main(_): ...@@ -320,6 +295,7 @@ def main(_):
path = "hdfs:///strategy/esmm/" path = "hdfs:///strategy/esmm/"
tr_files = [path+"tr/part-r-00000"] tr_files = [path+"tr/part-r-00000"]
va_files = [path+"va/part-r-00000"] va_files = [path+"va/part-r-00000"]
te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
# tr_files = glob.glob("%s/tr/*tfrecord" % FLAGS.data_dir) # tr_files = glob.glob("%s/tr/*tfrecord" % FLAGS.data_dir)
# random.shuffle(tr_files) # random.shuffle(tr_files)
...@@ -366,9 +342,9 @@ def main(_): ...@@ -366,9 +342,9 @@ def main(_):
print('%s: %s' % (key,value)) print('%s: %s' % (key,value))
elif FLAGS.task_type == 'infer': elif FLAGS.task_type == 'infer':
preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","pctr","pcvr"]) preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","pctr","pcvr"])
with open(FLAGS.data_dir+"/pred.txt", "w") as fo: with open(FLAGS.local_dir+"/pred.txt", "w") as fo:
print("-"*100) print("-"*100)
with open(FLAGS.data_dir + "/pred.txt", "w") as fo: with open(FLAGS.local_dir + "/pred.txt", "w") as fo:
for prob in preds: for prob in preds:
fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr'])) fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
elif FLAGS.task_type == 'export': elif FLAGS.task_type == 'export':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment