Commit 52b14ca5 authored by 张彦钊's avatar 张彦钊

change test file

parent 36f93bf4
......@@ -188,8 +188,7 @@ def feature_engineer():
df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids",
"tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7"])
print("样本总量:")
print(df.count())
df = df.na.fill(dict(zip(features, features)))
rdd = df.select("stat_date", "y", "z", "app_list", "level2_ids", "level3_ids",
......@@ -221,6 +220,9 @@ def feature_engineer():
print("train tfrecord done")
print((h - f) / 60)
print("样本总量:")
print(rdd.count())
test = rdd.filter(lambda x: x[0] == validate_date).map(
lambda x: (x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9],
x[10], x[11], x[12], x[13]))
......
This diff is collapsed.
......@@ -300,9 +300,10 @@ def main(_):
FLAGS.model_dir = FLAGS.model_dir + FLAGS.dt_dir
#FLAGS.data_dir = FLAGS.data_dir + FLAGS.dt_dir
tr_files = get_filename("tr")
va_files = get_filename("va")
te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
tr_files = get_filename("/strategy/esmm/tr")
va_files = get_filename("/strategy/esmm/va")
te_files = get_filename(FLAGS.hdfs_dir)
# te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
if FLAGS.clear_existing_model:
try:
......@@ -367,8 +368,6 @@ if __name__ == "__main__":
# a = "export CLASSPATH='$(hadoop classpath --glob)'"
# os.system(a)
# print("环境")
# print(os.popen('hadoop classpath --glob').read())
# os.system('export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/hadoop/lib/native"')
path = "hdfs://172.16.32.4:8020/strategy/esmm/"
tf.logging.set_verbosity(tf.logging.INFO)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment