change test file

52b14ca5 · 张彦钊 · 36f93bf4 · 52b14ca5 · 52b14ca5 · 52b14ca5
Commit 52b14ca5 authored May 29, 2019 by 张彦钊
Showing with 8 additions and 7 deletions

feature_engineering.py eda/esmm/Model_pipline/feature_engineering.py +4 -2

submit.sh eda/esmm/Model_pipline/submit.sh +0 -0

train.py eda/esmm/Model_pipline/train.py +4 -5

No files found.
--- a/eda/esmm/Model_pipline/feature_engineering.py
+++ b/eda/esmm/Model_pipline/feature_engineering.py
@@ -188,8 +188,7 @@ def feature_engineer():
    df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer",
                             "channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids",
                             "tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7"])
-    print("样本总量：")
-    print(df.count())
+
    df = df.na.fill(dict(zip(features, features)))

    rdd = df.select("stat_date", "y", "z", "app_list", "level2_ids", "level3_ids",
@@ -221,6 +220,9 @@ def feature_engineer():
    print("train tfrecord done")
    print((h - f) / 60)

+    print("样本总量：")
+    print(rdd.count())
+
    test = rdd.filter(lambda x: x[0] == validate_date).map(
        lambda x: (x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9],
                   x[10], x[11], x[12], x[13]))

--- a/eda/esmm/Model_pipline/submit.sh
+++ b/eda/esmm/Model_pipline/submit.sh
--- a/eda/esmm/Model_pipline/train.py
+++ b/eda/esmm/Model_pipline/train.py
@@ -300,9 +300,10 @@ def main(_):
    FLAGS.model_dir = FLAGS.model_dir + FLAGS.dt_dir
    #FLAGS.data_dir  = FLAGS.data_dir + FLAGS.dt_dir

-    tr_files = get_filename("tr")
-    va_files = get_filename("va")
-    te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
+    tr_files = get_filename("/strategy/esmm/tr")
+    va_files = get_filename("/strategy/esmm/va")
+    te_files = get_filename(FLAGS.hdfs_dir)
+    # te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]

    if FLAGS.clear_existing_model:
        try:
@@ -367,8 +368,6 @@ if __name__ == "__main__":

    # a = "export CLASSPATH='$(hadoop classpath --glob)'"
    # os.system(a)
-    # print("环境")
-    # print(os.popen('hadoop classpath --glob').read())
    # os.system('export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/hadoop/lib/native"')
    path = "hdfs://172.16.32.4:8020/strategy/esmm/"
    tf.logging.set_verbosity(tf.logging.INFO)