change test fliw

ea23b1d6 · 张彦钊 · f8359d52 · ea23b1d6 · ea23b1d6
Commit ea23b1d6 authored May 29, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

feature_engineering.py eda/esmm/Model_pipline/feature_engineering.py +2 -3

multi.py tensnsorflow/multi.py +2 -1

No files found.
--- a/eda/esmm/Model_pipline/feature_engineering.py
+++ b/eda/esmm/Model_pipline/feature_engineering.py
@@ -215,7 +215,7 @@ def feature_engineer():
    spark.createDataFrame(train).toDF("y", "z", "app_list", "level2_list", "level3_list",
                                      "tag1_list", "tag2_list", "tag3_list", "tag4_list",
                                      "tag5_list", "tag6_list", "tag7_list", "ids") \
-        .write.format("tfrecords").save(path=path + "tr/", mode="overwrite")
+        .repartition(1).write.format("tfrecords").save(path=path + "tr/", mode="overwrite")
    h = time.time()
    print("train tfrecord done")
    print((h - f) / 60)
@@ -230,7 +230,7 @@ def feature_engineer():
    spark.createDataFrame(test).toDF("y", "z", "app_list", "level2_list", "level3_list",
                                     "tag1_list", "tag2_list", "tag3_list", "tag4_list",
                                     "tag5_list", "tag6_list", "tag7_list", "ids") \
-        .write.format("tfrecords").save(path=path + "va/", mode="overwrite")
+        .repartition(1).write.format("tfrecords").save(path=path + "va/", mode="overwrite")

    print("va tfrecord done")

@@ -310,7 +310,6 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
    print("nearby csv")
    native_pre.toPandas().to_csv(local_path + "nearby.csv", header=True)

-
    spark.createDataFrame(rdd.filter(lambda x: x[0] == 1)
                          .map(
        lambda x: (x[1], x[2], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16]))) \

--- a/tensnsorflow/multi.py
+++ b/tensnsorflow/multi.py
@@ -159,7 +159,8 @@ def get_hdfs(dir_in):

 if __name__ == '__main__':
    preds = [3,4]
-    with open("/home/gmuser/hello.txt", "w") as fo:
+    with open("/home/gmuser/hel"
+              "lo.txt", "w") as fo:
        for prob in preds:
            fo.write(str(prob))
    # sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \