change test file

36f93bf4 · 张彦钊 · 6d2da688 · 36f93bf4
Commit 36f93bf4 authored May 29, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 11 deletions

multi.py tensnsorflow/multi.py +15 -11

No files found.
--- a/tensnsorflow/multi.py
+++ b/tensnsorflow/multi.py
@@ -143,19 +143,23 @@ def get_filename(dir_in):
            x.append(t)
    return x

-def parse_fn(record):
-    features = {
-        "y": tf.FixedLenFeature([], tf.float32),
-        "z": tf.FixedLenFeature([], tf.float32)
-    }
-
-    parsed = tf.parse_single_example(record, features)
-    y = parsed.pop('y')
-    z = parsed.pop('z')
-    return {"y": y, "z": z}
+def get_hdfs(dir_in):
+    pre_path = "hdfs://172.16.32.4:8020"
+    args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'"
+    proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+    s_output, s_err = proc.communicate()
+    all_dart_dirs = s_output.split()
+    a = []
+    for i in all_dart_dirs:
+        b = str(i).split("/")[4]
+        if b[:4] == "part":
+            tmp = pre_path + str(i)[2:-1]
+            a.append(tmp)
+    return a

 if __name__ == '__main__':
-
+    a = get_hdfs("/strategy/esmm/tr")
+    print(a)
    # sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
    #     .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
    #     .set("spark.tispark.plan.allow_index_double_read", "false") \