Commit 36f93bf4 authored by 张彦钊's avatar 张彦钊

change test file

parent 6d2da688
...@@ -143,19 +143,23 @@ def get_filename(dir_in): ...@@ -143,19 +143,23 @@ def get_filename(dir_in):
x.append(t) x.append(t)
return x return x
def parse_fn(record): def get_hdfs(dir_in):
features = { pre_path = "hdfs://172.16.32.4:8020"
"y": tf.FixedLenFeature([], tf.float32), args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'"
"z": tf.FixedLenFeature([], tf.float32) proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
} s_output, s_err = proc.communicate()
all_dart_dirs = s_output.split()
parsed = tf.parse_single_example(record, features) a = []
y = parsed.pop('y') for i in all_dart_dirs:
z = parsed.pop('z') b = str(i).split("/")[4]
return {"y": y, "z": z} if b[:4] == "part":
tmp = pre_path + str(i)[2:-1]
a.append(tmp)
return a
if __name__ == '__main__': if __name__ == '__main__':
a = get_hdfs("/strategy/esmm/tr")
print(a)
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \ # sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
# .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \ # .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
# .set("spark.tispark.plan.allow_index_double_read", "false") \ # .set("spark.tispark.plan.allow_index_double_read", "false") \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment