Commit dc7eba46 authored by 张彦钊's avatar 张彦钊

change test file

parent a9639056
......@@ -128,18 +128,19 @@ def con_sql(db,sql):
return df
def get_filename(dir_in):
pre_path = "hdfs://172.16.32.4:8020"
args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'"
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
s_output, s_err = proc.communicate()
all_dart_dirs = s_output.split()
a = []
for i in all_dart_dirs:
b = str(i).split("/")[4]
if b[:4] == "part":
tmp = pre_path + str(i)[2:-1]
a.append(tmp)
return a
pre_add = "hdfs://172.16.32.4:8020/strategy/esmm/"
x = []
for i in range(0,200):
if i < 10:
t = pre_add+dir_in+"/part-r-0000"+str(i)
x.append(t)
elif 10 <= x < 100:
t = pre_add + dir_in + "/part-r-000" + str(i)
x.append(t)
elif 100 <= x < 200:
t = pre_add + dir_in + "/part-r-00" + str(i)
x.append(t)
return x
if __name__ == '__main__':
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
......@@ -170,9 +171,9 @@ if __name__ == '__main__':
# df.show()
# df.createOrReplaceTempView("df")
# t = spark.sql("select id from df").map()
tr_files = get_filename("/strategy/esmm/tr")
tr_files = get_filename("tr")
print(tr_files)
va_files = get_filename("/strategy/esmm/va")
va_files = get_filename("va")
print("test")
print(va_files)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment