Commit dc7eba46 authored by 张彦钊's avatar 张彦钊

change test file

parent a9639056
...@@ -128,18 +128,19 @@ def con_sql(db,sql): ...@@ -128,18 +128,19 @@ def con_sql(db,sql):
return df return df
def get_filename(dir_in): def get_filename(dir_in):
pre_path = "hdfs://172.16.32.4:8020" pre_add = "hdfs://172.16.32.4:8020/strategy/esmm/"
args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'" x = []
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) for i in range(0,200):
s_output, s_err = proc.communicate() if i < 10:
all_dart_dirs = s_output.split() t = pre_add+dir_in+"/part-r-0000"+str(i)
a = [] x.append(t)
for i in all_dart_dirs: elif 10 <= x < 100:
b = str(i).split("/")[4] t = pre_add + dir_in + "/part-r-000" + str(i)
if b[:4] == "part": x.append(t)
tmp = pre_path + str(i)[2:-1] elif 100 <= x < 200:
a.append(tmp) t = pre_add + dir_in + "/part-r-00" + str(i)
return a x.append(t)
return x
if __name__ == '__main__': if __name__ == '__main__':
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \ # sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
...@@ -170,9 +171,9 @@ if __name__ == '__main__': ...@@ -170,9 +171,9 @@ if __name__ == '__main__':
# df.show() # df.show()
# df.createOrReplaceTempView("df") # df.createOrReplaceTempView("df")
# t = spark.sql("select id from df").map() # t = spark.sql("select id from df").map()
tr_files = get_filename("/strategy/esmm/tr") tr_files = get_filename("tr")
print(tr_files) print(tr_files)
va_files = get_filename("/strategy/esmm/va") va_files = get_filename("va")
print("test") print("test")
print(va_files) print(va_files)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment