Commit 88c19ef1 authored by 张彦钊's avatar 张彦钊

change test file

parent ea33c974
...@@ -147,13 +147,13 @@ if __name__ == '__main__': ...@@ -147,13 +147,13 @@ if __name__ == '__main__':
# validate_date, value_map, app_list_map = feature() # validate_date, value_map, app_list_map = feature()
# get_predict(validate_date, value_map, app_list_map) # get_predict(validate_date, value_map, app_list_map)
spark = SparkSession.builder.getOrCreate() from hdfs import *
b = [("a", 1), ("a", 1), ("b", 3), ("a", 2)] client = Client("hdfs://172.16.32.4:8020")
rdd = spark.sparkContext.parallelize(b) path = "/strategy/esmm/tr"
df = spark.createDataFrame(rdd).toDF("id","n") for root, dir, files in client.walk(path):
df.show() for file in files:
t = df.select("id").rdd.map(lambda x:x[0]).collect() if file[:4] == "part":
print(t) print(file)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment