Commit cef1c8a8 authored by Your Name's avatar Your Name

test dist predict

parent f55f775b
...@@ -203,22 +203,22 @@ if __name__ == "__main__": ...@@ -203,22 +203,22 @@ if __name__ == "__main__":
df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000") df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000")
df.show() df.show()
te_files = [] # te_files = []
for i in range(0,10): # for i in range(0,10):
te_files.append([path + "test_native/part-r-0000" + str(i)]) # te_files.append([path + "test_native/part-r-0000" + str(i)])
for i in range(10,100): # for i in range(10,100):
te_files.append([path + "test_native/part-r-000" + str(i)]) # te_files.append([path + "test_native/part-r-000" + str(i)])
#
rdd_te_files = spark.sparkContext.parallelize(te_files) # rdd_te_files = spark.sparkContext.parallelize(te_files)
print("-"*100) # print("-"*100)
print(rdd_te_files.collect()) # print(rdd_te_files.collect())
print("-" * 100) # print("-" * 100)
indices = rdd_te_files.repartition(100).map(lambda x: main(x)) # indices = rdd_te_files.repartition(100).map(lambda x: main(x))
print(indices.take(1)) # print(indices.take(1))
#
te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(lambda l: Row(sample_id=l.split(":")[0],ctcvr=l.split(":")[1]))) # te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(lambda l: Row(sample_id=l.split(":")[0],ctcvr=l.split(":")[1])))
te_result_dataframe.show() # te_result_dataframe.show()
te_result_dataframe.toPandas().to_csv("/home/gmuser/esmm/native/pred.txt", header=True) # te_result_dataframe.toPandas().to_csv("/home/gmuser/esmm/native/pred.txt", header=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment