Commit 043e9155 authored by Your Name's avatar Your Name

dist predict sucess

parent c0e265ca
...@@ -228,27 +228,26 @@ if __name__ == "__main__": ...@@ -228,27 +228,26 @@ if __name__ == "__main__":
# df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000") # df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000")
# df.show() # df.show()
# te_files = [] te_files = []
# for i in range(0,10): for i in range(0,10):
# te_files.append([path + "test_native/part-r-0000" + str(i)]) te_files.append([path + "test_native/part-r-0000" + str(i)])
# for i in range(10,100): for i in range(10,100):
# te_files.append([path + "test_native/part-r-000" + str(i)]) te_files.append([path + "test_native/part-r-000" + str(i)])
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"] # te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
main(te_files)
rdd_te_files = spark.sparkContext.parallelize(te_files)
# rdd_te_files = spark.sparkContext.parallelize(te_files) print("-" * 100)
# print("-" * 100) indices = rdd_te_files.repartition(100).map(lambda x: main(x))
# indices = rdd_te_files.repartition(100).map(lambda x: main(x)) # print(indices.take(1))
# # print(indices.take(1)) print("dist predict native")
# print("dist predict native")
te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(
# te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map( lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4])))
# lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4])))
# # te_result_dataframe.show()
# # te_result_dataframe.show()
# te_result_dataframe.repartition(50).write.format("parquet").save(path=path+"native_result/",mode="overwrite")
# te_result_dataframe.repartition(50).write.format("parquet").save(path=path+"native_result/",mode="overwrite")
print("耗时(秒):") print("耗时(秒):")
print((time.time()-b)) print((time.time()-b))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment