Commit c0e265ca authored by Your Name's avatar Your Name

test

parent 9076bcda
...@@ -228,26 +228,27 @@ if __name__ == "__main__": ...@@ -228,26 +228,27 @@ if __name__ == "__main__":
# df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000") # df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000")
# df.show() # df.show()
te_files = [] # te_files = []
for i in range(0,10): # for i in range(0,10):
te_files.append([path + "test_native/part-r-0000" + str(i)]) # te_files.append([path + "test_native/part-r-0000" + str(i)])
for i in range(10,100): # for i in range(10,100):
te_files.append([path + "test_native/part-r-000" + str(i)]) # te_files.append([path + "test_native/part-r-000" + str(i)])
# te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"] te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
main(te_files)
rdd_te_files = spark.sparkContext.parallelize(te_files)
print("-" * 100) # rdd_te_files = spark.sparkContext.parallelize(te_files)
indices = rdd_te_files.repartition(100).map(lambda x: main(x)) # print("-" * 100)
# print(indices.take(1)) # indices = rdd_te_files.repartition(100).map(lambda x: main(x))
print("dist predict native") # # print(indices.take(1))
# print("dist predict native")
te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(
lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4]))) # te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(
# lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4])))
# te_result_dataframe.show() #
# # te_result_dataframe.show()
te_result_dataframe.repartition(50).write.format("parquet").save(path=path+"native_result/",mode="overwrite") #
# te_result_dataframe.repartition(50).write.format("parquet").save(path=path+"native_result/",mode="overwrite")
print("耗时(秒):") print("耗时(秒):")
print((time.time()-b)) print((time.time()-b))
......
...@@ -383,7 +383,7 @@ if __name__ == "__main__": ...@@ -383,7 +383,7 @@ if __name__ == "__main__":
b = time.time() b = time.time()
path = "hdfs://172.16.32.4:8020/strategy/esmm/" path = "hdfs://172.16.32.4:8020/strategy/esmm/"
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"] te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
print("hello up") print("hello up")
result = main(te_files) result = main(te_files)
df = pd.DataFrame(result,columns=["sample_id","uid","city","cid_id","pctcvr"]) df = pd.DataFrame(result,columns=["sample_id","uid","city","cid_id","pctcvr"])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment