Commit c0e265ca authored by Your Name's avatar Your Name

test

parent 9076bcda
......@@ -228,26 +228,27 @@ if __name__ == "__main__":
# df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000")
# df.show()
te_files = []
for i in range(0,10):
te_files.append([path + "test_native/part-r-0000" + str(i)])
for i in range(10,100):
te_files.append([path + "test_native/part-r-000" + str(i)])
# te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
rdd_te_files = spark.sparkContext.parallelize(te_files)
print("-" * 100)
indices = rdd_te_files.repartition(100).map(lambda x: main(x))
# print(indices.take(1))
print("dist predict native")
te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(
lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4])))
# te_result_dataframe.show()
te_result_dataframe.repartition(50).write.format("parquet").save(path=path+"native_result/",mode="overwrite")
# te_files = []
# for i in range(0,10):
# te_files.append([path + "test_native/part-r-0000" + str(i)])
# for i in range(10,100):
# te_files.append([path + "test_native/part-r-000" + str(i)])
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
main(te_files)
# rdd_te_files = spark.sparkContext.parallelize(te_files)
# print("-" * 100)
# indices = rdd_te_files.repartition(100).map(lambda x: main(x))
# # print(indices.take(1))
# print("dist predict native")
# te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(
# lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4])))
#
# # te_result_dataframe.show()
#
# te_result_dataframe.repartition(50).write.format("parquet").save(path=path+"native_result/",mode="overwrite")
print("耗时(秒):")
print((time.time()-b))
......
......@@ -383,7 +383,7 @@ if __name__ == "__main__":
b = time.time()
path = "hdfs://172.16.32.4:8020/strategy/esmm/"
tf.logging.set_verbosity(tf.logging.INFO)
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"]
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
print("hello up")
result = main(te_files)
df = pd.DataFrame(result,columns=["sample_id","uid","city","cid_id","pctcvr"])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment