Commit 64190400 authored by 宋柯's avatar 宋柯

模型调试

parent 01a55b5e
...@@ -1021,9 +1021,10 @@ if __name__ == '__main__': ...@@ -1021,9 +1021,10 @@ if __name__ == '__main__':
train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file) train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file)
output_file = "file:///home/gmuser/train_samples" output_file = "file:///home/gmuser/train_samples"
train_samples.write.mode("overwrite").options(header="false").csv(output_file) train_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file)
output_file = "file:///home/gmuser/test_samples" output_file = "file:///home/gmuser/test_samples"
test_samples.write.mode("overwrite").options(header="false").csv(output_file) test_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file)
print("训练数据写入 耗时s:{}".format(time.time() - write_time_start)) print("训练数据写入 耗时s:{}".format(time.time() - write_time_start))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment