Commit 6f2d8504 authored by 宋柯's avatar 宋柯

模型调试

parent 548becfd
...@@ -1024,12 +1024,14 @@ if __name__ == '__main__': ...@@ -1024,12 +1024,14 @@ if __name__ == '__main__':
vocab_redis_keys = [] vocab_redis_keys = []
for categoty_field in categoty_fields: for categoty_field in categoty_fields:
output_file = "file:///home/gmuser/" + categoty_field + "_vocab" output_file = "file:///home/gmuser/" + categoty_field + "_vocab"
output_file = "/strategy/" + categoty_field + "_vocab"
# train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().write.mode("overwrite").options(header="false").csv(output_file) # train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().write.mode("overwrite").options(header="false").csv(output_file)
categoty_field_rows = train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().collect() categoty_field_rows = train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().collect()
vocab_redis_keys.append("strategy:" + categoty_field + ":vocab") vocab_redis_keys.append("strategy:" + categoty_field + ":vocab")
saveVocab(vocab_redis_keys[-1], list(map(lambda row: row[categoty_field], categoty_field_rows))) saveVocab(vocab_redis_keys[-1], list(map(lambda row: row[categoty_field], categoty_field_rows)))
for multi_categoty_field in multi_categoty_fields: for multi_categoty_field in multi_categoty_fields:
output_file = "file:///home/gmuser/" + multi_categoty_field + "_vocab" output_file = "file:///home/gmuser/" + multi_categoty_field + "_vocab"
output_file = "/strategy/" + multi_categoty_field + "_vocab"
# train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file) # train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file)
multi_categoty_field_rows = train_samples.selectExpr("explode(split({multi_categoty_field},',')) as {multi_categoty_field}".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').where(F.col(multi_categoty_field) != '').distinct().collect() multi_categoty_field_rows = train_samples.selectExpr("explode(split({multi_categoty_field},',')) as {multi_categoty_field}".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').where(F.col(multi_categoty_field) != '').distinct().collect()
vocab_redis_keys.append("strategy:" + multi_categoty_field + ":vocab") vocab_redis_keys.append("strategy:" + multi_categoty_field + ":vocab")
...@@ -1038,9 +1040,11 @@ if __name__ == '__main__': ...@@ -1038,9 +1040,11 @@ if __name__ == '__main__':
saveVocab("strategy:all:vocab", vocab_redis_keys) saveVocab("strategy:all:vocab", vocab_redis_keys)
output_file = "file:///home/gmuser/train_samples" output_file = "file:///home/gmuser/train_samples"
output_file = "/strategy/train_samples"
train_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file) train_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file)
output_file = "file:///home/gmuser/test_samples" output_file = "file:///home/gmuser/test_samples"
output_file = "/strategy/test_samples"
test_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file) test_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file)
print("训练数据写入 耗时s:{}".format(time.time() - write_time_start)) print("训练数据写入 耗时s:{}".format(time.time() - write_time_start))
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment