模型调试

6f2d8504 · 宋柯 · 548becfd · 6f2d8504 · 6f2d8504 · 6f2d8504
Commit 6f2d8504 authored Dec 17, 2021 by 宋柯
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

featureEngSk.py spark/featureEngSk.py +4 -0

train_service_sk.py train/train_service_sk.py +0 -0

train_service_sk_tf1.15.py train/train_service_sk_tf1.15.py +0 -0

No files found.
--- a/spark/featureEngSk.py
+++ b/spark/featureEngSk.py
@@ -1024,12 +1024,14 @@ if __name__ == '__main__':
    vocab_redis_keys = []
    for categoty_field in categoty_fields:
        output_file = "file:///home/gmuser/" + categoty_field + "_vocab"
+        output_file = "/strategy/" + categoty_field + "_vocab"
        # train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().write.mode("overwrite").options(header="false").csv(output_file)
        categoty_field_rows = train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().collect()
        vocab_redis_keys.append("strategy:" + categoty_field + ":vocab")
        saveVocab(vocab_redis_keys[-1], list(map(lambda row: row[categoty_field], categoty_field_rows)))
    for multi_categoty_field in multi_categoty_fields:
        output_file = "file:///home/gmuser/" + multi_categoty_field + "_vocab"
+        output_file = "/strategy/" + multi_categoty_field + "_vocab"
        # train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file)
        multi_categoty_field_rows = train_samples.selectExpr("explode(split({multi_categoty_field},',')) as {multi_categoty_field}".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').where(F.col(multi_categoty_field) != '').distinct().collect()
        vocab_redis_keys.append("strategy:" + multi_categoty_field + ":vocab")
@@ -1038,9 +1040,11 @@ if __name__ == '__main__':
    saveVocab("strategy:all:vocab", vocab_redis_keys)
    output_file = "file:///home/gmuser/train_samples"
+    output_file = "/strategy/train_samples"
    train_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file)
    output_file = "file:///home/gmuser/test_samples"
+    output_file = "/strategy/test_samples"
    test_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file)
    print("训练数据写入 耗时s:{}".format(time.time() - write_time_start))

--- a/train/train_service_sk.py
+++ b/train/train_service_sk.py
--- a/train/train_service_sk_tf1.15.py
+++ b/train/train_service_sk_tf1.15.py