美购精排模型

d09fd710 · 郭羽 · 04316d6f · d09fd710 · d09fd710
Commit d09fd710 authored May 26, 2021 by 郭羽
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

train.py mlp/train.py +3 -1

featureEng.py spark/featureEng.py +1 -1

No files found.
--- a/mlp/train.py
+++ b/mlp/train.py
@@ -2,7 +2,10 @@ import tensorflow as tf
 import json
 import pandas as pd
 import time
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
 import utils.connUtils as connUtils
 ITEM_NUMBER_COLUMNS = ["smart_rank2"]
@@ -37,7 +40,6 @@ def getDataVocabFromRedis(version):
 # 数据类型转换
 def csvTypeConvert(df,data_vocab):
-    df = df.fillna("-1")
    # 离散na值填充
    for k, v in data_vocab.items():
        df[k] = df[k].fillna("-1")

--- a/spark/featureEng.py
+++ b/spark/featureEng.py
@@ -186,7 +186,7 @@ def splitAndSaveTrainingTestSamplesByTimeStamp(samples,splitTimestamp, file_path
    # splitTimestamp = quantile[0]
    train = samples.where(F.col("timestampLong") <= splitTimestamp).drop("timestampLong")
    test = samples.where(F.col("timestampLong") > splitTimestamp).drop("timestampLong")
-    print("split train size:{},test size:".format(str(train.count()),str(test.count())))
+    print("split train size:{},test size:{}".format(str(train.count()),str(test.count())))
    trainingSavePath = file_path + '_train'
    testSavePath = file_path + '_test'
    train.write.option("header", "true").option("delimiter", "|").mode('overwrite').csv(trainingSavePath)