美购精排模型

e8059b5b · 郭羽 · e151a48f · e8059b5b
Commit e8059b5b authored Jun 04, 2021 by 郭羽
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

featureEng.py spark/featureEng.py +2 -2

No files found.
--- a/spark/featureEng.py
+++ b/spark/featureEng.py
@@ -242,6 +242,7 @@ def getDataVocab(samples):
        tagSet.add("-1") #空值默认
        multiVocab[c] = list(tagSet)
        samples = samples.drop(c)
+        print(c, len(dataVocab[c]))
    for c in samples.columns:
        # 判断是否以Bucket结尾 和 类别特征
@@ -253,14 +254,13 @@ def getDataVocab(samples):
                    vocabSet.add(str(d[c]))
            vocabSet.add("-1")# 空值的默认
            dataVocab[c] = list(vocabSet)
+            print(c, len(dataVocab[c]))
        else:
            # 判断是否多值离散列
            for cc, v in multiVocab.items():
                if c.count(cc) > 0:
                    dataVocab[c] = v
-        print(c,len(dataVocab[c]))
    return dataVocab
 def dataVocabToRedis(dataVocab):