Commit 4ad672a6 authored by 郭羽's avatar 郭羽

美购精排模型

parent e8059b5b
......@@ -242,9 +242,9 @@ def getDataVocab(samples):
tagSet.add("-1") #空值默认
multiVocab[c] = list(tagSet)
samples = samples.drop(c)
print(c, len(dataVocab[c]))
for c in samples.columns:
print("col",c)
# 判断是否以Bucket结尾 和 类别特征
if c.endswith("Bucket") or c.endswith("userRatedHistory") or c in ITEM_CATE_COLUMNS+["userid","itemid"]:
datas = samples.select(c).distinct().collect()
......@@ -254,7 +254,6 @@ def getDataVocab(samples):
vocabSet.add(str(d[c]))
vocabSet.add("-1")# 空值的默认
dataVocab[c] = list(vocabSet)
print(c, len(dataVocab[c]))
else:
# 判断是否多值离散列
for cc, v in multiVocab.items():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment