Commit 5f911d88 authored by 宋柯's avatar 宋柯

模型调试

parent 6a2815e9
...@@ -174,6 +174,7 @@ def addItemFeatures(itemDF,dataVocab,multi_col_vocab): ...@@ -174,6 +174,7 @@ def addItemFeatures(itemDF,dataVocab,multi_col_vocab):
itemDF = itemDF.drop(columns=onehot_col) itemDF = itemDF.drop(columns=onehot_col)
for c in multi_col: for c in multi_col:
#TODO 这里多标签的应该拆开
multi_col_vocab[c] = list(set(itemDF[c].tolist())) multi_col_vocab[c] = list(set(itemDF[c].tolist()))
for i in range(1, 6): for i in range(1, 6):
...@@ -884,15 +885,13 @@ if __name__ == '__main__': ...@@ -884,15 +885,13 @@ if __name__ == '__main__':
print("dataVocab:") print("dataVocab:")
for k, v in dataVocab.items(): for k, v in dataVocab.items():
print(k, len(v), v) print(k, len(v))
itemDF_spark = spark.createDataFrame(itemDF) itemDF_spark = spark.createDataFrame(itemDF)
itemDF_spark.printSchema() itemDF_spark.printSchema()
itemDF_spark.show(10, truncate=False) itemDF_spark.show(10, truncate=False)
sys.exit(1)
# item统计特征处理 # item统计特征处理
itemStaticDF = addItemStaticFeatures(ratingSamplesWithLabel,itemDF_spark,dataVocab) itemStaticDF = addItemStaticFeatures(ratingSamplesWithLabel,itemDF_spark,dataVocab)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment