Commit e41bc4ed authored by 郭羽's avatar 郭羽

update feature

parent 50ce4106
......@@ -166,7 +166,7 @@ def addUserFeatures(samples,dataVocab,multiVocab):
.withColumn("userPositiveHistory", arrayReverseUdf(F.col("userPositiveHistory")))
for i in range(1,11):
samples = samples.withColumn("userRatedHistory"+str(i), F.when(F.col("userPositiveHistory")[i-1].isNotNull(),F.col("userPositiveHistory")[i-1]).otherwise("-1"))
# dataVocab["userRatedHistory"+str(i)] = dataVocab["itemid"]
dataVocab["userRatedHistory"+str(i)] = dataVocab["itemid"]
samples = samples.drop("userPositiveHistory")
# user历史点击分值统计
......@@ -216,7 +216,7 @@ def addUserFeatures(samples,dataVocab,multiVocab):
def addSampleLabel(ratingSamples):
ratingSamples = ratingSamples.withColumn('label', when(F.col('rating') >= 8, 1).otherwise(0))
ratingSamples = ratingSamples.withColumn('label', when(F.col('rating') >= 5, 1).otherwise(0))
ratingSamples.show(5, truncate=False)
ratingSamples.printSchema()
return ratingSamples
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment