Commit e43aecc7 authored by 郭羽's avatar 郭羽

service model 优化

parent 55c12178
......@@ -202,14 +202,14 @@ def addUserFeatures(samples,dataVocab,multiVocab):
dataVocab["user_os"] = ["ios","android"]
extractTagsUdf = F.udf(extractTags, ArrayType(StringType()))
# arrayReverseUdf = F.udf(arrayReverse, ArrayType(StringType()))
arrayReverseUdf = F.udf(arrayReverse, ArrayType(StringType()))
print("user历史数据处理...")
# user历史记录
samples = samples.withColumn('userPositiveHistory',F.collect_list(when(F.col('label') == 1, F.col('item_id')).otherwise(F.lit(None))).over(sql.Window.partitionBy("userid").orderBy(F.col("timestamp")).rowsBetween(-100, -1)))
samples.select("userPositiveHistory").show(10,truncate=False)
samples.show(10,truncate=False)
samples = samples.withColumn("userPositiveHistory", arrayReverse(F.col("userPositiveHistory")))
samples.select("userPositiveHistory").show(10,truncate=False)
samples = samples.withColumn("userPositiveHistory", arrayReverseUdf(F.col("userPositiveHistory")))
samples.show(10,truncate=False)
for i in range(1,11):
samples = samples.withColumn("userRatedHistory"+str(i), F.when(F.col("userPositiveHistory")[i-1].isNotNull(),F.col("userPositiveHistory")[i-1]).otherwise("-1"))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment