Commit 42f95d9f authored by 郭羽's avatar 郭羽

美购精排模型

parent 70bb8fab
...@@ -133,12 +133,12 @@ def addUserFeatures(samples): ...@@ -133,12 +133,12 @@ def addUserFeatures(samples):
extractTagsUdf = F.udf(extractTags, ArrayType(StringType())) extractTagsUdf = F.udf(extractTags, ArrayType(StringType()))
samples = samples.withColumnRenamed("cl_id","userid") samples = samples.withColumnRenamed("cl_id","userid")
# user历史记录 # user历史记录
# samples = samples\ samples = samples\
# .withColumn('userPositiveHistory',F.collect_list(when(F.col('label') == 1, F.col('itemid')).otherwise(F.lit(None))).over(sql.Window.partitionBy("userid").orderBy(F.col("timestamp")).rowsBetween(-100, -1))) \ .withColumn('userPositiveHistory',F.collect_list(when(F.col('label') == 1, F.col('itemid')).otherwise(F.lit(None))).over(sql.Window.partitionBy("userid").orderBy(F.col("timestamp")).rowsBetween(-100, -1))) \
# .withColumn("userPositiveHistory", F.reverse(F.col("userPositiveHistory"))) .withColumn("userPositiveHistory", F.reverse(F.col("userPositiveHistory")))
# for i in range(1,11): for i in range(1,11):
# samples = samples.withColumn("userRatedHistory"+str(i), F.when(F.col("userPositiveHistory")[i-1].isNotNull(),F.col("userPositiveHistory")[i-1]).otherwise("-1")) samples = samples.withColumn("userRatedHistory"+str(i), F.when(F.col("userPositiveHistory")[i-1].isNotNull(),F.col("userPositiveHistory")[i-1]).otherwise("-1"))
# samples = samples.drop("userPositiveHistory") samples = samples.drop("userPositiveHistory")
# user历史点击分值统计 # user历史点击分值统计
samples = samples\ samples = samples\
...@@ -237,7 +237,7 @@ def getDataVocab(samples): ...@@ -237,7 +237,7 @@ def getDataVocab(samples):
for c in samples.columns: for c in samples.columns:
# 判断是否以Bucket结尾 和 类别特征 # 判断是否以Bucket结尾 和 类别特征
if c.endswith("Bucket") or c in ITEM_CATE_COLUMNS+["userid","itemid"]: if c.endswith("Bucket") or c.endswith("userRatedHistory") or c in ITEM_CATE_COLUMNS+["userid","itemid"]:
datas = samples.select(c).distinct().collect() datas = samples.select(c).distinct().collect()
vocabSet = set() vocabSet = set()
for d in datas: for d in datas:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment