Commit e151a48f authored by 郭羽's avatar 郭羽

美购精排模型

parent b79cf1c6
......@@ -68,6 +68,8 @@ TRAIN_FILE_PATH = "service_feature_" + VERSION
def addItemFeatures(samples,itemDF):
prefix = "item_"
itemDF = itemDF.withColumnRenamed("id", "itemid")
# 数据过滤:无医生
itemDF = itemDF.filter(col("doctor_id") != "-1")
# null处理
for c in ITEM_NUMBER_COLUMNS:
......@@ -88,8 +90,6 @@ def addItemFeatures(samples,itemDF):
itemDF = itemDF.withColumn(new_c, F.when(F.col(new_c).isNull(), "-1").otherwise(F.col(new_c)))
samples = samples.join(itemDF, on=['itemid'], how='inner')
# 数据过滤:无医生
samples = samples.filter(col("doctor_id") != "-1")
# 统计特征处理
staticFeatures = samples.groupBy('itemid').agg(F.count(F.lit(1)).alias('itemRatingCount'),
F.avg(F.col('rating')).alias('itemRatingAvg'),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment