Commit e151a48f authored by 郭羽's avatar 郭羽

美购精排模型

parent b79cf1c6
...@@ -68,6 +68,8 @@ TRAIN_FILE_PATH = "service_feature_" + VERSION ...@@ -68,6 +68,8 @@ TRAIN_FILE_PATH = "service_feature_" + VERSION
def addItemFeatures(samples,itemDF): def addItemFeatures(samples,itemDF):
prefix = "item_" prefix = "item_"
itemDF = itemDF.withColumnRenamed("id", "itemid") itemDF = itemDF.withColumnRenamed("id", "itemid")
# 数据过滤:无医生
itemDF = itemDF.filter(col("doctor_id") != "-1")
# null处理 # null处理
for c in ITEM_NUMBER_COLUMNS: for c in ITEM_NUMBER_COLUMNS:
...@@ -88,8 +90,6 @@ def addItemFeatures(samples,itemDF): ...@@ -88,8 +90,6 @@ def addItemFeatures(samples,itemDF):
itemDF = itemDF.withColumn(new_c, F.when(F.col(new_c).isNull(), "-1").otherwise(F.col(new_c))) itemDF = itemDF.withColumn(new_c, F.when(F.col(new_c).isNull(), "-1").otherwise(F.col(new_c)))
samples = samples.join(itemDF, on=['itemid'], how='inner') samples = samples.join(itemDF, on=['itemid'], how='inner')
# 数据过滤:无医生
samples = samples.filter(col("doctor_id") != "-1")
# 统计特征处理 # 统计特征处理
staticFeatures = samples.groupBy('itemid').agg(F.count(F.lit(1)).alias('itemRatingCount'), staticFeatures = samples.groupBy('itemid').agg(F.count(F.lit(1)).alias('itemRatingCount'),
F.avg(F.col('rating')).alias('itemRatingAvg'), F.avg(F.col('rating')).alias('itemRatingAvg'),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment