Commit 793d205a authored by 郭羽's avatar 郭羽

美购精排模型

parent 5caf1641
......@@ -87,7 +87,7 @@ def getTrainColumns(train_columns,data_vocab):
columns.append(col)
dataColumns.append(feature)
elif feature in ITEM_NUMBER_COLUMNS or feature.endswith("RatingAvg") or feature.endswith("RatingStddev"):
elif feature in ITEM_NUMBER_COLUMNS:
col = tf.feature_column.numeric_column(feature)
columns.append(col)
dataColumns.append(feature)
......
......@@ -85,8 +85,8 @@ def addItemFeatures(samples,itemDF):
# pipelineStage.append(MinMaxScaler(inputCol=c, outputCol=c+"Scale"))
# bucketing
for c in ["case_count", "ordered_user_ids_count","itemRatingCount","lowest_price"]:
pipelineStage.append(QuantileDiscretizer(numBuckets=10, inputCol=c, outputCol=c + "Bucket"))
for c in ["case_count", "ordered_user_ids_count","itemRatingCount","lowest_price","itemRatingStddev","itemRatingAvg"]:
pipelineStage.append(QuantileDiscretizer(numBuckets=20, inputCol=c, outputCol=c + "Bucket"))
featurePipeline = Pipeline(stages=pipelineStage)
samples = featurePipeline.fit(samples).transform(samples)
......@@ -149,7 +149,7 @@ def addUserFeatures(samples):
# pipelineStage.append(MinMaxScaler(inputCol=c, outputCol=c + "Scale"))
# bucketing
for c in ["userRatingCount"]:
for c in ["userRatingCount","userRatingAvg","userRatingStddev"]:
pipelineStage.append(QuantileDiscretizer(numBuckets=10, inputCol=c, outputCol=c + "Bucket"))
featurePipeline = Pipeline(stages=pipelineStage)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment