Commit 25b34121 authored by 郭羽's avatar 郭羽

service model 优化

parent 5e5fa6d6
...@@ -129,17 +129,17 @@ def addStaticsFeatures(samples,dataVocab): ...@@ -129,17 +129,17 @@ def addStaticsFeatures(samples,dataVocab):
bucket_suffix = "_Bucket" bucket_suffix = "_Bucket"
for col in ["userRatingCount","userRatingAvg","userClickCount","userExpCount","itemRatingCount","itemRatingAvg","itemClickCount","itemExpCount"]: for col in ["userRatingCount","userRatingAvg","userClickCount","userExpCount","itemRatingCount","itemRatingAvg","itemClickCount","itemExpCount"]:
new_col = col + bucket_suffix new_col = col + bucket_suffix
samples = samples.withColumn(new_col, numberToBucketUdf(F.col(col)).cast("int").cast("string")).drop(col) samples = samples.withColumn(new_col, F.when(F.col(col).isNull(),"0").otherwise(numberToBucketUdf(F.col(col)).cast("int").cast("string"))).drop(col)
dataVocab[new_col] = bucket_vocab dataVocab[new_col] = bucket_vocab
# 方差处理 # 方差处理
number_suffix = "_number" number_suffix = "_number"
for col in ["userRatingStddev","itemRatingStddev"]: for col in ["userRatingStddev","itemRatingStddev"]:
new_col = col + number_suffix new_col = col + number_suffix
samples = samples.withColumn(new_col,1/(F.col(col)+1)).drop(col) samples = samples.withColumn(new_col,F.when(F.col(col).isNull(),0).otherwise(1/(F.col(col)+1))).drop(col)
for col in ["userCtr", "itemCtr"]: for col in ["userCtr", "itemCtr"]:
new_col = col + number_suffix new_col = col + number_suffix
samples = samples.withColumnRenamed(col, new_col) samples = samples.withColumn(col, F.when(F.col(col).isNull(), 0).otherwise(F.col(col))).withColumnRenamed(col, new_col)
samples.printSchema() samples.printSchema()
samples.show(10, truncate=False) samples.show(10, truncate=False)
...@@ -814,7 +814,7 @@ if __name__ == '__main__': ...@@ -814,7 +814,7 @@ if __name__ == '__main__':
# model columns # model columns
print("model columns to redis...") print("model columns to redis...")
model_columns = user_columns + item_columns model_columns = user_columns + item_columns
featureColumnsToRedis(model_columns) # featureColumnsToRedis(model_columns)
print("数据字典save...") print("数据字典save...")
print("dataVocab:", str(dataVocab.keys())) print("dataVocab:", str(dataVocab.keys()))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment