Commit dbc3d28f authored by 郭羽's avatar 郭羽

service model 优化

parent f370c36e
......@@ -78,8 +78,8 @@ def priceToBucket(num):
return str(res)
numberToBucketUdf = F.udf(numberToBucket, FloatType())
priceToBucketUdf = F.udf(priceToBucket, FloatType())
numberToBucketUdf = F.udf(numberToBucket, StringType())
priceToBucketUdf = F.udf(priceToBucket, StringType())
def addStaticsFeatures(samples,dataVocab):
print("user统计特征处理...")
......@@ -108,7 +108,7 @@ def addStaticsFeatures(samples,dataVocab):
bucket_suffix = "_Bucket"
for col in ["userRatingCount","userRatingAvg","userClickCount","userExpCount","itemRatingCount","itemRatingAvg","itemClickCount","itemExpCount"]:
new_col = col + bucket_suffix
samples = samples.withColumn(new_col, numberToBucketUdf(F.col(col)).cast("int").cast("string"))\
samples = samples.withColumn(new_col, numberToBucketUdf(F.col(col)))\
.drop(col)\
.withColumn(new_col,F.when(F.col(new_col).isNull(),"0").otherwise(F.col(new_col)))
dataVocab[new_col] = bucket_vocab
......@@ -334,7 +334,7 @@ def itemFeaturesToRedis(samples,itemDF,columns,redisKey):
resDatas.show(10,truncate=False)
resDatas = resDatas.select(*columns).distinct()
print("item size:",len(resDatas))
print("item size:",resDatas.count())
resDatas.repartition(8).foreachPartition(toRedis)
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment