Commit b46f2484 authored by 郭羽's avatar 郭羽

service model 优化

parent e43aecc7
......@@ -82,6 +82,8 @@ def parseTags(tags,i):
def numberToBucket(num):
res = 0
if not num:
return res
if num >= 1000:
res = 1000//10
else:
......@@ -90,6 +92,8 @@ def numberToBucket(num):
def priceToBucket(num):
res = 0
if not num:
return res
if num >= 100000:
res = 100000//1000
else:
......@@ -125,7 +129,7 @@ def addStaticsFeatures(samples,dataVocab):
bucket_suffix = "_Bucket"
for col in ["userRatingCount","userRatingAvg","userClickCount","userExpCount","itemRatingCount","itemRatingAvg","itemClickCount","itemExpCount"]:
new_col = col + bucket_suffix
samples = samples.withColumn(new_col, numberToBucketUdf(F.col(col))).drop(col)
samples = samples.withColumn(new_col, numberToBucketUdf(F.col(col)).cast("int").cast("string")).drop(col)
dataVocab[new_col] = bucket_vocab
# 方差处理
......@@ -138,6 +142,7 @@ def addStaticsFeatures(samples,dataVocab):
samples = samples.withColumnRenamed(col, new_col)
samples.printSchema()
samples.show(10, truncate=False)
return samples
def addItemFeatures(itemDF,dataVocab,multi_col_vocab):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment