Commit b46f2484 authored by 郭羽's avatar 郭羽

service model 优化

parent e43aecc7
...@@ -82,6 +82,8 @@ def parseTags(tags,i): ...@@ -82,6 +82,8 @@ def parseTags(tags,i):
def numberToBucket(num): def numberToBucket(num):
res = 0 res = 0
if not num:
return res
if num >= 1000: if num >= 1000:
res = 1000//10 res = 1000//10
else: else:
...@@ -90,6 +92,8 @@ def numberToBucket(num): ...@@ -90,6 +92,8 @@ def numberToBucket(num):
def priceToBucket(num): def priceToBucket(num):
res = 0 res = 0
if not num:
return res
if num >= 100000: if num >= 100000:
res = 100000//1000 res = 100000//1000
else: else:
...@@ -125,7 +129,7 @@ def addStaticsFeatures(samples,dataVocab): ...@@ -125,7 +129,7 @@ def addStaticsFeatures(samples,dataVocab):
bucket_suffix = "_Bucket" bucket_suffix = "_Bucket"
for col in ["userRatingCount","userRatingAvg","userClickCount","userExpCount","itemRatingCount","itemRatingAvg","itemClickCount","itemExpCount"]: for col in ["userRatingCount","userRatingAvg","userClickCount","userExpCount","itemRatingCount","itemRatingAvg","itemClickCount","itemExpCount"]:
new_col = col + bucket_suffix new_col = col + bucket_suffix
samples = samples.withColumn(new_col, numberToBucketUdf(F.col(col))).drop(col) samples = samples.withColumn(new_col, numberToBucketUdf(F.col(col)).cast("int").cast("string")).drop(col)
dataVocab[new_col] = bucket_vocab dataVocab[new_col] = bucket_vocab
# 方差处理 # 方差处理
...@@ -138,6 +142,7 @@ def addStaticsFeatures(samples,dataVocab): ...@@ -138,6 +142,7 @@ def addStaticsFeatures(samples,dataVocab):
samples = samples.withColumnRenamed(col, new_col) samples = samples.withColumnRenamed(col, new_col)
samples.printSchema() samples.printSchema()
samples.show(10, truncate=False)
return samples return samples
def addItemFeatures(itemDF,dataVocab,multi_col_vocab): def addItemFeatures(itemDF,dataVocab,multi_col_vocab):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment