Commit 6f7df6fc authored by 郭羽's avatar 郭羽

ctr 平滑

parent 10426904
......@@ -83,6 +83,7 @@ numberToBucketUdf = F.udf(numberToBucket, StringType())
priceToBucketUdf = F.udf(priceToBucket, StringType())
def addItemStaticFeatures(samples,itemDF,dataVocab):
ctrUdf = F.udf(wilson_ctr, ArrayType(float()))
# item不设置over窗口,原因:item可能一直存在,统计数据按照最新即可
print("item统计特征处理...")
staticFeatures = samples.groupBy('item_id').agg(F.count(F.lit(1)).alias('itemRatingCount'),
......@@ -93,7 +94,7 @@ def addItemStaticFeatures(samples,itemDF,dataVocab):
).fillna(0) \
.withColumn('itemRatingStddev', F.format_number(F.col('itemRatingStddev'), NUMBER_PRECISION).cast("float")) \
.withColumn('itemRatingAvg', F.format_number(F.col('itemRatingAvg'), NUMBER_PRECISION).cast("float")) \
.withColumn('itemCtr',F.format_number(F.col("itemClickCount") / (F.col("itemExpCount") + 1), NUMBER_PRECISION).cast("float"))
.withColumn('itemCtr',F.format_number(ctrUdf(F.col("itemClickCount"),(F.col("itemExpCount"))), NUMBER_PRECISION).cast("float"))
staticFeatures.show(20, truncate=False)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment