Commit 0cb723f3 authored by 郭羽's avatar 郭羽

service model 优化

parent d142d185
......@@ -82,6 +82,7 @@ numberToBucketUdf = F.udf(numberToBucket, StringType())
priceToBucketUdf = F.udf(priceToBucket, StringType())
def addItemStaticFeatures(samples,itemDF,dataVocab):
# item不设置over窗口,原因:item可能一直存在,统计数据按照最新即可
print("item统计特征处理...")
staticFeatures = samples.groupBy('item_id').agg(F.count(F.lit(1)).alias('itemRatingCount'),
F.avg(F.col('rating')).alias('itemRatingAvg'),
......@@ -118,6 +119,7 @@ def addItemStaticFeatures(samples,itemDF,dataVocab):
print("item size:", staticFeatures.count())
staticFeatures.show(5, truncate=False)
return staticFeatures
def addUserStaticsFeatures(samples,dataVocab):
......@@ -880,7 +882,7 @@ if __name__ == '__main__':
itemDF_spark.printSchema()
itemDF_spark.show(10, truncate=False)
#user统计特征处理
# item统计特征处理
itemStaticDF = addItemStaticFeatures(ratingSamplesWithLabel,itemDF_spark,dataVocab)
# 统计数据处理
......@@ -897,7 +899,7 @@ if __name__ == '__main__':
user_columns = [c for c in samplesWithUserFeatures.columns if c.startswith("user")]
print("collect feature for user:{}".format(str(user_columns)))
# item columns
item_columns = addItemStaticFeatures.columns
item_columns = itemStaticDF.columns
print("collect feature for item:{}".format(str(item_columns)))
# model columns
print("model columns to redis...")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment