Commit 0cb723f3 authored by 郭羽's avatar 郭羽

service model 优化

parent d142d185
No related merge requests found
...@@ -82,6 +82,7 @@ numberToBucketUdf = F.udf(numberToBucket, StringType()) ...@@ -82,6 +82,7 @@ numberToBucketUdf = F.udf(numberToBucket, StringType())
priceToBucketUdf = F.udf(priceToBucket, StringType()) priceToBucketUdf = F.udf(priceToBucket, StringType())
def addItemStaticFeatures(samples,itemDF,dataVocab): def addItemStaticFeatures(samples,itemDF,dataVocab):
# item不设置over窗口,原因:item可能一直存在,统计数据按照最新即可
print("item统计特征处理...") print("item统计特征处理...")
staticFeatures = samples.groupBy('item_id').agg(F.count(F.lit(1)).alias('itemRatingCount'), staticFeatures = samples.groupBy('item_id').agg(F.count(F.lit(1)).alias('itemRatingCount'),
F.avg(F.col('rating')).alias('itemRatingAvg'), F.avg(F.col('rating')).alias('itemRatingAvg'),
...@@ -118,6 +119,7 @@ def addItemStaticFeatures(samples,itemDF,dataVocab): ...@@ -118,6 +119,7 @@ def addItemStaticFeatures(samples,itemDF,dataVocab):
print("item size:", staticFeatures.count()) print("item size:", staticFeatures.count())
staticFeatures.show(5, truncate=False)
return staticFeatures return staticFeatures
def addUserStaticsFeatures(samples,dataVocab): def addUserStaticsFeatures(samples,dataVocab):
...@@ -880,7 +882,7 @@ if __name__ == '__main__': ...@@ -880,7 +882,7 @@ if __name__ == '__main__':
itemDF_spark.printSchema() itemDF_spark.printSchema()
itemDF_spark.show(10, truncate=False) itemDF_spark.show(10, truncate=False)
#user统计特征处理 # item统计特征处理
itemStaticDF = addItemStaticFeatures(ratingSamplesWithLabel,itemDF_spark,dataVocab) itemStaticDF = addItemStaticFeatures(ratingSamplesWithLabel,itemDF_spark,dataVocab)
# 统计数据处理 # 统计数据处理
...@@ -897,7 +899,7 @@ if __name__ == '__main__': ...@@ -897,7 +899,7 @@ if __name__ == '__main__':
user_columns = [c for c in samplesWithUserFeatures.columns if c.startswith("user")] user_columns = [c for c in samplesWithUserFeatures.columns if c.startswith("user")]
print("collect feature for user:{}".format(str(user_columns))) print("collect feature for user:{}".format(str(user_columns)))
# item columns # item columns
item_columns = addItemStaticFeatures.columns item_columns = itemStaticDF.columns
print("collect feature for item:{}".format(str(item_columns))) print("collect feature for item:{}".format(str(item_columns)))
# model columns # model columns
print("model columns to redis...") print("model columns to redis...")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment