Commit c8b33cee authored by 郭羽's avatar 郭羽

service model 优化

parent 76fdb06e
......@@ -176,7 +176,6 @@ def addItemFeatures(itemDF,dataVocab,multi_col_vocab):
new_col = ITEM_PREFIX + col + number_suffix
itemDF = itemDF.withColumnRenamed(col, new_col)
itemDF.show(10, truncate=False)
return itemDF
def extractTags(genres_list):
......@@ -774,11 +773,16 @@ if __name__ == '__main__':
print("处理item特征, 耗时s:{}".format(timestmp2 - timestmp1))
print("multiVocab:")
print(multiVocab.keys())
itemDF_spark = spark.createDataFrame(itemDF)
itemDF_spark.printSchema()
itemDF_spark.show(10, truncate=False)
# 统计数据处理
ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
samples = ratingSamplesWithLabel.join(itemDF, on=['item_id'], how='inner')
samples = ratingSamplesWithLabel.join(itemDF_spark, on=['item_id'], how='inner')
print("处理user特征...")
samplesWithUserFeatures = addUserFeatures(samples,dataVocab,multiVocab)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment