Commit c8b33cee authored by 郭羽's avatar 郭羽

service model 优化

parent 76fdb06e
...@@ -176,7 +176,6 @@ def addItemFeatures(itemDF,dataVocab,multi_col_vocab): ...@@ -176,7 +176,6 @@ def addItemFeatures(itemDF,dataVocab,multi_col_vocab):
new_col = ITEM_PREFIX + col + number_suffix new_col = ITEM_PREFIX + col + number_suffix
itemDF = itemDF.withColumnRenamed(col, new_col) itemDF = itemDF.withColumnRenamed(col, new_col)
itemDF.show(10, truncate=False)
return itemDF return itemDF
def extractTags(genres_list): def extractTags(genres_list):
...@@ -774,11 +773,16 @@ if __name__ == '__main__': ...@@ -774,11 +773,16 @@ if __name__ == '__main__':
print("处理item特征, 耗时s:{}".format(timestmp2 - timestmp1)) print("处理item特征, 耗时s:{}".format(timestmp2 - timestmp1))
print("multiVocab:") print("multiVocab:")
print(multiVocab.keys()) print(multiVocab.keys())
itemDF_spark = spark.createDataFrame(itemDF)
itemDF_spark.printSchema()
itemDF_spark.show(10, truncate=False)
# 统计数据处理 # 统计数据处理
ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab) ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
samples = ratingSamplesWithLabel.join(itemDF, on=['item_id'], how='inner')
samples = ratingSamplesWithLabel.join(itemDF_spark, on=['item_id'], how='inner')
print("处理user特征...") print("处理user特征...")
samplesWithUserFeatures = addUserFeatures(samples,dataVocab,multiVocab) samplesWithUserFeatures = addUserFeatures(samples,dataVocab,multiVocab)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment