service model 优化

c8b33cee · 郭羽 · 76fdb06e · c8b33cee
Commit c8b33cee authored Jul 30, 2021 by 郭羽
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 2 deletions

featureEng2.py spark/featureEng2.py +6 -2

No files found.
--- a/spark/featureEng2.py
+++ b/spark/featureEng2.py
@@ -176,7 +176,6 @@ def addItemFeatures(itemDF,dataVocab,multi_col_vocab):
        new_col = ITEM_PREFIX + col + number_suffix
        itemDF = itemDF.withColumnRenamed(col, new_col)
-    itemDF.show(10, truncate=False)
    return itemDF
 def extractTags(genres_list):
@@ -774,11 +773,16 @@ if __name__ == '__main__':
    print("处理item特征, 耗时s:{}".format(timestmp2 - timestmp1))
    print("multiVocab:")
    print(multiVocab.keys())
+    itemDF_spark = spark.createDataFrame(itemDF)
+    itemDF_spark.printSchema()
+    itemDF_spark.show(10, truncate=False)
    # 统计数据处理
    ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
-    samples = ratingSamplesWithLabel.join(itemDF, on=['item_id'], how='inner')
+    samples = ratingSamplesWithLabel.join(itemDF_spark, on=['item_id'], how='inner')
    print("处理user特征...")
    samplesWithUserFeatures = addUserFeatures(samples,dataVocab,multiVocab)