service model 优化

5e5fa6d6 · 郭羽 · b46f2484 · 5e5fa6d6
Commit 5e5fa6d6 authored Jul 30, 2021 by 郭羽
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 5 deletions

featureEng2.py spark/featureEng2.py +4 -5

No files found.
--- a/spark/featureEng2.py
+++ b/spark/featureEng2.py
@@ -83,7 +83,7 @@ def parseTags(tags,i):
 def numberToBucket(num):
    res = 0
    if not num:
-        return res
+        return str(res)
    if num >= 1000:
        res = 1000//10
    else:
@@ -93,7 +93,7 @@ def numberToBucket(num):
 def priceToBucket(num):
    res = 0
    if not num:
-        return res
+        return str(res)
    if num >= 100000:
        res = 100000//1000
    else:
@@ -627,7 +627,7 @@ def init_es_query():
        },
        "query": {
            "bool": {
-                "must": [],
+                "must": [{"term": {"is_online": True}}],
                "must_not": [],
                "should": []
            }
@@ -790,6 +790,7 @@ if __name__ == '__main__':
    for k, v in dataVocab.items():
        print(k, len(v))
    itemDF_spark = spark.createDataFrame(itemDF)
    itemDF_spark.printSchema()
    itemDF_spark.show(10, truncate=False)
@@ -797,8 +798,6 @@ if __name__ == '__main__':
    # 统计数据处理
    ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
    samples = ratingSamplesWithLabel.join(itemDF_spark, on=['item_id'], how='inner')
    print("处理user特征...")