Commit 5e5fa6d6 authored by 郭羽's avatar 郭羽

service model 优化

parent b46f2484
...@@ -83,7 +83,7 @@ def parseTags(tags,i): ...@@ -83,7 +83,7 @@ def parseTags(tags,i):
def numberToBucket(num): def numberToBucket(num):
res = 0 res = 0
if not num: if not num:
return res return str(res)
if num >= 1000: if num >= 1000:
res = 1000//10 res = 1000//10
else: else:
...@@ -93,7 +93,7 @@ def numberToBucket(num): ...@@ -93,7 +93,7 @@ def numberToBucket(num):
def priceToBucket(num): def priceToBucket(num):
res = 0 res = 0
if not num: if not num:
return res return str(res)
if num >= 100000: if num >= 100000:
res = 100000//1000 res = 100000//1000
else: else:
...@@ -627,7 +627,7 @@ def init_es_query(): ...@@ -627,7 +627,7 @@ def init_es_query():
}, },
"query": { "query": {
"bool": { "bool": {
"must": [], "must": [{"term": {"is_online": True}}],
"must_not": [], "must_not": [],
"should": [] "should": []
} }
...@@ -790,6 +790,7 @@ if __name__ == '__main__': ...@@ -790,6 +790,7 @@ if __name__ == '__main__':
for k, v in dataVocab.items(): for k, v in dataVocab.items():
print(k, len(v)) print(k, len(v))
itemDF_spark = spark.createDataFrame(itemDF) itemDF_spark = spark.createDataFrame(itemDF)
itemDF_spark.printSchema() itemDF_spark.printSchema()
itemDF_spark.show(10, truncate=False) itemDF_spark.show(10, truncate=False)
...@@ -797,8 +798,6 @@ if __name__ == '__main__': ...@@ -797,8 +798,6 @@ if __name__ == '__main__':
# 统计数据处理 # 统计数据处理
ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab) ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
samples = ratingSamplesWithLabel.join(itemDF_spark, on=['item_id'], how='inner') samples = ratingSamplesWithLabel.join(itemDF_spark, on=['item_id'], how='inner')
print("处理user特征...") print("处理user特征...")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment