Commit 5e5fa6d6 authored by 郭羽's avatar 郭羽

service model 优化

parent b46f2484
......@@ -83,7 +83,7 @@ def parseTags(tags,i):
def numberToBucket(num):
res = 0
if not num:
return res
return str(res)
if num >= 1000:
res = 1000//10
else:
......@@ -93,7 +93,7 @@ def numberToBucket(num):
def priceToBucket(num):
res = 0
if not num:
return res
return str(res)
if num >= 100000:
res = 100000//1000
else:
......@@ -627,7 +627,7 @@ def init_es_query():
},
"query": {
"bool": {
"must": [],
"must": [{"term": {"is_online": True}}],
"must_not": [],
"should": []
}
......@@ -790,6 +790,7 @@ if __name__ == '__main__':
for k, v in dataVocab.items():
print(k, len(v))
itemDF_spark = spark.createDataFrame(itemDF)
itemDF_spark.printSchema()
itemDF_spark.show(10, truncate=False)
......@@ -797,8 +798,6 @@ if __name__ == '__main__':
# 统计数据处理
ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
samples = ratingSamplesWithLabel.join(itemDF_spark, on=['item_id'], how='inner')
print("处理user特征...")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment