Commit 2cf954ad authored by 赵威's avatar 赵威

try all

parent 8e14a43a
......@@ -10,6 +10,24 @@ def merge_lists(*lol):
return [i for i in chain(*a) if i is not None]
def get_result(hits):
res_dict = {}
for info in hits:
second_demands = info["_source"].get("second_demands", [])
if second_demands and len(second_demands) > 0:
id = info["_source"]["id"]
sd = second_demands[0]
if sd not in res_dict:
res_dict[sd] = [id]
else:
res_dict[sd].append(id)
res_pair = sorted(res_dict.items(), key=lambda x: len(x[1]), reverse=True)
res_list = []
for _, v in res_pair:
res_list.append(v)
return merge_lists(res_list)
keywords = [
"瘦脸", "双眼皮", "补水", "美白嫩肤", "瘦小腿", "除皱", "隆鼻", "牙齿矫正", "脱毛", "祛斑", "控油", "面部祛脂", "隆胸", "垫鼻基底", "缩鼻翼", "生头发", "开眼角", "瘦肩", "祛痘",
"缩毛孔", "轮廓改善", "除眼底细纹", "眼部修复", "垫下巴", "颧骨内推", "除法令纹", "缩咬肌", "瘦臀部", "丰眼窝", "瘦手臂", "丰唇", "瘦腰腹", "瘦大腿", "填充泪沟", "洁面", "除面部细纹",
......@@ -132,23 +150,141 @@ def get_es_diary(keywords, city_tag_id=-1, version=False):
q["sort"] = sort_list
q["_source"] = {"includes": ["id", "second_demands"]}
es_res = es_query("diary", q, offset=0, size=5000)
diary_dict = {}
for diary_info in es_res["hits"]["hits"]:
second_demands = diary_info["_source"].get("second_demands", [])
if second_demands and len(second_demands) > 0:
id = diary_info["_source"]["id"]
sd = second_demands[0]
if sd not in diary_dict:
diary_dict[sd] = [id]
else:
diary_dict[sd].append(id)
res_pair = sorted(diary_dict.items(), key=lambda x: len(x[1]), reverse=True)
res_list = []
for _, v in res_pair:
res_list.append(v)
return merge_lists(res_list)
return get_result(es_res["hits"]["hits"])
def get_es_tractate(keywords, version=False):
q = {}
should_list = [{
"terms": {
"first_demands": keywords
}
}, {
"terms": {
"second_demands": keywords
}
}, {
"terms": {
"first_solutions": keywords
}
}, {
"terms": {
"second_solutions": keywords
}
}, {
"terms": {
"positions": keywords
}
}, {
"terms": {
"second_positions": keywords
}
}, {
"terms": {
"tags_v3": keywords
}
}]
q["query"] = {
"bool": {
"filter": [{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}],
"should": should_list,
"minimum_should_match": 1
}
}
q["_source"] = {"includes": ["id", "second_demands"]}
if version:
q["sort"] = [{
"is_video": {
"order": "asc"
}
}, {
"good_click_tractate_score": {
"order": "desc"
}
}, {
"good_click": {
"order": "desc"
}
}]
else:
q["sort"] = [{"is_video": {"order": "asc"}}, {"tractate_score": {"order": "desc"}}, {"good_click": {"order": "desc"}}]
es_res = es_query("tractate", q, offset=0, size=5000)
return get_result(es_res["hits"]["hits"])
def get_es_answer(keywords, version=False):
q = {}
if version:
sort_list = [{"has_picture": {"order": "desc"}}, {"smart_rank_v3": {"order": "desc"}}, {"good_click": {"order": "desc"}}]
else:
sort_list = [{"has_picture": {"order": "desc"}}, {"smart_rank_v2": {"order": "desc"}}, {"good_click": {"order": "desc"}}]
should_list = [{
"terms": {
"first_demands": keywords
}
}, {
"terms": {
"second_demands": keywords
}
}, {
"terms": {
"first_solutions": keywords
}
}, {
"terms": {
"second_solutions": keywords
}
}, {
"terms": {
"positions": keywords
}
}, {
"terms": {
"second_positions": keywords
}
}, {
"terms": {
"tags_v3": keywords
}
}]
q["query"] = {
"bool": {
"filter": [{
"range": {
"content_length": {
"gte": 30
}
}
}, {
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": ["6", "5", "4", "3.5", "3"]
}
}],
"should":
should_list,
"minimum_should_match":
1
}
}
q["_source"] = {"includes": ["id", "second_demands"]}
q["sort"] = sort_list
es_res = es_query("answer", q, offset=0, size=5000)
return get_result(es_res["hits"]["hits"])
if __name__ == "__main__":
ids = get_es_diary(keywords)
print(ids)
print(get_es_diary(keywords)[:10])
print(get_es_tractate(keywords)[:10])
print(get_es_answer(keywords)[:10])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment