Commit c37ba3aa authored by 赵威's avatar 赵威

Merge branch 'offic' into 'master'

Offic

See merge request !44
parents 35cd9025 4aa782a8
import json
from itertools import chain, zip_longest
import redis
from es_tool import es_query
def merge_lists(*lol):
a = zip_longest(*lol)
return [i for i in chain(*a) if i is not None]
def get_result(hits):
res_dict = {}
for info in hits:
second_demands = info["_source"].get("second_demands", [])
if second_demands and len(second_demands) > 0:
id = info["_source"]["id"]
sd = second_demands[0]
if sd not in res_dict:
res_dict[sd] = [id]
else:
res_dict[sd].append(id)
res_pair = sorted(res_dict.items(), key=lambda x: len(x[1]), reverse=True)
res_list = []
for _, v in res_pair:
res_list.append(v)
return merge_lists(*res_list)
keywords = [
"瘦脸", "双眼皮", "补水", "美白嫩肤", "瘦小腿", "除皱", "隆鼻", "牙齿矫正", "脱毛", "祛斑", "控油", "面部祛脂", "隆胸", "垫鼻基底", "缩鼻翼", "生头发", "开眼角", "瘦肩", "祛痘",
"缩毛孔", "轮廓改善", "除眼底细纹", "眼部修复", "垫下巴", "颧骨内推", "除法令纹", "缩咬肌", "瘦臀部", "丰眼窝", "瘦手臂", "丰唇", "瘦腰腹", "瘦大腿", "填充泪沟", "洁面", "除面部细纹",
"上眼睑提升", "大眼睛", "缩短眼距离", "除印第安纹", "瘦全身", "下巴改善", "祛双下巴", "祛痘印", "眼部护理", "缩窄下巴", "缩短下巴", "填充苹果肌", "祛痘坑", "填充卧蚕"
]
def get_es_diary(keywords, city_tag_id=-1, version=False):
q = {}
sort_list = [{
"_script": {
"lang": "groovy",
"script_file": "sort_diary-recommend",
"type": "number",
"params": {
"user_city_tag_id": city_tag_id,
},
"order": "desc",
"_cache": True,
}
}]
if version:
sort_list += [{
"has_video_cover": {
"order": "asc"
}
}, {
"offline_score_v1": {
"order": "desc"
}
}, {
"good_click": {
"order": "desc"
}
}, {
"last_update_time": {
"order": "desc"
}
}]
else:
sort_list += [{
"has_video_cover": {
"order": "asc"
}
}, {
"offline_score": {
"order": "desc"
}
}, {
"good_click": {
"order": "desc"
}
}, {
"last_update_time": {
"order": "desc"
}
}]
should_list = [{
"terms": {
"first_demands": keywords
}
}, {
"terms": {
"second_demands": keywords
}
}, {
"terms": {
"first_solutions": keywords
}
}, {
"terms": {
"second_solutions": keywords
}
}, {
"terms": {
"positions": keywords
}
}, {
"terms": {
"second_positions": keywords
}
}, {
"terms": {
"tags_v3": keywords
}
}]
q["query"] = {
"bool": {
"filter": [{
"term": {
"is_online": True
}
}, {
"term": {
"has_cover": True
}
}, {
"term": {
"is_sink": False
}
}, {
"term": {
"has_after_cover": True
}
}, {
"term": {
"has_before_cover": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}],
"should":
should_list,
"minimum_should_match":
1
}
}
q["sort"] = sort_list
q["_source"] = {"includes": ["id", "second_demands"]}
es_res = es_query("diary", q, offset=0, size=5000)
return get_result(es_res["hits"]["hits"])
def get_es_tractate(keywords, version=False):
q = {}
should_list = [{
"terms": {
"first_demands": keywords
}
}, {
"terms": {
"second_demands": keywords
}
}, {
"terms": {
"first_solutions": keywords
}
}, {
"terms": {
"second_solutions": keywords
}
}, {
"terms": {
"positions": keywords
}
}, {
"terms": {
"second_positions": keywords
}
}, {
"terms": {
"tags_v3": keywords
}
}]
q["query"] = {
"bool": {
"filter": [{
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}],
"should": should_list,
"minimum_should_match": 1
}
}
q["_source"] = {"includes": ["id", "second_demands"]}
if version:
q["sort"] = [{
"is_video": {
"order": "asc"
}
}, {
"good_click_tractate_score": {
"order": "desc"
}
}, {
"good_click": {
"order": "desc"
}
}]
else:
q["sort"] = [{"is_video": {"order": "asc"}}, {"tractate_score": {"order": "desc"}}, {"good_click": {"order": "desc"}}]
es_res = es_query("tractate", q, offset=0, size=5000)
return get_result(es_res["hits"]["hits"])
def get_es_answer(keywords, version=False):
q = {}
if version:
sort_list = [{"has_picture": {"order": "desc"}}, {"smart_rank_v3": {"order": "desc"}}, {"good_click": {"order": "desc"}}]
else:
sort_list = [{"has_picture": {"order": "desc"}}, {"smart_rank_v2": {"order": "desc"}}, {"good_click": {"order": "desc"}}]
should_list = [{
"terms": {
"first_demands": keywords
}
}, {
"terms": {
"second_demands": keywords
}
}, {
"terms": {
"first_solutions": keywords
}
}, {
"terms": {
"second_solutions": keywords
}
}, {
"terms": {
"positions": keywords
}
}, {
"terms": {
"second_positions": keywords
}
}, {
"terms": {
"tags_v3": keywords
}
}]
q["query"] = {
"bool": {
"filter": [{
"range": {
"content_length": {
"gte": 30
}
}
}, {
"term": {
"is_online": True
}
}, {
"terms": {
"content_level": ["6", "5", "4", "3.5", "3"]
}
}],
"should":
should_list,
"minimum_should_match":
1
}
}
q["_source"] = {"includes": ["id", "second_demands"]}
q["sort"] = sort_list
es_res = es_query("answer", q, offset=0, size=5000)
return get_result(es_res["hits"]["hits"])
if __name__ == "__main__":
# print("diary:" + str(get_es_diary(keywords)[:10]) + "\n")
# print("tractate: " + str(get_es_tractate(keywords)[:10]) + "\n")
# print("answer: " + str(get_es_answer(keywords)[:10]) + "\n")
diary_list = get_es_diary(keywords)
tractate_list = get_es_tractate(keywords)
answer_list = get_es_answer(keywords)
redis_client = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN9@172.16.40.173:6379")
redis_client.rpush("doris:tag_v3:coldstart:diary", *diary_list)
redis_client.rpush("doris:tag_v3:coldstart:tractate", *tractate_list)
redis_client.rpush("doris:tag_v3:coldstart:answer", *answer_list)
print("diary: " + str(len(diary_list)))
print("tractate: " + str(len(tractate_list)))
print("answer: " + str(len(answer_list)))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment