Commit 6c96b6f8 authored by 段英荣's avatar 段英荣

modify search

parent 4d9341b6
...@@ -163,17 +163,7 @@ class TopicUtils(object): ...@@ -163,17 +163,7 @@ class TopicUtils(object):
q["query"] = dict() q["query"] = dict()
functions_list = [ functions_list = list()
{
"gauss": {
"create_time": {
"scale": "1d",
"decay": 0.99
}
},
"weight": 60
}
]
query_function_score = { query_function_score = {
"query": { "query": {
...@@ -204,7 +194,7 @@ class TopicUtils(object): ...@@ -204,7 +194,7 @@ class TopicUtils(object):
] ]
} }
}, },
"score_mode": "max", "score_mode": "sum",
"boost_mode": "sum", "boost_mode": "sum",
"functions": functions_list "functions": functions_list
} }
...@@ -228,26 +218,11 @@ class TopicUtils(object): ...@@ -228,26 +218,11 @@ class TopicUtils(object):
query_function_score["query"]["bool"]["must_not"] = [ query_function_score["query"]["bool"]["must_not"] = [
{"terms": {"user_id": linucb_user_id_list}} {"terms": {"user_id": linucb_user_id_list}}
] ]
if query is not None: # 搜索帖子 q["_source"] = {
multi_match = { "includes": ["id", "highlight", "description"]
'query': query, }
'type': 'best_fields',
'operator': 'or',
'fields': ["content", "tag_name_list"],
}
query_function_score["boost_mode"] = "replace"
tag_ids = get_same_tagset_ids(tag_id) if query is None:
query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match},
{"terms": {"tag_list": tag_ids}},
{"term": {"user_nick_name_pre": query.lower()}}
]
query_function_score["query"]["bool"]["minimum_should_match"] = 1
query_function_score["query"]["bool"]["filter"].append(
{"range": {"content_level": {"gte": 3, "lte": 6}}}
)
else:
if "must_not" in query_function_score["query"]["bool"]: if "must_not" in query_function_score["query"]["bool"]:
query_function_score["query"]["bool"]["must_not"] += [ query_function_score["query"]["bool"]["must_not"] += [
{"term": {"is_operation_home_recommend": True}} {"term": {"is_operation_home_recommend": True}}
...@@ -264,19 +239,26 @@ class TopicUtils(object): ...@@ -264,19 +239,26 @@ class TopicUtils(object):
query_function_score["query"]["bool"]["filter"].append( query_function_score["query"]["bool"]["filter"].append(
{"term": {"content_level": 6}} {"term": {"content_level": 6}}
) )
q["_source"] = {
"includes": ["id", "highlight", "description"]
}
if query is None: # if user_id and user_id > 0:
if user_id and user_id > 0: # redis_key_prefix = "physical:user_similar:participant_user_id:"
redis_key_prefix = "physical:user_similar:participant_user_id:" # similar_redis_key = redis_key_prefix + str(user_id)
similar_redis_key = redis_key_prefix + str(user_id) # redis_user_similar_data = redis_client.get(similar_redis_key)
redis_user_similar_data = redis_client.get(similar_redis_key) # user_similar_list = json.loads(redis_user_similar_data) if redis_user_similar_data else []
user_similar_list = json.loads(redis_user_similar_data) if redis_user_similar_data else [] # if len(user_similar_list) > 0:
if len(user_similar_list) > 0: # functions_list.extend(user_similar_list)
functions_list.extend(user_similar_list)
functions_list.append(
{
"gauss": {
"create_time": {
"scale": "1d",
"decay": 0.99
}
},
"weight": 60
}
)
if len(attention_user_id_list) > 0: if len(attention_user_id_list) > 0:
functions_list.append( functions_list.append(
{ {
...@@ -334,87 +316,81 @@ class TopicUtils(object): ...@@ -334,87 +316,81 @@ class TopicUtils(object):
multi_match = { multi_match = {
'query': query, 'query': query,
'type': 'best_fields', 'type': 'best_fields',
'operator': 'and', 'operator': 'or',
'fields': ["content", "tag_name_list"], 'fields': ["content", "tag_name_list"],
"analyzer":"gm_default_index"
}
query_function_score["boost_mode"] = "replace"
same_tagset_ids = get_same_tagset_ids(tag_id)
query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match},
{"terms": {"tag_list": same_tagset_ids}},
{"term": {"user_nick_name_pre": query.lower()}}
]
query_function_score["query"]["bool"]["minimum_should_match"] = 1
query_function_score["query"]["bool"]["filter"].append(
{"range": {"content_level": {"gte": 3, "lte": 6}}}
)
collection_redis_key_name="physical:official_tag_name_set"
collect_tag_name_set=set()
body = {
'text': query,
'analyzer': "gm_default_search"
}
analyze_res = ESPerform.get_analyze_results(es_cli=ESPerform.get_cli(), sub_index_name="topic",query_body=body)
for item in analyze_res["tokens"]:
token_word = item["token"]
is_member = redis_client.sismember(collection_redis_key_name, token_word)
if is_member:
collect_tag_name_set.add(token_word)
query_fields = ["content", "tag_name_list"]
multi_match = {
'query': query,
'type': 'best_fields',
'operator': 'and',
'fields': query_fields,
} }
functions_list += [ functions_list += [
{ {
"weight": 400, "weight":10,
"filter": { "filter":{
"constant_score": { "term":{
"filter": { "language_type":1
"term": {"user_nick_name_pre": query.lower()}
}
}
}
},
{
"weight": 400,
"filter": {
"constant_score": {
"filter": {
"bool": {
"must": {
"term": {"content_level": 6},
},
"minimum_should_match": 1,
"should": [
{'match_phrase': {"content": query}},
{'match_phrase': {"tag_name_list": query}},
# {'multi_match': multi_match},
{"term": {"tag_list": tag_id}},
{"term": {"user_nick_name_pre": query.lower()}}
]
}
}
}
}
},
{
"weight": 400,
"filter": {
"constant_score": {
"filter": {
"bool": {
"must": {
"term": {"content_level": 5},
},
"minimum_should_match": 1,
"should": [
{'match_phrase': {"content": query}},
{'match_phrase': {"tag_name_list": query}},
# {'multi_match': multi_match},
{"term": {"tag_list": tag_id}},
{"term": {"user_nick_name_pre": query.lower()}}
]
}
}
} }
} }
}, },
{ {
"weight": 400, "weight": 1000,
"filter": { "filter": {
"constant_score": { "bool": {
"filter": { "minimum_should_match": 1,
"bool": { "should": [
"must": { {'match_phrase': {"content": query}},
"term": {"content_level": 4}, {'match_phrase': {"tag_name_list": query}},
}, # {'multi_match': multi_match},
"minimum_should_match": 1, {"terms": {"tag_list": same_tagset_ids}},
"should": [ {"term": {"user_nick_name_pre": query.lower()}}
{'match_phrase': {"content": query}}, ]
{'match_phrase': {"tag_name_list": query}},
# {'multi_match': multi_match},
{"term": {"tag_list": tag_id}},
{"term": {"user_nick_name_pre": query.lower()}}
]
}
}
} }
} }
} }
] ]
for query_item in collect_tag_name_set:
for field_item in query_fields:
term_dict = {
"filter": {
"term": {
field_item: query_item
}
},
"weight": 5
}
functions_list.append(term_dict)
query_function_score["functions"] = functions_list query_function_score["functions"] = functions_list
q["query"]["function_score"] = query_function_score q["query"]["function_score"] = query_function_score
q["sort"] = [ q["sort"] = [
......
...@@ -29,6 +29,12 @@ from search.utils.common import * ...@@ -29,6 +29,12 @@ from search.utils.common import *
from linucb.views.collect_data import CollectData from linucb.views.collect_data import CollectData
from injection.data_sync.tasks import sync_user_similar_score from injection.data_sync.tasks import sync_user_similar_score
from trans2es.models.tag import Tag
from libs.cache import redis_client
from trans2es.models.tag import TopicTag
class Job(object): class Job(object):
__es = None __es = None
...@@ -189,6 +195,20 @@ class Command(BaseCommand): ...@@ -189,6 +195,20 @@ class Command(BaseCommand):
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def sync_tag_collecction_name_set(self):
try:
collection_redis_key_name = "physical:official_tag_name_set"
tag_list = TopicTag.objects.filter(is_online=True, is_collection=True).values_list("tag_id", flat=True)
for tag_id in tag_list:
tag_name = Tag.objects.filter(id=tag_id, is_online=True, is_deleted=False,
is_category=False).values_list("name", flat=True)
if tag_name and len(tag_name)>0 and tag_name[0]:
redis_client.sadd(collection_redis_key_name,tag_name[0])
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def handle(self, *args, **options): def handle(self, *args, **options):
try: try:
type_name_list = get_type_info_map().keys() type_name_list = get_type_info_map().keys()
...@@ -211,5 +231,9 @@ class Command(BaseCommand): ...@@ -211,5 +231,9 @@ class Command(BaseCommand):
if len(options["sync_type"]) and options["sync_type"]=="similar": if len(options["sync_type"]) and options["sync_type"]=="similar":
sync_user_similar_score() sync_user_similar_score()
if len(options["sync_type"]) and options["sync_type"]=="tagname":
self.sync_tag_collecction_name_set()
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment