Commit 6c96b6f8 authored by 段英荣's avatar 段英荣

modify search

parent 4d9341b6
......@@ -163,17 +163,7 @@ class TopicUtils(object):
q["query"] = dict()
functions_list = [
{
"gauss": {
"create_time": {
"scale": "1d",
"decay": 0.99
}
},
"weight": 60
}
]
functions_list = list()
query_function_score = {
"query": {
......@@ -204,7 +194,7 @@ class TopicUtils(object):
]
}
},
"score_mode": "max",
"score_mode": "sum",
"boost_mode": "sum",
"functions": functions_list
}
......@@ -228,26 +218,11 @@ class TopicUtils(object):
query_function_score["query"]["bool"]["must_not"] = [
{"terms": {"user_id": linucb_user_id_list}}
]
if query is not None: # 搜索帖子
multi_match = {
'query': query,
'type': 'best_fields',
'operator': 'or',
'fields': ["content", "tag_name_list"],
}
query_function_score["boost_mode"] = "replace"
q["_source"] = {
"includes": ["id", "highlight", "description"]
}
tag_ids = get_same_tagset_ids(tag_id)
query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match},
{"terms": {"tag_list": tag_ids}},
{"term": {"user_nick_name_pre": query.lower()}}
]
query_function_score["query"]["bool"]["minimum_should_match"] = 1
query_function_score["query"]["bool"]["filter"].append(
{"range": {"content_level": {"gte": 3, "lte": 6}}}
)
else:
if query is None:
if "must_not" in query_function_score["query"]["bool"]:
query_function_score["query"]["bool"]["must_not"] += [
{"term": {"is_operation_home_recommend": True}}
......@@ -264,19 +239,26 @@ class TopicUtils(object):
query_function_score["query"]["bool"]["filter"].append(
{"term": {"content_level": 6}}
)
q["_source"] = {
"includes": ["id", "highlight", "description"]
}
if query is None:
if user_id and user_id > 0:
redis_key_prefix = "physical:user_similar:participant_user_id:"
similar_redis_key = redis_key_prefix + str(user_id)
redis_user_similar_data = redis_client.get(similar_redis_key)
user_similar_list = json.loads(redis_user_similar_data) if redis_user_similar_data else []
if len(user_similar_list) > 0:
functions_list.extend(user_similar_list)
# if user_id and user_id > 0:
# redis_key_prefix = "physical:user_similar:participant_user_id:"
# similar_redis_key = redis_key_prefix + str(user_id)
# redis_user_similar_data = redis_client.get(similar_redis_key)
# user_similar_list = json.loads(redis_user_similar_data) if redis_user_similar_data else []
# if len(user_similar_list) > 0:
# functions_list.extend(user_similar_list)
functions_list.append(
{
"gauss": {
"create_time": {
"scale": "1d",
"decay": 0.99
}
},
"weight": 60
}
)
if len(attention_user_id_list) > 0:
functions_list.append(
{
......@@ -334,87 +316,81 @@ class TopicUtils(object):
multi_match = {
'query': query,
'type': 'best_fields',
'operator': 'and',
'operator': 'or',
'fields': ["content", "tag_name_list"],
"analyzer":"gm_default_index"
}
query_function_score["boost_mode"] = "replace"
same_tagset_ids = get_same_tagset_ids(tag_id)
query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match},
{"terms": {"tag_list": same_tagset_ids}},
{"term": {"user_nick_name_pre": query.lower()}}
]
query_function_score["query"]["bool"]["minimum_should_match"] = 1
query_function_score["query"]["bool"]["filter"].append(
{"range": {"content_level": {"gte": 3, "lte": 6}}}
)
collection_redis_key_name="physical:official_tag_name_set"
collect_tag_name_set=set()
body = {
'text': query,
'analyzer': "gm_default_search"
}
analyze_res = ESPerform.get_analyze_results(es_cli=ESPerform.get_cli(), sub_index_name="topic",query_body=body)
for item in analyze_res["tokens"]:
token_word = item["token"]
is_member = redis_client.sismember(collection_redis_key_name, token_word)
if is_member:
collect_tag_name_set.add(token_word)
query_fields = ["content", "tag_name_list"]
multi_match = {
'query': query,
'type': 'best_fields',
'operator': 'and',
'fields': query_fields,
}
functions_list += [
{
"weight": 400,
"filter": {
"constant_score": {
"filter": {
"term": {"user_nick_name_pre": query.lower()}
}
}
}
},
{
"weight": 400,
"filter": {
"constant_score": {
"filter": {
"bool": {
"must": {
"term": {"content_level": 6},
},
"minimum_should_match": 1,
"should": [
{'match_phrase': {"content": query}},
{'match_phrase': {"tag_name_list": query}},
# {'multi_match': multi_match},
{"term": {"tag_list": tag_id}},
{"term": {"user_nick_name_pre": query.lower()}}
]
}
}
}
}
},
{
"weight": 400,
"filter": {
"constant_score": {
"filter": {
"bool": {
"must": {
"term": {"content_level": 5},
},
"minimum_should_match": 1,
"should": [
{'match_phrase': {"content": query}},
{'match_phrase': {"tag_name_list": query}},
# {'multi_match': multi_match},
{"term": {"tag_list": tag_id}},
{"term": {"user_nick_name_pre": query.lower()}}
]
}
}
"weight":10,
"filter":{
"term":{
"language_type":1
}
}
},
{
"weight": 400,
"weight": 1000,
"filter": {
"constant_score": {
"filter": {
"bool": {
"must": {
"term": {"content_level": 4},
},
"minimum_should_match": 1,
"should": [
{'match_phrase': {"content": query}},
{'match_phrase': {"tag_name_list": query}},
# {'multi_match': multi_match},
{"term": {"tag_list": tag_id}},
{"term": {"user_nick_name_pre": query.lower()}}
]
}
}
"bool": {
"minimum_should_match": 1,
"should": [
{'match_phrase': {"content": query}},
{'match_phrase': {"tag_name_list": query}},
# {'multi_match': multi_match},
{"terms": {"tag_list": same_tagset_ids}},
{"term": {"user_nick_name_pre": query.lower()}}
]
}
}
}
]
for query_item in collect_tag_name_set:
for field_item in query_fields:
term_dict = {
"filter": {
"term": {
field_item: query_item
}
},
"weight": 5
}
functions_list.append(term_dict)
query_function_score["functions"] = functions_list
q["query"]["function_score"] = query_function_score
q["sort"] = [
......
......@@ -29,6 +29,12 @@ from search.utils.common import *
from linucb.views.collect_data import CollectData
from injection.data_sync.tasks import sync_user_similar_score
from trans2es.models.tag import Tag
from libs.cache import redis_client
from trans2es.models.tag import TopicTag
class Job(object):
__es = None
......@@ -189,6 +195,20 @@ class Command(BaseCommand):
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def sync_tag_collecction_name_set(self):
try:
collection_redis_key_name = "physical:official_tag_name_set"
tag_list = TopicTag.objects.filter(is_online=True, is_collection=True).values_list("tag_id", flat=True)
for tag_id in tag_list:
tag_name = Tag.objects.filter(id=tag_id, is_online=True, is_deleted=False,
is_category=False).values_list("name", flat=True)
if tag_name and len(tag_name)>0 and tag_name[0]:
redis_client.sadd(collection_redis_key_name,tag_name[0])
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def handle(self, *args, **options):
try:
type_name_list = get_type_info_map().keys()
......@@ -211,5 +231,9 @@ class Command(BaseCommand):
if len(options["sync_type"]) and options["sync_type"]=="similar":
sync_user_similar_score()
if len(options["sync_type"]) and options["sync_type"]=="tagname":
self.sync_tag_collecction_name_set()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment