Commit 5b2ee65d authored by lixiaofang's avatar lixiaofang

add

parent 7dc38967
...@@ -12,7 +12,6 @@ from .common import TopicDocumentField ...@@ -12,7 +12,6 @@ from .common import TopicDocumentField
from search.utils.common import * from search.utils.common import *
class TopicUtils(object): class TopicUtils(object):
@classmethod @classmethod
...@@ -27,22 +26,22 @@ class TopicUtils(object): ...@@ -27,22 +26,22 @@ class TopicUtils(object):
try: try:
q = dict() q = dict()
q["query"] = { q["query"] = {
"term":{ "term": {
"user_id": user_id "user_id": user_id
} }
} }
q["_source"] = ["tag_list","attention_user_id_list", "pick_user_id_list", "same_pictorial_user_id_list"] q["_source"] = ["tag_list", "attention_user_id_list", "pick_user_id_list", "same_pictorial_user_id_list"]
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size) result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size)
return result_dict return result_dict
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]} return {"total_count": 0, "hits": []}
@classmethod @classmethod
def analyze_related_user_id_list(cls,related_user_id_list): def analyze_related_user_id_list(cls, related_user_id_list):
""" """
:remark:获取指定用户关联的 用户列表 :remark:获取指定用户关联的 用户列表
:param related_user_id_list: :param related_user_id_list:
...@@ -61,13 +60,13 @@ class TopicUtils(object): ...@@ -61,13 +60,13 @@ class TopicUtils(object):
elif item["country_id"] == 2: elif item["country_id"] == 2:
korea_user_id_list.append(item["user_id"]) korea_user_id_list.append(item["user_id"])
return (chinese_user_id_list,japan_user_id_list,korea_user_id_list) return (chinese_user_id_list, japan_user_id_list, korea_user_id_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[],[]) return ([], [], [])
@classmethod @classmethod
def refresh_redis_hash_data(cls, redis_cli,redis_key,redis_data_dict): def refresh_redis_hash_data(cls, redis_cli, redis_key, redis_data_dict):
try: try:
redis_cli.hmset(redis_key, redis_data_dict) redis_cli.hmset(redis_key, redis_data_dict)
return True return True
...@@ -76,13 +75,13 @@ class TopicUtils(object): ...@@ -76,13 +75,13 @@ class TopicUtils(object):
return False return False
@classmethod @classmethod
def ___get_should_term_list(cls,ori_list,field_name="tag_list"): def ___get_should_term_list(cls, ori_list, field_name="tag_list"):
try: try:
should_term_list = list() should_term_list = list()
for term_id in ori_list: for term_id in ori_list:
term_dict = { term_dict = {
"term":{ "term": {
field_name:{"value":term_id} field_name: {"value": term_id}
} }
} }
should_term_list.append(term_dict) should_term_list.append(term_dict)
...@@ -92,15 +91,15 @@ class TopicUtils(object): ...@@ -92,15 +91,15 @@ class TopicUtils(object):
return [] return []
@classmethod @classmethod
def get_topic_tag_info(cls, offset, size, topic_id_list,user_id): def get_topic_tag_info(cls, offset, size, topic_id_list, user_id):
try: try:
q = { q = {
"query":{ "query": {
"terms":{ "terms": {
"id": topic_id_list "id": topic_id_list
} }
}, },
"_source":{ "_source": {
"includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"] "includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"]
} }
} }
...@@ -120,8 +119,9 @@ class TopicUtils(object): ...@@ -120,8 +119,9 @@ class TopicUtils(object):
return {} return {}
@classmethod @classmethod
def get_recommend_topic_ids(cls,user_id,tag_id,offset,size,single_size,query=None,query_type=TopicPageType.FIND_PAGE, def get_recommend_topic_ids(cls, user_id, tag_id, offset, size, single_size, query=None,
filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[], query_type=TopicPageType.FIND_PAGE,
filter_topic_id_list=[], test_score=False, must_topic_id_list=[], recommend_tag_list=[],
user_similar_score_list=[]): user_similar_score_list=[]):
""" """
:需增加打散逻辑 :需增加打散逻辑
...@@ -158,7 +158,7 @@ class TopicUtils(object): ...@@ -158,7 +158,7 @@ class TopicUtils(object):
user_tag_list = result_dict["hits"][0]["_source"]["tag_list"] user_tag_list = result_dict["hits"][0]["_source"]["tag_list"]
logging.info("get same_pictorial_id_list :%s"%same_pictorial_id_list) logging.info("get same_pictorial_id_list :%s" % same_pictorial_id_list)
q = dict() q = dict()
q["query"] = dict() q["query"] = dict()
...@@ -182,9 +182,9 @@ class TopicUtils(object): ...@@ -182,9 +182,9 @@ class TopicUtils(object):
} }
] ]
if len(user_similar_score_list)>0: if len(user_similar_score_list) > 0:
for item in user_similar_score_list[:100]: for item in user_similar_score_list[:100]:
score_item = 3 * 10*item[1] score_item = 3 * 10 * item[1]
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
...@@ -193,19 +193,19 @@ class TopicUtils(object): ...@@ -193,19 +193,19 @@ class TopicUtils(object):
} }
) )
if len(attention_user_id_list)>0: if len(attention_user_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":attention_user_id_list}}}}, "should": {"terms": {"user_id": attention_user_id_list}}}},
"weight": 3, "weight": 3,
} }
) )
if len(pick_user_id_list)>0: if len(pick_user_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":pick_user_id_list}}}}, "should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2 "weight": 2
} }
) )
...@@ -217,61 +217,60 @@ class TopicUtils(object): ...@@ -217,61 +217,60 @@ class TopicUtils(object):
# "weight": 1 # "weight": 1
# } # }
# ) # )
if len(same_pictorial_id_list)>0: if len(same_pictorial_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":same_pictorial_id_list}}}}, "should": {"terms": {"user_id": same_pictorial_id_list}}}},
"weight": 1 "weight": 1
} }
) )
# query_tag_term_list = cls.___get_should_term_list(user_tag_list) # query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if len(user_tag_list)>0: if len(user_tag_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter":{"bool":{ "filter": {"bool": {
"should":{"terms":{"tag_list":user_tag_list}}}}, "should": {"terms": {"tag_list": user_tag_list}}}},
"weight": 1 "weight": 1
} }
) )
if len(recommend_tag_list)>0: if len(recommend_tag_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter":{"bool":{ "filter": {"bool": {
"should":{"terms":{"edit_tag_list":recommend_tag_list}}}}, "should": {"terms": {"edit_tag_list": recommend_tag_list}}}},
"weight": 3 "weight": 3
} }
) )
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
low_content_level = 4 if query_type==TopicPageType.FIND_PAGE else 3
query_function_score = { query_function_score = {
"query": { "query": {
"bool": { "bool": {
"filter": [ "filter": [
{"range": {"content_level": {"gte": low_content_level, "lte": 5}}}, {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{"term": {"has_image":True}}, {"term": {"has_image": True}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
], ],
"should": [ "should": [
{ {
"bool":{ "bool": {
"must":[ "must": [
{"term":{"has_image":True}}, {"term": {"has_image": True}},
{"term": {"has_video": False}} {"term": {"has_video": False}}
] ]
} }
},{ }, {
"bool":{ "bool": {
"must":{ "must": {
"term":{"has_video":True} "term": {"has_video": True}
} }
} }
} }
], ],
"minimum_should_match":1 "minimum_should_match": 1
} }
}, },
"score_mode": "sum", "score_mode": "sum",
...@@ -280,23 +279,23 @@ class TopicUtils(object): ...@@ -280,23 +279,23 @@ class TopicUtils(object):
} }
if len(must_topic_id_list) > 0: if len(must_topic_id_list) > 0:
query_function_score["query"]["bool"]["must"] = { query_function_score["query"]["bool"]["must"] = {
"terms":{ "terms": {
"id": must_topic_id_list "id": must_topic_id_list
} }
} }
if len(filter_topic_id_list)>0: if len(filter_topic_id_list) > 0:
query_function_score["query"]["bool"]["must_not"] = { query_function_score["query"]["bool"]["must_not"] = {
"terms":{ "terms": {
"id": filter_topic_id_list "id": filter_topic_id_list
} }
} }
if query is not None:#搜索帖子 if query is not None: # 搜索帖子
multi_fields = { multi_fields = {
'description': 200, 'description': 200,
'content': 300, 'content': 300,
'name': 400, 'name': 400,
'tag_name_list':300, 'tag_name_list': 300,
} }
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()] query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = { multi_match = {
...@@ -308,7 +307,7 @@ class TopicUtils(object): ...@@ -308,7 +307,7 @@ class TopicUtils(object):
query_function_score["query"]["bool"]["should"] = [ query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match}, {'multi_match': multi_match},
{"term":{"tag_list":tag_id}} {"term": {"tag_list": tag_id}}
] ]
query_function_score["query"]["bool"]["minimum_should_match"] = 1 query_function_score["query"]["bool"]["minimum_should_match"] = 1
...@@ -317,19 +316,19 @@ class TopicUtils(object): ...@@ -317,19 +316,19 @@ class TopicUtils(object):
"field": "user_id" "field": "user_id"
} }
q["_source"] = { q["_source"] = {
"includes":["id","pictorial_id","offline_score","user_id","edit_tag_list"] "includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"]
} }
q["sort"] = [ q["sort"] = [
{ {
"_script":{ "_script": {
"type":"number", "type": "number",
"script":{ "script": {
"lang": "expression", "lang": "expression",
"source": "_score+doc['offline_score']" "source": "_score+doc['offline_score']"
# "lang":"painless", # "lang":"painless",
# "source":"_score+params._source.offline_score" # "source":"_score+params._source.offline_score"
}, },
"order":"desc" "order": "desc"
} }
}, },
"_score" "_score"
...@@ -375,7 +374,8 @@ class TopicUtils(object): ...@@ -375,7 +374,8 @@ class TopicUtils(object):
return list() return list()
@classmethod @classmethod
def get_topic_detail_recommend_list(cls,user_id,topic_id,topic_tag_list,topic_pictorial_id,topic_user_id,filter_topic_user_id,have_read_topic_list,offset,size,es_cli_obj=None): def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id,
filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None):
""" """
:remark 帖子详情页推荐列表,缺少按时间衰减 :remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id: :param user_id:
...@@ -400,15 +400,15 @@ class TopicUtils(object): ...@@ -400,15 +400,15 @@ class TopicUtils(object):
"weight": 1000 "weight": 1000
}, },
{ {
"linear": { "linear": {
"create_time": { "create_time": {
"scale": "1d", "scale": "1d",
"decay": 0.5 "decay": 0.5
} }
} }
} }
] ]
if isinstance(topic_pictorial_id,int) and topic_pictorial_id > 0: if isinstance(topic_pictorial_id, int) and topic_pictorial_id > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"term": { "filter": {"term": {
...@@ -419,16 +419,16 @@ class TopicUtils(object): ...@@ -419,16 +419,16 @@ class TopicUtils(object):
have_read_topic_list.append(topic_id) have_read_topic_list.append(topic_id)
query_function_score = { query_function_score = {
"query":{ "query": {
"bool":{ "bool": {
"must": [ "must": [
{"range": {"content_level": {"gte": 3, "lte": 5}}}, {"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
], ],
"must_not":{ "must_not": {
"terms":{ "terms": {
"id":have_read_topic_list "id": have_read_topic_list
} }
} }
} }
...@@ -440,15 +440,15 @@ class TopicUtils(object): ...@@ -440,15 +440,15 @@ class TopicUtils(object):
if filter_topic_user_id: if filter_topic_user_id:
query_function_score["query"]["bool"]["must"].append({"term": {"user_id": topic_user_id}}) query_function_score["query"]["bool"]["must"].append({"term": {"user_id": topic_user_id}})
if len(topic_tag_list)>0: if len(topic_tag_list) > 0:
query_function_score["query"]["bool"]["should"]={ query_function_score["query"]["bool"]["should"] = {
"terms":{ "terms": {
"tag_list":topic_tag_list "tag_list": topic_tag_list
} }
} }
q["query"]["function_score"] = query_function_score q["query"]["function_score"] = query_function_score
q["_source"] = { q["_source"] = {
"includes":["id","pictorial_id","user_id","_score"] "includes": ["id", "pictorial_id", "user_id", "_score"]
} }
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q, result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q,
...@@ -460,7 +460,7 @@ class TopicUtils(object): ...@@ -460,7 +460,7 @@ class TopicUtils(object):
return [] return []
@classmethod @classmethod
def get_topic_tag_id_list(cls,topic_id,es_cli_obj=None): def get_topic_tag_id_list(cls, topic_id, es_cli_obj=None):
""" """
:remark 获取帖子标签列表 :remark 获取帖子标签列表
:param topic_id: :param topic_id:
...@@ -472,18 +472,18 @@ class TopicUtils(object): ...@@ -472,18 +472,18 @@ class TopicUtils(object):
q = dict() q = dict()
q["query"] = { q["query"] = {
"term":{ "term": {
"id": topic_id "id": topic_id
} }
} }
q["_source"] = { q["_source"] = {
"includes":[TopicDocumentField.TAG_LIST] "includes": [TopicDocumentField.TAG_LIST]
} }
result_dict = ESPerform.get_search_results(es_cli_obj,sub_index_name="topic",query_body=q,size=1) result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q, size=1)
tag_id_list = [] tag_id_list = []
if len(result_dict["hits"])>0: if len(result_dict["hits"]) > 0:
tag_id_list = result_dict["hits"][0]["_source"][TopicDocumentField.TAG_LIST] tag_id_list = result_dict["hits"][0]["_source"][TopicDocumentField.TAG_LIST]
return tag_id_list return tag_id_list
...@@ -491,9 +491,8 @@ class TopicUtils(object): ...@@ -491,9 +491,8 @@ class TopicUtils(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
@classmethod @classmethod
def get_tag_aggregation_topic_id_list(cls,user_id,tag_id,offset,size): def get_tag_aggregation_topic_id_list(cls, user_id, tag_id, offset, size):
try: try:
attention_user_id_list = list() attention_user_id_list = list()
pick_user_id_list = list() pick_user_id_list = list()
...@@ -519,34 +518,34 @@ class TopicUtils(object): ...@@ -519,34 +518,34 @@ class TopicUtils(object):
} }
] ]
if len(attention_user_id_list)>0: if len(attention_user_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":attention_user_id_list}}}}, "should": {"terms": {"user_id": attention_user_id_list}}}},
"weight": 3, "weight": 3,
} }
) )
if len(pick_user_id_list)>0: if len(pick_user_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":pick_user_id_list}}}}, "should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2 "weight": 2
} }
) )
query_function_score = { query_function_score = {
"query":{ "query": {
"bool":{ "bool": {
"must": [ "must": [
#{"range": {"content_level": {"gte": 3, "lte": 5}}}, # {"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}}, {"term": {"is_deleted": False}},
{"term": {"tag_list":tag_id}} {"term": {"tag_list": tag_id}}
], ],
"must_not":[ "must_not": [
{"terms": {"content_level": [1,2]}} {"terms": {"content_level": [1, 2]}}
] ]
} }
}, },
...@@ -557,22 +556,22 @@ class TopicUtils(object): ...@@ -557,22 +556,22 @@ class TopicUtils(object):
q = dict() q = dict()
q["query"] = { q["query"] = {
"function_score":query_function_score "function_score": query_function_score
} }
q["_source"] = { q["_source"] = {
"includes":["id","pictorial_id","user_id","_score","offline_score","manual_score"] "includes": ["id", "pictorial_id", "user_id", "_score", "offline_score", "manual_score"]
} }
q["sort"] = [ q["sort"] = [
{ {
"_script":{ "_script": {
"type":"number", "type": "number",
"script":{ "script": {
"lang": "expression", "lang": "expression",
"source": "_score+doc['offline_score']+doc['manual_score']" "source": "_score+doc['offline_score']+doc['manual_score']"
# "lang":"painless", # "lang":"painless",
# "source":"_score+params._source.offline_score+params._source.manual_score" # "source":"_score+params._source.offline_score+params._source.manual_score"
}, },
"order":"desc" "order": "desc"
} }
} }
] ]
...@@ -666,11 +665,11 @@ class TopicUtils(object): ...@@ -666,11 +665,11 @@ class TopicUtils(object):
if isinstance(sorts_by, int): if isinstance(sorts_by, int):
if sorts_by == TOPIC_SEARCH_SORT.VOTE_NUM: if sorts_by == TOPIC_SEARCH_SORT.VOTE_NUM:
sort_rule.append({ sort_rule.append({
"vote_num":{ "vote_num": {
"order":"desc" "order": "desc"
}, },
"update_time":{ "update_time": {
"order":"desc" "order": "desc"
}, },
}) })
...@@ -678,26 +677,26 @@ class TopicUtils(object): ...@@ -678,26 +677,26 @@ class TopicUtils(object):
for sort_by in sorts_by: for sort_by in sorts_by:
if sort_by == TOPIC_SEARCH_SORT.ID_AEC: if sort_by == TOPIC_SEARCH_SORT.ID_AEC:
sort_rule.append({ sort_rule.append({
"id":{ "id": {
"order":"asc" "order": "asc"
}, },
}) })
elif sort_by == TOPIC_SEARCH_SORT.ID_DESC: elif sort_by == TOPIC_SEARCH_SORT.ID_DESC:
sort_rule.append({ sort_rule.append({
"id":{ "id": {
"order":"desc" "order": "desc"
}, },
}) })
elif sort_by == TOPIC_SEARCH_SORT.SCORE_AEC: elif sort_by == TOPIC_SEARCH_SORT.SCORE_AEC:
sort_rule.append({ sort_rule.append({
"sort_score":{ "sort_score": {
"order":"asc" "order": "asc"
}, },
}) })
elif sort_by == TOPIC_SEARCH_SORT.SCORE_DESC: elif sort_by == TOPIC_SEARCH_SORT.SCORE_DESC:
sort_rule.append({ sort_rule.append({
"sort_score":{ "sort_score": {
"order":"desc" "order": "desc"
}, },
}) })
......
...@@ -84,14 +84,10 @@ class Topic(models.Model): ...@@ -84,14 +84,10 @@ class Topic(models.Model):
def get_pictorial_id(self): def get_pictorial_id(self):
try: try:
logging.info("get user id :%s" % self.id)
pictorial_id_list =[] pictorial_id_list =[]
pictorial_id = PictorialTopic.objects.filter(topic_id=self.id).values_list("pictorial_id",flat=True) pictorial_id = PictorialTopic.objects.filter(topic_id=self.id).values_list("pictorial_id",flat=True)
for i in pictorial_id: for i in pictorial_id:
pictorial_id_list.append(i) pictorial_id_list.append(i)
if len(pictorial_id_list) > 0:
logging.info("get user attention pictorial id :%s" % pictorial_id_list)
return pictorial_id_list return pictorial_id_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment