Commit b62c423e authored by lixiaofang's avatar lixiaofang

add contrast_similar.py

parent 664863f4
# -*- coding: UTF-8 -*-
# !/usr/bin/env python
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from search.utils.topic import TopicUtils
from libs.es import ESPerform
from libs.cache import redis_client
from search.utils.common import *
from gm_rpcd.all import bind
import functools
import logging
......@@ -12,71 +24,78 @@ GROUP_SIZE = 10
@bind('doris/search/query_contrast_similar')
def query_contrast_similar(query, offset=0, size=10):
def contrast_similar(user_id, offset=0, size=10):
try:
assert (query is not None and len(query) > 0)
# multi_fields = {
# 'user_id': 100,
# }
# query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
#
# multi_match = {
# 'query': query,
# 'type': 'cross_fields',
# 'operator': 'and',
# 'fields': query_fields,
# }
if isinstance(user_id, int):
contrast_topic_id_list = get_all_topic_id_list(user_id, offset, size)
return {"contrast_user_id_list": contrast_topic_id_list}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"contrast_user_id_list": []}
def get_all_topic_id_list(user_id, offset, size):
try:
if user_id:
redis_key = "physical:user_similar:participant_user_id:" + str(user_id)
redis_field_val_list = redis_client.get(redis_key)
have_read_topic_id_list = json.loads(redis_field_val_list) if redis_field_val_list else []
read_topic_id_list = list()
if len(have_read_topic_id_list) > 0:
for i in have_read_topic_id_list:
read_topic_id_list.append(i.get("contrast_user_id"))
q = {
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{"term": {
"is_online": True}
},
{"term": {
"is_deleted": False}
},
{"term": {
"id": query}
"must": [{
"term": {
"is_online": True
}
}, {
"term": {
"is_deleted": False
}
}],
"should": {
"match": {
"participant_user_id": read_topic_id_list
}
],
"minimum_should_match": 1
},
"minimum_should_match": 1
}
}
}
}
},
"sort": [{"similarity": "desc"}],
"size": size
}
logging.info('test query es :%s' % str(q).encode('utf-8'))
logging.info('contrast_similar,as q:%s' % str(q).encode('utf-8'))
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="contrast_similar", query_body=q,
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic",
query_body=q,
offset=offset, size=size)
contrast_topic_ids = []
logging.info(('res contrast_similar :%s' % str(result_dict)))
logging.info("q:%s" % q)
if result_dict['hits']["total"] > 0:
res_hit = result_dict["hits"]["hits"]
res_hit = result_dict['hits']["hits"]
for item in res_hit:
topic_id = item['_source']['id']
contrast_topic_ids.append(topic_id)
if '_source' in item:
wiki_name = item['_source']['contrast_user_id_list']
logging.info('contrast_similar_topic_id:%s' % contrast_topic_ids)
logging.info('contrast_user_id_list res:%s' % wiki_name)
return {"contrast_user_id_list": contrast_topic_ids}
return {"wiki_hot_recommends": wiki_name}
except:
logging.error("catch exception, query_sku:%s" % traceback.format_exc())
return {'wiki_hot_recommends': []}
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"contrast_user_id_list": []}
......@@ -12,7 +12,7 @@ from libs.cache import redis_client
from search.utils.common import *
def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.FIND_PAGE):
def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE):
try:
if user_id == -1:
redis_key = "physical:discover_page" + ":user_id:" + str(user_id) + ":device_id:" + device_id
......@@ -20,12 +20,13 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.
redis_key = "physical:discover_page" + ":user_id:" + str(user_id)
redis_field_list = [b'have_read_topic_id']
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list)
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
have_read_topic_id_list = json.loads(redis_field_val_list[0]) if redis_field_val_list[0] else []
recommend_topic_ids = []
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, offset=0, size=size,query_type=query_type,filter_topic_id_list=have_read_topic_id_list)
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, offset=0, size=size, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list)
for item in topic_id_list:
recommend_topic_ids.append(item["id"])
......@@ -34,49 +35,53 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.
redis_dict = {
"have_read_topic_id": json.dumps(have_read_topic_id_list)
}
redis_client.hmset(redis_key,redis_dict)
redis_client.hmset(redis_key, redis_dict)
return recommend_topic_ids
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_type=TopicPageType.HOME_RECOMMEND):
def get_home_recommend_topic_ids(user_id, device_id, offset, size, query=None, query_type=TopicPageType.HOME_RECOMMEND):
try:
if query is None:
if user_id == -1:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
redis_key = "physical:home_recommend" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else:
if user_id == -1:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key = "physical:home_query" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(
query) + ":query_type:" + str(query_type)
redis_field_list = [b'last_offset_num', b'unread_topic_id']
for page_id in range(0,offset,size):
for page_id in range(0, offset, size):
redis_field_list.append(str(page_id))
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list)
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
last_offset_num = int(redis_field_val_list[0]) if redis_field_val_list[0] else -1
recommend_topic_ids = []
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id, offset, size*size,query,query_type=query_type)
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id, offset, size * size, query, query_type=query_type)
have_read_group_id_set = set()
unread_topic_id_list = list()
have_read_topic_id_set = set()
ori_unread_topic_id_list = list()
if redis_field_val_list[1] and offset>0:
if (user_id>0 and offset==last_offset_num) or user_id==-1:
if redis_field_val_list[1] and offset > 0:
if (user_id > 0 and offset == last_offset_num) or user_id == -1:
ori_unread_topic_id_list = json.loads(redis_field_val_list[1])
topic_id_list = ori_unread_topic_id_list + topic_id_list
for have_read_item in redis_field_val_list[2:]:
if have_read_item:
have_read_topic_id_set=have_read_topic_id_set.union(json.loads(have_read_item))
have_read_topic_id_set = have_read_topic_id_set.union(json.loads(have_read_item))
# 当前页小组数量
cur_page_group_num = 0
......@@ -88,12 +93,12 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
unread_topic_id_list.append(item)
else:
if item["id"] not in have_read_topic_id_set:
if isinstance(item["group_id"],int) and item["group_id"]>0 and cur_page_group_num<(size*0.9):
if isinstance(item["group_id"], int) and item["group_id"] > 0 and cur_page_group_num < (size * 0.9):
have_read_group_id_set.add(item["group_id"])
have_read_topic_id_set.add(item["id"])
cur_page_group_num += 1
recommend_topic_ids.append(item["id"])
elif item["group_id"]<=0 and cur_page_user_num<(size*0.1):
elif item["group_id"] <= 0 and cur_page_user_num < (size * 0.1):
cur_page_user_num += 1
recommend_topic_ids.append(item["id"])
else:
......@@ -102,31 +107,31 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
if len(recommend_topic_ids) >= size:
break
if len(recommend_topic_ids) < size and len(unread_topic_id_list)>0:
if len(recommend_topic_ids) < size and len(unread_topic_id_list) > 0:
recommend_len = len(recommend_topic_ids)
offi_unread_topic_id = [item["id"] for item in unread_topic_id_list[:(size-recommend_len)]]
offi_unread_topic_id = [item["id"] for item in unread_topic_id_list[:(size - recommend_len)]]
recommend_topic_ids = recommend_topic_ids + offi_unread_topic_id
unread_topic_id_list = unread_topic_id_list[(size-recommend_len):]
unread_topic_id_list = unread_topic_id_list[(size - recommend_len):]
if user_id==-1:
if user_id == -1:
unread_topic_id_list = ori_unread_topic_id_list + unread_topic_id_list
redis_dict = {
"unread_topic_id":json.dumps(unread_topic_id_list),
"last_offset_num":offset+size,
"unread_topic_id": json.dumps(unread_topic_id_list),
"last_offset_num": offset + size,
offset: json.dumps(recommend_topic_ids)
}
redis_client.hmset(redis_key,redis_dict)
redis_client.hmset(redis_key, redis_dict)
# 每个session key保存15分钟
redis_client.expire(redis_key,15*60*60)
redis_client.expire(redis_key, 15 * 60 * 60)
return recommend_topic_ids
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@bind("physical/search/home_recommend")
def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPageType.HOME_RECOMMEND):
def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=TopicPageType.HOME_RECOMMEND):
"""
:remark:首页推荐,目前只推荐日记
:param session_id:
......@@ -137,20 +142,20 @@ def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPage
"""
try:
if not user_id:
user_id=-1
if not isinstance(device_id,str):
user_id = -1
if not isinstance(device_id, str):
device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id,device_id,offset,size,query_type=query_type)
recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, offset, size, query_type=query_type)
return {"recommend_topic_ids":recommend_topic_ids}
return {"recommend_topic_ids": recommend_topic_ids}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []}
@bind("physical/search/discover_page")
def discover_page(device_id="",user_id=-1,size=10):
def discover_page(device_id="", user_id=-1, size=10):
"""
:remark:首页推荐,目前只推荐日记
:param session_id:
......@@ -161,20 +166,20 @@ def discover_page(device_id="",user_id=-1,size=10):
"""
try:
if not user_id:
user_id=-1
if not isinstance(device_id,str):
user_id = -1
if not isinstance(device_id, str):
device_id = ""
recommend_topic_ids = get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.FIND_PAGE)
recommend_topic_ids = get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE)
return {"recommend_topic_ids":recommend_topic_ids}
return {"recommend_topic_ids": recommend_topic_ids}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []}
@bind("physical/search/home_query")
def home_query(device_id="",user_id=-1,query="",offset=0,size=10):
def home_query(device_id="", user_id=-1, query="", offset=0, size=10):
"""
:remark:首页搜索,目前只推荐日记
:param session_id:
......@@ -186,19 +191,20 @@ def home_query(device_id="",user_id=-1,query="",offset=0,size=10):
"""
try:
if not user_id:
user_id=-1
if not isinstance(device_id,str):
user_id = -1
if not isinstance(device_id, str):
device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id,device_id,offset,size,query)
return {"recommend_topic_ids":recommend_topic_ids}
recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, offset, size, query)
return {"recommend_topic_ids": recommend_topic_ids}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []}
@bind("physical/search/topic_detail_page_recommend")
def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_user_id=-1,filter_topic_user_id=False,offset=0,size=10):
def topic_detail_page_recommend(user_id=-1, topic_id=-1, topic_group_id=-1, topic_user_id=-1,
filter_topic_user_id=False, offset=0, size=10):
"""
:remark:帖子详情页推荐策略,缺少第一个卡片策略
:param user_id:
......@@ -208,15 +214,16 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u
:return:
"""
try:
if not isinstance(user_id,int):
if not isinstance(user_id, int):
user_id = -1
# 获取帖子标签列表
topic_tag_list = TopicUtils.get_topic_tag_id_list(topic_id)
result_list = TopicUtils.get_topic_detail_recommend_list(user_id,topic_id,topic_tag_list,topic_group_id,topic_user_id,filter_topic_user_id,offset,size)
result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_group_id,
topic_user_id, filter_topic_user_id, offset, size)
recommend_topic_ids_list = list()
if len(result_list)>0:
if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
return {"recommend_topic_ids": recommend_topic_ids_list}
......@@ -226,7 +233,7 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u
@bind("physical/search/query_topic_by_tag_aggregation")
def query_topic_by_tag_aggregation(user_id,tag_id, offset, size):
def query_topic_by_tag_aggregation(user_id, tag_id, offset, size):
"""
:remark 按标签聚合召回帖子
:param tag_id:
......@@ -240,14 +247,12 @@ def query_topic_by_tag_aggregation(user_id,tag_id, offset, size):
if not tag_id:
tag_id = -1
result_list = TopicUtils.get_tag_aggregation_topic_id_list(user_id,tag_id,offset,size)
result_list = TopicUtils.get_tag_aggregation_topic_id_list(user_id, tag_id, offset, size)
recommend_topic_ids_list = list()
if len(result_list)>0:
if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
return {"recommend_topic_id": recommend_topic_ids_list}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_id": []}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment