Commit b62c423e authored by lixiaofang's avatar lixiaofang

add contrast_similar.py

parent 664863f4
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
# !/usr/bin/env python
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from search.utils.topic import TopicUtils
from libs.es import ESPerform
from libs.cache import redis_client
from search.utils.common import *
from gm_rpcd.all import bind from gm_rpcd.all import bind
import functools import functools
import logging import logging
...@@ -12,71 +24,78 @@ GROUP_SIZE = 10 ...@@ -12,71 +24,78 @@ GROUP_SIZE = 10
@bind('doris/search/query_contrast_similar') @bind('doris/search/query_contrast_similar')
def query_contrast_similar(query, offset=0, size=10): def contrast_similar(user_id, offset=0, size=10):
try: try:
assert (query is not None and len(query) > 0) if isinstance(user_id, int):
contrast_topic_id_list = get_all_topic_id_list(user_id, offset, size)
# multi_fields = { return {"contrast_user_id_list": contrast_topic_id_list}
# 'user_id': 100,
# } except:
# query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
# logging.error("catch exception,err_msg:%s" % traceback.format_exc())
# multi_match = { return {"contrast_user_id_list": []}
# 'query': query,
# 'type': 'cross_fields',
# 'operator': 'and', def get_all_topic_id_list(user_id, offset, size):
# 'fields': query_fields, try:
# } if user_id:
redis_key = "physical:user_similar:participant_user_id:" + str(user_id)
redis_field_val_list = redis_client.get(redis_key)
have_read_topic_id_list = json.loads(redis_field_val_list) if redis_field_val_list else []
read_topic_id_list = list()
if len(have_read_topic_id_list) > 0:
for i in have_read_topic_id_list:
read_topic_id_list.append(i.get("contrast_user_id"))
q = { q = {
"query": { "query": {
"function_score": { "function_score": {
"query": { "query": {
"bool": { "bool": {
"must": [ "must": [{
{"term": { "term": {
"is_online": True} "is_online": True
}, }
{"term": { }, {
"is_deleted": False} "term": {
}, "is_deleted": False
{"term": { }
"id": query} }],
"should": {
"match": {
"participant_user_id": read_topic_id_list
} }
],
"minimum_should_match": 1
}, },
"minimum_should_match": 1
}
}
} }
} }
},
"sort": [{"similarity": "desc"}],
"size": size
} }
logging.info('test query es :%s' % str(q).encode('utf-8')) logging.info('contrast_similar,as q:%s' % str(q).encode('utf-8'))
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="contrast_similar", query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic",
query_body=q,
offset=offset, size=size) offset=offset, size=size)
contrast_topic_ids = []
logging.info(('res contrast_similar :%s' % str(result_dict))) if result_dict['hits']["total"] > 0:
logging.info("q:%s" % q)
res_hit = result_dict["hits"]["hits"] res_hit = result_dict['hits']["hits"]
for item in res_hit: for item in res_hit:
topic_id = item['_source']['id']
contrast_topic_ids.append(topic_id)
if '_source' in item: logging.info('contrast_similar_topic_id:%s' % contrast_topic_ids)
wiki_name = item['_source']['contrast_user_id_list']
logging.info('contrast_user_id_list res:%s' % wiki_name) return {"contrast_user_id_list": contrast_topic_ids}
return {"wiki_hot_recommends": wiki_name}
except: except:
logging.error("catch exception, query_sku:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"contrast_user_id_list": []}
return {'wiki_hot_recommends': []}
...@@ -12,7 +12,7 @@ from libs.cache import redis_client ...@@ -12,7 +12,7 @@ from libs.cache import redis_client
from search.utils.common import * from search.utils.common import *
def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.FIND_PAGE): def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE):
try: try:
if user_id == -1: if user_id == -1:
redis_key = "physical:discover_page" + ":user_id:" + str(user_id) + ":device_id:" + device_id redis_key = "physical:discover_page" + ":user_id:" + str(user_id) + ":device_id:" + device_id
...@@ -20,12 +20,13 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType. ...@@ -20,12 +20,13 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.
redis_key = "physical:discover_page" + ":user_id:" + str(user_id) redis_key = "physical:discover_page" + ":user_id:" + str(user_id)
redis_field_list = [b'have_read_topic_id'] redis_field_list = [b'have_read_topic_id']
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list) redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
have_read_topic_id_list = json.loads(redis_field_val_list[0]) if redis_field_val_list[0] else [] have_read_topic_id_list = json.loads(redis_field_val_list[0]) if redis_field_val_list[0] else []
recommend_topic_ids = [] recommend_topic_ids = []
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, offset=0, size=size,query_type=query_type,filter_topic_id_list=have_read_topic_id_list) topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, offset=0, size=size, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list)
for item in topic_id_list: for item in topic_id_list:
recommend_topic_ids.append(item["id"]) recommend_topic_ids.append(item["id"])
...@@ -34,49 +35,53 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType. ...@@ -34,49 +35,53 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.
redis_dict = { redis_dict = {
"have_read_topic_id": json.dumps(have_read_topic_id_list) "have_read_topic_id": json.dumps(have_read_topic_id_list)
} }
redis_client.hmset(redis_key,redis_dict) redis_client.hmset(redis_key, redis_dict)
return recommend_topic_ids return recommend_topic_ids
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_type=TopicPageType.HOME_RECOMMEND):
def get_home_recommend_topic_ids(user_id, device_id, offset, size, query=None, query_type=TopicPageType.HOME_RECOMMEND):
try: try:
if query is None: if query is None:
if user_id == -1: if user_id == -1:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type) redis_key = "physical:home_recommend" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
else: else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type) redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else: else:
if user_id == -1: if user_id == -1:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type) redis_key = "physical:home_query" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
else: else:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(query) + ":query_type:" + str(query_type) redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(
query) + ":query_type:" + str(query_type)
redis_field_list = [b'last_offset_num', b'unread_topic_id'] redis_field_list = [b'last_offset_num', b'unread_topic_id']
for page_id in range(0,offset,size): for page_id in range(0, offset, size):
redis_field_list.append(str(page_id)) redis_field_list.append(str(page_id))
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list) redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
last_offset_num = int(redis_field_val_list[0]) if redis_field_val_list[0] else -1 last_offset_num = int(redis_field_val_list[0]) if redis_field_val_list[0] else -1
recommend_topic_ids = [] recommend_topic_ids = []
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id, offset, size*size,query,query_type=query_type) topic_id_list = TopicUtils.get_recommend_topic_ids(user_id, offset, size * size, query, query_type=query_type)
have_read_group_id_set = set() have_read_group_id_set = set()
unread_topic_id_list = list() unread_topic_id_list = list()
have_read_topic_id_set = set() have_read_topic_id_set = set()
ori_unread_topic_id_list = list() ori_unread_topic_id_list = list()
if redis_field_val_list[1] and offset>0: if redis_field_val_list[1] and offset > 0:
if (user_id>0 and offset==last_offset_num) or user_id==-1: if (user_id > 0 and offset == last_offset_num) or user_id == -1:
ori_unread_topic_id_list = json.loads(redis_field_val_list[1]) ori_unread_topic_id_list = json.loads(redis_field_val_list[1])
topic_id_list = ori_unread_topic_id_list + topic_id_list topic_id_list = ori_unread_topic_id_list + topic_id_list
for have_read_item in redis_field_val_list[2:]: for have_read_item in redis_field_val_list[2:]:
if have_read_item: if have_read_item:
have_read_topic_id_set=have_read_topic_id_set.union(json.loads(have_read_item)) have_read_topic_id_set = have_read_topic_id_set.union(json.loads(have_read_item))
# 当前页小组数量 # 当前页小组数量
cur_page_group_num = 0 cur_page_group_num = 0
...@@ -88,12 +93,12 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_ ...@@ -88,12 +93,12 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
unread_topic_id_list.append(item) unread_topic_id_list.append(item)
else: else:
if item["id"] not in have_read_topic_id_set: if item["id"] not in have_read_topic_id_set:
if isinstance(item["group_id"],int) and item["group_id"]>0 and cur_page_group_num<(size*0.9): if isinstance(item["group_id"], int) and item["group_id"] > 0 and cur_page_group_num < (size * 0.9):
have_read_group_id_set.add(item["group_id"]) have_read_group_id_set.add(item["group_id"])
have_read_topic_id_set.add(item["id"]) have_read_topic_id_set.add(item["id"])
cur_page_group_num += 1 cur_page_group_num += 1
recommend_topic_ids.append(item["id"]) recommend_topic_ids.append(item["id"])
elif item["group_id"]<=0 and cur_page_user_num<(size*0.1): elif item["group_id"] <= 0 and cur_page_user_num < (size * 0.1):
cur_page_user_num += 1 cur_page_user_num += 1
recommend_topic_ids.append(item["id"]) recommend_topic_ids.append(item["id"])
else: else:
...@@ -102,31 +107,31 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_ ...@@ -102,31 +107,31 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
if len(recommend_topic_ids) >= size: if len(recommend_topic_ids) >= size:
break break
if len(recommend_topic_ids) < size and len(unread_topic_id_list)>0: if len(recommend_topic_ids) < size and len(unread_topic_id_list) > 0:
recommend_len = len(recommend_topic_ids) recommend_len = len(recommend_topic_ids)
offi_unread_topic_id = [item["id"] for item in unread_topic_id_list[:(size-recommend_len)]] offi_unread_topic_id = [item["id"] for item in unread_topic_id_list[:(size - recommend_len)]]
recommend_topic_ids = recommend_topic_ids + offi_unread_topic_id recommend_topic_ids = recommend_topic_ids + offi_unread_topic_id
unread_topic_id_list = unread_topic_id_list[(size-recommend_len):] unread_topic_id_list = unread_topic_id_list[(size - recommend_len):]
if user_id==-1: if user_id == -1:
unread_topic_id_list = ori_unread_topic_id_list + unread_topic_id_list unread_topic_id_list = ori_unread_topic_id_list + unread_topic_id_list
redis_dict = { redis_dict = {
"unread_topic_id":json.dumps(unread_topic_id_list), "unread_topic_id": json.dumps(unread_topic_id_list),
"last_offset_num":offset+size, "last_offset_num": offset + size,
offset: json.dumps(recommend_topic_ids) offset: json.dumps(recommend_topic_ids)
} }
redis_client.hmset(redis_key,redis_dict) redis_client.hmset(redis_key, redis_dict)
# 每个session key保存15分钟 # 每个session key保存15分钟
redis_client.expire(redis_key,15*60*60) redis_client.expire(redis_key, 15 * 60 * 60)
return recommend_topic_ids return recommend_topic_ids
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
@bind("physical/search/home_recommend") @bind("physical/search/home_recommend")
def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPageType.HOME_RECOMMEND): def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=TopicPageType.HOME_RECOMMEND):
""" """
:remark:首页推荐,目前只推荐日记 :remark:首页推荐,目前只推荐日记
:param session_id: :param session_id:
...@@ -137,20 +142,20 @@ def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPage ...@@ -137,20 +142,20 @@ def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPage
""" """
try: try:
if not user_id: if not user_id:
user_id=-1 user_id = -1
if not isinstance(device_id,str): if not isinstance(device_id, str):
device_id = "" device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id,device_id,offset,size,query_type=query_type) recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, offset, size, query_type=query_type)
return {"recommend_topic_ids":recommend_topic_ids} return {"recommend_topic_ids": recommend_topic_ids}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []} return {"recommend_topic_ids": []}
@bind("physical/search/discover_page") @bind("physical/search/discover_page")
def discover_page(device_id="",user_id=-1,size=10): def discover_page(device_id="", user_id=-1, size=10):
""" """
:remark:首页推荐,目前只推荐日记 :remark:首页推荐,目前只推荐日记
:param session_id: :param session_id:
...@@ -161,20 +166,20 @@ def discover_page(device_id="",user_id=-1,size=10): ...@@ -161,20 +166,20 @@ def discover_page(device_id="",user_id=-1,size=10):
""" """
try: try:
if not user_id: if not user_id:
user_id=-1 user_id = -1
if not isinstance(device_id,str): if not isinstance(device_id, str):
device_id = "" device_id = ""
recommend_topic_ids = get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.FIND_PAGE) recommend_topic_ids = get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE)
return {"recommend_topic_ids":recommend_topic_ids} return {"recommend_topic_ids": recommend_topic_ids}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []} return {"recommend_topic_ids": []}
@bind("physical/search/home_query") @bind("physical/search/home_query")
def home_query(device_id="",user_id=-1,query="",offset=0,size=10): def home_query(device_id="", user_id=-1, query="", offset=0, size=10):
""" """
:remark:首页搜索,目前只推荐日记 :remark:首页搜索,目前只推荐日记
:param session_id: :param session_id:
...@@ -186,19 +191,20 @@ def home_query(device_id="",user_id=-1,query="",offset=0,size=10): ...@@ -186,19 +191,20 @@ def home_query(device_id="",user_id=-1,query="",offset=0,size=10):
""" """
try: try:
if not user_id: if not user_id:
user_id=-1 user_id = -1
if not isinstance(device_id,str): if not isinstance(device_id, str):
device_id = "" device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id,device_id,offset,size,query) recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, offset, size, query)
return {"recommend_topic_ids":recommend_topic_ids} return {"recommend_topic_ids": recommend_topic_ids}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []} return {"recommend_topic_ids": []}
@bind("physical/search/topic_detail_page_recommend") @bind("physical/search/topic_detail_page_recommend")
def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_user_id=-1,filter_topic_user_id=False,offset=0,size=10): def topic_detail_page_recommend(user_id=-1, topic_id=-1, topic_group_id=-1, topic_user_id=-1,
filter_topic_user_id=False, offset=0, size=10):
""" """
:remark:帖子详情页推荐策略,缺少第一个卡片策略 :remark:帖子详情页推荐策略,缺少第一个卡片策略
:param user_id: :param user_id:
...@@ -208,15 +214,16 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u ...@@ -208,15 +214,16 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u
:return: :return:
""" """
try: try:
if not isinstance(user_id,int): if not isinstance(user_id, int):
user_id = -1 user_id = -1
# 获取帖子标签列表 # 获取帖子标签列表
topic_tag_list = TopicUtils.get_topic_tag_id_list(topic_id) topic_tag_list = TopicUtils.get_topic_tag_id_list(topic_id)
result_list = TopicUtils.get_topic_detail_recommend_list(user_id,topic_id,topic_tag_list,topic_group_id,topic_user_id,filter_topic_user_id,offset,size) result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_group_id,
topic_user_id, filter_topic_user_id, offset, size)
recommend_topic_ids_list = list() recommend_topic_ids_list = list()
if len(result_list)>0: if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list] recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
return {"recommend_topic_ids": recommend_topic_ids_list} return {"recommend_topic_ids": recommend_topic_ids_list}
...@@ -226,7 +233,7 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u ...@@ -226,7 +233,7 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u
@bind("physical/search/query_topic_by_tag_aggregation") @bind("physical/search/query_topic_by_tag_aggregation")
def query_topic_by_tag_aggregation(user_id,tag_id, offset, size): def query_topic_by_tag_aggregation(user_id, tag_id, offset, size):
""" """
:remark 按标签聚合召回帖子 :remark 按标签聚合召回帖子
:param tag_id: :param tag_id:
...@@ -240,14 +247,12 @@ def query_topic_by_tag_aggregation(user_id,tag_id, offset, size): ...@@ -240,14 +247,12 @@ def query_topic_by_tag_aggregation(user_id,tag_id, offset, size):
if not tag_id: if not tag_id:
tag_id = -1 tag_id = -1
result_list = TopicUtils.get_tag_aggregation_topic_id_list(user_id,tag_id,offset,size) result_list = TopicUtils.get_tag_aggregation_topic_id_list(user_id, tag_id, offset, size)
recommend_topic_ids_list = list() recommend_topic_ids_list = list()
if len(result_list)>0: if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list] recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
return {"recommend_topic_id": recommend_topic_ids_list} return {"recommend_topic_id": recommend_topic_ids_list}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_id": []} return {"recommend_topic_id": []}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment