Commit 2dadb187 authored by 段英荣's avatar 段英荣

Merge branch 'set_tag_list' into 'master'

Set tag list

See merge request alpha/physical!455
parents 71d8e15d 909a9fef
......@@ -17,6 +17,8 @@ import libs.tools as Tools
from trans2es.models.pictorial import CommunityPictorialHomeFeed
from libs.error import logging_exception
import os
from search.views.tag import get_same_tagset_ids
class KafkaManager(object):
consumser_obj = None
......@@ -91,7 +93,8 @@ class CollectData(object):
recommend_lin_pictorial_id_list = list()
if click_topic_tag_list and len(click_topic_tag_list)>0:
recommend_topic_id_list_click,recommend_topic_id_list_click_dict = ESPerform.get_tag_topic_list_dict(click_topic_tag_list,
click_topic_tag_list_same_tagset_ids = get_same_tagset_ids(click_topic_tag_list)
recommend_topic_id_list_click,recommend_topic_id_list_click_dict = ESPerform.get_tag_topic_list_dict(click_topic_tag_list_same_tagset_ids,
have_read_topic_id_list,size=2)
if len(recommend_topic_id_list_click) > 0:
recommend_topic_id_list.extend(recommend_topic_id_list_click)
......@@ -120,12 +123,14 @@ class CollectData(object):
# if len(recommend_topic_id_list)==0 and cursor==0 and len(redis_topic_list)>0:
# have_read_topic_id_list.extend(redis_topic_list[:2])
if len(new_user_click_tag_list)>0:
tag_topic_id_list,tag_topic_dict = ESPerform.get_tag_topic_list_dict(new_user_click_tag_list, have_read_topic_id_list)
recommend_lin_pictorial_id_list = ESPerform.get_tag_pictorial_id_list(new_user_click_tag_list,
new_user_click_tag_list_same_tagset_ids = get_same_tagset_ids(new_user_click_tag_list)
tag_topic_id_list,tag_topic_dict = ESPerform.get_tag_topic_list_dict(new_user_click_tag_list_same_tagset_ids, have_read_topic_id_list)
recommend_lin_pictorial_id_list = ESPerform.get_tag_pictorial_id_list(new_user_click_tag_list_same_tagset_ids,
have_read_lin_pictorial_id_list)
else:
tag_topic_id_list,tag_topic_dict = ESPerform.get_tag_topic_list_dict(tag_id_list,have_read_topic_id_list)
recommend_lin_pictorial_id_list = ESPerform.get_tag_pictorial_id_list(tag_id_list,
tag_id_list_same_tagset_ids = get_same_tagset_ids(tag_id_list)
tag_topic_id_list,tag_topic_dict = ESPerform.get_tag_topic_list_dict(tag_id_list_same_tagset_ids,have_read_topic_id_list)
recommend_lin_pictorial_id_list = ESPerform.get_tag_pictorial_id_list(tag_id_list_same_tagset_ids,
have_read_lin_pictorial_id_list)
if len(recommend_topic_id_list)>0 or len(tag_topic_id_list)>0 or len(new_user_click_tag_list) > 0:
......
......@@ -4,16 +4,14 @@
import logging
import traceback
import json
from alpha_types.venus import TOPIC_SEARCH_SORT
from search.views.tag import get_same_tagset_ids
from libs.es import ESPerform
from .common import TopicDocumentField
from search.utils.common import *
from trans2es.models.pictorial import PictorialTopics
from libs.cache import redis_client
class TopicUtils(object):
@classmethod
......@@ -239,9 +237,10 @@ class TopicUtils(object):
}
query_function_score["boost_mode"] = "replace"
tag_ids = get_same_tagset_ids(tag_id)
query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match},
{"term": {"tag_list": tag_id}},
{"terms": {"tag_list": tag_ids}},
{"term": {"user_nick_name_pre": query.lower()}}
]
query_function_score["query"]["bool"]["minimum_should_match"] = 1
......
......@@ -17,6 +17,7 @@ from trans2es.models.topic import CommunityCategoryTagRelation
from trans2es.models.pictorial import PictorialTopics
from trans2es.models.pictorial import CommunityPictorialActivity
from alpha_types.venus import PICTORIAL_ACTIVITY_SORT
from search.views.tag import get_same_tagset_ids
@bind("physical/search/query_pictorial")
......@@ -148,6 +149,10 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
for item in result_dict["hits"]:
pict_pictorial_ids_list = item["_source"]["pictorial_id"]
topic_tag_list = item["_source"]["tag_list"]
##拿到这些标签的同义词
topic_tag_list = get_same_tagset_ids(topic_tag_list)
##先拿到当前帖子对应的标签的分类ID
tag_type_ids = CommunityCategoryTagRelation.objects.filter(tag_id__in=topic_tag_list,
is_online=True, is_deleted=False).values_list(
......
......@@ -6,13 +6,14 @@ from gm_rpcd.all import bind
import logging
import traceback
import json
from search.utils.topic import TopicUtils
from libs.es import ESPerform
from libs.cache import redis_client
from search.utils.common import *
from trans2es.models.tag import TopicTag,AccountUserTag,CommunityTagFollow,Tag
from trans2es.models.tag import TopicTag, AccountUserTag, CommunityTagFollow, Tag
import time
from trans2es.models.tag import CommunityTagSetRelation
from django.conf import settings
from libs.error import logging_exception
def get_highlight(fields=[]):
......@@ -25,17 +26,17 @@ def get_highlight(fields=[]):
@bind("physical/search/query_tag")
def query_tag(query,offset,size):
def query_tag(query, offset, size):
try:
if query:
query = query.lower()
pre_q = {
"query":{
"query": {
"bool": {
"must":[
{"term":{"name_pre": query}},
{"term":{"is_online": True}}
"must": [
{"term": {"name_pre": query}},
{"term": {"is_online": True}}
]
}
},
......@@ -46,21 +47,23 @@ def query_tag(query,offset,size):
ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(),sub_index_name="tag",query_body=pre_q,offset=0,size=1)
if len(result_dict["hits"])>0:
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="tag", query_body=pre_q,
offset=0, size=1)
if len(result_dict["hits"]) > 0:
hitLight = u'<ems>%s</ems>' % query
result_dict["hits"][0]["_source"]["highlight"] = result_dict["hits"][0]["_source"]["name"].replace(query, hitLight)
result_dict["hits"][0]["_source"]["highlight"] = result_dict["hits"][0]["_source"]["name"].replace(query,
hitLight)
ret_list.append(result_dict["hits"][0]["_source"])
size -= 1
q = {
"suggest":{
"tips-suggest":{
"prefix":query,
"completion":{
"field":"suggest",
"size":size,
"contexts":{
"suggest": {
"tips-suggest": {
"prefix": query,
"completion": {
"field": "suggest",
"size": size,
"contexts": {
"is_online": [True],
"is_deleted": [False]
}
......@@ -75,14 +78,14 @@ def query_tag(query,offset,size):
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(),sub_index_name="tag",query_body=q,offset=offset,size=size,is_suggest_request=True)
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="tag", query_body=q,
offset=offset, size=size, is_suggest_request=True)
for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]:
#if len(hit_item["contexts"])==2:
if hit_item["_source"]["is_deleted"]==False and hit_item["_source"]["is_online"]==True:
# if len(hit_item["contexts"])==2:
if hit_item["_source"]["is_deleted"] == False and hit_item["_source"]["is_online"] == True:
hitLight = u'<ems>%s</ems>' % query
hit_item["_source"]["highlight"] = hit_item["_source"]["name"].replace(query,hitLight)
hit_item["_source"]["highlight"] = hit_item["_source"]["name"].replace(query, hitLight)
ret_list.append(hit_item["_source"])
return {"tag_list": ret_list}
......@@ -92,20 +95,20 @@ def query_tag(query,offset,size):
@bind("physical/search/query_by_tag_type")
def query_by_tag_type(tag_type_id,offset,size):
def query_by_tag_type(tag_type_id, offset, size):
try:
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online":True}},
"query": {
"bool": {
"must": [
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
],
"filter":{"term":{"tag_type": tag_type_id}}
"filter": {"term": {"tag_type": tag_type_id}}
}
},
"sort":[
{"near_new_topic_num":{"order":"desc"}}
"sort": [
{"near_new_topic_num": {"order": "desc"}}
],
"_source": {
"includes": ["id", "name"]
......@@ -113,7 +116,8 @@ def query_by_tag_type(tag_type_id,offset,size):
}
ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(),sub_index_name="tag",query_body=q,offset=offset,size=size)
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="tag", query_body=q,
offset=offset, size=size)
for hit_item in result_dict["hits"]:
ret_list.append(hit_item["_source"])
......@@ -122,6 +126,7 @@ def query_by_tag_type(tag_type_id,offset,size):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_list": []}
@bind("physical/search/choice_pictorial_push_tag")
def choice_pictorial_push_tag(device_id, user_id):
try:
......@@ -152,7 +157,7 @@ def choice_push_tag(device_id, user_id):
redis_push_tag_dict = json.loads(redis_push_tag_data) if redis_push_tag_data else {}
now_sec = int(time.time())
valid_time = 8*7*24*60*60
valid_time = 8 * 7 * 24 * 60 * 60
ori_key_list = list(redis_push_tag_dict.keys())
for tag_id in ori_key_list:
......@@ -161,8 +166,11 @@ def choice_push_tag(device_id, user_id):
redis_push_tag_list = list(redis_push_tag_dict.keys())
redis_push_tag_list = [int(item) for item in redis_push_tag_list]
account_user_tag_list = list(AccountUserTag.objects.filter(user=user_id,is_deleted=False).values_list("tag_id",flat=True))
community_tag_follow_list = list(CommunityTagFollow.objects.filter(user_id=user_id,is_online=True,is_deleted=False).values_list("tag_id",flat=True))
account_user_tag_list = list(
AccountUserTag.objects.filter(user=user_id, is_deleted=False).values_list("tag_id", flat=True))
community_tag_follow_list = list(
CommunityTagFollow.objects.filter(user_id=user_id, is_online=True, is_deleted=False).values_list("tag_id",
flat=True))
linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
tag_recommend_redis_key = linucb_recommend_redis_prefix + str(device_id)
......@@ -173,15 +181,16 @@ def choice_push_tag(device_id, user_id):
account_user_tag_list.extend(linucb_recommend_tag_list)
unread_tag_list = list(set(account_user_tag_list) - set(redis_push_tag_list))
unread_tag_list = list(Tag.objects.filter(id__in=unread_tag_list, is_online=True, is_deleted=False).values_list("id",flat=True))
unread_tag_list = list(
Tag.objects.filter(id__in=unread_tag_list, is_online=True, is_deleted=False).values_list("id", flat=True))
ret_tag_set = set()
if len(unread_tag_list)>0:
if len(unread_tag_list) > 0:
for tag_id in unread_tag_list:
valid_tag_topic_num = TopicTag.objects.filter(tag_id=tag_id,is_online=True).count()
if valid_tag_topic_num>100:
valid_tag_topic_num = TopicTag.objects.filter(tag_id=tag_id, is_online=True).count()
if valid_tag_topic_num > 100:
ret_tag_set.add(tag_id)
redis_push_tag_dict[tag_id] = now_sec
if len(ret_tag_set)>=1:
if len(ret_tag_set) >= 1:
break
redis_client.set(redis_push_tag_key, json.dumps(redis_push_tag_dict))
......@@ -191,6 +200,7 @@ def choice_push_tag(device_id, user_id):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_list": []}
@bind("physical/search/identity_tag_name")
def identity_tag_name(topic_content):
try:
......@@ -203,7 +213,8 @@ def identity_tag_name(topic_content):
}
cli_info = settings.TAG_ES_INFO_LIST
res = ESPerform.get_analyze_results(es_cli=ESPerform.get_cli(cli_info=cli_info), sub_index_name="tag", query_body=body)
res = ESPerform.get_analyze_results(es_cli=ESPerform.get_cli(cli_info=cli_info), sub_index_name="tag",
query_body=body)
logging.info("duan add,res:%s" % str(res).encode("utf-8"))
for item in res["tokens"]:
......@@ -217,3 +228,32 @@ def identity_tag_name(topic_content):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_name_list": []}
def get_same_tagset_ids(tag_list):
"""
获取同一词集下的标签ID
:param tag_list:
:return:
"""
try:
all_tag = list()
if isinstance(tag_list, int):
all_tag.append(list)
else:
all_tag = tag_list
tag_set_list_id = list(
CommunityTagSetRelation.objects.filter(tag_id__in=all_tag, is_deleted=False).values_list("tag_set_id",
flat=True))
logging.info("get tag_set_list_id:%s" % tag_set_list_id)
tag_ids = list(
CommunityTagSetRelation.objects.filter(tag_set_id__in=tag_set_list_id, is_deleted=False).values_list(
"tag_id",
flat=True))
return tag_ids
except:
logging_exception()
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_list": []}
......@@ -16,6 +16,7 @@ from django.conf import settings
from libs.tools import get_have_read_lin_pictorial_id_list
import datetime
from trans2es.models.tag import Tag
from search.views.tag import get_same_tagset_ids
def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE):
......@@ -32,7 +33,8 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
if have_read_topic_id_list == None:
have_read_topic_id_list = list()
recommend_topic_ids,ret_data_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,
recommend_topic_ids, ret_data_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0,
size=size,
single_size=size,
query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
......@@ -165,11 +167,13 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
rank_topic_id_list = list()
ret_data_list = list()
if size > 0:
rank_topic_id_list,ret_data_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=0, size=size,
rank_topic_id_list, ret_data_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id,
offset=0, size=size,
single_size=size, query=query,
query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
index_type=index_type, routing=topic_star_routing,
index_type=index_type,
routing=topic_star_routing,
attention_tag_list=attention_tag_list,
linucb_user_id_list=recommend_topic_user_list,
disable_collpase=disable_collpase)
......@@ -202,7 +206,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
today = datetime.datetime.strptime(str(datetime.date.today()), "%Y-%m-%d")
tomorrow = today + datetime.timedelta(days=1)
nowTime = datetime.datetime.now()
expire_time = (tomorrow - nowTime).seconds + 3*60*60
expire_time = (tomorrow - nowTime).seconds + 3 * 60 * 60
redis_client.expire(redis_key, expire_time)
ret_list = rank_topic_id_list if query is None else ret_data_list
......@@ -217,10 +221,12 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
else:
return [], []
def get_home_recommend_pictorial_ids(user_id=-1,device_id="",size=4):
def get_home_recommend_pictorial_ids(user_id=-1, device_id="", size=4):
try:
pictorial_recommend_redis_key = "physical:linucb:pictorial_recommend:device_id:" + str(device_id)
have_read_lin_pictorial_id_list = get_have_read_lin_pictorial_id_list(device_id,user_id,TopicPageType.HOME_RECOMMEND)
have_read_lin_pictorial_id_list = get_have_read_lin_pictorial_id_list(device_id, user_id,
TopicPageType.HOME_RECOMMEND)
pictorial_recommend_dict = redis_client.hgetall(pictorial_recommend_redis_key)
pictorial_recommend_list = list()
if b"data" in pictorial_recommend_dict:
......@@ -238,7 +244,7 @@ def get_home_recommend_pictorial_ids(user_id=-1,device_id="",size=4):
redis_key = "physical:home_pictorial_recommend" + ":device_id:" + str(device_id) + ":query_type:" + str(
TopicPageType.HOME_RECOMMEND)
redis_field_list = 'have_read_pictorial_list'
redis_client.hset(redis_key, redis_field_list,have_read_lin_pictorial_id_list)
redis_client.hset(redis_key, redis_field_list, have_read_lin_pictorial_id_list)
today = datetime.datetime.strptime(str(datetime.date.today()), "%Y-%m-%d")
tomorrow = today + datetime.timedelta(days=1)
nowTime = datetime.datetime.now()
......@@ -313,7 +319,7 @@ def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=Topic
usefulrecall=usefulrecall,
useful_tag_list=useful_tag_list)
return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
"useful_topic_ids": useful_topic_ids,"linucb_pictorial_ids":recommend_pictorial_ids}
"useful_topic_ids": useful_topic_ids, "linucb_pictorial_ids": recommend_pictorial_ids}
else:
recommend_pictorial_ids = get_home_recommend_pictorial_ids(user_id, device_id, 4)
size = size - len(recommend_pictorial_ids)
......@@ -322,13 +328,14 @@ def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=Topic
query_type=query_type,
promote_topic_list=promote_topic_list)
return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,"linucb_pictorial_ids":recommend_pictorial_ids}
return {"linucb_topic_ids": recommend_topic_ids, "rank_topic_ids": rank_topic_ids,
"linucb_pictorial_ids": recommend_pictorial_ids}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
if usefulrecall != -1:
return {"linucb_topic_ids": [], "rank_topic_ids": [], "useful_topic_ids": [],"linucb_pictorial_ids":[]}
return {"linucb_topic_ids": [], "rank_topic_ids": [], "useful_topic_ids": [], "linucb_pictorial_ids": []}
else:
return {"linucb_topic_ids": [], "rank_topic_ids": [],"linucb_pictorial_ids":[]}
return {"linucb_topic_ids": [], "rank_topic_ids": [], "linucb_pictorial_ids": []}
@bind("physical/search/discover_page")
......@@ -434,6 +441,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
normal_tag_result = list()
category_tag_reuslt = list()
result = list()
topic_tag_list = get_same_tagset_ids(topic_tag_list)
tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
id__in=topic_tag_list, is_online=True, is_deleted=False).values_list("id", "is_category")
......@@ -589,9 +597,11 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
try:
must_topic_id_list = list(topic_similarity_score_dict.keys())
topic_id_list,ret_data_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,
topic_id_list, ret_data_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset,
size=size,
single_size=size,
must_topic_id_list=must_topic_id_list, index_type="topic",
must_topic_id_list=must_topic_id_list,
index_type="topic",
routing="4,5,6")
return {"recommend_topic_ids": topic_id_list}
......
......@@ -107,4 +107,15 @@ class SettingsConfig(models.Model):
key = models.IntegerField(verbose_name=u"值")
val = models.IntegerField(verbose_name="标签内容")
class CommunityTagSetRelation(models.Model):
class Meta:
verbose_name = "搜索热词"
db_table = "community_tag_set_relation"
id = models.IntegerField(primary_key=True, verbose_name=u"主键ID")
tag_id = models.IntegerField(verbose_name=u"标签ID")
tag_set_id = models.IntegerField(verbose_name=u"标签ID")
is_deleted = models.BooleanField(verbose_name=u"是否删除")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment