tag.py 10.8 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from libs.es import ESPerform
from libs.cache import redis_client
from search.utils.common import *
from trans2es.models.tag import TopicTag, AccountUserTag, CommunityTagFollow, Tag
import time
from libs.tools import is_connection_usable
from trans2es.models.tag import CommunityTagSetRelation
from django.conf import settings
from libs.error import logging_exception
from django.db import connection
from trans2es.models.account_reg_extra import AccountRegExtra


def get_highlight(fields=[]):
    field_highlight = {
        'fields': {k: {} for k in fields},
        'pre_tags': ['<%s>' % 'ems'],
        'post_tags': ['</%s>' % 'ems'],
    }
    return field_highlight


@bind("physical/search/query_tag")
def query_tag(query, offset, size):
    try:
        if query:
            query = query.lower()

        pre_q = {
            "query": {
                "bool": {
                    "must": [
                        {"term": {"name_pre": query}},
                        {"term": {"is_online": True}}
                    ]
                }
            },
            "_source": {
                "include": ["id", "name", "is_deleted", "is_online"]
            }
        }

        ret_list = list()

        result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="tag", query_body=pre_q,
                                                   offset=0, size=1)
        if len(result_dict["hits"]) > 0:
            hitLight = u'<ems>%s</ems>' % query
            result_dict["hits"][0]["_source"]["highlight"] = result_dict["hits"][0]["_source"]["name"].replace(query,
                                                                                                               hitLight)
            ret_list.append(result_dict["hits"][0]["_source"])
            size -= 1

        q = {
            "suggest": {
                "tips-suggest": {
                    "prefix": query,
                    "completion": {
                        "field": "suggest",
                        "size": size,
                        "contexts": {
                            "is_online": [True],
                            "is_deleted": [False]
                        }
                    }
                }
            },
            "sort": [
                {"near_new_topic_num": {"order": "desc"}}
            ],
            "_source": {
                "include": ["id", "name", "is_deleted", "is_online"]
            }
        }

        result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="tag", query_body=q,
                                                   offset=offset, size=size, is_suggest_request=True)
        for tips_item in result_dict["suggest"]["tips-suggest"]:
            for hit_item in tips_item["options"]:
                # if len(hit_item["contexts"])==2:
                if hit_item["_source"]["is_deleted"] == False and hit_item["_source"]["is_online"] == True:
                    hitLight = u'<ems>%s</ems>' % query
                    hit_item["_source"]["highlight"] = hit_item["_source"]["name"].replace(query, hitLight)
                    ret_list.append(hit_item["_source"])

        return {"tag_list": ret_list}
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"tag_list": []}


@bind("physical/search/query_by_tag_type")
def query_by_tag_type(tag_type_id, offset, size):
    try:
        q = {
            "query": {
                "bool": {
                    "must": [
                        {"term": {"is_online": True}},
                        {"term": {"is_deleted": False}}
                    ],
                    "filter": {"term": {"tag_type": tag_type_id}}
                }
            },
            "sort": [
                {"near_new_topic_num": {"order": "desc"}}
            ],
            "_source": {
                "includes": ["id", "name"]
            }
        }

        ret_list = list()
        result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="tag", query_body=q,
                                                   offset=offset, size=size)
        for hit_item in result_dict["hits"]:
            ret_list.append(hit_item["_source"])

        return {"tag_list": ret_list}
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"tag_list": []}


@bind("physical/search/choice_pictorial_push_tag")
def choice_pictorial_push_tag(device_id, user_id):
    try:
        linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
        tag_recommend_redis_key = linucb_recommend_redis_prefix + str(device_id)
        linucb_recommend_tag_data = redis_client.get(tag_recommend_redis_key)
        linucb_recommend_tag_list = json.loads(linucb_recommend_tag_data) if linucb_recommend_tag_data else []

        return {"pictorial_tag_list": linucb_recommend_tag_list[:50]}
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"pictorial_tag_list": []}


@bind("physical/search/lintag_by_user_id")
def get_lintags_by_user_id(user_id):
    try:
        devices = AccountRegExtra.objects.filter(user_id=user_id, is_online=True, is_deleted=False).values_list("device_id", flat=True)
        if devices:
            linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
            device_id = devices[0]
            redis_key = linucb_recommend_redis_prefix + str(device_id)
            tag_data = redis_client.get(redis_key)
            lintags = []
            if tag_data is None:
                lintags = []
            else:
                lintags = json.loads(str(tag_data, encoding="utf-8"))
            return {"lin_tag_list": lintags[:3]}
        return {"lin_tag_list": []}
    except Exception as e:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"lin_tag_list": []}


@bind("physical/search/choice_push_tag")
def choice_push_tag(device_id, user_id):
    """
    :remark 挑选push标签
    :param device_id:
    :param user_id:
    :return:
    """
    try:
        redis_key_prefix = "physical:push_tag:user_id:"
        redis_push_tag_key = redis_key_prefix + str(user_id)

        redis_push_tag_data = redis_client.get(redis_push_tag_key)
        redis_push_tag_dict = json.loads(redis_push_tag_data) if redis_push_tag_data else {}

        now_sec = int(time.time())
        valid_time = 8 * 7 * 24 * 60 * 60

        ori_key_list = list(redis_push_tag_dict.keys())
        for tag_id in ori_key_list:
            if now_sec - redis_push_tag_dict[tag_id] >= valid_time:
                redis_push_tag_dict.pop(tag_id)

        redis_push_tag_list = list(redis_push_tag_dict.keys())
        redis_push_tag_list = [int(item) for item in redis_push_tag_list]
        account_user_tag_list = list(
            AccountUserTag.objects.filter(user=user_id, is_deleted=False).values_list("tag_id", flat=True))
        community_tag_follow_list = list(
            CommunityTagFollow.objects.filter(user_id=user_id, is_online=True, is_deleted=False).values_list("tag_id",
                                                                                                             flat=True))

        linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
        tag_recommend_redis_key = linucb_recommend_redis_prefix + str(device_id)
        linucb_recommend_tag_data = redis_client.get(tag_recommend_redis_key)
        linucb_recommend_tag_list = json.loads(linucb_recommend_tag_data) if linucb_recommend_tag_data else []

        account_user_tag_list.extend(community_tag_follow_list)
        account_user_tag_list.extend(linucb_recommend_tag_list)
        unread_tag_list = list(set(account_user_tag_list) - set(redis_push_tag_list))

        unread_tag_list = list(
            Tag.objects.filter(id__in=unread_tag_list, is_online=True, is_deleted=False).values_list("id", flat=True))
        ret_tag_set = set()
        if len(unread_tag_list) > 0:
            for tag_id in unread_tag_list:
                valid_tag_topic_num = TopicTag.objects.filter(tag_id=tag_id, is_online=True).count()
                if valid_tag_topic_num > 100:
                    ret_tag_set.add(tag_id)
                    redis_push_tag_dict[tag_id] = now_sec
                    if len(ret_tag_set) >= 1:
                        break

        redis_client.set(redis_push_tag_key, json.dumps(redis_push_tag_dict))

        return {"tag_list": list(ret_tag_set)}
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"tag_list": []}


@bind("physical/search/identity_tag_name")
def identity_tag_name(topic_content):
    try:
        ret_tag_set = set()
        redis_key_name = "physical:tag_name_set"

        body = {
            'text': topic_content,
            'analyzer': "gm_default_index"
        }

        cli_info = settings.TAG_ES_INFO_LIST
        res = ESPerform.get_analyze_results(es_cli=ESPerform.get_cli(cli_info=cli_info), sub_index_name="tag",
                                            query_body=body)

        logging.info("duan add,res:%s" % str(res).encode("utf-8"))
        for item in res["tokens"]:
            token_word = item["token"]
            is_member = redis_client.sismember(redis_key_name, token_word)
            if is_member:
                ret_tag_set.add(token_word)

        return {"tag_name_list": list(ret_tag_set)}
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"tag_name_list": []}


def get_same_tagset_ids(tag_list):
    """
    获取同一词集下的标签ID
    :param tag_list:
    :return:
    """
    all_tag = list()
    if isinstance(tag_list, int):
        all_tag.append(tag_list)
    else:
        all_tag = tag_list
    try:
        if not is_connection_usable():
            connection.close()
            logging.error("unable mysql connection and close")
        tag_set_list_id = list(
            CommunityTagSetRelation.objects.using(settings.SLAVE1_DB_NAME).filter(tag_id__in=all_tag, is_deleted=False).values_list("tag_set_id",
                                                                                                     flat=True))
        tag_ids = list(
            CommunityTagSetRelation.objects.using(settings.SLAVE1_DB_NAME).filter(tag_set_id__in=tag_set_list_id, is_deleted=False).values_list(
                "tag_id",
                flat=True))
        all_tag.extend(tag_ids)
        logging.info("get_same_tagset_ids:%s" % str(all_tag))
        return list(set(all_tag))
    except:
        logging_exception()
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return all_tag