topic.py 7 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import traceback
from libs.es import ESPerform


class TopicUtils(object):

    @classmethod
    def get_related_user_info(cls, user_id, offset=0, size=10):
        """
        :remark:获取指定用户相关用户列表
        :param user_id:
        :param offset:
        :param size:
        :return:
        """
        try:
            q = dict()
            q["query"] = {
                "user_id": user_id
            }

            q["_souce"] = ["tag_list","attention_user_id_list", "pick_user_id_list", "same_group_user_id_list"]

            result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size)

            return result_dict
        except:
            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
            return None


    @classmethod
    def analyze_related_user_id_list(cls,related_user_id_list):
        """
        :remark:获取指定用户关联的 用户列表
        :param related_user_id_list:
        :return:
        """
        try:
            chinese_user_id_list = list()
            japan_user_id_list = list()
            korea_user_id_list = list()

            for item in related_user_id_list:
                if item["country_id"] == 0:
                    chinese_user_id_list.append(item["user_id"])
                elif item["country_id"] == 1:
                    japan_user_id_list.append(item["user_id"])
                elif item["country_id"] == 2:
                    korea_user_id_list.append(item["user_id"])

            return (chinese_user_id_list,japan_user_id_list,korea_user_id_list)
        except:
            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
            return ([],[],[])

    @classmethod
    def refresh_redis_hash_data(cls, redis_cli,redis_key,redis_data_dict):
        try:
            redis_cli.hmset(redis_key, redis_data_dict)
            return True
        except:
            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
            return False

    @classmethod
    def get_recommend_topic_ids(cls,user_id,offset,size,is_first_time,group_topic_ids_index,not_group_topic_ids_index,query=None):
        """
        :需增加打散逻辑
        :remark:获取首页推荐帖子列表
        :param user_id:
        :param offset:
        :param size:
        :param is_first_time:
        :param group_topic_ids_index:
        :param not_group_topic_ids_index:
        :return:
        """
        try:
            #重写offset,size
            size = 1000
            if is_first_time:
                offset=0
            else:
                if group_topic_ids_index>0:
                    offset = 1000 * group_topic_ids_index
                else:
                    offset = 1000 * not_group_topic_ids_index

            result_dict = TopicUtils.get_related_user_info(user_id, 0, 1)
            if len(result_dict["hits"]) == 0:
                logging.error("not find user_id:%d in es!" % int(user_id))
                return {"recommend_topic_ids": []}

            attention_user_info_list = result_dict["hits"][0]["attention_user_id_list"]
            (attention_chinese_user_id_list, attention_japan_user_id_list,
             attention_korea_user_id_list) = TopicUtils.analyze_related_user_id_list(
                related_user_id_list=attention_user_info_list)

            pick_user_info_list = result_dict["hits"][0]["pick_user_id_list"]
            (pick_chinese_user_id_list, pick_japan_user_id_list,
             pick_korea_user_id_list) = TopicUtils.analyze_related_user_id_list(pick_user_info_list)

            same_group_user_info_list = result_dict["hits"][0]["same_group_user_id_list"]
            (same_group_chinese_user_id_list, same_group_japan_user_id_list,
             same_group_korea_user_id_list) = TopicUtils.analyze_related_user_id_list(same_group_user_info_list)

            user_tag_list = result_dict["hits"][0]["tag_list"]

            q = dict()
            q["filter"] = {
                "range": {"content_level": {"gte": 3, "lte": 5}}
            }
            if not is_first_time:
                if group_topic_ids_index:
                    q["filter"]["range"]["group_id"] = {
                        "gt": 0
                    }
                else:
                    q["filter"]["range"]["group_id"] = {
                        "lt": 0
                    }
            functions_list = [
                {
                    "filter": {"match": {
                        "user_id": attention_chinese_user_id_list + attention_japan_user_id_list + attention_korea_user_id_list}},
                    "weight": 5,
                },
                {
                    "filter": {"match": {
                        "user_id": pick_chinese_user_id_list + pick_japan_user_id_list + pick_korea_user_id_list}},
                    "weight": 3
                },
                {
                    "filter": {"match": {
                        "user_id": same_group_chinese_user_id_list + same_group_japan_user_id_list + same_group_korea_user_id_list}},
                    "weight": 2
                }
            ]

            query_function_score = {
                "query": {
                    "match": {
                        "tag_list": user_tag_list
                    }
                },
                "score_mode": "sum",
                "boost_mode": "sum",
                "functions": functions_list
            }

            q["query"] = {
                "function_score": query_function_score
            }

            if query is not None:#搜索帖子
                multi_fields = {
                    'description': 2,
                    'name': 4,
                }
                query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
                multi_match = {
                    'query': query,
                    'type': 'cross_fields',
                    'operator': 'and',
                    'fields': query_fields,
                }
                q["query"]["bool"] = {
                    "should": [
                        {'multi_match': multi_match}
                    ],
                    "minimum_should_match": 1
                }
            q["_source"] = {
                "include":["id","group_id"]
            }

            logging.info("duan add,es query:%s" % str(q).encode("utf-8"))
            result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
                                                       offset=offset, size=size)
            group_topic_ids = list()
            not_group_topic_ids = list()
            for item in result_dict["hits"]:
                if item["group_id"] > 0:
                    group_topic_ids.append(item["id"])
                else:
                    not_group_topic_ids.append(item["id"])

            return (group_topic_ids,not_group_topic_ids)
        except:
            logging.error("catch exception,err_msg:%s" % traceback.format_exc())
            return ([],[])