group.py 13.8 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from gm_rpcd.all import bind
import logging
import traceback
import json
from libs.cache import redis_client
from libs.es import ESPerform
from search.utils.group import GroupUtils
from search.utils.common import GroupSortTypes
from libs.es import ESPerform
from trans2es.models.pictorial import PictorialTopics


@bind("physical/search/query_pictorial")
def query_pictorial(query="", offset=0, size=10):
    """
    :remark:小组搜索排序策略,缺少排序策略
    :param query:
    :param offset:
    :param size:
    :return:
    """
    try:
        result_dict = GroupUtils.get_group_query_result(query, offset, size)
        group_ids_list = []

        if len(result_dict["hits"]) > 0:
            group_ids_list = [item["_source"]["id"] for item in result_dict["hits"]]

        return {"group_ids": group_ids_list}
    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"group_ids": []}


@bind("physical/search/pictorial_sort")
def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0, size=10):
    """
    :remark 小组排序,缺少:前1天发评论人数*x
    :param user_id:
    :param sort_type:
    :param offset:
    :param size:
    :return:
    """
    try:
        if not isinstance(user_id, int):
            user_id = -1

        # 获取es链接对象
        es_cli_obj = ESPerform.get_cli()

        if sort_type == GroupSortTypes.HOT_RECOMMEND:
            # 用户关注标签
            attention_tag_list = list()
            if user_id > 0:
                redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
                attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []

            pictorial_ids_list = GroupUtils.get_hot_pictorial_recommend_result_list(offset, size, es_cli_obj,
                                                                                    attention_tag_list)

            return {"pictorial_recommend_ids": pictorial_ids_list}
        elif sort_type == GroupSortTypes.ATTENTION_RECOMMEND:
            attention_pictorial_list = GroupUtils.get_user_attention_pictorial_list(user_id, offset=0, size=1,
                                                                                    es_cli_obj=es_cli_obj)
            if len(attention_pictorial_list) == 0:
                return {"pictorial_recommend_ids": []}

            else:
                attention_pictorial_id_list = [item["pictorial_id"] for item in attention_pictorial_list]
                sorted_pictorial_ids_list = GroupUtils.get_pictorial_ids_by_aggs(attention_pictorial_id_list,
                                                                                 es_cli_obj)

                pictorial_recommend_ids_list = sorted_pictorial_ids_list
                # if len(group_recommend_ids_list) < size and len(group_recommend_ids_list)<len(attention_group_list):
                sorted_attention_pictorial_list = sorted(attention_pictorial_list,
                                                         key=lambda item: item["update_time_val"], reverse=True)
                for item in sorted_attention_pictorial_list:
                    if item["pictorial_id"] not in pictorial_recommend_ids_list:
                        pictorial_recommend_ids_list.append(item["pictorial_id"])
                return {"pictorial_recommend_ids": pictorial_recommend_ids_list[offset:(offset + size)]}

    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"pictorial_recommend_ids": []}


@bind("physical/search/pictorial_topic")
def pictorial_topic(topic_id=-1, offset=0, size=10):
    """
    :remark 入选画报
    :param user_id:
    :param sort_type:
    :param offset:
    :param size:
    :return:
    """
    try:

        # 获取es链接对象
        es_cli_obj = ESPerform.get_cli()
        q = {}
        # 获取帖子从属的画报
        q["query"] = {
            "bool": {
                "must": [
                    {
                        "term": {
                            "id": topic_id
                        }
                    },
                    {
                        "term": {
                            "is_online": True
                        }
                    }
                ],
                "must_not": [
                    {
                        "term": {
                            "is_history": True
                        }
                    }
                ]
            }
        }
        q["_source"] = {
            "includes": ["id", "pictorial_id", "tag_list"]
        }
        result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)
        pict_pictorial_ids_list = []
        topic_tag_list = []
        pictorial_id_list = []
        if len(result_dict["hits"]) > 0:
            for item in result_dict["hits"]:
                pict_pictorial_ids_list = item["_source"]["pictorial_id"]
                topic_tag_list = item["_source"]["tag_list"]

            q["query"] = {
                "bool": {
                    "must": [{
                        "terms": {
                            "id": pict_pictorial_ids_list
                        }

                    },
                        {
                            "term": {
                                "is_online": True
                            }

                        },
                        {
                            "term": {
                                "is_deleted": False
                            }

                        },
                    ]
                }

            }
            q["_source"] = {
                "includes": ["id", "update_time"]
            }
            q["sort"] = {
                'update_time': {
                    'order': 'desc'
                }

            }
            result_dict = ESPerform.get_search_results(es_cli_obj, "pictorial", q, offset, size)
            if len(result_dict["hits"]) > 0:
                for item in result_dict["hits"]:
                    pictorial_id = item["_source"]["id"]
                    pictorial_id_list.append(pictorial_id)

        if len(pictorial_id_list) < 10:
            num = 10 - len(pictorial_id_list)
            q["query"] = {
                "bool": {
                    "must": [{
                        "terms": {
                            "tag_id": topic_tag_list
                        }}, {
                        "term": {
                            "is_online": True
                        }
                    }, {
                        "term": {
                            "is_deleted": False
                        }
                    }, {
                        "range": {
                            "topic_id_list": {
                                "gte": 0
                            }

                        }
                    }]
                }
            }
            q["_source"] = {
                "includes": ["id", "tag_id"]}
            q["sort"] = {
                'update_time': {
                    'order': 'desc'

                }}
            q["from"] = 0
            q["size"] = 10

            result_dict = ESPerform.get_search_results(es_cli_obj, "pictorial", q, offset, size)

            if len(result_dict["hits"]) > 0:
                for item in result_dict["hits"]:
                    id = item["_source"]["id"]
                    if id not in pictorial_id_list:
                        pictorial_id_list.append(id)  #

            pictorial_list = pictorial_id_list if len(pictorial_id_list) < 10 else pictorial_id_list[:10]

            return {"pictorial_ids_list": pictorial_list}

        else:
            return {"pictorial_ids_list": pictorial_id_list}

    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"pictorial_ids_list": []}


@bind("physical/search/pictorial_topic_sort")
def pictorial_topic_sort(pictorial_id=-1, offset=0, size=10):
    """
    :remark 画报排序 人气 部分
    人气按照票数从大到小排序,相同票数按照图片票数更新时间由旧到新排序
    :param user_id:
    :param sort_type:
    :param offset:
    :param size:
    :return:
    """
    try:
        if not isinstance(pictorial_id, int):
            user_id = -1

        q = {
            "query": {
                "bool": {
                    "must": [
                        {"term": {"is_online": True}},
                        {"term": {"is_deleted": False}},
                        {"term": {"pictorial_id": pictorial_id}}
                        # {
                        #     "nested": {
                        #         "path": "related_billboard",
                        #         "query": {
                        #             "bool": {
                        #                 "must": [
                        #                     {
                        #                         "term": {
                        #                             "related_billboard.pictorial_id": pictorial_id
                        #                         }
                        #                     }
                        #                 ]
                        #             }
                        #         }
                        #     }
                        # }
                    ]
                }
            },
            "sort": [
                {"related_billboard.total_vote_cnt": {
                    "order": "desc",
                    "nested_path": "related_billboard",
                    "missing": "_last",
                    "nested_filter": {
                        "term": {
                            "related_billboard.pictorial_id": pictorial_id
                        }
                    }
                }},
                {"create_time": {"order": "desc"}}
            ]

        }

        logging.info("get qqqqqq:%s" % q)
        pict_pictorial_ids_list = []
        # 获取es链接对象
        es_cli_obj = ESPerform.get_cli()
        result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)

        # logging.info("get pictorial_topic_sort res:%s" % result_dict)

        for item in result_dict["hits"]:
            topic_id = item["_source"]["id"]
            pict_pictorial_ids_list.append(topic_id)

        return {"pict_pictorial_ids_list": pict_pictorial_ids_list}

    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"pict_pictorial_ids_list": []}


@bind("physical/search/search_physical")
def search_physical(query="", offset=0, size=10):
    """
    搜索页面搜索画报
    :param query:
    :param offset:
    :param size:
    :return:
    1. query识别:允许模糊匹配
    2. 召回部分:去掉无封面、无帖子、下线的、默认画报
    3. rank部分:(和之前一样,下面只是粘贴过来)
    """
    try:
        result_dicts = GroupUtils.get_search_pictorial_topic(query, offset, size)
        group_ids_list = []
        if len(result_dicts["hits"]) > 0:
            group_ids_list = [item["_source"]["id"] for item in result_dicts["hits"]]
        return {"search_pictorial_ids": group_ids_list}

    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"search_pictorial_ids": []}


@bind("physical/search/pictorialid_topic")
def pictorial_topic_sort_peoplehot(pictorial_id=-1, user_id=-1, offset=0, size=10):
    try:
        if not isinstance(pictorial_id, int):
            pictorial_id = -1

        results = pictorial_topic_sort(pictorial_id=pictorial_id, size=100)

        pict_pictorial_ids_list = results["pict_pictorial_ids_list"]

        logging.info("get pict_pictorial_ids_list res:%s" % pict_pictorial_ids_list)

        q = {
            "query": {
                "bool": {
                    "must": [
                        {"term": {"is_online": True}},
                        {"term": {"is_deleted": False}},
                        {"term": {"pictorial_id": pictorial_id}},
                        {"term": {"user_id": user_id}}
                    ]
                }
            },
            "sort": [
                {"related_billboard.total_vote_cnt": {
                    "order": "desc",
                    "nested_path": "related_billboard",
                    "missing": "_last",
                    "nested_filter": {
                        "term": {
                            "related_billboard.pictorial_id": pictorial_id
                        }
                    }
                }},
                {"create_time": {"order": "desc"}}
            ]
        }

        # 获取es链接对象
        es_cli_obj = ESPerform.get_cli()
        result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)

        user_pict_pictorial_ids_list = []
        for item in result_dict["hits"]:
            topic_id = item["_source"]["id"]
            user_pict_pictorial_ids_list.append(topic_id)

        logging.info("get user_pict_pictorial_ids_list res:%s" % user_pict_pictorial_ids_list)

        pictorial_user_topicid_list = list()

        for user_id_topic in user_pict_pictorial_ids_list:

            if user_id_topic in pict_pictorial_ids_list:
                topic_sort = pict_pictorial_ids_list.index(user_id_topic)
                pictorial_user_topicid_list.append({"topic_id": user_id_topic, "topic_sort": topic_sort + 1})

            else:
                index = user_pict_pictorial_ids_list.index(user_id_topic)
                pictorial_user_topicid_list.append({"topic_id": user_id_topic, "topic_sort": 100 + index})

        return {"pictorial_user_topicid_list": pictorial_user_topicid_list}

    except:
        logging.error("catch exception,err_msg:%s" % traceback.format_exc())
        return {"pict_pictorial_ids_list": []}