
import re
from html.parser import HTMLParser

from lxml import html


def _get_rich_text(rich_text):
    """
    富文本标签转成标签
    :param rich_text:
    :return:
    """
    h = HTMLParser()
    rich_text = h.unescape(rich_text.replace("&amp;", "&").replace("\n", "<br>"))  # 富文本标签转成标签对象

    return rich_text


def gm_decode_html(rich_text, reserve_spaces=False):
    """
        匹配富文本信息
    :param rich_text: 包含html标签(实体标签)的文本信息
    :param reserve_spaces: 是否保留空格，多个空格替换为一个空格
    :return: 仅展示 纯文本
    """
    if not rich_text:
        return ""

    rich_text = _get_rich_text(rich_text)
    element_obj = html.fromstring(rich_text)  # 转成 element 对象处理标签
    _text = html.tostring(element_obj, encoding="unicode", method="text")  # 仅获取文本
    safe_text = _text.replace("\n", "").replace("\r", "").replace("\t", "")

    if reserve_spaces:
        safe_text = re.sub(" +", " ", safe_text)
    else:
        safe_text = safe_text.replace(" ", "")

    return safe_text