Commit 0cc47263 authored by litaolemo's avatar litaolemo

update

parent b5127f27
......@@ -15,7 +15,7 @@ import requests
import os
import copy
import re
import HTMLParser
# import HTMLParser
import pymysql
from crawler.crawler_sys.utils.output_results import retry_get_url
from lxml import html
......@@ -167,18 +167,18 @@ WHITE_TAGS = {
}
def _get_rich_text(rich_text):
"""
富文本标签转成标签
:param rich_text:
:return:
"""
try:
h = HTMLParser.HTMLParser()
rich_text = h.unescape(rich_text.decode("utf-8").replace("&amp;", "&").replace("\n", "<br>")) # 富文本标签转成标签对象
return rich_text
except:
return rich_text
# def _get_rich_text(rich_text):
# """
# 富文本标签转成标签
# :param rich_text:
# :return:
# """
# try:
# h = HTMLParser.HTMLParser()
# rich_text = h.unescape(rich_text.decode("utf-8").replace("&amp;", "&").replace("\n", "<br>")) # 富文本标签转成标签对象
# return rich_text
# except:
# return rich_text
def gm_convert_html_tags(rich_text, all_tags=False, remove_tags=None):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment