Commit 0cc47263 authored by litaolemo's avatar litaolemo

update

parent b5127f27
...@@ -15,7 +15,7 @@ import requests ...@@ -15,7 +15,7 @@ import requests
import os import os
import copy import copy
import re import re
import HTMLParser # import HTMLParser
import pymysql import pymysql
from crawler.crawler_sys.utils.output_results import retry_get_url from crawler.crawler_sys.utils.output_results import retry_get_url
from lxml import html from lxml import html
...@@ -167,18 +167,18 @@ WHITE_TAGS = { ...@@ -167,18 +167,18 @@ WHITE_TAGS = {
} }
def _get_rich_text(rich_text): # def _get_rich_text(rich_text):
""" # """
富文本标签转成标签 # 富文本标签转成标签
:param rich_text: # :param rich_text:
:return: # :return:
""" # """
try: # try:
h = HTMLParser.HTMLParser() # h = HTMLParser.HTMLParser()
rich_text = h.unescape(rich_text.decode("utf-8").replace("&amp;", "&").replace("\n", "<br>")) # 富文本标签转成标签对象 # rich_text = h.unescape(rich_text.decode("utf-8").replace("&amp;", "&").replace("\n", "<br>")) # 富文本标签转成标签对象
return rich_text # return rich_text
except: # except:
return rich_text # return rich_text
def gm_convert_html_tags(rich_text, all_tags=False, remove_tags=None): def gm_convert_html_tags(rich_text, all_tags=False, remove_tags=None):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment