diff --git a/pelican/readers.py b/pelican/readers.py index 431e6937..954ea412 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -21,10 +21,7 @@ try: from html import escape except ImportError: from cgi import escape -try: - from html.parser import HTMLParser -except ImportError: - from HTMLParser import HTMLParser +from six.moves.html_parser import HTMLParser from pelican import signals from pelican.contents import Page, Category, Tag, Author @@ -43,7 +40,6 @@ METADATA_PROCESSORS = { logger = logging.getLogger(__name__) - class BaseReader(object): """Base class to read files. @@ -231,7 +227,11 @@ class HTMLReader(BaseReader): class _HTMLParser(HTMLParser): def __init__(self, settings, filename): - HTMLParser.__init__(self) + try: + # Python 3.4+ + HTMLParser.__init__(self, convert_charrefs=False) + except TypeError: + HTMLParser.__init__(self) self.body = '' self.metadata = {} self.settings = settings diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 7c8662c9..a50aa216 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -4,11 +4,10 @@ from __future__ import unicode_literals, print_function import argparse try: - # py3k import - from html.parser import HTMLParser + from html import unescape # py3.4+ except ImportError: - # py2 import - from HTMLParser import HTMLParser # NOQA + from six.moves.html_parser import HTMLParser + unescape = HTMLParser().unescape import os import re import subprocess @@ -129,7 +128,7 @@ def wp2fields(xml, wp_custpost=False): try: # Use HTMLParser due to issues with BeautifulSoup 3 - title = HTMLParser().unescape(item.title.contents[0]) + title = unescape(item.title.contents[0]) except IndexError: title = 'No title [%s]' % item.find('post_name').string logger.warning('Post "%s" is lacking a proper title' % title)