diff --git a/pelican/readers.py b/pelican/readers.py index e977b349..274f077e 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -9,6 +9,7 @@ import docutils import docutils.core import docutils.io from docutils.writers.html4css1 import HTMLTranslator +import six # import the directives to have pygments support from pelican import rstdirectives # NOQA @@ -20,10 +21,7 @@ try: from html import escape except ImportError: from cgi import escape -try: - from html.parser import HTMLParser -except ImportError: - from HTMLParser import HTMLParser +from six.moves.html_parser import HTMLParser from pelican import signals from pelican.contents import Page, Category, Tag, Author @@ -42,7 +40,6 @@ METADATA_PROCESSORS = { logger = logging.getLogger(__name__) - class BaseReader(object): """Base class to read files. @@ -121,6 +118,19 @@ class RstReader(BaseReader): enabled = bool(docutils) file_extensions = ['rst'] + class FileInput(docutils.io.FileInput): + """Patch docutils.io.FileInput to remove "U" mode in py3. + + Universal newlines is enabled by default and "U" mode is deprecated + in py3. + + """ + + def __init__(self, *args, **kwargs): + if six.PY3: + kwargs['mode'] = kwargs.get('mode', 'r').replace('U', '') + docutils.io.FileInput.__init__(self, *args, **kwargs) + def __init__(self, *args, **kwargs): super(RstReader, self).__init__(*args, **kwargs) @@ -152,12 +162,14 @@ class RstReader(BaseReader): extra_params = {'initial_header_level': '2', 'syntax_highlight': 'short', 'input_encoding': 'utf-8', - 'exit_status_level': 2} + 'exit_status_level': 2, + 'embed_stylesheet': False} user_params = self.settings.get('DOCUTILS_SETTINGS') if user_params: extra_params.update(user_params) pub = docutils.core.Publisher( + source_class=self.FileInput, destination_class=docutils.io.StringOutput) pub.set_components('standalone', 'restructuredtext', 'html') pub.writer.translator_class = PelicanHTMLTranslator @@ -230,7 +242,11 @@ class HTMLReader(BaseReader): class _HTMLParser(HTMLParser): def __init__(self, settings, filename): - HTMLParser.__init__(self) + try: + # Python 3.4+ + HTMLParser.__init__(self, convert_charrefs=False) + except TypeError: + HTMLParser.__init__(self) self.body = '' self.metadata = {} self.settings = settings diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index b6078201..064f3147 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -4,11 +4,10 @@ from __future__ import unicode_literals, print_function import argparse try: - # py3k import - from html.parser import HTMLParser + from html import unescape # py3.4+ except ImportError: - # py2 import - from HTMLParser import HTMLParser # NOQA + from six.moves.html_parser import HTMLParser + unescape = HTMLParser().unescape import os import re import subprocess @@ -129,7 +128,7 @@ def wp2fields(xml, wp_custpost=False): try: # Use HTMLParser due to issues with BeautifulSoup 3 - title = HTMLParser().unescape(item.title.contents[0]) + title = unescape(item.title.contents[0]) except IndexError: title = 'No title [%s]' % item.find('post_name').string logger.warning('Post "%s" is lacking a proper title' % title)