From 2d8116b84cd8e1ef1414ed36d94b3a0b5aefc009 Mon Sep 17 00:00:00 2001 From: Deniz Turgut Date: Wed, 11 Jun 2014 15:58:06 -0400 Subject: [PATCH] Fix HTMLParser related deprecation warnings in Py3.4 --- pelican/readers.py | 12 ++++++------ pelican/tools/pelican_import.py | 9 ++++----- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/pelican/readers.py b/pelican/readers.py index e977b349..76394fb9 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -20,10 +20,7 @@ try: from html import escape except ImportError: from cgi import escape -try: - from html.parser import HTMLParser -except ImportError: - from HTMLParser import HTMLParser +from six.moves.html_parser import HTMLParser from pelican import signals from pelican.contents import Page, Category, Tag, Author @@ -42,7 +39,6 @@ METADATA_PROCESSORS = { logger = logging.getLogger(__name__) - class BaseReader(object): """Base class to read files. @@ -230,7 +226,11 @@ class HTMLReader(BaseReader): class _HTMLParser(HTMLParser): def __init__(self, settings, filename): - HTMLParser.__init__(self) + try: + # Python 3.4+ + HTMLParser.__init__(self, convert_charrefs=False) + except TypeError: + HTMLParser.__init__(self) self.body = '' self.metadata = {} self.settings = settings diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index b6078201..064f3147 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -4,11 +4,10 @@ from __future__ import unicode_literals, print_function import argparse try: - # py3k import - from html.parser import HTMLParser + from html import unescape # py3.4+ except ImportError: - # py2 import - from HTMLParser import HTMLParser # NOQA + from six.moves.html_parser import HTMLParser + unescape = HTMLParser().unescape import os import re import subprocess @@ -129,7 +128,7 @@ def wp2fields(xml, wp_custpost=False): try: # Use HTMLParser due to issues with BeautifulSoup 3 - title = HTMLParser().unescape(item.title.contents[0]) + title = unescape(item.title.contents[0]) except IndexError: title = 'No title [%s]' % item.find('post_name').string logger.warning('Post "%s" is lacking a proper title' % title)