diff --git a/pelican/__init__.py b/pelican/__init__.py index 456d0691..83160684 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -1,10 +1,6 @@ # -*- coding: utf-8 -*- import argparse -try: - import collections.abc as collections -except ImportError: - import collections import logging import multiprocessing import os @@ -12,6 +8,7 @@ import pprint import sys import time import traceback +from collections.abc import Iterable # pelican.log has to be the first pelican module to be loaded # because logging.setLoggerClass has to be called before logging.getLogger @@ -184,7 +181,7 @@ class Pelican(object): for pair in signals.get_generators.send(self): (funct, value) = pair - if not isinstance(value, collections.Iterable): + if not isinstance(value, Iterable): value = (value, ) for v in value: diff --git a/pelican/generators.py b/pelican/generators.py index 27c895e4..8782238b 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -5,7 +5,6 @@ import errno import fnmatch import logging import os -from codecs import open from collections import defaultdict from functools import partial from itertools import chain, groupby diff --git a/pelican/readers.py b/pelican/readers.py index bb4d6d81..c650913f 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -5,6 +5,7 @@ import logging import os import re from collections import OrderedDict +from html import escape from html.parser import HTMLParser from io import StringIO @@ -18,7 +19,7 @@ from pelican import rstdirectives # NOQA from pelican import signals from pelican.cache import FileStampDataCacher from pelican.contents import Author, Category, Page, Tag -from pelican.utils import escape_html, get_date, pelican_open, posixize_path +from pelican.utils import get_date, pelican_open, posixize_path try: from markdown import Markdown @@ -411,7 +412,7 @@ class HTMLReader(BaseReader): self._in_body = False self._in_top_level = True elif self._in_body: - self._data_buffer += ''.format(escape_html(tag)) + self._data_buffer += ''.format(escape(tag)) def handle_startendtag(self, tag, attrs): if tag == 'meta' and self._in_head: @@ -432,16 +433,16 @@ class HTMLReader(BaseReader): self._data_buffer += '&#{};'.format(data) def build_tag(self, tag, attrs, close_tag): - result = '<{}'.format(escape_html(tag)) + result = '<{}'.format(escape(tag)) for k, v in attrs: - result += ' ' + escape_html(k) + result += ' ' + escape(k) if v is not None: # If the attribute value contains a double quote, surround # with single quotes, otherwise use double quotes. if '"' in v: - result += "='{}'".format(escape_html(v, quote=False)) + result += "='{}'".format(escape(v, quote=False)) else: - result += '="{}"'.format(escape_html(v, quote=False)) + result += '="{}"'.format(escape(v, quote=False)) if close_tag: return result + ' />' return result + '>' diff --git a/pelican/settings.py b/pelican/settings.py index 11bad8c4..a4033001 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import copy +import importlib.util import inspect import locale import logging @@ -12,19 +13,11 @@ from posixpath import join as posix_join from pelican.log import LimitFilter -try: - # spec_from_file_location is the recommended way in Python 3.5+ - import importlib.util - - def load_source(name, path): - spec = importlib.util.spec_from_file_location(name, path) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - return mod -except ImportError: - # but it does not exist in Python 2.7, so fall back to imp - import imp - load_source = imp.load_source +def load_source(name, path): + spec = importlib.util.spec_from_file_location(name, path) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod logger = logging.getLogger(__name__) diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index 5b03b1d1..3ab341a3 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -2,7 +2,6 @@ import locale import os -from codecs import open from shutil import copy, rmtree from tempfile import mkdtemp diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py index 942b95fe..1a6ce404 100644 --- a/pelican/tests/test_importer.py +++ b/pelican/tests/test_importer.py @@ -3,7 +3,6 @@ import locale import os import re -from codecs import open from pelican.settings import DEFAULT_CONFIG from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder, diff --git a/pelican/tests/test_pelican.py b/pelican/tests/test_pelican.py index 0d495ac7..5625d617 100644 --- a/pelican/tests/test_pelican.py +++ b/pelican/tests/test_pelican.py @@ -1,15 +1,11 @@ # -*- coding: utf-8 -*- -try: - import collections.abc as collections -except ImportError: - import collections - import locale import logging import os import subprocess import sys +from collections.abc import Sequence from shutil import rmtree from tempfile import mkdtemp @@ -94,7 +90,7 @@ class TestPelican(LoggedTestCase): generator_classes[-1] is StaticGenerator, "StaticGenerator must be the last generator, but it isn't!") self.assertIsInstance( - generator_classes, collections.Sequence, + generator_classes, Sequence, "get_generator_classes() must return a Sequence to preserve order") def test_basic_generation_works(self): diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 9b2edf4c..21122eb1 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -8,8 +8,8 @@ import re import subprocess import sys import time -from codecs import open from collections import defaultdict +from html import unescape from urllib.error import URLError from urllib.parse import quote, urlparse, urlsplit, urlunsplit from urllib.request import urlretrieve @@ -19,11 +19,6 @@ from pelican.log import init from pelican.settings import read_settings from pelican.utils import SafeDatetime, slugify -try: - from html import unescape # py3.5+ -except ImportError: - from html.parser import HTMLParser - unescape = HTMLParser().unescape logger = logging.getLogger(__name__) diff --git a/pelican/tools/pelican_quickstart.py b/pelican/tools/pelican_quickstart.py index 5ff3dc33..a7801866 100755 --- a/pelican/tools/pelican_quickstart.py +++ b/pelican/tools/pelican_quickstart.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import argparse -import codecs import locale import os @@ -309,8 +308,8 @@ needed by Pelican. print('Error: {0}'.format(e)) try: - with codecs.open(os.path.join(CONF['basedir'], 'pelicanconf.py'), - 'w', 'utf-8') as fd: + with open(os.path.join(CONF['basedir'], 'pelicanconf.py'), + 'w', 'utf-8') as fd: conf_python = dict() for key, value in CONF.items(): conf_python[key] = repr(value) @@ -322,8 +321,8 @@ needed by Pelican. print('Error: {0}'.format(e)) try: - with codecs.open(os.path.join(CONF['basedir'], 'publishconf.py'), - 'w', 'utf-8') as fd: + with open(os.path.join(CONF['basedir'], 'publishconf.py'), + 'w', 'utf-8') as fd: _template = _jinja_env.get_template('publishconf.py.jinja2') fd.write(_template.render(**CONF)) fd.close() @@ -332,16 +331,16 @@ needed by Pelican. if automation: try: - with codecs.open(os.path.join(CONF['basedir'], 'tasks.py'), - 'w', 'utf-8') as fd: + with open(os.path.join(CONF['basedir'], 'tasks.py'), + 'w', 'utf-8') as fd: _template = _jinja_env.get_template('tasks.py.jinja2') fd.write(_template.render(**CONF)) fd.close() except OSError as e: print('Error: {0}'.format(e)) try: - with codecs.open(os.path.join(CONF['basedir'], 'Makefile'), - 'w', 'utf-8') as fd: + with open(os.path.join(CONF['basedir'], 'Makefile'), + 'w', 'utf-8') as fd: py_v = 'python3' _template = _jinja_env.get_template('Makefile.jinja2') fd.write(_template.render(py_v=py_v, **CONF)) diff --git a/pelican/urlwrappers.py b/pelican/urlwrappers.py index edfb11b4..6b512938 100644 --- a/pelican/urlwrappers.py +++ b/pelican/urlwrappers.py @@ -62,7 +62,7 @@ class URLWrapper(object): def _normalize_key(self, key): subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', []) - return str(slugify(key, regex_subs=subs)) + return slugify(key, regex_subs=subs) def __eq__(self, other): if isinstance(other, self.__class__): diff --git a/pelican/utils.py b/pelican/utils.py index d031503d..6491f02e 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- -import codecs import datetime -import errno import fnmatch import locale import logging @@ -12,10 +10,7 @@ import shutil import sys import traceback import urllib -try: - from collections.abc import Hashable -except ImportError: - from collections import Hashable +from collections.abc import Hashable from contextlib import contextmanager from functools import partial from html import entities @@ -29,10 +24,6 @@ from jinja2 import Markup import pytz -try: - from html import escape -except ImportError: - from cgi import escape logger = logging.getLogger(__name__) @@ -51,17 +42,11 @@ def sanitised_join(base_directory, *parts): def strftime(date, date_format): ''' - Replacement for built-in strftime - - This is necessary because of the way Py2 handles date format strings. - Specifically, Py2 strftime takes a bytestring. In the case of text output - (e.g. %b, %a, etc), the output is encoded with an encoding defined by - locale.LC_TIME. Things get messy if the formatting string has chars that - are not valid in LC_TIME defined encoding. + Enhanced replacement for built-in strftime with zero stripping This works by 'grabbing' possible format strings (those starting with %), - formatting them with the date, (if necessary) decoding the output and - replacing formatted output back. + formatting them with the date, stripping any leading zeros if - prefix is + used and replacing formatted output back. ''' def strip_zeros(x): return x.lstrip('0') or '0' @@ -74,10 +59,6 @@ def strftime(date, date_format): # replace candidates with placeholders for later % formatting template = re.sub(format_options, '%s', date_format) - # we need to convert formatted dates back to unicode in Py2 - # LC_TIME determines the encoding for built-in strftime outputs - lang_code, enc = locale.getlocale(locale.LC_TIME) - formatted_candidates = [] for candidate in candidates: # test for valid C89 directives only @@ -232,15 +213,12 @@ def get_date(string): @contextmanager -def pelican_open(filename, mode='rb', strip_crs=(sys.platform == 'win32')): +def pelican_open(filename, mode='r', strip_crs=(sys.platform == 'win32')): """Open a file and return its content""" - with codecs.open(filename, mode, encoding='utf-8') as infile: + # utf-8-sig will clear any BOM if present + with open(filename, mode, encoding='utf-8-sig') as infile: content = infile.read() - if content[:1] == codecs.BOM_UTF8.decode('utf8'): - content = content[1:] - if strip_crs: - content = content.replace('\r\n', '\n') yield content @@ -610,14 +588,6 @@ def truncate_html_words(s, num, end_text='…'): return out -def escape_html(text, quote=True): - """Escape '&', '<' and '>' to HTML-safe sequences. - - In Python 2 this uses cgi.escape and in Python 3 this uses html.escape. We - wrap here to ensure the quote argument has an identical default.""" - return escape(text, quote=quote) - - def process_translations(content_list, translation_id=None): """ Finds translations and returns them. @@ -833,11 +803,7 @@ def set_date_tzinfo(d, tz_name=None): def mkdir_p(path): - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST or not os.path.isdir(path): - raise + os.makedirs(path, exist_ok=True) def split_all(path):