Port pelican to python 3.

Stays compatible with 2.x series, thanks to an unified codebase.
This commit is contained in:
Dirk Makowski 2013-01-11 02:57:43 +01:00 committed by Alexis Métaireau
commit 71995d5e1b
43 changed files with 495 additions and 287 deletions

View file

@ -1,10 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import six
import os
import re
import pytz
import shutil
import logging
import errno
import locale
from collections import defaultdict, Hashable
from functools import partial
@ -17,6 +21,77 @@ from operator import attrgetter
logger = logging.getLogger(__name__)
def strftime(date, date_format):
"""
Replacement for the builtin strftime().
This :func:`strftime()` is compatible to Python 2 and 3. In both cases,
input and output is always unicode.
Still, Python 3's :func:`strftime()` seems to somehow "normalize" unicode
chars in the format string. So if e.g. your format string contains 'ø' or
'ä', the result will be 'o' and 'a'.
See here for an `extensive testcase <https://github.com/dmdm/test_strftime>`_.
:param date: Any object that sports a :meth:`strftime()` method.
:param date_format: Format string, can always be unicode.
:returns: Unicode string with formatted date.
"""
# As tehkonst confirmed, above mentioned testcase runs correctly on
# Python 2 and 3 on Windows as well. Thanks.
if six.PY3:
# It could be so easy... *sigh*
return date.strftime(date_format)
# TODO Perhaps we should refactor again, so that the
# xmlcharrefreplace-regex-dance is always done, regardless
# of the Python version.
else:
# We must ensure that the format string is an encoded byte
# string, ASCII only WTF!!!
# But with "xmlcharrefreplace" our formatted date will produce
# *yuck* like this:
# "Øl trinken beim Besäufnis"
# --> "&#216;l trinken beim Bes&#228;ufnis"
date_format = date_format.encode('ascii',
errors="xmlcharrefreplace")
result = date.strftime(date_format)
# strftime() returns an encoded byte string
# which we must decode into unicode.
lang_code, enc = locale.getlocale(locale.LC_ALL)
if enc:
result = result.decode(enc)
else:
result = unicode(result)
# Convert XML character references back to unicode characters.
if "&#" in result:
result = re.sub(r'&#(?P<num>\d+);'
, lambda m: unichr(int(m.group('num')))
, result
)
return result
#----------------------------------------------------------------------------
# Stolen from Django: django.utils.encoding
#
def python_2_unicode_compatible(klass):
"""
A decorator that defines __unicode__ and __str__ methods under Python 2.
Under Python 3 it does nothing.
To support Python 2 and 3 with a single code base, define a __str__ method
returning text and apply this decorator to the class.
"""
if not six.PY3:
klass.__unicode__ = klass.__str__
klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
return klass
#----------------------------------------------------------------------------
class NoFilesError(Exception):
pass
@ -78,14 +153,24 @@ def slugify(value):
Took from django sources.
"""
# TODO Maybe steal again from current Django 1.5dev
value = Markup(value).striptags()
if type(value) == unicode:
import unicodedata
from unidecode import unidecode
value = unicode(unidecode(value))
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
return re.sub('[-\s]+', '-', value)
# value must be unicode per se
import unicodedata
from unidecode import unidecode
# unidecode returns str in Py2 and 3, so in Py2 we have to make
# it unicode again
value = unidecode(value)
if isinstance(value, six.binary_type):
value = value.decode('ascii')
# still unicode
value = unicodedata.normalize('NFKD', value)
value = re.sub('[^\w\s-]', '', value).strip().lower()
value = re.sub('[-\s]+', '-', value)
# we want only ASCII chars
value = value.encode('ascii', 'ignore')
# but Pelican should generally use only unicode
return value.decode('ascii')
def copy(path, source, destination, destination_path=None, overwrite=False):
@ -137,7 +222,7 @@ def clean_output_dir(path):
if not os.path.isdir(path):
try:
os.remove(path)
except Exception, e:
except Exception as e:
logger.error("Unable to delete file %s; %e" % path, e)
return
@ -148,13 +233,13 @@ def clean_output_dir(path):
try:
shutil.rmtree(file)
logger.debug("Deleted directory %s" % file)
except Exception, e:
except Exception as e:
logger.error("Unable to delete directory %s; %e" % file, e)
elif os.path.isfile(file) or os.path.islink(file):
try:
os.remove(file)
logger.debug("Deleted file/link %s" % file)
except Exception, e:
except Exception as e:
logger.error("Unable to delete file %s; %e" % file, e)
else:
logger.error("Unable to delete %s, file type unknown" % file)
@ -180,7 +265,7 @@ def truncate_html_words(s, num, end_text='...'):
"""
length = int(num)
if length <= 0:
return u''
return ''
html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area',
'hr', 'input')
@ -254,10 +339,10 @@ def process_translations(content_list):
for slug, items in grouped_by_slugs:
items = list(items)
# find items with default language
default_lang_items = filter(attrgetter('in_default_lang'), items)
default_lang_items = list(filter(attrgetter('in_default_lang'), items))
len_ = len(default_lang_items)
if len_ > 1:
logger.warning(u'there are %s variants of "%s"' % (len_, slug))
logger.warning('there are %s variants of "%s"' % (len_, slug))
for x in default_lang_items:
logger.warning(' %s' % x.filename)
elif len_ == 0:
@ -269,12 +354,9 @@ def process_translations(content_list):
+ 'content'
logger.warning(msg)
index.extend(default_lang_items)
translations.extend(filter(
lambda x: x not in default_lang_items,
items
))
translations.extend([x for x in items if x not in default_lang_items])
for a in items:
a.translations = filter(lambda x: x != a, items)
a.translations = [x for x in items if x != a]
return index, translations
@ -333,6 +415,6 @@ def set_date_tzinfo(d, tz_name=None):
def mkdir_p(path):
try:
os.makedirs(path)
except OSError, e:
except OSError as e:
if e.errno != errno.EEXIST:
raise