Add support for unicode slugs

This commit is contained in:
Martin Paljak 2013-12-28 20:16:50 +00:00
commit 80d7156427

View file

@ -227,7 +227,7 @@ def pelican_open(filename):
yield content
def slugify(value, substitutions=()):
def slugify(value, substitutions=(), use_unicode=True):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
@ -236,24 +236,30 @@ def slugify(value, substitutions=()):
"""
# TODO Maybe steal again from current Django 1.5dev
value = Markup(value).striptags()
# value must be unicode per se
import unicodedata
from unidecode import unidecode
# unidecode returns str in Py2 and 3, so in Py2 we have to make
# it unicode again
value = unidecode(value)
if isinstance(value, six.binary_type):
value = value.decode('ascii')
if not use_unicode:
from unidecode import unidecode
# unidecode returns str in Py2 and 3, so in Py2 we have to make
# it unicode again
value = unidecode(value)
if isinstance(value, six.binary_type):
value = value.decode('ascii')
# still unicode
value = unicodedata.normalize('NFKD', value).lower()
import unicodedata
value = unicodedata.normalize('NFKC' if use_unicode else 'NFKD', value).lower()
for src, dst in substitutions:
value = value.replace(src.lower(), dst.lower())
value = re.sub('[^\w\s-]', '', value).strip()
value = re.sub('[-\s]+', '-', value)
# we want only ASCII chars
value = value.encode('ascii', 'ignore')
# but Pelican should generally use only unicode
return value.decode('ascii')
if use_unicode:
return value
else:
# we want only ASCII chars
value = value.encode('ascii', 'ignore')
# but Pelican should generally use only unicode
return value.decode('ascii')
def copy(path, source, destination, destination_path=None):