From 80d7156427814042d6beb55714858d8c8b5b1796 Mon Sep 17 00:00:00 2001 From: Martin Paljak Date: Sat, 28 Dec 2013 20:16:50 +0000 Subject: [PATCH] Add support for unicode slugs --- pelican/utils.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/pelican/utils.py b/pelican/utils.py index 4b25ec7f..ebff1e5f 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -227,7 +227,7 @@ def pelican_open(filename): yield content -def slugify(value, substitutions=()): +def slugify(value, substitutions=(), use_unicode=True): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. @@ -236,24 +236,30 @@ def slugify(value, substitutions=()): """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() + # value must be unicode per se - import unicodedata - from unidecode import unidecode - # unidecode returns str in Py2 and 3, so in Py2 we have to make - # it unicode again - value = unidecode(value) - if isinstance(value, six.binary_type): - value = value.decode('ascii') + if not use_unicode: + from unidecode import unidecode + # unidecode returns str in Py2 and 3, so in Py2 we have to make + # it unicode again + value = unidecode(value) + if isinstance(value, six.binary_type): + value = value.decode('ascii') + # still unicode - value = unicodedata.normalize('NFKD', value).lower() + import unicodedata + value = unicodedata.normalize('NFKC' if use_unicode else 'NFKD', value).lower() for src, dst in substitutions: value = value.replace(src.lower(), dst.lower()) value = re.sub('[^\w\s-]', '', value).strip() value = re.sub('[-\s]+', '-', value) - # we want only ASCII chars - value = value.encode('ascii', 'ignore') - # but Pelican should generally use only unicode - return value.decode('ascii') + if use_unicode: + return value + else: + # we want only ASCII chars + value = value.encode('ascii', 'ignore') + # but Pelican should generally use only unicode + return value.decode('ascii') def copy(path, source, destination, destination_path=None):