Add support for unicode slugs

2025-10-15 20:28:56 +02:00 · 2013-12-28 20:16:50 +00:00 · 2013-12-28 20:16:50 +00:00 · 80d7156427
commit 80d7156427
parent 0b3dc9db21
1 changed files with 19 additions and 13 deletions
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -227,7 +227,7 @@ def pelican_open(filename):
    yield content


-def slugify(value, substitutions=()):
+def slugify(value, substitutions=(), use_unicode=True):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
@ -236,24 +236,30 @@ def slugify(value, substitutions=()):
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
+
    # value must be unicode per se
-    import unicodedata
-    from unidecode import unidecode
-    # unidecode returns str in Py2 and 3, so in Py2 we have to make
-    # it unicode again
-    value = unidecode(value)
-    if isinstance(value, six.binary_type):
-        value = value.decode('ascii')
+    if not use_unicode:
+        from unidecode import unidecode
+        # unidecode returns str in Py2 and 3, so in Py2 we have to make
+        # it unicode again
+        value = unidecode(value)
+        if isinstance(value, six.binary_type):
+            value = value.decode('ascii')
+
    # still unicode
-    value = unicodedata.normalize('NFKD', value).lower()
+    import unicodedata
+    value = unicodedata.normalize('NFKC' if use_unicode else 'NFKD', value).lower()
    for src, dst in substitutions:
        value = value.replace(src.lower(), dst.lower())
    value = re.sub('[^\w\s-]', '', value).strip()
    value = re.sub('[-\s]+', '-', value)
-    # we want only ASCII chars
-    value = value.encode('ascii', 'ignore')
-    # but Pelican should generally use only unicode
-    return value.decode('ascii')
+    if use_unicode:
+        return value
+    else:
+        # we want only ASCII chars
+        value = value.encode('ascii', 'ignore')
+        # but Pelican should generally use only unicode
+        return value.decode('ascii')


 def copy(path, source, destination, destination_path=None):