More granular control of tags and categories slugs. Fixes #1873

- add TAG_SUBSTITUTIONS AND CATEGORY_SUBSTITURIONS settings - make slugify keep non-alphanumeric characters if configured
2025-10-15 20:28:56 +02:00 · 2016-03-14 00:16:58 +02:00 · 2016-03-14 00:16:58 +02:00 · 648165b839
commit 648165b839
parent 70665ea0fa
8 changed files with 123 additions and 8 deletions
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -270,10 +270,34 @@ def slugify(value, substitutions=()):
        value = value.decode('ascii')
    # still unicode
    value = unicodedata.normalize('NFKD', value).lower()
-    for src, dst in substitutions:
+
+    # backward compatible covert from 2-tuples to 3-tuples
+    new_subs = []
+    for tpl in substitutions:
+        try:
+            src, dst, skip = tpl
+        except ValueError:
+            src, dst = tpl
+            skip = False
+        new_subs.append((src, dst, skip))
+    substitutions = tuple(new_subs)
+
+    # by default will replace non-alphanum characters
+    replace = True
+    for src, dst, skip in substitutions:
+        orig_value = value
        value = value.replace(src.lower(), dst.lower())
-    value = re.sub('[^\w\s-]', '', value).strip()
-    value = re.sub('[-\s]+', '-', value)
+        # if replacement was made then skip non-alphanum
+        # replacement if instructed to do so
+        if value != orig_value:
+            replace = replace and not skip
+
+    if replace:
+        value = re.sub('[^\w\s-]', '', value).strip()
+        value = re.sub('[-\s]+', '-', value)
+    else:
+        value = value.strip()
+
    # we want only ASCII chars
    value = value.encode('ascii', 'ignore')
    # but Pelican should generally use only unicode