Added new settings option ALLOW_NON_ASCII_IN_SLUG

2025-10-15 20:28:56 +02:00 · 2013-11-26 01:11:26 +08:00 · 2013-11-26 01:11:26 +08:00 · 6a8da0b1b3
commit 6a8da0b1b3
parent 8005f675a7
5 changed files with 35 additions and 22 deletions
--- a/pelican/contents.py
+++ b/pelican/contents.py
@ -92,7 +92,8 @@ class Content(object):
        # create the slug if not existing, from the title
        if not hasattr(self, 'slug') and hasattr(self, 'title'):
            self.slug = slugify(self.title,
-                                settings.get('SLUG_SUBSTITUTIONS', ()))
+                                settings.get('SLUG_SUBSTITUTIONS', ()),
+                                settings['ALLOW_NON_ASCII_IN_SLUG'])

        self.source_path = source_path

@ -157,11 +158,13 @@ class Content(object):
            'date': getattr(self, 'date', datetime.now()),
            'author': slugify(
                getattr(self, 'author', ''),
-                slug_substitutions
+                slug_substitutions,
+                self.settings['ALLOW_NON_ASCII_IN_SLUG']
            ),
            'category': slugify(
                getattr(self, 'category', default_category),
-                slug_substitutions
+                slug_substitutions,
+                self.settings['ALLOW_NON_ASCII_IN_SLUG']
            )
        })
        return metadata
--- a/pelican/settings.py
+++ b/pelican/settings.py
@ -111,6 +111,7 @@ DEFAULT_CONFIG = {
    'TEMPLATE_PAGES': {},
    'IGNORE_FILES': ['.#*'],
    'SLUG_SUBSTITUTIONS': (),
+    'ALLOW_NON_ASCII_IN_SLUG': False,
    'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]',
    }

--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@ -93,6 +93,8 @@ class TestUtils(LoggedTestCase):

        for value, expected in samples:
            self.assertEqual(utils.slugify(value), expected)
+            # nothing will be changed if allow_non_ascii is True.
+            self.assertEqual(utils.slugify(value, allow_non_ascii=True), value)

    def test_slugify_substitute(self):

--- a/pelican/urlwrappers.py
+++ b/pelican/urlwrappers.py
@ -17,7 +17,9 @@ class URLWrapper(object):
        # but are here for clarity
        self.settings = settings
        self._name = name
-        self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
+        self.slug = slugify(name,
+                self.settings.get('SLUG_SUBSTITUTIONS', ()),
+                self.settings['ALLOW_NON_ASCII_IN_SLUG'])
        self.name = name

    @property
@ -27,7 +29,9 @@ class URLWrapper(object):
    @name.setter
    def name(self, name):
        self._name = name
-        self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
+        self.slug = slugify(name,
+                self.settings.get('SLUG_SUBSTITUTIONS', ()),
+                self.settings['ALLOW_NON_ASCII_IN_SLUG'])

    def as_dict(self):
        d = self.__dict__
@ -42,7 +46,7 @@ class URLWrapper(object):

    def _normalize_key(self, key):
        subs = self.settings.get('SLUG_SUBSTITUTIONS', ())
-        return six.text_type(slugify(key, subs))
+        return six.text_type(slugify(key, subs, self.settings['ALLOW_NON_ASCII_IN_SLUG']))

    def __eq__(self, other):
        return self._key() == self._normalize_key(other)
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -227,7 +227,7 @@ def pelican_open(filename):
    yield content


-def slugify(value, substitutions=()):
+def slugify(value, substitutions=(), allow_non_ascii=False):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
@ -236,24 +236,27 @@ def slugify(value, substitutions=()):
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
-    # value must be unicode per se
-    import unicodedata
-    from unidecode import unidecode
-    # unidecode returns str in Py2 and 3, so in Py2 we have to make
-    # it unicode again
-    value = unidecode(value)
-    if isinstance(value, six.binary_type):
-        value = value.decode('ascii')
-    # still unicode
-    value = unicodedata.normalize('NFKD', value).lower()
+
    for src, dst in substitutions:
        value = value.replace(src.lower(), dst.lower())
-    value = re.sub('[^\w\s-]', '', value).strip()
-    value = re.sub('[-\s]+', '-', value)
-    # we want only ASCII chars
-    value = value.encode('ascii', 'ignore')
+
+    if not allow_non_ascii:
+        # value must be unicode per se
+        from unidecode import unidecode
+        # unidecode returns str in Py2 and 3, so in Py2 we have to make
+        # it unicode again
+        value = unidecode(value)
+        if isinstance(value, six.binary_type):
+            value = value.decode('ascii')
+        # still unicode
+        import unicodedata
+        value = unicodedata.normalize('NFKD', value).lower()
+        value = re.sub('[^\w\s-]', '', value).strip()
+        value = re.sub('[-\s]+', '-', value)
+        # we want only ASCII chars
+        value = value.encode('ascii', 'ignore')
    # but Pelican should generally use only unicode
-    return value.decode('ascii')
+    return unicode(value)


 def copy(path, source, destination, destination_path=None):