mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Allow text substitutions when generating slugs
The `slugify()` function used by Pelican is in general very good at coming up with something both readable and URL-safe. However, there are a few specific cases where it causes conflicts. One that I've run into is using the strings `C++` and `C` as tags, both of which transform to the slug `c`. This commit adds an optional `SLUG_SUBSTITUTIONS` setting which is a list of 2-tuples of substitutions to be carried out case-insensitively just prior to stripping out non-alphanumeric characters. This allows cases like `C++` to be transformed to `CPP` or similar. This can also improve the readability of slugs.
This commit is contained in:
parent
7ec4d5faa2
commit
39518e15ef
6 changed files with 28 additions and 8 deletions
|
|
@ -258,6 +258,10 @@ Setting name (default value) What does it do?
|
||||||
posts.
|
posts.
|
||||||
`DAY_ARCHIVE_SAVE_AS` (False) The location to save per-day archives of your
|
`DAY_ARCHIVE_SAVE_AS` (False) The location to save per-day archives of your
|
||||||
posts.
|
posts.
|
||||||
|
`SLUG_SUBSTITUTIONS` (``()``) Substitutions to make prior to stripping out
|
||||||
|
non-alphanumerics when generating slugs. Specified
|
||||||
|
as a list of 2-tuples of ``(from, to)`` which are
|
||||||
|
applied in order.
|
||||||
==================================================== =====================================================
|
==================================================== =====================================================
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,8 @@ class Content(object):
|
||||||
|
|
||||||
# create the slug if not existing, from the title
|
# create the slug if not existing, from the title
|
||||||
if not hasattr(self, 'slug') and hasattr(self, 'title'):
|
if not hasattr(self, 'slug') and hasattr(self, 'title'):
|
||||||
self.slug = slugify(self.title)
|
self.slug = slugify(self.title,
|
||||||
|
settings.get('SLUG_SUBSTITUTIONS', ()))
|
||||||
|
|
||||||
self.source_path = source_path
|
self.source_path = source_path
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,6 +105,7 @@ DEFAULT_CONFIG = {
|
||||||
'PLUGINS': [],
|
'PLUGINS': [],
|
||||||
'TEMPLATE_PAGES': {},
|
'TEMPLATE_PAGES': {},
|
||||||
'IGNORE_FILES': ['.#*'],
|
'IGNORE_FILES': ['.#*'],
|
||||||
|
'SLUG_SUBSTITUTIONS': (),
|
||||||
}
|
}
|
||||||
|
|
||||||
def read_settings(path=None, override=None):
|
def read_settings(path=None, override=None):
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,17 @@ class TestUtils(LoggedTestCase):
|
||||||
for value, expected in samples:
|
for value, expected in samples:
|
||||||
self.assertEqual(utils.slugify(value), expected)
|
self.assertEqual(utils.slugify(value), expected)
|
||||||
|
|
||||||
|
def test_slugify_substitute(self):
|
||||||
|
|
||||||
|
samples = (('C++ is based on C', 'cpp-is-based-on-c'),
|
||||||
|
('C+++ test C+ test', 'cpp-test-c-test'),
|
||||||
|
('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'),
|
||||||
|
('c++-streams', 'cpp-streams'),)
|
||||||
|
|
||||||
|
subs = (('C++', 'CPP'), ('C#', 'C-SHARP'))
|
||||||
|
for value, expected in samples:
|
||||||
|
self.assertEqual(utils.slugify(value, subs), expected)
|
||||||
|
|
||||||
def test_get_relative_path(self):
|
def test_get_relative_path(self):
|
||||||
|
|
||||||
samples = ((os.path.join('test', 'test.html'), os.pardir),
|
samples = ((os.path.join('test', 'test.html'), os.pardir),
|
||||||
|
|
|
||||||
|
|
@ -15,10 +15,10 @@ class URLWrapper(object):
|
||||||
def __init__(self, name, settings):
|
def __init__(self, name, settings):
|
||||||
# next 2 lines are redundant with the setter of the name property
|
# next 2 lines are redundant with the setter of the name property
|
||||||
# but are here for clarity
|
# but are here for clarity
|
||||||
self._name = name
|
|
||||||
self.slug = slugify(name)
|
|
||||||
self.name = name
|
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
|
self._name = name
|
||||||
|
self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
|
||||||
|
self.name = name
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
|
|
@ -27,7 +27,7 @@ class URLWrapper(object):
|
||||||
@name.setter
|
@name.setter
|
||||||
def name(self, name):
|
def name(self, name):
|
||||||
self._name = name
|
self._name = name
|
||||||
self.slug = slugify(name)
|
self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
|
||||||
|
|
||||||
def as_dict(self):
|
def as_dict(self):
|
||||||
d = self.__dict__
|
d = self.__dict__
|
||||||
|
|
@ -41,7 +41,8 @@ class URLWrapper(object):
|
||||||
return self.slug
|
return self.slug
|
||||||
|
|
||||||
def _normalize_key(self, key):
|
def _normalize_key(self, key):
|
||||||
return six.text_type(slugify(key))
|
subs = self.settings.get('SLUG_SUBSTITUTIONS', ())
|
||||||
|
return six.text_type(slugify(key, subs))
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return self._key() == self._normalize_key(other)
|
return self._key() == self._normalize_key(other)
|
||||||
|
|
|
||||||
|
|
@ -231,7 +231,7 @@ class pelican_open(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def slugify(value):
|
def slugify(value, substitutions=()):
|
||||||
"""
|
"""
|
||||||
Normalizes string, converts to lowercase, removes non-alpha characters,
|
Normalizes string, converts to lowercase, removes non-alpha characters,
|
||||||
and converts spaces to hyphens.
|
and converts spaces to hyphens.
|
||||||
|
|
@ -249,8 +249,10 @@ def slugify(value):
|
||||||
if isinstance(value, six.binary_type):
|
if isinstance(value, six.binary_type):
|
||||||
value = value.decode('ascii')
|
value = value.decode('ascii')
|
||||||
# still unicode
|
# still unicode
|
||||||
value = unicodedata.normalize('NFKD', value)
|
value = unicodedata.normalize('NFKD', value).lower()
|
||||||
value = re.sub('[^\w\s-]', '', value).strip().lower()
|
for src, dst in substitutions:
|
||||||
|
value = value.replace(src.lower(), dst.lower())
|
||||||
|
value = re.sub('[^\w\s-]', '', value).strip()
|
||||||
value = re.sub('[-\s]+', '-', value)
|
value = re.sub('[-\s]+', '-', value)
|
||||||
# we want only ASCII chars
|
# we want only ASCII chars
|
||||||
value = value.encode('ascii', 'ignore')
|
value = value.encode('ascii', 'ignore')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue