1
0
Fork 0
forked from github/pelican

Allow text substitutions when generating slugs

The `slugify()` function used by Pelican is in general very good at
coming up with something both readable and URL-safe. However, there are
a few specific cases where it causes conflicts. One that I've run into
is using the strings `C++` and `C` as tags, both of which transform to
the slug `c`. This commit adds an optional `SLUG_SUBSTITUTIONS` setting
which is a list of 2-tuples of substitutions to be carried out
case-insensitively just prior to stripping out non-alphanumeric
characters. This allows cases like `C++` to be transformed to `CPP` or
similar. This can also improve the readability of slugs.
This commit is contained in:
Andy Pearce 2013-06-14 15:54:06 +01:00
commit 39518e15ef
6 changed files with 28 additions and 8 deletions

View file

@ -258,6 +258,10 @@ Setting name (default value) What does it do?
posts.
`DAY_ARCHIVE_SAVE_AS` (False) The location to save per-day archives of your
posts.
`SLUG_SUBSTITUTIONS` (``()``) Substitutions to make prior to stripping out
non-alphanumerics when generating slugs. Specified
as a list of 2-tuples of ``(from, to)`` which are
applied in order.
==================================================== =====================================================
.. note::

View file

@ -86,7 +86,8 @@ class Content(object):
# create the slug if not existing, from the title
if not hasattr(self, 'slug') and hasattr(self, 'title'):
self.slug = slugify(self.title)
self.slug = slugify(self.title,
settings.get('SLUG_SUBSTITUTIONS', ()))
self.source_path = source_path

View file

@ -105,6 +105,7 @@ DEFAULT_CONFIG = {
'PLUGINS': [],
'TEMPLATE_PAGES': {},
'IGNORE_FILES': ['.#*'],
'SLUG_SUBSTITUTIONS': (),
}
def read_settings(path=None, override=None):

View file

@ -94,6 +94,17 @@ class TestUtils(LoggedTestCase):
for value, expected in samples:
self.assertEqual(utils.slugify(value), expected)
def test_slugify_substitute(self):
samples = (('C++ is based on C', 'cpp-is-based-on-c'),
('C+++ test C+ test', 'cpp-test-c-test'),
('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'),
('c++-streams', 'cpp-streams'),)
subs = (('C++', 'CPP'), ('C#', 'C-SHARP'))
for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected)
def test_get_relative_path(self):
samples = ((os.path.join('test', 'test.html'), os.pardir),

View file

@ -15,10 +15,10 @@ class URLWrapper(object):
def __init__(self, name, settings):
# next 2 lines are redundant with the setter of the name property
# but are here for clarity
self._name = name
self.slug = slugify(name)
self.name = name
self.settings = settings
self._name = name
self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
self.name = name
@property
def name(self):
@ -27,7 +27,7 @@ class URLWrapper(object):
@name.setter
def name(self, name):
self._name = name
self.slug = slugify(name)
self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
def as_dict(self):
d = self.__dict__
@ -41,7 +41,8 @@ class URLWrapper(object):
return self.slug
def _normalize_key(self, key):
return six.text_type(slugify(key))
subs = self.settings.get('SLUG_SUBSTITUTIONS', ())
return six.text_type(slugify(key, subs))
def __eq__(self, other):
return self._key() == self._normalize_key(other)

View file

@ -231,7 +231,7 @@ class pelican_open(object):
pass
def slugify(value):
def slugify(value, substitutions=()):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
@ -249,8 +249,10 @@ def slugify(value):
if isinstance(value, six.binary_type):
value = value.decode('ascii')
# still unicode
value = unicodedata.normalize('NFKD', value)
value = re.sub('[^\w\s-]', '', value).strip().lower()
value = unicodedata.normalize('NFKD', value).lower()
for src, dst in substitutions:
value = value.replace(src.lower(), dst.lower())
value = re.sub('[^\w\s-]', '', value).strip()
value = re.sub('[-\s]+', '-', value)
# we want only ASCII chars
value = value.encode('ascii', 'ignore')