diff --git a/docs/settings.rst b/docs/settings.rst index 78a0ddf7..61ccc2b2 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -258,6 +258,10 @@ Setting name (default value) What does it do? posts. `DAY_ARCHIVE_SAVE_AS` (False) The location to save per-day archives of your posts. +`SLUG_SUBSTITUTIONS` (``()``) Substitutions to make prior to stripping out + non-alphanumerics when generating slugs. Specified + as a list of 2-tuples of ``(from, to)`` which are + applied in order. ==================================================== ===================================================== .. note:: diff --git a/pelican/contents.py b/pelican/contents.py index 1b604f19..d56335dd 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -86,7 +86,8 @@ class Content(object): # create the slug if not existing, from the title if not hasattr(self, 'slug') and hasattr(self, 'title'): - self.slug = slugify(self.title) + self.slug = slugify(self.title, + settings.get('SLUG_SUBSTITUTIONS', ())) self.source_path = source_path diff --git a/pelican/settings.py b/pelican/settings.py index 1c9b48c3..01203504 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -105,6 +105,7 @@ DEFAULT_CONFIG = { 'PLUGINS': [], 'TEMPLATE_PAGES': {}, 'IGNORE_FILES': ['.#*'], + 'SLUG_SUBSTITUTIONS': (), } def read_settings(path=None, override=None): diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index ab35d991..0e65003a 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -94,6 +94,17 @@ class TestUtils(LoggedTestCase): for value, expected in samples: self.assertEqual(utils.slugify(value), expected) + def test_slugify_substitute(self): + + samples = (('C++ is based on C', 'cpp-is-based-on-c'), + ('C+++ test C+ test', 'cpp-test-c-test'), + ('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'), + ('c++-streams', 'cpp-streams'),) + + subs = (('C++', 'CPP'), ('C#', 'C-SHARP')) + for value, expected in samples: + self.assertEqual(utils.slugify(value, subs), expected) + def test_get_relative_path(self): samples = ((os.path.join('test', 'test.html'), os.pardir), diff --git a/pelican/urlwrappers.py b/pelican/urlwrappers.py index b0df61ad..acb8e07d 100644 --- a/pelican/urlwrappers.py +++ b/pelican/urlwrappers.py @@ -15,10 +15,10 @@ class URLWrapper(object): def __init__(self, name, settings): # next 2 lines are redundant with the setter of the name property # but are here for clarity - self._name = name - self.slug = slugify(name) - self.name = name self.settings = settings + self._name = name + self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ())) + self.name = name @property def name(self): @@ -27,7 +27,7 @@ class URLWrapper(object): @name.setter def name(self, name): self._name = name - self.slug = slugify(name) + self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ())) def as_dict(self): d = self.__dict__ @@ -41,7 +41,8 @@ class URLWrapper(object): return self.slug def _normalize_key(self, key): - return six.text_type(slugify(key)) + subs = self.settings.get('SLUG_SUBSTITUTIONS', ()) + return six.text_type(slugify(key, subs)) def __eq__(self, other): return self._key() == self._normalize_key(other) diff --git a/pelican/utils.py b/pelican/utils.py index 2c70ae8c..b1524036 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -231,7 +231,7 @@ class pelican_open(object): pass -def slugify(value): +def slugify(value, substitutions=()): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. @@ -249,8 +249,10 @@ def slugify(value): if isinstance(value, six.binary_type): value = value.decode('ascii') # still unicode - value = unicodedata.normalize('NFKD', value) - value = re.sub('[^\w\s-]', '', value).strip().lower() + value = unicodedata.normalize('NFKD', value).lower() + for src, dst in substitutions: + value = value.replace(src.lower(), dst.lower()) + value = re.sub('[^\w\s-]', '', value).strip() value = re.sub('[-\s]+', '-', value) # we want only ASCII chars value = value.encode('ascii', 'ignore')