More granular control of tags and categories slugs. Fixes #1873

- add TAG_SUBSTITUTIONS AND CATEGORY_SUBSTITURIONS settings
- make slugify keep non-alphanumeric characters if configured
This commit is contained in:
Mr. Senko 2016-03-14 00:16:58 +02:00
commit 648165b839
8 changed files with 123 additions and 8 deletions

View file

@ -4,7 +4,14 @@ Release history
Next release Next release
============ ============
- Nothing yet * ``SLUG_SUBSTITUTIONS`` now accepts 3-tuple elements, allowing to keep
non-alphanum characters. Existing 2-tuple configurations will continue to work
without change in behavior. The new 3rd parameter has side effects when there
are multiple substitutions defined. Plese see the docs.
* Tag and category slugs can be controlled with greater precision using the
``TAG_SUBSTITUTIONS`` and ``CATEGORY_SUBSTITUTIONS`` settings. These also
allow for keeping non-alphanum characters for backward compatibility with
existing URLs.
3.6.3 (2015-08-14) 3.6.3 (2015-08-14)
================== ==================

View file

@ -306,8 +306,12 @@ Setting name (followed by default value, if any) What does it do?
``DAY_ARCHIVE_SAVE_AS = ''`` The location to save per-day archives of your posts. ``DAY_ARCHIVE_SAVE_AS = ''`` The location to save per-day archives of your posts.
``SLUG_SUBSTITUTIONS = ()`` Substitutions to make prior to stripping out ``SLUG_SUBSTITUTIONS = ()`` Substitutions to make prior to stripping out
non-alphanumerics when generating slugs. Specified non-alphanumerics when generating slugs. Specified
as a list of 2-tuples of ``(from, to)`` which are as a list of 3-tuples of ``(from, to, skip)`` which are
applied in order. applied in order. ``skip`` is a boolean indicating whether
or not to skip replacement of non-alphanumeric characters.
Useful for backward compatibility with existing URLs.
``CATEGORY_SUBSTITUTIONS = ()`` Added to ``SLUG_SUBSTITUTIONS`` for categories.
``TAG_SUBSTITUTIONS = ()`` Added to ``SLUG_SUBSTITUTIONS`` for tags.
====================================================== ============================================================== ====================================================== ==============================================================
.. note:: .. note::
@ -317,6 +321,20 @@ Setting name (followed by default value, if any) What does it do?
set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the
relevant page from being generated. relevant page from being generated.
.. note::
Substitutions are applied in order with the side effect that keeping
non-alphanum characters applies to the whole string when a replacement
is made. For example if you have the following setting
``SLUG_SUBSTITUTIONS = (('C++', 'cpp'), ('keep dot', 'keep.dot', True))``
the string ``Keep Dot`` will be converted to ``keep.dot``, however
``C++ will keep dot`` will be converted to ``cpp will keep.dot`` instead
of ``cpp-will-keep.dot``!
If you want to keep non-alphanum characters only for tags or categories
but not other slugs then configure ``TAG_SUBSTITUTIONS`` and
``CATEGORY_SUBSTITUTIONS`` respectively!
Pelican can optionally create per-year, per-month, and per-day archives of your Pelican can optionally create per-year, per-month, and per-day archives of your
posts. These secondary archives are disabled by default but are automatically posts. These secondary archives are disabled by default but are automatically
enabled if you supply format strings for their respective ``_SAVE_AS`` settings. enabled if you supply format strings for their respective ``_SAVE_AS`` settings.

View file

@ -172,6 +172,7 @@ class Content(object):
'lang': getattr(self, 'lang', 'en'), 'lang': getattr(self, 'lang', 'en'),
'date': getattr(self, 'date', SafeDatetime.now()), 'date': getattr(self, 'date', SafeDatetime.now()),
'author': self.author.slug if hasattr(self, 'author') else '', 'author': self.author.slug if hasattr(self, 'author') else '',
'tag': self.tag.slug if hasattr(self, 'tag') else '',
'category': self.category.slug if hasattr(self, 'category') else '' 'category': self.category.slug if hasattr(self, 'category') else ''
}) })
return metadata return metadata

View file

@ -11,7 +11,7 @@ from jinja2.utils import generate_lorem_ipsum
import six import six
from pelican.contents import Article, Author, Category, Page, Static from pelican.contents import Article, Author, Category, Page, Static, Tag
from pelican.settings import DEFAULT_CONFIG from pelican.settings import DEFAULT_CONFIG
from pelican.signals import content_object_init from pelican.signals import content_object_init
from pelican.tests.support import LoggedTestCase, get_settings, unittest from pelican.tests.support import LoggedTestCase, get_settings, unittest
@ -457,6 +457,29 @@ class TestArticle(TestPage):
self.assertEqual( self.assertEqual(
article.save_as, 'obrien/csharp-stuff/fnord/index.html') article.save_as, 'obrien/csharp-stuff/fnord/index.html')
def test_slugify_category_with_dots(self):
settings = get_settings()
settings['CATEGORY_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)]
settings['ARTICLE_URL'] = '{category}/{slug}/'
article_kwargs = self._copy_page_kwargs()
article_kwargs['metadata']['category'] = Category('Fedora QA',
settings)
article_kwargs['metadata']['title'] = 'This Week in Fedora QA'
article_kwargs['settings'] = settings
article = Article(**article_kwargs)
self.assertEqual(article.url, 'fedora.qa/this-week-in-fedora-qa/')
def test_slugify_tags_with_dots(self):
settings = get_settings()
settings['TAG_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)]
settings['ARTICLE_URL'] = '{tag}/{slug}/'
article_kwargs = self._copy_page_kwargs()
article_kwargs['metadata']['tag'] = Tag('Fedora QA', settings)
article_kwargs['metadata']['title'] = 'This Week in Fedora QA'
article_kwargs['settings'] = settings
article = Article(**article_kwargs)
self.assertEqual(article.url, 'fedora.qa/this-week-in-fedora-qa/')
class TestStatic(LoggedTestCase): class TestStatic(LoggedTestCase):

View file

@ -56,3 +56,18 @@ class TestURLWrapper(unittest.TestCase):
cat_ascii = Category('指導書', settings={}) cat_ascii = Category('指導書', settings={})
self.assertEqual(cat_ascii, u'zhi-dao-shu') self.assertEqual(cat_ascii, u'zhi-dao-shu')
def test_slugify_with_substitutions_and_dots(self):
tag = Tag('Tag Dot',
settings={
'TAG_SUBSTITUTIONS': [('Tag Dot', 'tag.dot', True)]
})
cat = Category('Category Dot',
settings={
'CATEGORY_SUBSTITUTIONS': (('Category Dot',
'cat.dot',
True),)
})
self.assertEqual(tag.slug, 'tag.dot')
self.assertEqual(cat.slug, 'cat.dot')

View file

@ -131,6 +131,18 @@ class TestUtils(LoggedTestCase):
for value, expected in samples: for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected) self.assertEqual(utils.slugify(value, subs), expected)
def test_slugify_substitute_and_keeping_non_alphanum(self):
samples = (('Fedora QA', 'fedora.qa'),
('C++ is used by Fedora QA', 'cpp is used by fedora.qa'),
('C++ is based on C', 'cpp-is-based-on-c'),
('C+++ test C+ test', 'cpp-test-c-test'),)
subs = (('Fedora QA', 'fedora.qa', True),
('c++', 'cpp'),)
for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected)
def test_get_relative_path(self): def test_get_relative_path(self):
samples = ((os.path.join('test', 'test.html'), os.pardir), samples = ((os.path.join('test', 'test.html'), os.pardir),

View file

@ -112,13 +112,28 @@ class URLWrapper(object):
class Category(URLWrapper): class Category(URLWrapper):
pass @property
def slug(self):
if self._slug is None:
substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
substitutions += tuple(self.settings.get('CATEGORY_SUBSTITUTIONS',
()))
self._slug = slugify(self.name, substitutions)
return self._slug
class Tag(URLWrapper): class Tag(URLWrapper):
def __init__(self, name, *args, **kwargs): def __init__(self, name, *args, **kwargs):
super(Tag, self).__init__(name.strip(), *args, **kwargs) super(Tag, self).__init__(name.strip(), *args, **kwargs)
@property
def slug(self):
if self._slug is None:
substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
substitutions += tuple(self.settings.get('TAG_SUBSTITUTIONS', ()))
self._slug = slugify(self.name, substitutions)
return self._slug
class Author(URLWrapper): class Author(URLWrapper):
pass pass

View file

@ -270,10 +270,34 @@ def slugify(value, substitutions=()):
value = value.decode('ascii') value = value.decode('ascii')
# still unicode # still unicode
value = unicodedata.normalize('NFKD', value).lower() value = unicodedata.normalize('NFKD', value).lower()
for src, dst in substitutions:
# backward compatible covert from 2-tuples to 3-tuples
new_subs = []
for tpl in substitutions:
try:
src, dst, skip = tpl
except ValueError:
src, dst = tpl
skip = False
new_subs.append((src, dst, skip))
substitutions = tuple(new_subs)
# by default will replace non-alphanum characters
replace = True
for src, dst, skip in substitutions:
orig_value = value
value = value.replace(src.lower(), dst.lower()) value = value.replace(src.lower(), dst.lower())
value = re.sub('[^\w\s-]', '', value).strip() # if replacement was made then skip non-alphanum
value = re.sub('[-\s]+', '-', value) # replacement if instructed to do so
if value != orig_value:
replace = replace and not skip
if replace:
value = re.sub('[^\w\s-]', '', value).strip()
value = re.sub('[-\s]+', '-', value)
else:
value = value.strip()
# we want only ASCII chars # we want only ASCII chars
value = value.encode('ascii', 'ignore') value = value.encode('ascii', 'ignore')
# but Pelican should generally use only unicode # but Pelican should generally use only unicode