diff --git a/docs/settings.rst b/docs/settings.rst index 48344076..60e539d3 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -320,12 +320,6 @@ Basic settings A list of default Pygments settings for your reStructuredText code blocks. See :ref:`internal_pygments_options` for a list of supported options. -.. data:: SLUGIFY_SOURCE = 'title' - - Specifies where you want the slug to be automatically generated from. Can be - set to ``title`` to use the 'Title:' metadata tag or ``basename`` to use the - article's file name when creating the slug. - .. data:: CACHE_CONTENT = False If ``True``, saves content in caches. See @@ -621,6 +615,19 @@ corresponding ``*_URL`` setting as string, while others hard-code them: ``'archives.html'``, ``'authors.html'``, ``'categories.html'``, ``'tags.html'``. + +.. data:: SLUGIFY_SOURCE = 'title' + + Specifies where you want the slug to be automatically generated from. Can be + set to ``title`` to use the 'Title:' metadata tag or ``basename`` to use the + article's file name when creating the slug. + +.. data:: SLUGIFY_USE_UNICODE = False + + Allow unicode characters in slugs. Set ``True`` to keep unicode characters + in auto-generated slugs. Otherwise, unicode characters will be replaced + with ASCII equivalents. + .. data:: SLUG_REGEX_SUBSTITUTIONS = [ (r'[^\\w\\s-]', ''), # remove non-alphabetical/whitespace/'-' chars (r'(?u)\\A\\s*', ''), # strip leading whitespace diff --git a/pelican/contents.py b/pelican/contents.py index 40d9c28e..b49e1f2e 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -92,16 +92,17 @@ class Content(object): if not hasattr(self, 'slug'): if (settings['SLUGIFY_SOURCE'] == 'title' and hasattr(self, 'title')): - self.slug = slugify( - self.title, - regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', [])) + value = self.title elif (settings['SLUGIFY_SOURCE'] == 'basename' and source_path is not None): - basename = os.path.basename( - os.path.splitext(source_path)[0]) + value = os.path.basename(os.path.splitext(source_path)[0]) + else: + value = None + if value is not None: self.slug = slugify( - basename, - regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', [])) + value, + regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []), + use_unicode=settings['SLUGIFY_USE_UNICODE']) self.source_path = source_path self.relative_source_path = self.get_relative_source_path() diff --git a/pelican/settings.py b/pelican/settings.py index 77aea059..0e0397c9 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -155,6 +155,7 @@ DEFAULT_CONFIG = { ], 'INTRASITE_LINK_REGEX': '[{|](?P.*?)[|}]', 'SLUGIFY_SOURCE': 'title', + 'SLUGIFY_USE_UNICODE': False, 'CACHE_CONTENT': False, 'CONTENT_CACHING_LAYER': 'reader', 'CACHE_PATH': 'cache', diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 08d4eb73..2a377e75 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -135,6 +135,17 @@ class TestPage(LoggedTestCase): page = Page(**page_kwargs) self.assertEqual(page.slug, 'foo') + # test slug from unicode title + # slug doesn't use unicode + settings['SLUGIFY_SOURCE'] = "title" + page_kwargs['metadata']['title'] = '指導書' + page = Page(**page_kwargs) + self.assertEqual(page.slug, 'zhi-dao-shu') + # slug uses unicode + settings['SLUGIFY_USE_UNICODE'] = True + page = Page(**page_kwargs) + self.assertEqual(page.slug, '指導書') + def test_defaultlang(self): # If no lang is given, default to the default one. page = Page(**self.page_kwargs) diff --git a/pelican/urlwrappers.py b/pelican/urlwrappers.py index cc276b3f..86f9ef32 100644 --- a/pelican/urlwrappers.py +++ b/pelican/urlwrappers.py @@ -34,15 +34,14 @@ class URLWrapper(object): if self._slug is None: class_key = '{}_REGEX_SUBSTITUTIONS'.format( self.__class__.__name__.upper()) - if class_key in self.settings: - self._slug = slugify( - self.name, - regex_subs=self.settings[class_key]) - else: - self._slug = slugify( - self.name, - regex_subs=self.settings.get( - 'SLUG_REGEX_SUBSTITUTIONS', [])) + regex_subs = self.settings.get( + class_key, + self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])) + self._slug = slugify( + self.name, + regex_subs=regex_subs, + use_unicode=self.settings.get('SLUGIFY_USE_UNICODE', False) + ) return self._slug @slug.setter @@ -61,8 +60,13 @@ class URLWrapper(object): return hash(self.slug) def _normalize_key(self, key): - subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', []) - return slugify(key, regex_subs=subs) + class_key = '{}_REGEX_SUBSTITUTIONS'.format( + self.__class__.__name__.upper()) + regex_subs = self.settings.get( + class_key, + self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])) + use_unicode = self.settings.get('SLUGIFY_USE_UNICODE', False) + return slugify(key, regex_subs=regex_subs, use_unicode=use_unicode) def __eq__(self, other): if isinstance(other, self.__class__):