diff --git a/pelican/contents.py b/pelican/contents.py index 6fb55782..6a049948 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -92,7 +92,8 @@ class Content(object): # create the slug if not existing, from the title if not hasattr(self, 'slug') and hasattr(self, 'title'): self.slug = slugify(self.title, - settings.get('SLUG_SUBSTITUTIONS', ())) + settings.get('SLUG_SUBSTITUTIONS', ()), + settings['ALLOW_NON_ASCII_IN_SLUG']) self.source_path = source_path @@ -157,11 +158,13 @@ class Content(object): 'date': getattr(self, 'date', datetime.now()), 'author': slugify( getattr(self, 'author', ''), - slug_substitutions + slug_substitutions, + self.settings['ALLOW_NON_ASCII_IN_SLUG'] ), 'category': slugify( getattr(self, 'category', default_category), - slug_substitutions + slug_substitutions, + self.settings['ALLOW_NON_ASCII_IN_SLUG'] ) }) return metadata diff --git a/pelican/settings.py b/pelican/settings.py index 99828935..5c0d261a 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -111,6 +111,7 @@ DEFAULT_CONFIG = { 'TEMPLATE_PAGES': {}, 'IGNORE_FILES': ['.#*'], 'SLUG_SUBSTITUTIONS': (), + 'ALLOW_NON_ASCII_IN_SLUG': False, 'INTRASITE_LINK_REGEX': '[{|](?P.*?)[|}]', } diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 0642926e..925142e4 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -93,6 +93,8 @@ class TestUtils(LoggedTestCase): for value, expected in samples: self.assertEqual(utils.slugify(value), expected) + # nothing will be changed if allow_non_ascii is True. + self.assertEqual(utils.slugify(value, allow_non_ascii=True), value) def test_slugify_substitute(self): diff --git a/pelican/urlwrappers.py b/pelican/urlwrappers.py index acb8e07d..bcbcf7c4 100644 --- a/pelican/urlwrappers.py +++ b/pelican/urlwrappers.py @@ -17,7 +17,9 @@ class URLWrapper(object): # but are here for clarity self.settings = settings self._name = name - self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ())) + self.slug = slugify(name, + self.settings.get('SLUG_SUBSTITUTIONS', ()), + self.settings['ALLOW_NON_ASCII_IN_SLUG']) self.name = name @property @@ -27,7 +29,9 @@ class URLWrapper(object): @name.setter def name(self, name): self._name = name - self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ())) + self.slug = slugify(name, + self.settings.get('SLUG_SUBSTITUTIONS', ()), + self.settings['ALLOW_NON_ASCII_IN_SLUG']) def as_dict(self): d = self.__dict__ @@ -42,7 +46,7 @@ class URLWrapper(object): def _normalize_key(self, key): subs = self.settings.get('SLUG_SUBSTITUTIONS', ()) - return six.text_type(slugify(key, subs)) + return six.text_type(slugify(key, subs, self.settings['ALLOW_NON_ASCII_IN_SLUG'])) def __eq__(self, other): return self._key() == self._normalize_key(other) diff --git a/pelican/utils.py b/pelican/utils.py index 4b25ec7f..db8740fc 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -227,7 +227,7 @@ def pelican_open(filename): yield content -def slugify(value, substitutions=()): +def slugify(value, substitutions=(), allow_non_ascii=False): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. @@ -236,24 +236,27 @@ def slugify(value, substitutions=()): """ # TODO Maybe steal again from current Django 1.5dev value = Markup(value).striptags() - # value must be unicode per se - import unicodedata - from unidecode import unidecode - # unidecode returns str in Py2 and 3, so in Py2 we have to make - # it unicode again - value = unidecode(value) - if isinstance(value, six.binary_type): - value = value.decode('ascii') - # still unicode - value = unicodedata.normalize('NFKD', value).lower() + for src, dst in substitutions: value = value.replace(src.lower(), dst.lower()) - value = re.sub('[^\w\s-]', '', value).strip() - value = re.sub('[-\s]+', '-', value) - # we want only ASCII chars - value = value.encode('ascii', 'ignore') + + if not allow_non_ascii: + # value must be unicode per se + from unidecode import unidecode + # unidecode returns str in Py2 and 3, so in Py2 we have to make + # it unicode again + value = unidecode(value) + if isinstance(value, six.binary_type): + value = value.decode('ascii') + # still unicode + import unicodedata + value = unicodedata.normalize('NFKD', value).lower() + value = re.sub('[^\w\s-]', '', value).strip() + value = re.sub('[-\s]+', '-', value) + # we want only ASCII chars + value = value.encode('ascii', 'ignore') # but Pelican should generally use only unicode - return value.decode('ascii') + return unicode(value) def copy(path, source, destination, destination_path=None):