Added new settings option ALLOW_NON_ASCII_IN_SLUG

This commit is contained in:
Kommit 2013-11-26 01:11:26 +08:00
commit 6a8da0b1b3
5 changed files with 35 additions and 22 deletions

View file

@ -92,7 +92,8 @@ class Content(object):
# create the slug if not existing, from the title
if not hasattr(self, 'slug') and hasattr(self, 'title'):
self.slug = slugify(self.title,
settings.get('SLUG_SUBSTITUTIONS', ()))
settings.get('SLUG_SUBSTITUTIONS', ()),
settings['ALLOW_NON_ASCII_IN_SLUG'])
self.source_path = source_path
@ -157,11 +158,13 @@ class Content(object):
'date': getattr(self, 'date', datetime.now()),
'author': slugify(
getattr(self, 'author', ''),
slug_substitutions
slug_substitutions,
self.settings['ALLOW_NON_ASCII_IN_SLUG']
),
'category': slugify(
getattr(self, 'category', default_category),
slug_substitutions
slug_substitutions,
self.settings['ALLOW_NON_ASCII_IN_SLUG']
)
})
return metadata

View file

@ -111,6 +111,7 @@ DEFAULT_CONFIG = {
'TEMPLATE_PAGES': {},
'IGNORE_FILES': ['.#*'],
'SLUG_SUBSTITUTIONS': (),
'ALLOW_NON_ASCII_IN_SLUG': False,
'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]',
}

View file

@ -93,6 +93,8 @@ class TestUtils(LoggedTestCase):
for value, expected in samples:
self.assertEqual(utils.slugify(value), expected)
# nothing will be changed if allow_non_ascii is True.
self.assertEqual(utils.slugify(value, allow_non_ascii=True), value)
def test_slugify_substitute(self):

View file

@ -17,7 +17,9 @@ class URLWrapper(object):
# but are here for clarity
self.settings = settings
self._name = name
self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
self.slug = slugify(name,
self.settings.get('SLUG_SUBSTITUTIONS', ()),
self.settings['ALLOW_NON_ASCII_IN_SLUG'])
self.name = name
@property
@ -27,7 +29,9 @@ class URLWrapper(object):
@name.setter
def name(self, name):
self._name = name
self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
self.slug = slugify(name,
self.settings.get('SLUG_SUBSTITUTIONS', ()),
self.settings['ALLOW_NON_ASCII_IN_SLUG'])
def as_dict(self):
d = self.__dict__
@ -42,7 +46,7 @@ class URLWrapper(object):
def _normalize_key(self, key):
subs = self.settings.get('SLUG_SUBSTITUTIONS', ())
return six.text_type(slugify(key, subs))
return six.text_type(slugify(key, subs, self.settings['ALLOW_NON_ASCII_IN_SLUG']))
def __eq__(self, other):
return self._key() == self._normalize_key(other)

View file

@ -227,7 +227,7 @@ def pelican_open(filename):
yield content
def slugify(value, substitutions=()):
def slugify(value, substitutions=(), allow_non_ascii=False):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
@ -236,24 +236,27 @@ def slugify(value, substitutions=()):
"""
# TODO Maybe steal again from current Django 1.5dev
value = Markup(value).striptags()
# value must be unicode per se
import unicodedata
from unidecode import unidecode
# unidecode returns str in Py2 and 3, so in Py2 we have to make
# it unicode again
value = unidecode(value)
if isinstance(value, six.binary_type):
value = value.decode('ascii')
# still unicode
value = unicodedata.normalize('NFKD', value).lower()
for src, dst in substitutions:
value = value.replace(src.lower(), dst.lower())
value = re.sub('[^\w\s-]', '', value).strip()
value = re.sub('[-\s]+', '-', value)
# we want only ASCII chars
value = value.encode('ascii', 'ignore')
if not allow_non_ascii:
# value must be unicode per se
from unidecode import unidecode
# unidecode returns str in Py2 and 3, so in Py2 we have to make
# it unicode again
value = unidecode(value)
if isinstance(value, six.binary_type):
value = value.decode('ascii')
# still unicode
import unicodedata
value = unicodedata.normalize('NFKD', value).lower()
value = re.sub('[^\w\s-]', '', value).strip()
value = re.sub('[-\s]+', '-', value)
# we want only ASCII chars
value = value.encode('ascii', 'ignore')
# but Pelican should generally use only unicode
return value.decode('ascii')
return unicode(value)
def copy(path, source, destination, destination_path=None):