mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Add support for unicode slugs
This commit is contained in:
parent
0b3dc9db21
commit
80d7156427
1 changed files with 19 additions and 13 deletions
|
|
@ -227,7 +227,7 @@ def pelican_open(filename):
|
|||
yield content
|
||||
|
||||
|
||||
def slugify(value, substitutions=()):
|
||||
def slugify(value, substitutions=(), use_unicode=True):
|
||||
"""
|
||||
Normalizes string, converts to lowercase, removes non-alpha characters,
|
||||
and converts spaces to hyphens.
|
||||
|
|
@ -236,24 +236,30 @@ def slugify(value, substitutions=()):
|
|||
"""
|
||||
# TODO Maybe steal again from current Django 1.5dev
|
||||
value = Markup(value).striptags()
|
||||
|
||||
# value must be unicode per se
|
||||
import unicodedata
|
||||
from unidecode import unidecode
|
||||
# unidecode returns str in Py2 and 3, so in Py2 we have to make
|
||||
# it unicode again
|
||||
value = unidecode(value)
|
||||
if isinstance(value, six.binary_type):
|
||||
value = value.decode('ascii')
|
||||
if not use_unicode:
|
||||
from unidecode import unidecode
|
||||
# unidecode returns str in Py2 and 3, so in Py2 we have to make
|
||||
# it unicode again
|
||||
value = unidecode(value)
|
||||
if isinstance(value, six.binary_type):
|
||||
value = value.decode('ascii')
|
||||
|
||||
# still unicode
|
||||
value = unicodedata.normalize('NFKD', value).lower()
|
||||
import unicodedata
|
||||
value = unicodedata.normalize('NFKC' if use_unicode else 'NFKD', value).lower()
|
||||
for src, dst in substitutions:
|
||||
value = value.replace(src.lower(), dst.lower())
|
||||
value = re.sub('[^\w\s-]', '', value).strip()
|
||||
value = re.sub('[-\s]+', '-', value)
|
||||
# we want only ASCII chars
|
||||
value = value.encode('ascii', 'ignore')
|
||||
# but Pelican should generally use only unicode
|
||||
return value.decode('ascii')
|
||||
if use_unicode:
|
||||
return value
|
||||
else:
|
||||
# we want only ASCII chars
|
||||
value = value.encode('ascii', 'ignore')
|
||||
# but Pelican should generally use only unicode
|
||||
return value.decode('ascii')
|
||||
|
||||
|
||||
def copy(path, source, destination, destination_path=None):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue