1
0
Fork 0
forked from github/pelican

Merge pull request #2326 from oulenz/slug_substitutions

Control slug substitutions from settings with regex
This commit is contained in:
Justin Mayer 2018-10-31 20:08:01 +01:00 committed by GitHub
commit 461f535d04
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 409 additions and 235 deletions

View file

@ -6,6 +6,8 @@ Next release
* New signal: ``feed_generated`` * New signal: ``feed_generated``
* Replace Fabric by Invoke and ``fabfile.py`` template by ``tasks.py``. * Replace Fabric by Invoke and ``fabfile.py`` template by ``tasks.py``.
* Replace ``SLUG_SUBSTITUTIONS`` (and friends) by ``SLUG_REGEX_SUBSTITUTIONS``
for more finegrained control
3.7.1 (2017-01-10) 3.7.1 (2017-01-10)
================== ==================

View file

@ -519,27 +519,6 @@ respectively.
The URL to use for per-day archives of your posts. Used only if you have the The URL to use for per-day archives of your posts. Used only if you have the
``{url}`` placeholder in ``PAGINATION_PATTERNS``. ``{url}`` placeholder in ``PAGINATION_PATTERNS``.
.. data:: SLUG_SUBSTITUTIONS = ()
Substitutions to make prior to stripping out non-alphanumerics when
generating slugs. Specified as a list of 3-tuples of ``(from, to, skip)``
which are applied in order. ``skip`` is a boolean indicating whether or not
to skip replacement of non-alphanumeric characters. Useful for backward
compatibility with existing URLs.
.. data:: AUTHOR_SUBSTITUTIONS = ()
Substitutions for authors. ``SLUG_SUBSTITUTIONS`` is not taken into account
here!
.. data:: CATEGORY_SUBSTITUTIONS = ()
Added to ``SLUG_SUBSTITUTIONS`` for categories.
.. data:: TAG_SUBSTITUTIONS = ()
Added to ``SLUG_SUBSTITUTIONS`` for tags.
.. note:: .. note::
If you do not want one or more of the default pages to be created (e.g., If you do not want one or more of the default pages to be created (e.g.,
@ -547,24 +526,6 @@ respectively.
set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the
relevant page from being generated. relevant page from being generated.
.. note::
Substitutions are applied in order with the side effect that keeping
non-alphanum characters applies to the whole string when a replacement
is made.
For example if you have the following setting::
SLUG_SUBSTITUTIONS = (('C++', 'cpp'), ('keep dot', 'keep.dot', True))
the string ``Keep Dot`` will be converted to ``keep.dot``, however
``C++ will keep dot`` will be converted to ``cpp will keep.dot`` instead
of ``cpp-will-keep.dot``!
If you want to keep non-alphanum characters only for tags or categories
but not other slugs then configure ``TAG_SUBSTITUTIONS`` and
``CATEGORY_SUBSTITUTIONS`` respectively!
Pelican can optionally create per-year, per-month, and per-day archives of your Pelican can optionally create per-year, per-month, and per-day archives of your
posts. These secondary archives are disabled by default but are automatically posts. These secondary archives are disabled by default but are automatically
enabled if you supply format strings for their respective ``_SAVE_AS`` settings. enabled if you supply format strings for their respective ``_SAVE_AS`` settings.
@ -626,6 +587,33 @@ URLs for direct template pages are theme-dependent. Some themes use
corresponding ``*_URL`` setting as string, while others hard-code them: corresponding ``*_URL`` setting as string, while others hard-code them:
``'archives.html'``, ``'authors.html'``, ``'categories.html'``, ``'tags.html'``. ``'archives.html'``, ``'authors.html'``, ``'categories.html'``, ``'tags.html'``.
.. data:: SLUG_REGEX_SUBSTITUTIONS = [
(r'[^\w\s-]', ''), # remove non-alphabetical/whitespace/'-' chars
(r'(?u)\A\s*', ''), # strip leading whitespace
(r'(?u)\s*\Z', ''), # strip trailing whitespace
(r'[-\s]+', '-'), # reduce multiple whitespace or '-' to single '-'
]
Regex substitutions to make when generating slugs of articles and pages.
Specified as a list of pairs of ``(from, to)`` which are applied in order,
ignoring case. The default substitutions have the effect of removing
non-alphanumeric characters and converting internal whitespace to dashes.
Apart from these substitutions, slugs are always converted to lowercase
ascii characters and leading and trailing whitespace is stripped. Useful for
backward compatibility with existing URLs.
.. data:: AUTHOR_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
Regex substitutions for author slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
.. data:: CATEGORY_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
Regex substitutions for category slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
.. data:: TAG_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
Regex substitutions for tag slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
Time and Date Time and Date
============= =============

View file

@ -98,14 +98,16 @@ class Content(object):
if not hasattr(self, 'slug'): if not hasattr(self, 'slug'):
if (settings['SLUGIFY_SOURCE'] == 'title' and if (settings['SLUGIFY_SOURCE'] == 'title' and
hasattr(self, 'title')): hasattr(self, 'title')):
self.slug = slugify(self.title, self.slug = slugify(
settings.get('SLUG_SUBSTITUTIONS', ())) self.title,
regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
elif (settings['SLUGIFY_SOURCE'] == 'basename' and elif (settings['SLUGIFY_SOURCE'] == 'basename' and
source_path is not None): source_path is not None):
basename = os.path.basename( basename = os.path.basename(
os.path.splitext(source_path)[0]) os.path.splitext(source_path)[0])
self.slug = slugify( self.slug = slugify(
basename, settings.get('SLUG_SUBSTITUTIONS', ())) basename,
regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
self.source_path = source_path self.source_path = source_path

View file

@ -6,6 +6,7 @@ import inspect
import locale import locale
import logging import logging
import os import os
import re
from os.path import isabs from os.path import isabs
from posixpath import join as posix_join from posixpath import join as posix_join
@ -145,7 +146,12 @@ DEFAULT_CONFIG = {
'TEMPLATE_PAGES': {}, 'TEMPLATE_PAGES': {},
'TEMPLATE_EXTENSIONS': ['.html'], 'TEMPLATE_EXTENSIONS': ['.html'],
'IGNORE_FILES': ['.#*'], 'IGNORE_FILES': ['.#*'],
'SLUG_SUBSTITUTIONS': (), 'SLUG_REGEX_SUBSTITUTIONS': [
(r'[^\w\s-]', ''), # remove non-alphabetical/whitespace/'-' chars
(r'(?u)\A\s*', ''), # strip leading whitespace
(r'(?u)\s*\Z', ''), # strip trailing whitespace
(r'[-\s]+', '-'), # reduce multiple whitespace or '-' to single '-'
],
'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]', 'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]',
'SLUGIFY_SOURCE': 'title', 'SLUGIFY_SOURCE': 'title',
'CACHE_CONTENT': False, 'CACHE_CONTENT': False,
@ -164,79 +170,62 @@ PYGMENTS_RST_OPTIONS = None
def read_settings(path=None, override=None): def read_settings(path=None, override=None):
settings = override or {}
if path: if path:
local_settings = get_settings_from_file(path) settings = dict(get_settings_from_file(path), **settings)
# Make the paths relative to the settings file
if settings:
settings = handle_deprecated_settings(settings)
if path:
# Make relative paths absolute
def getabs(maybe_relative, base_path=path):
if isabs(maybe_relative):
return maybe_relative
return os.path.abspath(os.path.normpath(os.path.join(
os.path.dirname(base_path), maybe_relative)))
for p in ['PATH', 'OUTPUT_PATH', 'THEME', 'CACHE_PATH']: for p in ['PATH', 'OUTPUT_PATH', 'THEME', 'CACHE_PATH']:
if p in local_settings and local_settings[p] is not None \ if settings.get(p) is not None:
and not isabs(local_settings[p]): absp = getabs(settings[p])
absp = os.path.abspath(os.path.normpath(os.path.join( # THEME may be a name rather than a path
os.path.dirname(path), local_settings[p])))
if p != 'THEME' or os.path.exists(absp): if p != 'THEME' or os.path.exists(absp):
local_settings[p] = absp settings[p] = absp
if 'PLUGIN_PATH' in local_settings: if settings.get('PLUGIN_PATHS') is not None:
logger.warning('PLUGIN_PATH setting has been replaced by ' settings['PLUGIN_PATHS'] = [getabs(pluginpath)
'PLUGIN_PATHS, moving it to the new setting name.') for pluginpath
local_settings['PLUGIN_PATHS'] = local_settings['PLUGIN_PATH'] in settings['PLUGIN_PATHS']]
del local_settings['PLUGIN_PATH']
if 'JINJA_EXTENSIONS' in local_settings:
logger.warning('JINJA_EXTENSIONS setting has been deprecated, '
'moving it to JINJA_ENVIRONMENT setting.')
local_settings['JINJA_ENVIRONMENT']['extensions'] = \
local_settings['JINJA_EXTENSIONS']
del local_settings['JINJA_EXTENSIONS']
if isinstance(local_settings['PLUGIN_PATHS'], six.string_types):
logger.warning("Defining PLUGIN_PATHS setting as string "
"has been deprecated (should be a list)")
local_settings['PLUGIN_PATHS'] = [local_settings['PLUGIN_PATHS']]
elif local_settings['PLUGIN_PATHS'] is not None:
def getabs(path, pluginpath):
if isabs(pluginpath):
return pluginpath
else:
path_dirname = os.path.dirname(path)
path_joined = os.path.join(path_dirname, pluginpath)
path_normed = os.path.normpath(path_joined)
path_absolute = os.path.abspath(path_normed)
return path_absolute
pluginpath_list = [getabs(path, pluginpath) settings = dict(copy.deepcopy(DEFAULT_CONFIG), **settings)
for pluginpath settings = configure_settings(settings)
in local_settings['PLUGIN_PATHS']]
local_settings['PLUGIN_PATHS'] = pluginpath_list
else:
local_settings = copy.deepcopy(DEFAULT_CONFIG)
if override:
local_settings.update(override)
parsed_settings = configure_settings(local_settings)
# This is because there doesn't seem to be a way to pass extra # This is because there doesn't seem to be a way to pass extra
# parameters to docutils directive handlers, so we have to have a # parameters to docutils directive handlers, so we have to have a
# variable here that we'll import from within Pygments.run (see # variable here that we'll import from within Pygments.run (see
# rstdirectives.py) to see what the user defaults were. # rstdirectives.py) to see what the user defaults were.
global PYGMENTS_RST_OPTIONS global PYGMENTS_RST_OPTIONS
PYGMENTS_RST_OPTIONS = parsed_settings.get('PYGMENTS_RST_OPTIONS', None) PYGMENTS_RST_OPTIONS = settings.get('PYGMENTS_RST_OPTIONS', None)
return parsed_settings return settings
def get_settings_from_module(module=None, default_settings=DEFAULT_CONFIG): def get_settings_from_module(module=None):
"""Loads settings from a module, returns a dictionary.""" """Loads settings from a module, returns a dictionary."""
context = copy.deepcopy(default_settings) context = {}
if module is not None: if module is not None:
context.update( context.update(
(k, v) for k, v in inspect.getmembers(module) if k.isupper()) (k, v) for k, v in inspect.getmembers(module) if k.isupper())
return context return context
def get_settings_from_file(path, default_settings=DEFAULT_CONFIG): def get_settings_from_file(path):
"""Loads settings from a file path, returning a dict.""" """Loads settings from a file path, returning a dict."""
name, ext = os.path.splitext(os.path.basename(path)) name, ext = os.path.splitext(os.path.basename(path))
module = load_source(name, path) module = load_source(name, path)
return get_settings_from_module(module, default_settings=default_settings) return get_settings_from_module(module)
def get_jinja_environment(settings): def get_jinja_environment(settings):
@ -253,6 +242,149 @@ def get_jinja_environment(settings):
return settings return settings
def handle_deprecated_settings(settings):
"""Converts deprecated settings and issues warnings. Issues an exception
if both old and new setting is specified.
"""
# PLUGIN_PATH -> PLUGIN_PATHS
if 'PLUGIN_PATH' in settings:
logger.warning('PLUGIN_PATH setting has been replaced by '
'PLUGIN_PATHS, moving it to the new setting name.')
settings['PLUGIN_PATHS'] = settings['PLUGIN_PATH']
del settings['PLUGIN_PATH']
# PLUGIN_PATHS: str -> [str]
if isinstance(settings.get('PLUGIN_PATHS'), six.string_types):
logger.warning("Defining PLUGIN_PATHS setting as string "
"has been deprecated (should be a list)")
settings['PLUGIN_PATHS'] = [settings['PLUGIN_PATHS']]
# JINJA_EXTENSIONS -> JINJA_ENVIRONMENT > extensions
if 'JINJA_EXTENSIONS' in settings:
logger.warning('JINJA_EXTENSIONS setting has been deprecated, '
'moving it to JINJA_ENVIRONMENT setting.')
settings['JINJA_ENVIRONMENT']['extensions'] = \
settings['JINJA_EXTENSIONS']
del settings['JINJA_EXTENSIONS']
# {ARTICLE,PAGE}_DIR -> {ARTICLE,PAGE}_PATHS
for key in ['ARTICLE', 'PAGE']:
old_key = key + '_DIR'
new_key = key + '_PATHS'
if old_key in settings:
logger.warning(
'Deprecated setting %s, moving it to %s list',
old_key, new_key)
settings[new_key] = [settings[old_key]] # also make a list
del settings[old_key]
# EXTRA_TEMPLATES_PATHS -> THEME_TEMPLATES_OVERRIDES
if 'EXTRA_TEMPLATES_PATHS' in settings:
logger.warning('EXTRA_TEMPLATES_PATHS is deprecated use '
'THEME_TEMPLATES_OVERRIDES instead.')
if ('THEME_TEMPLATES_OVERRIDES' in settings and
settings['THEME_TEMPLATES_OVERRIDES']):
raise Exception(
'Setting both EXTRA_TEMPLATES_PATHS and '
'THEME_TEMPLATES_OVERRIDES is not permitted. Please move to '
'only setting THEME_TEMPLATES_OVERRIDES.')
settings['THEME_TEMPLATES_OVERRIDES'] = \
settings['EXTRA_TEMPLATES_PATHS']
del settings['EXTRA_TEMPLATES_PATHS']
# MD_EXTENSIONS -> MARKDOWN
if 'MD_EXTENSIONS' in settings:
logger.warning('MD_EXTENSIONS is deprecated use MARKDOWN '
'instead. Falling back to the default.')
settings['MARKDOWN'] = DEFAULT_CONFIG['MARKDOWN']
# LESS_GENERATOR -> Webassets plugin
# FILES_TO_COPY -> STATIC_PATHS, EXTRA_PATH_METADATA
for old, new, doc in [
('LESS_GENERATOR', 'the Webassets plugin', None),
('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
'https://github.com/getpelican/pelican/'
'blob/master/docs/settings.rst#path-metadata'),
]:
if old in settings:
message = 'The {} setting has been removed in favor of {}'.format(
old, new)
if doc:
message += ', see {} for details'.format(doc)
logger.warning(message)
# PAGINATED_DIRECT_TEMPLATES -> PAGINATED_TEMPLATES
if 'PAGINATED_DIRECT_TEMPLATES' in settings:
message = 'The {} setting has been removed in favor of {}'.format(
'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_TEMPLATES')
logger.warning(message)
for t in settings['PAGINATED_DIRECT_TEMPLATES']:
if t not in settings['PAGINATED_TEMPLATES']:
settings['PAGINATED_TEMPLATES'][t] = None
del settings['PAGINATED_DIRECT_TEMPLATES']
# {SLUG,CATEGORY,TAG,AUTHOR}_SUBSTITUTIONS ->
# {SLUG,CATEGORY,TAG,AUTHOR}_REGEX_SUBSTITUTIONS
url_settings_url = \
'http://docs.getpelican.com/en/latest/settings.html#url-settings'
flavours = {'SLUG', 'CATEGORY', 'TAG', 'AUTHOR'}
old_values = {f: settings[f + '_SUBSTITUTIONS']
for f in flavours if f + '_SUBSTITUTIONS' in settings}
new_values = {f: settings[f + '_REGEX_SUBSTITUTIONS']
for f in flavours if f + '_REGEX_SUBSTITUTIONS' in settings}
if old_values and new_values:
raise Exception(
'Setting both {new_key} and {old_key} (or variants thereof) is '
'not permitted. Please move to only setting {new_key}.'
.format(old_key='SLUG_SUBSTITUTIONS',
new_key='SLUG_REGEX_SUBSTITUTIONS'))
if old_values:
message = ('{} and variants thereof are deprecated and will be '
'removed in the future. Please use {} and variants thereof '
'instead. Check {}.'
.format('SLUG_SUBSTITUTIONS', 'SLUG_REGEX_SUBSTITUTIONS',
url_settings_url))
logger.warning(message)
if old_values.get('SLUG'):
for f in {'CATEGORY', 'TAG'}:
if old_values.get(f):
old_values[f] = old_values['SLUG'] + old_values[f]
old_values['AUTHOR'] = old_values.get('AUTHOR', [])
for f in flavours:
if old_values.get(f) is not None:
regex_subs = []
# by default will replace non-alphanum characters
replace = True
for tpl in old_values[f]:
try:
src, dst, skip = tpl
if skip:
replace = False
except ValueError:
src, dst = tpl
regex_subs.append(
(re.escape(src), dst.replace('\\', r'\\')))
if replace:
regex_subs += [
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]
else:
regex_subs += [
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
]
settings[f + '_REGEX_SUBSTITUTIONS'] = regex_subs
settings.pop(f + '_SUBSTITUTIONS', None)
return settings
def configure_settings(settings): def configure_settings(settings):
"""Provide optimizations, error checking, and warnings for the given """Provide optimizations, error checking, and warnings for the given
settings. settings.
@ -377,31 +509,6 @@ def configure_settings(settings):
key=lambda r: r[0], key=lambda r: r[0],
) )
# move {ARTICLE,PAGE}_DIR -> {ARTICLE,PAGE}_PATHS
for key in ['ARTICLE', 'PAGE']:
old_key = key + '_DIR'
new_key = key + '_PATHS'
if old_key in settings:
logger.warning(
'Deprecated setting %s, moving it to %s list',
old_key, new_key)
settings[new_key] = [settings[old_key]] # also make a list
del settings[old_key]
# Deprecated warning of EXTRA_TEMPLATES_PATHS
if 'EXTRA_TEMPLATES_PATHS' in settings:
logger.warning('EXTRA_TEMPLATES_PATHS is deprecated use '
'THEME_TEMPLATES_OVERRIDES instead.')
if ('THEME_TEMPLATES_OVERRIDES' in settings and
settings['THEME_TEMPLATES_OVERRIDES']):
raise Exception(
'Setting both EXTRA_TEMPLATES_PATHS and '
'THEME_TEMPLATES_OVERRIDES is not permitted. Please move to '
'only setting THEME_TEMPLATES_OVERRIDES.')
settings['THEME_TEMPLATES_OVERRIDES'] = \
settings['EXTRA_TEMPLATES_PATHS']
del settings['EXTRA_TEMPLATES_PATHS']
# Save people from accidentally setting a string rather than a list # Save people from accidentally setting a string rather than a list
path_keys = ( path_keys = (
'ARTICLE_EXCLUDES', 'ARTICLE_EXCLUDES',
@ -425,12 +532,6 @@ def configure_settings(settings):
PATH_KEY) PATH_KEY)
settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY] settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY]
# Deprecated warning of MD_EXTENSIONS
if 'MD_EXTENSIONS' in settings:
logger.warning('MD_EXTENSIONS is deprecated use MARKDOWN '
'instead. Falling back to the default.')
settings['MARKDOWN'] = DEFAULT_CONFIG['MARKDOWN']
# Add {PAGE,ARTICLE}_PATHS to {ARTICLE,PAGE}_EXCLUDES # Add {PAGE,ARTICLE}_PATHS to {ARTICLE,PAGE}_EXCLUDES
mutually_exclusive = ('ARTICLE', 'PAGE') mutually_exclusive = ('ARTICLE', 'PAGE')
for type_1, type_2 in [mutually_exclusive, mutually_exclusive[::-1]]: for type_1, type_2 in [mutually_exclusive, mutually_exclusive[::-1]]:
@ -443,27 +544,4 @@ def configure_settings(settings):
except KeyError: except KeyError:
continue # setting not specified, nothing to do continue # setting not specified, nothing to do
for old, new, doc in [
('LESS_GENERATOR', 'the Webassets plugin', None),
('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
'https://github.com/getpelican/pelican/'
'blob/master/docs/settings.rst#path-metadata'),
]:
if old in settings:
message = 'The {} setting has been removed in favor of {}'.format(
old, new)
if doc:
message += ', see {} for details'.format(doc)
logger.warning(message)
if 'PAGINATED_DIRECT_TEMPLATES' in settings:
message = 'The {} setting has been removed in favor of {}'.format(
'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_TEMPLATES')
logger.warning(message)
for t in settings['PAGINATED_DIRECT_TEMPLATES']:
if t not in settings['PAGINATED_TEMPLATES']:
settings['PAGINATED_TEMPLATES'][t] = None
del settings['PAGINATED_DIRECT_TEMPLATES']
return settings return settings

View file

@ -497,7 +497,13 @@ class TestArticle(TestPage):
def test_slugify_category_author(self): def test_slugify_category_author(self):
settings = get_settings() settings = get_settings()
settings['SLUG_SUBSTITUTIONS'] = [('C#', 'csharp')] settings['SLUG_REGEX_SUBSTITUTIONS'] = [
(r'C#', 'csharp'),
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]
settings['ARTICLE_URL'] = '{author}/{category}/{slug}/' settings['ARTICLE_URL'] = '{author}/{category}/{slug}/'
settings['ARTICLE_SAVE_AS'] = '{author}/{category}/{slug}/index.html' settings['ARTICLE_SAVE_AS'] = '{author}/{category}/{slug}/index.html'
article_kwargs = self._copy_page_kwargs() article_kwargs = self._copy_page_kwargs()
@ -513,9 +519,13 @@ class TestArticle(TestPage):
def test_slugify_with_author_substitutions(self): def test_slugify_with_author_substitutions(self):
settings = get_settings() settings = get_settings()
settings['AUTHOR_SUBSTITUTIONS'] = [ settings['AUTHOR_REGEX_SUBSTITUTIONS'] = [
('Alexander Todorov', 'atodorov', False), ('Alexander Todorov', 'atodorov'),
('Krasimir Tsonev', 'krasimir', False), ('Krasimir Tsonev', 'krasimir'),
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
] ]
settings['ARTICLE_URL'] = 'blog/{author}/{slug}/' settings['ARTICLE_URL'] = 'blog/{author}/{slug}/'
settings['ARTICLE_SAVE_AS'] = 'blog/{author}/{slug}/index.html' settings['ARTICLE_SAVE_AS'] = 'blog/{author}/{slug}/index.html'
@ -530,7 +540,9 @@ class TestArticle(TestPage):
def test_slugify_category_with_dots(self): def test_slugify_category_with_dots(self):
settings = get_settings() settings = get_settings()
settings['CATEGORY_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)] settings['CATEGORY_REGEX_SUBSTITUTIONS'] = [
('Fedora QA', 'fedora.qa'),
]
settings['ARTICLE_URL'] = '{category}/{slug}/' settings['ARTICLE_URL'] = '{category}/{slug}/'
article_kwargs = self._copy_page_kwargs() article_kwargs = self._copy_page_kwargs()
article_kwargs['metadata']['category'] = Category('Fedora QA', article_kwargs['metadata']['category'] = Category('Fedora QA',
@ -542,7 +554,9 @@ class TestArticle(TestPage):
def test_slugify_tags_with_dots(self): def test_slugify_tags_with_dots(self):
settings = get_settings() settings = get_settings()
settings['TAG_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)] settings['TAG_REGEX_SUBSTITUTIONS'] = [
('Fedora QA', 'fedora.qa'),
]
settings['ARTICLE_URL'] = '{tag}/{slug}/' settings['ARTICLE_URL'] = '{tag}/{slug}/'
article_kwargs = self._copy_page_kwargs() article_kwargs = self._copy_page_kwargs()
article_kwargs['metadata']['tag'] = Tag('Fedora QA', settings) article_kwargs['metadata']['tag'] = Tag('Fedora QA', settings)

View file

@ -6,6 +6,7 @@ import os
import re import re
from codecs import open from codecs import open
from pelican.settings import DEFAULT_CONFIG
from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder, from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
unittest) unittest)
from pelican.tools.pelican_import import (blogger2fields, build_header, from pelican.tools.pelican_import import (blogger2fields, build_header,
@ -133,10 +134,11 @@ class TestWordpressXmlImporter(unittest.TestCase):
with temporary_folder() as temp: with temporary_folder() as temp:
fnames = list(silent_f2p(test_posts, 'markdown', fnames = list(silent_f2p(test_posts, 'markdown',
temp, dircat=True)) temp, dircat=True))
subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
index = 0 index = 0
for post in test_posts: for post in test_posts:
name = post[2] name = post[2]
category = slugify(post[5][0]) category = slugify(post[5][0], regex_subs=subs)
name += '.md' name += '.md'
filename = os.path.join(category, name) filename = os.path.join(category, name)
out_name = fnames[index] out_name = fnames[index]
@ -208,11 +210,12 @@ class TestWordpressXmlImporter(unittest.TestCase):
with temporary_folder() as temp: with temporary_folder() as temp:
fnames = list(silent_f2p(test_posts, 'markdown', temp, fnames = list(silent_f2p(test_posts, 'markdown', temp,
wp_custpost=True, dircat=True)) wp_custpost=True, dircat=True))
subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
index = 0 index = 0
for post in test_posts: for post in test_posts:
name = post[2] name = post[2]
kind = post[8] kind = post[8]
category = slugify(post[5][0]) category = slugify(post[5][0], regex_subs=subs)
name += '.md' name += '.md'
filename = os.path.join(kind, category, name) filename = os.path.join(kind, category, name)
out_name = fnames[index] out_name = fnames[index]

View file

@ -9,7 +9,8 @@ from sys import platform
from pelican.settings import (DEFAULT_CONFIG, DEFAULT_THEME, from pelican.settings import (DEFAULT_CONFIG, DEFAULT_THEME,
configure_settings, read_settings) configure_settings, handle_deprecated_settings,
read_settings)
from pelican.tests.support import unittest from pelican.tests.support import unittest
@ -128,7 +129,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings['ARTICLE_DIR'] = 'foo' settings['ARTICLE_DIR'] = 'foo'
settings['PAGE_DIR'] = 'bar' settings['PAGE_DIR'] = 'bar'
configure_settings(settings) settings = handle_deprecated_settings(settings)
self.assertEqual(settings['ARTICLE_PATHS'], ['foo']) self.assertEqual(settings['ARTICLE_PATHS'], ['foo'])
self.assertEqual(settings['PAGE_PATHS'], ['bar']) self.assertEqual(settings['PAGE_PATHS'], ['bar'])
@ -171,7 +172,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings = self.settings settings = self.settings
settings['EXTRA_TEMPLATES_PATHS'] = ['/foo/bar', '/ha'] settings['EXTRA_TEMPLATES_PATHS'] = ['/foo/bar', '/ha']
configure_settings(settings) settings = handle_deprecated_settings(settings)
self.assertEqual(settings['THEME_TEMPLATES_OVERRIDES'], self.assertEqual(settings['THEME_TEMPLATES_OVERRIDES'],
['/foo/bar', '/ha']) ['/foo/bar', '/ha'])
@ -181,7 +182,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings = self.settings settings = self.settings
settings['PAGINATED_DIRECT_TEMPLATES'] = ['index', 'archives'] settings['PAGINATED_DIRECT_TEMPLATES'] = ['index', 'archives']
settings['PAGINATED_TEMPLATES'] = {'index': 10, 'category': None} settings['PAGINATED_TEMPLATES'] = {'index': 10, 'category': None}
settings = configure_settings(settings) settings = handle_deprecated_settings(settings)
self.assertEqual(settings['PAGINATED_TEMPLATES'], self.assertEqual(settings['PAGINATED_TEMPLATES'],
{'index': 10, 'category': None, 'archives': None}) {'index': 10, 'category': None, 'archives': None})
self.assertNotIn('PAGINATED_DIRECT_TEMPLATES', settings) self.assertNotIn('PAGINATED_DIRECT_TEMPLATES', settings)
@ -191,4 +192,82 @@ class TestSettingsConfiguration(unittest.TestCase):
settings['EXTRA_TEMPLATES_PATHS'] = ['/ha'] settings['EXTRA_TEMPLATES_PATHS'] = ['/ha']
settings['THEME_TEMPLATES_OVERRIDES'] = ['/foo/bar'] settings['THEME_TEMPLATES_OVERRIDES'] = ['/foo/bar']
self.assertRaises(Exception, configure_settings, settings) self.assertRaises(Exception, handle_deprecated_settings, settings)
def test_slug_and_slug_regex_substitutions_exception(self):
settings = {}
settings['SLUG_REGEX_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
self.assertRaises(Exception, handle_deprecated_settings, settings)
def test_deprecated_slug_substitutions(self):
default_slug_regex_subs = self.settings['SLUG_REGEX_SUBSTITUTIONS']
# If no deprecated setting is set, don't set new ones
settings = {}
settings = handle_deprecated_settings(settings)
self.assertNotIn('SLUG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('TAG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('CATEGORY_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('AUTHOR_REGEX_SUBSTITUTIONS', settings)
# If SLUG_SUBSTITUTIONS is set, set {SLUG, AUTHOR}_REGEX_SUBSTITUTIONS
# correctly, don't set {CATEGORY, TAG}_REGEX_SUBSTITUTIONS
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings.get('SLUG_REGEX_SUBSTITUTIONS'),
[(r'C\+\+', 'cpp')] + default_slug_regex_subs)
self.assertNotIn('TAG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('CATEGORY_REGEX_SUBSTITUTIONS', settings)
self.assertEqual(settings.get('AUTHOR_REGEX_SUBSTITUTIONS'),
default_slug_regex_subs)
# If {CATEGORY, TAG, AUTHOR}_SUBSTITUTIONS are set, set
# {CATEGORY, TAG, AUTHOR}_REGEX_SUBSTITUTIONS correctly, don't set
# SLUG_REGEX_SUBSTITUTIONS
settings = {}
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['CATEGORY_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov')]
settings = handle_deprecated_settings(settings)
self.assertNotIn('SLUG_REGEX_SUBSTITUTIONS', settings)
self.assertEqual(settings['TAG_REGEX_SUBSTITUTIONS'],
[(r'C\#', 'csharp')] + default_slug_regex_subs)
self.assertEqual(settings['CATEGORY_REGEX_SUBSTITUTIONS'],
[(r'C\#', 'csharp')] + default_slug_regex_subs)
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)
# If {SLUG, CATEGORY, TAG, AUTHOR}_SUBSTITUTIONS are set, set
# {SLUG, CATEGORY, TAG, AUTHOR}_REGEX_SUBSTITUTIONS correctly
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['CATEGORY_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov')]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['TAG_REGEX_SUBSTITUTIONS'],
[(r'C\+\+', 'cpp')] + [(r'C\#', 'csharp')] +
default_slug_regex_subs)
self.assertEqual(settings['CATEGORY_REGEX_SUBSTITUTIONS'],
[(r'C\+\+', 'cpp')] + [(r'C\#', 'csharp')] +
default_slug_regex_subs)
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)
# Handle old 'skip' flags correctly
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp', True)]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov',
False)]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings.get('SLUG_REGEX_SUBSTITUTIONS'),
[(r'C\+\+', 'cpp')] +
[(r'(?u)\A\s*', ''), (r'(?u)\s*\Z', '')])
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)

View file

@ -55,30 +55,29 @@ class TestURLWrapper(unittest.TestCase):
self.assertEqual(author, author_equal) self.assertEqual(author, author_equal)
cat_ascii = Category('指導書', settings={}) cat_ascii = Category('指導書', settings={})
self.assertEqual(cat_ascii, u'zhi-dao-shu') self.assertEqual(cat_ascii, u'zhi dao shu')
def test_slugify_with_substitutions_and_dots(self): def test_slugify_with_substitutions_and_dots(self):
tag = Tag('Tag Dot', tag = Tag('Tag Dot', settings={'TAG_REGEX_SUBSTITUTIONS': [
settings={ ('Tag Dot', 'tag.dot'),
'TAG_SUBSTITUTIONS': [('Tag Dot', 'tag.dot', True)] ]})
})
cat = Category('Category Dot', cat = Category('Category Dot',
settings={ settings={'CATEGORY_REGEX_SUBSTITUTIONS': [
'CATEGORY_SUBSTITUTIONS': (('Category Dot', ('Category Dot', 'cat.dot'),
'cat.dot', ]})
True),)
})
self.assertEqual(tag.slug, 'tag.dot') self.assertEqual(tag.slug, 'tag.dot')
self.assertEqual(cat.slug, 'cat.dot') self.assertEqual(cat.slug, 'cat.dot')
def test_author_slug_substitutions(self): def test_author_slug_substitutions(self):
settings = { settings = {'AUTHOR_REGEX_SUBSTITUTIONS': [
'AUTHOR_SUBSTITUTIONS': [ ('Alexander Todorov', 'atodorov'),
('Alexander Todorov', 'atodorov', False), ('Krasimir Tsonev', 'krasimir'),
('Krasimir Tsonev', 'krasimir', False), (r'[^\w\s-]', ''),
] (r'(?u)\A\s*', ''),
} (r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]}
author1 = Author('Mr. Senko', settings=settings) author1 = Author('Mr. Senko', settings=settings)
author2 = Author('Alexander Todorov', settings=settings) author2 = Author('Alexander Todorov', settings=settings)

View file

@ -119,8 +119,11 @@ class TestUtils(LoggedTestCase):
('大飯原発4号機、18日夜起動へ', ('大飯原発4号機、18日夜起動へ',
'da-fan-yuan-fa-4hao-ji-18ri-ye-qi-dong-he'),) 'da-fan-yuan-fa-4hao-ji-18ri-ye-qi-dong-he'),)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for value, expected in samples: for value, expected in samples:
self.assertEqual(utils.slugify(value), expected) self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_slugify_substitute(self): def test_slugify_substitute(self):
@ -129,21 +132,27 @@ class TestUtils(LoggedTestCase):
('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'), ('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'),
('c++-streams', 'cpp-streams'),) ('c++-streams', 'cpp-streams'),)
subs = (('C++', 'CPP'), ('C#', 'C-SHARP')) settings = read_settings()
subs = [
(r'C\+\+', 'CPP'),
(r'C#', 'C-SHARP'),
] + settings['SLUG_REGEX_SUBSTITUTIONS']
for value, expected in samples: for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected) self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_slugify_substitute_and_keeping_non_alphanum(self): def test_slugify_substitute_and_keeping_non_alphanum(self):
samples = (('Fedora QA', 'fedora.qa'), samples = (('Fedora QA', 'fedora.qa'),
('C++ is used by Fedora QA', 'cpp is used by fedora.qa'), ('C++ is used by Fedora QA', 'cpp is used by fedora.qa'),
('C++ is based on C', 'cpp-is-based-on-c'), ('C++ is based on C', 'cpp is based on c'),
('C+++ test C+ test', 'cpp-test-c-test'),) ('C+++ test C+ test', 'cpp+ test c+ test'),)
subs = (('Fedora QA', 'fedora.qa', True), subs = [
('c++', 'cpp'),) (r'Fedora QA', 'fedora.qa'),
(r'c\+\+', 'cpp'),
]
for value, expected in samples: for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected) self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_get_relative_path(self): def test_get_relative_path(self):

View file

@ -17,6 +17,7 @@ from six.moves.urllib.request import urlretrieve
# because logging.setLoggerClass has to be called before logging.getLogger # because logging.setLoggerClass has to be called before logging.getLogger
from pelican.log import init from pelican.log import init
from pelican.settings import read_settings
from pelican.utils import SafeDatetime, slugify from pelican.utils import SafeDatetime, slugify
try: try:
@ -291,6 +292,8 @@ def dc2fields(file):
print("%i posts read." % len(posts)) print("%i posts read." % len(posts))
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for post in posts: for post in posts:
fields = post.split('","') fields = post.split('","')
@ -383,8 +386,9 @@ def dc2fields(file):
kind = 'article' # TODO: Recognise pages kind = 'article' # TODO: Recognise pages
status = 'published' # TODO: Find a way for draft posts status = 'published' # TODO: Find a way for draft posts
yield (post_title, content, slugify(post_title), post_creadt, author, yield (post_title, content, slugify(post_title, regex_subs=subs),
categories, tags, status, kind, post_format) post_creadt, author, categories, tags, status, kind,
post_format)
def posterous2fields(api_token, email, password): def posterous2fields(api_token, email, password):
@ -418,6 +422,8 @@ def posterous2fields(api_token, email, password):
page = 1 page = 1
posts = get_posterous_posts(api_token, email, password, page) posts = get_posterous_posts(api_token, email, password, page)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
while len(posts) > 0: while len(posts) > 0:
posts = get_posterous_posts(api_token, email, password, page) posts = get_posterous_posts(api_token, email, password, page)
page += 1 page += 1
@ -425,7 +431,7 @@ def posterous2fields(api_token, email, password):
for post in posts: for post in posts:
slug = post.get('slug') slug = post.get('slug')
if not slug: if not slug:
slug = slugify(post.get('title')) slug = slugify(post.get('title'), regex_subs=subs)
tags = [tag.get('name') for tag in post.get('tags')] tags = [tag.get('name') for tag in post.get('tags')]
raw_date = post.get('display_date') raw_date = post.get('display_date')
date_object = SafeDatetime.strptime( date_object = SafeDatetime.strptime(
@ -469,13 +475,15 @@ def tumblr2fields(api_key, blogname):
offset = 0 offset = 0
posts = get_tumblr_posts(api_key, blogname, offset) posts = get_tumblr_posts(api_key, blogname, offset)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
while len(posts) > 0: while len(posts) > 0:
for post in posts: for post in posts:
title = \ title = \
post.get('title') or \ post.get('title') or \
post.get('source_title') or \ post.get('source_title') or \
post.get('type').capitalize() post.get('type').capitalize()
slug = post.get('slug') or slugify(title) slug = post.get('slug') or slugify(title, regex_subs=subs)
tags = post.get('tags') tags = post.get('tags')
timestamp = post.get('timestamp') timestamp = post.get('timestamp')
date = SafeDatetime.fromtimestamp(int(timestamp)).strftime( date = SafeDatetime.fromtimestamp(int(timestamp)).strftime(
@ -552,6 +560,8 @@ def feed2fields(file):
"""Read a feed and yield pelican fields""" """Read a feed and yield pelican fields"""
import feedparser import feedparser
d = feedparser.parse(file) d = feedparser.parse(file)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for entry in d.entries: for entry in d.entries:
date = (entry.updated_parsed.strftime('%Y-%m-%d %H:%M') date = (entry.updated_parsed.strftime('%Y-%m-%d %H:%M')
if hasattr(entry, 'updated_parsed') else None) if hasattr(entry, 'updated_parsed') else None)
@ -559,7 +569,7 @@ def feed2fields(file):
tags = ([e['term'] for e in entry.tags] tags = ([e['term'] for e in entry.tags]
if hasattr(entry, 'tags') else None) if hasattr(entry, 'tags') else None)
slug = slugify(entry.title) slug = slugify(entry.title, regex_subs=subs)
kind = 'article' kind = 'article'
yield (entry.title, entry.description, slug, date, yield (entry.title, entry.description, slug, date,
author, [], tags, None, kind, 'html') author, [], tags, None, kind, 'html')
@ -621,7 +631,7 @@ def get_ext(out_markup, in_markup='html'):
def get_out_filename(output_path, filename, ext, kind, def get_out_filename(output_path, filename, ext, kind,
dirpage, dircat, categories, wp_custpost): dirpage, dircat, categories, wp_custpost, slug_subs):
filename = os.path.basename(filename) filename = os.path.basename(filename)
# Enforce filename restrictions for various filesystems at once; see # Enforce filename restrictions for various filesystems at once; see
@ -647,12 +657,12 @@ def get_out_filename(output_path, filename, ext, kind,
# create subdirectories with category names # create subdirectories with category names
elif kind != 'article': elif kind != 'article':
if wp_custpost: if wp_custpost:
typename = slugify(kind) typename = slugify(kind, regex_subs=slug_subs)
else: else:
typename = '' typename = ''
kind = 'article' kind = 'article'
if dircat and (len(categories) > 0): if dircat and (len(categories) > 0):
catname = slugify(categories[0]) catname = slugify(categories[0], regex_subs=slug_subs)
else: else:
catname = '' catname = ''
out_filename = os.path.join(output_path, typename, out_filename = os.path.join(output_path, typename,
@ -661,7 +671,7 @@ def get_out_filename(output_path, filename, ext, kind,
os.makedirs(os.path.join(output_path, typename, catname)) os.makedirs(os.path.join(output_path, typename, catname))
# option to put files in directories with categories names # option to put files in directories with categories names
elif dircat and (len(categories) > 0): elif dircat and (len(categories) > 0):
catname = slugify(categories[0]) catname = slugify(categories[0], regex_subs=slug_subs)
out_filename = os.path.join(output_path, catname, filename + ext) out_filename = os.path.join(output_path, catname, filename + ext)
if not os.path.isdir(os.path.join(output_path, catname)): if not os.path.isdir(os.path.join(output_path, catname)):
os.mkdir(os.path.join(output_path, catname)) os.mkdir(os.path.join(output_path, catname))
@ -768,6 +778,9 @@ def fields2pelican(
'requested import action.') 'requested import action.')
exit(error) exit(error)
settings = read_settings()
slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for (title, content, filename, date, author, categories, tags, status, for (title, content, filename, date, author, categories, tags, status,
kind, in_markup) in fields: kind, in_markup) in fields:
if filter_author and filter_author != author: if filter_author and filter_author != author:
@ -796,7 +809,7 @@ def fields2pelican(
out_filename = get_out_filename( out_filename = get_out_filename(
output_path, filename, ext, kind, dirpage, dircat, output_path, filename, ext, kind, dirpage, dircat,
categories, wp_custpost) categories, wp_custpost, slug_subs)
print(out_filename) print(out_filename)
if in_markup in ('html', 'wp-html'): if in_markup in ('html', 'wp-html'):

View file

@ -36,8 +36,9 @@ class URLWrapper(object):
@property @property
def slug(self): def slug(self):
if self._slug is None: if self._slug is None:
self._slug = slugify(self.name, self._slug = slugify(
self.settings.get('SLUG_SUBSTITUTIONS', ())) self.name,
regex_subs=self.settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
return self._slug return self._slug
@slug.setter @slug.setter
@ -56,8 +57,8 @@ class URLWrapper(object):
return hash(self.slug) return hash(self.slug)
def _normalize_key(self, key): def _normalize_key(self, key):
subs = self.settings.get('SLUG_SUBSTITUTIONS', ()) subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
return six.text_type(slugify(key, subs)) return six.text_type(slugify(key, regex_subs=subs))
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, self.__class__): if isinstance(other, self.__class__):
@ -115,10 +116,11 @@ class Category(URLWrapper):
@property @property
def slug(self): def slug(self):
if self._slug is None: if self._slug is None:
substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ()) if 'CATEGORY_REGEX_SUBSTITUTIONS' in self.settings:
substitutions += tuple(self.settings.get('CATEGORY_SUBSTITUTIONS', subs = self.settings['CATEGORY_REGEX_SUBSTITUTIONS']
())) else:
self._slug = slugify(self.name, substitutions) subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug return self._slug
@ -129,9 +131,11 @@ class Tag(URLWrapper):
@property @property
def slug(self): def slug(self):
if self._slug is None: if self._slug is None:
substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ()) if 'TAG_REGEX_SUBSTITUTIONS' in self.settings:
substitutions += tuple(self.settings.get('TAG_SUBSTITUTIONS', ())) subs = self.settings['TAG_REGEX_SUBSTITUTIONS']
self._slug = slugify(self.name, substitutions) else:
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug return self._slug
@ -139,6 +143,9 @@ class Author(URLWrapper):
@property @property
def slug(self): def slug(self):
if self._slug is None: if self._slug is None:
self._slug = slugify(self.name, if 'AUTHOR_REGEX_SUBSTITUTIONS' in self.settings:
self.settings.get('AUTHOR_SUBSTITUTIONS', ())) subs = self.settings['AUTHOR_REGEX_SUBSTITUTIONS']
else:
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug return self._slug

View file

@ -263,13 +263,14 @@ def pelican_open(filename, mode='rb', strip_crs=(sys.platform == 'win32')):
yield content yield content
def slugify(value, substitutions=()): def slugify(value, regex_subs=()):
""" """
Normalizes string, converts to lowercase, removes non-alpha characters, Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens. and converts spaces to hyphens.
Took from Django sources. Took from Django sources.
""" """
# TODO Maybe steal again from current Django 1.5dev # TODO Maybe steal again from current Django 1.5dev
value = Markup(value).striptags() value = Markup(value).striptags()
# value must be unicode per se # value must be unicode per se
@ -281,37 +282,16 @@ def slugify(value, substitutions=()):
if isinstance(value, six.binary_type): if isinstance(value, six.binary_type):
value = value.decode('ascii') value = value.decode('ascii')
# still unicode # still unicode
value = unicodedata.normalize('NFKD', value).lower() value = unicodedata.normalize('NFKD', value)
# backward compatible covert from 2-tuples to 3-tuples for src, dst in regex_subs:
new_subs = [] value = re.sub(src, dst, value, flags=re.IGNORECASE)
for tpl in substitutions:
try:
src, dst, skip = tpl
except ValueError:
src, dst = tpl
skip = False
new_subs.append((src, dst, skip))
substitutions = tuple(new_subs)
# by default will replace non-alphanum characters # convert to lowercase
replace = True value = value.lower()
for src, dst, skip in substitutions:
orig_value = value
value = value.replace(src.lower(), dst.lower())
# if replacement was made then skip non-alphanum
# replacement if instructed to do so
if value != orig_value:
replace = replace and not skip
if replace:
value = re.sub(r'[^\w\s-]', '', value).strip()
value = re.sub(r'[-\s]+', '-', value)
else:
value = value.strip()
# we want only ASCII chars # we want only ASCII chars
value = value.encode('ascii', 'ignore') value = value.encode('ascii', 'ignore').strip()
# but Pelican should generally use only unicode # but Pelican should generally use only unicode
return value.decode('ascii') return value.decode('ascii')