Merge pull request #2326 from oulenz/slug_substitutions

Control slug substitutions from settings with regex
2018-10-31 20:08:01 +01:00 · 2018-10-31 20:08:01 +01:00 · 461f535d04
commit 461f535d04
parent 96a689eaef 5199fa51ea
12 changed files with 409 additions and 235 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -6,6 +6,8 @@ Next release
 * New signal: ``feed_generated``
 * Replace Fabric by Invoke and ``fabfile.py`` template by ``tasks.py``.
 * Replace ``SLUG_SUBSTITUTIONS`` (and friends) by ``SLUG_REGEX_SUBSTITUTIONS``
  for more finegrained control
 3.7.1 (2017-01-10)
 ==================
--- a/docs/settings.rst
+++ b/docs/settings.rst
@ -519,27 +519,6 @@ respectively.
   The URL to use for per-day archives of your posts. Used only if you have the
   ``{url}`` placeholder in ``PAGINATION_PATTERNS``.
 .. data:: SLUG_SUBSTITUTIONS = ()
   Substitutions to make prior to stripping out non-alphanumerics when
   generating slugs. Specified as a list of 3-tuples of ``(from, to, skip)``
   which are applied in order. ``skip`` is a boolean indicating whether or not
   to skip replacement of non-alphanumeric characters.  Useful for backward
   compatibility with existing URLs.
 .. data:: AUTHOR_SUBSTITUTIONS = ()
   Substitutions for authors. ``SLUG_SUBSTITUTIONS`` is not taken into account
   here!
 .. data:: CATEGORY_SUBSTITUTIONS = ()
   Added to ``SLUG_SUBSTITUTIONS`` for categories.
 .. data:: TAG_SUBSTITUTIONS = ()
   Added to ``SLUG_SUBSTITUTIONS`` for tags.
 .. note::
    If you do not want one or more of the default pages to be created (e.g.,
@ -547,24 +526,6 @@ respectively.
    set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the
    relevant page from being generated.
 .. note::
    Substitutions are applied in order with the side effect that keeping
    non-alphanum characters applies to the whole string when a replacement
    is made.
    For example if you have the following setting::
       SLUG_SUBSTITUTIONS = (('C++', 'cpp'), ('keep dot', 'keep.dot', True))
    the string ``Keep Dot`` will be converted to ``keep.dot``, however
    ``C++ will keep dot`` will be converted to ``cpp will keep.dot`` instead
    of ``cpp-will-keep.dot``!
    If you want to keep non-alphanum characters only for tags or categories
    but not other slugs then configure ``TAG_SUBSTITUTIONS`` and
    ``CATEGORY_SUBSTITUTIONS`` respectively!
 Pelican can optionally create per-year, per-month, and per-day archives of your
 posts. These secondary archives are disabled by default but are automatically
 enabled if you supply format strings for their respective ``_SAVE_AS`` settings.
@ -626,6 +587,33 @@ URLs for direct template pages are theme-dependent. Some themes use
 corresponding ``*_URL`` setting as string, while others hard-code them:
 ``'archives.html'``, ``'authors.html'``, ``'categories.html'``, ``'tags.html'``.
 .. data:: SLUG_REGEX_SUBSTITUTIONS = [
        (r'[^\w\s-]', ''),  # remove non-alphabetical/whitespace/'-' chars
        (r'(?u)\A\s*', ''),  # strip leading whitespace
        (r'(?u)\s*\Z', ''),  # strip trailing whitespace
        (r'[-\s]+', '-'),  # reduce multiple whitespace or '-' to single '-'
    ]
   Regex substitutions to make when generating slugs of articles and pages.
   Specified as a list of pairs of ``(from, to)`` which are applied in order,
   ignoring case. The default substitutions have the effect of removing
   non-alphanumeric characters and converting internal whitespace to dashes.
   Apart from these substitutions, slugs are always converted to lowercase
   ascii characters and leading and trailing whitespace is stripped. Useful for
   backward compatibility with existing URLs.
 .. data:: AUTHOR_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
   Regex substitutions for author slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
 .. data:: CATEGORY_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
   Regex substitutions for category slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
 .. data:: TAG_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
   Regex substitutions for tag slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
 Time and Date
 =============
--- a/pelican/contents.py
+++ b/pelican/contents.py
@ -98,14 +98,16 @@ class Content(object):
        if not hasattr(self, 'slug'):
            if (settings['SLUGIFY_SOURCE'] == 'title' and
                    hasattr(self, 'title')):
-                self.slug = slugify(self.title,
+                self.slug = slugify(
-                                    settings.get('SLUG_SUBSTITUTIONS', ()))
+                    self.title,
                    regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
            elif (settings['SLUGIFY_SOURCE'] == 'basename' and
                    source_path is not None):
                basename = os.path.basename(
                    os.path.splitext(source_path)[0])
                self.slug = slugify(
-                    basename, settings.get('SLUG_SUBSTITUTIONS', ()))
+                    basename,
                    regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
        self.source_path = source_path
--- a/pelican/settings.py
+++ b/pelican/settings.py
@ -6,6 +6,7 @@ import inspect
 import locale
 import logging
 import os
 import re
 from os.path import isabs
 from posixpath import join as posix_join
@ -145,7 +146,12 @@ DEFAULT_CONFIG = {
    'TEMPLATE_PAGES': {},
    'TEMPLATE_EXTENSIONS': ['.html'],
    'IGNORE_FILES': ['.#*'],
-    'SLUG_SUBSTITUTIONS': (),
+    'SLUG_REGEX_SUBSTITUTIONS': [
        (r'[^\w\s-]', ''),  # remove non-alphabetical/whitespace/'-' chars
        (r'(?u)\A\s*', ''),  # strip leading whitespace
        (r'(?u)\s*\Z', ''),  # strip trailing whitespace
        (r'[-\s]+', '-'),  # reduce multiple whitespace or '-' to single '-'
    ],
    'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]',
    'SLUGIFY_SOURCE': 'title',
    'CACHE_CONTENT': False,
@ -164,79 +170,62 @@ PYGMENTS_RST_OPTIONS = None
 def read_settings(path=None, override=None):
    settings = override or {}
    if path:
-        local_settings = get_settings_from_file(path)
+        settings = dict(get_settings_from_file(path), **settings)
-        # Make the paths relative to the settings file
+
    if settings:
        settings = handle_deprecated_settings(settings)
    if path:
        # Make relative paths absolute
        def getabs(maybe_relative, base_path=path):
            if isabs(maybe_relative):
                return maybe_relative
            return os.path.abspath(os.path.normpath(os.path.join(
                os.path.dirname(base_path), maybe_relative)))
        for p in ['PATH', 'OUTPUT_PATH', 'THEME', 'CACHE_PATH']:
-            if p in local_settings and local_settings[p] is not None \
+            if settings.get(p) is not None:
-                    and not isabs(local_settings[p]):
+                absp = getabs(settings[p])
-                absp = os.path.abspath(os.path.normpath(os.path.join(
+                # THEME may be a name rather than a path
                    os.path.dirname(path), local_settings[p])))
                if p != 'THEME' or os.path.exists(absp):
-                    local_settings[p] = absp
+                    settings[p] = absp
-        if 'PLUGIN_PATH' in local_settings:
+        if settings.get('PLUGIN_PATHS') is not None:
-            logger.warning('PLUGIN_PATH setting has been replaced by '
+            settings['PLUGIN_PATHS'] = [getabs(pluginpath)
-                           'PLUGIN_PATHS, moving it to the new setting name.')
+                                        for pluginpath
-            local_settings['PLUGIN_PATHS'] = local_settings['PLUGIN_PATH']
+                                        in settings['PLUGIN_PATHS']]
            del local_settings['PLUGIN_PATH']
        if 'JINJA_EXTENSIONS' in local_settings:
            logger.warning('JINJA_EXTENSIONS setting has been deprecated, '
                           'moving it to JINJA_ENVIRONMENT setting.')
            local_settings['JINJA_ENVIRONMENT']['extensions'] = \
                local_settings['JINJA_EXTENSIONS']
            del local_settings['JINJA_EXTENSIONS']
        if isinstance(local_settings['PLUGIN_PATHS'], six.string_types):
            logger.warning("Defining PLUGIN_PATHS setting as string "
                           "has been deprecated (should be a list)")
            local_settings['PLUGIN_PATHS'] = [local_settings['PLUGIN_PATHS']]
        elif local_settings['PLUGIN_PATHS'] is not None:
            def getabs(path, pluginpath):
                if isabs(pluginpath):
                    return pluginpath
                else:
                    path_dirname = os.path.dirname(path)
                    path_joined = os.path.join(path_dirname, pluginpath)
                    path_normed = os.path.normpath(path_joined)
                    path_absolute = os.path.abspath(path_normed)
                    return path_absolute
-            pluginpath_list = [getabs(path, pluginpath)
+    settings = dict(copy.deepcopy(DEFAULT_CONFIG), **settings)
-                               for pluginpath
+    settings = configure_settings(settings)
                               in local_settings['PLUGIN_PATHS']]
            local_settings['PLUGIN_PATHS'] = pluginpath_list
    else:
        local_settings = copy.deepcopy(DEFAULT_CONFIG)
    if override:
        local_settings.update(override)
    parsed_settings = configure_settings(local_settings)
    # This is because there doesn't seem to be a way to pass extra
    # parameters to docutils directive handlers, so we have to have a
    # variable here that we'll import from within Pygments.run (see
    # rstdirectives.py) to see what the user defaults were.
    global PYGMENTS_RST_OPTIONS
-    PYGMENTS_RST_OPTIONS = parsed_settings.get('PYGMENTS_RST_OPTIONS', None)
+    PYGMENTS_RST_OPTIONS = settings.get('PYGMENTS_RST_OPTIONS', None)
-    return parsed_settings
+    return settings
-def get_settings_from_module(module=None, default_settings=DEFAULT_CONFIG):
+def get_settings_from_module(module=None):
    """Loads settings from a module, returns a dictionary."""
-    context = copy.deepcopy(default_settings)
+    context = {}
    if module is not None:
        context.update(
            (k, v) for k, v in inspect.getmembers(module) if k.isupper())
    return context
-def get_settings_from_file(path, default_settings=DEFAULT_CONFIG):
+def get_settings_from_file(path):
    """Loads settings from a file path, returning a dict."""
    name, ext = os.path.splitext(os.path.basename(path))
    module = load_source(name, path)
-    return get_settings_from_module(module, default_settings=default_settings)
+    return get_settings_from_module(module)
 def get_jinja_environment(settings):
@ -253,6 +242,149 @@ def get_jinja_environment(settings):
    return settings
 def handle_deprecated_settings(settings):
    """Converts deprecated settings and issues warnings. Issues an exception
    if both old and new setting is specified.
    """
    # PLUGIN_PATH -> PLUGIN_PATHS
    if 'PLUGIN_PATH' in settings:
        logger.warning('PLUGIN_PATH setting has been replaced by '
                       'PLUGIN_PATHS, moving it to the new setting name.')
        settings['PLUGIN_PATHS'] = settings['PLUGIN_PATH']
        del settings['PLUGIN_PATH']
    # PLUGIN_PATHS: str -> [str]
    if isinstance(settings.get('PLUGIN_PATHS'), six.string_types):
        logger.warning("Defining PLUGIN_PATHS setting as string "
                       "has been deprecated (should be a list)")
        settings['PLUGIN_PATHS'] = [settings['PLUGIN_PATHS']]
    # JINJA_EXTENSIONS -> JINJA_ENVIRONMENT > extensions
    if 'JINJA_EXTENSIONS' in settings:
        logger.warning('JINJA_EXTENSIONS setting has been deprecated, '
                       'moving it to JINJA_ENVIRONMENT setting.')
        settings['JINJA_ENVIRONMENT']['extensions'] = \
            settings['JINJA_EXTENSIONS']
        del settings['JINJA_EXTENSIONS']
    # {ARTICLE,PAGE}_DIR -> {ARTICLE,PAGE}_PATHS
    for key in ['ARTICLE', 'PAGE']:
        old_key = key + '_DIR'
        new_key = key + '_PATHS'
        if old_key in settings:
            logger.warning(
                'Deprecated setting %s, moving it to %s list',
                old_key, new_key)
            settings[new_key] = [settings[old_key]]   # also make a list
            del settings[old_key]
    # EXTRA_TEMPLATES_PATHS -> THEME_TEMPLATES_OVERRIDES
    if 'EXTRA_TEMPLATES_PATHS' in settings:
        logger.warning('EXTRA_TEMPLATES_PATHS is deprecated use '
                       'THEME_TEMPLATES_OVERRIDES instead.')
        if ('THEME_TEMPLATES_OVERRIDES' in settings and
                settings['THEME_TEMPLATES_OVERRIDES']):
            raise Exception(
                'Setting both EXTRA_TEMPLATES_PATHS and '
                'THEME_TEMPLATES_OVERRIDES is not permitted. Please move to '
                'only setting THEME_TEMPLATES_OVERRIDES.')
        settings['THEME_TEMPLATES_OVERRIDES'] = \
            settings['EXTRA_TEMPLATES_PATHS']
        del settings['EXTRA_TEMPLATES_PATHS']
    # MD_EXTENSIONS -> MARKDOWN
    if 'MD_EXTENSIONS' in settings:
        logger.warning('MD_EXTENSIONS is deprecated use MARKDOWN '
                       'instead. Falling back to the default.')
        settings['MARKDOWN'] = DEFAULT_CONFIG['MARKDOWN']
    # LESS_GENERATOR -> Webassets plugin
    # FILES_TO_COPY -> STATIC_PATHS, EXTRA_PATH_METADATA
    for old, new, doc in [
            ('LESS_GENERATOR', 'the Webassets plugin', None),
            ('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
                'https://github.com/getpelican/pelican/'
                'blob/master/docs/settings.rst#path-metadata'),
    ]:
        if old in settings:
            message = 'The {} setting has been removed in favor of {}'.format(
                old, new)
            if doc:
                message += ', see {} for details'.format(doc)
            logger.warning(message)
    # PAGINATED_DIRECT_TEMPLATES -> PAGINATED_TEMPLATES
    if 'PAGINATED_DIRECT_TEMPLATES' in settings:
        message = 'The {} setting has been removed in favor of {}'.format(
            'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_TEMPLATES')
        logger.warning(message)
        for t in settings['PAGINATED_DIRECT_TEMPLATES']:
            if t not in settings['PAGINATED_TEMPLATES']:
                settings['PAGINATED_TEMPLATES'][t] = None
        del settings['PAGINATED_DIRECT_TEMPLATES']
    # {SLUG,CATEGORY,TAG,AUTHOR}_SUBSTITUTIONS ->
    # {SLUG,CATEGORY,TAG,AUTHOR}_REGEX_SUBSTITUTIONS
    url_settings_url = \
        'http://docs.getpelican.com/en/latest/settings.html#url-settings'
    flavours = {'SLUG', 'CATEGORY', 'TAG', 'AUTHOR'}
    old_values = {f: settings[f + '_SUBSTITUTIONS']
                  for f in flavours if f + '_SUBSTITUTIONS' in settings}
    new_values = {f: settings[f + '_REGEX_SUBSTITUTIONS']
                  for f in flavours if f + '_REGEX_SUBSTITUTIONS' in settings}
    if old_values and new_values:
        raise Exception(
            'Setting both {new_key} and {old_key} (or variants thereof) is '
            'not permitted. Please move to only setting {new_key}.'
            .format(old_key='SLUG_SUBSTITUTIONS',
                    new_key='SLUG_REGEX_SUBSTITUTIONS'))
    if old_values:
        message = ('{} and variants thereof are deprecated and will be '
                   'removed in the future. Please use {} and variants thereof '
                   'instead. Check {}.'
                   .format('SLUG_SUBSTITUTIONS', 'SLUG_REGEX_SUBSTITUTIONS',
                           url_settings_url))
        logger.warning(message)
        if old_values.get('SLUG'):
            for f in {'CATEGORY', 'TAG'}:
                if old_values.get(f):
                    old_values[f] = old_values['SLUG'] + old_values[f]
            old_values['AUTHOR'] = old_values.get('AUTHOR', [])
        for f in flavours:
            if old_values.get(f) is not None:
                regex_subs = []
                # by default will replace non-alphanum characters
                replace = True
                for tpl in old_values[f]:
                    try:
                        src, dst, skip = tpl
                        if skip:
                            replace = False
                    except ValueError:
                        src, dst = tpl
                    regex_subs.append(
                        (re.escape(src), dst.replace('\\', r'\\')))
                if replace:
                    regex_subs += [
                        (r'[^\w\s-]', ''),
                        (r'(?u)\A\s*', ''),
                        (r'(?u)\s*\Z', ''),
                        (r'[-\s]+', '-'),
                    ]
                else:
                    regex_subs += [
                        (r'(?u)\A\s*', ''),
                        (r'(?u)\s*\Z', ''),
                    ]
                settings[f + '_REGEX_SUBSTITUTIONS'] = regex_subs
            settings.pop(f + '_SUBSTITUTIONS', None)
    return settings
 def configure_settings(settings):
    """Provide optimizations, error checking, and warnings for the given
    settings.
@ -377,31 +509,6 @@ def configure_settings(settings):
        key=lambda r: r[0],
    )
    # move {ARTICLE,PAGE}_DIR -> {ARTICLE,PAGE}_PATHS
    for key in ['ARTICLE', 'PAGE']:
        old_key = key + '_DIR'
        new_key = key + '_PATHS'
        if old_key in settings:
            logger.warning(
                'Deprecated setting %s, moving it to %s list',
                old_key, new_key)
            settings[new_key] = [settings[old_key]]   # also make a list
            del settings[old_key]
    # Deprecated warning of EXTRA_TEMPLATES_PATHS
    if 'EXTRA_TEMPLATES_PATHS' in settings:
        logger.warning('EXTRA_TEMPLATES_PATHS is deprecated use '
                       'THEME_TEMPLATES_OVERRIDES instead.')
        if ('THEME_TEMPLATES_OVERRIDES' in settings and
                settings['THEME_TEMPLATES_OVERRIDES']):
            raise Exception(
                'Setting both EXTRA_TEMPLATES_PATHS and '
                'THEME_TEMPLATES_OVERRIDES is not permitted. Please move to '
                'only setting THEME_TEMPLATES_OVERRIDES.')
        settings['THEME_TEMPLATES_OVERRIDES'] = \
            settings['EXTRA_TEMPLATES_PATHS']
        del settings['EXTRA_TEMPLATES_PATHS']
    # Save people from accidentally setting a string rather than a list
    path_keys = (
        'ARTICLE_EXCLUDES',
@ -425,12 +532,6 @@ def configure_settings(settings):
                           PATH_KEY)
            settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY]
    # Deprecated warning of MD_EXTENSIONS
    if 'MD_EXTENSIONS' in settings:
        logger.warning('MD_EXTENSIONS is deprecated use MARKDOWN '
                       'instead. Falling back to the default.')
        settings['MARKDOWN'] = DEFAULT_CONFIG['MARKDOWN']
    # Add {PAGE,ARTICLE}_PATHS to {ARTICLE,PAGE}_EXCLUDES
    mutually_exclusive = ('ARTICLE', 'PAGE')
    for type_1, type_2 in [mutually_exclusive, mutually_exclusive[::-1]]:
@ -443,27 +544,4 @@ def configure_settings(settings):
        except KeyError:
            continue            # setting not specified, nothing to do
    for old, new, doc in [
            ('LESS_GENERATOR', 'the Webassets plugin', None),
            ('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
                'https://github.com/getpelican/pelican/'
                'blob/master/docs/settings.rst#path-metadata'),
    ]:
        if old in settings:
            message = 'The {} setting has been removed in favor of {}'.format(
                old, new)
            if doc:
                message += ', see {} for details'.format(doc)
            logger.warning(message)
    if 'PAGINATED_DIRECT_TEMPLATES' in settings:
        message = 'The {} setting has been removed in favor of {}'.format(
            'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_TEMPLATES')
        logger.warning(message)
        for t in settings['PAGINATED_DIRECT_TEMPLATES']:
            if t not in settings['PAGINATED_TEMPLATES']:
                settings['PAGINATED_TEMPLATES'][t] = None
        del settings['PAGINATED_DIRECT_TEMPLATES']
    return settings
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@ -497,7 +497,13 @@ class TestArticle(TestPage):
    def test_slugify_category_author(self):
        settings = get_settings()
-        settings['SLUG_SUBSTITUTIONS'] = [('C#', 'csharp')]
+        settings['SLUG_REGEX_SUBSTITUTIONS'] = [
            (r'C#', 'csharp'),
            (r'[^\w\s-]', ''),
            (r'(?u)\A\s*', ''),
            (r'(?u)\s*\Z', ''),
            (r'[-\s]+', '-'),
        ]
        settings['ARTICLE_URL'] = '{author}/{category}/{slug}/'
        settings['ARTICLE_SAVE_AS'] = '{author}/{category}/{slug}/index.html'
        article_kwargs = self._copy_page_kwargs()
@ -513,9 +519,13 @@ class TestArticle(TestPage):
    def test_slugify_with_author_substitutions(self):
        settings = get_settings()
-        settings['AUTHOR_SUBSTITUTIONS'] = [
+        settings['AUTHOR_REGEX_SUBSTITUTIONS'] = [
-                                    ('Alexander Todorov', 'atodorov', False),
+            ('Alexander Todorov', 'atodorov'),
-                                    ('Krasimir Tsonev', 'krasimir', False),
+            ('Krasimir Tsonev', 'krasimir'),
            (r'[^\w\s-]', ''),
            (r'(?u)\A\s*', ''),
            (r'(?u)\s*\Z', ''),
            (r'[-\s]+', '-'),
        ]
        settings['ARTICLE_URL'] = 'blog/{author}/{slug}/'
        settings['ARTICLE_SAVE_AS'] = 'blog/{author}/{slug}/index.html'
@ -530,7 +540,9 @@ class TestArticle(TestPage):
    def test_slugify_category_with_dots(self):
        settings = get_settings()
-        settings['CATEGORY_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)]
+        settings['CATEGORY_REGEX_SUBSTITUTIONS'] = [
            ('Fedora QA', 'fedora.qa'),
        ]
        settings['ARTICLE_URL'] = '{category}/{slug}/'
        article_kwargs = self._copy_page_kwargs()
        article_kwargs['metadata']['category'] = Category('Fedora QA',
@ -542,7 +554,9 @@ class TestArticle(TestPage):
    def test_slugify_tags_with_dots(self):
        settings = get_settings()
-        settings['TAG_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)]
+        settings['TAG_REGEX_SUBSTITUTIONS'] = [
            ('Fedora QA', 'fedora.qa'),
        ]
        settings['ARTICLE_URL'] = '{tag}/{slug}/'
        article_kwargs = self._copy_page_kwargs()
        article_kwargs['metadata']['tag'] = Tag('Fedora QA', settings)
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@ -6,6 +6,7 @@ import os
 import re
 from codecs import open
 from pelican.settings import DEFAULT_CONFIG
 from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
                                   unittest)
 from pelican.tools.pelican_import import (blogger2fields, build_header,
@ -133,10 +134,11 @@ class TestWordpressXmlImporter(unittest.TestCase):
        with temporary_folder() as temp:
            fnames = list(silent_f2p(test_posts, 'markdown',
                                     temp, dircat=True))
        subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
        index = 0
        for post in test_posts:
            name = post[2]
-            category = slugify(post[5][0])
+            category = slugify(post[5][0], regex_subs=subs)
            name += '.md'
            filename = os.path.join(category, name)
            out_name = fnames[index]
@ -208,11 +210,12 @@ class TestWordpressXmlImporter(unittest.TestCase):
        with temporary_folder() as temp:
            fnames = list(silent_f2p(test_posts, 'markdown', temp,
                                     wp_custpost=True, dircat=True))
        subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
        index = 0
        for post in test_posts:
            name = post[2]
            kind = post[8]
-            category = slugify(post[5][0])
+            category = slugify(post[5][0], regex_subs=subs)
            name += '.md'
            filename = os.path.join(kind, category, name)
            out_name = fnames[index]
--- a/pelican/tests/test_settings.py
+++ b/pelican/tests/test_settings.py
@ -9,7 +9,8 @@ from sys import platform
 from pelican.settings import (DEFAULT_CONFIG, DEFAULT_THEME,
-                              configure_settings, read_settings)
+                              configure_settings, handle_deprecated_settings,
                              read_settings)
 from pelican.tests.support import unittest
@ -128,7 +129,7 @@ class TestSettingsConfiguration(unittest.TestCase):
        settings['ARTICLE_DIR'] = 'foo'
        settings['PAGE_DIR'] = 'bar'
-        configure_settings(settings)
+        settings = handle_deprecated_settings(settings)
        self.assertEqual(settings['ARTICLE_PATHS'], ['foo'])
        self.assertEqual(settings['PAGE_PATHS'], ['bar'])
@ -171,7 +172,7 @@ class TestSettingsConfiguration(unittest.TestCase):
        settings = self.settings
        settings['EXTRA_TEMPLATES_PATHS'] = ['/foo/bar', '/ha']
-        configure_settings(settings)
+        settings = handle_deprecated_settings(settings)
        self.assertEqual(settings['THEME_TEMPLATES_OVERRIDES'],
                         ['/foo/bar', '/ha'])
@ -181,7 +182,7 @@ class TestSettingsConfiguration(unittest.TestCase):
        settings = self.settings
        settings['PAGINATED_DIRECT_TEMPLATES'] = ['index', 'archives']
        settings['PAGINATED_TEMPLATES'] = {'index': 10, 'category': None}
-        settings = configure_settings(settings)
+        settings = handle_deprecated_settings(settings)
        self.assertEqual(settings['PAGINATED_TEMPLATES'],
                         {'index': 10, 'category': None, 'archives': None})
        self.assertNotIn('PAGINATED_DIRECT_TEMPLATES', settings)
@ -191,4 +192,82 @@ class TestSettingsConfiguration(unittest.TestCase):
        settings['EXTRA_TEMPLATES_PATHS'] = ['/ha']
        settings['THEME_TEMPLATES_OVERRIDES'] = ['/foo/bar']
-        self.assertRaises(Exception, configure_settings, settings)
+        self.assertRaises(Exception, handle_deprecated_settings, settings)
    def test_slug_and_slug_regex_substitutions_exception(self):
        settings = {}
        settings['SLUG_REGEX_SUBSTITUTIONS'] = [('C++', 'cpp')]
        settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
        self.assertRaises(Exception, handle_deprecated_settings, settings)
    def test_deprecated_slug_substitutions(self):
        default_slug_regex_subs = self.settings['SLUG_REGEX_SUBSTITUTIONS']
        # If no deprecated setting is set, don't set new ones
        settings = {}
        settings = handle_deprecated_settings(settings)
        self.assertNotIn('SLUG_REGEX_SUBSTITUTIONS', settings)
        self.assertNotIn('TAG_REGEX_SUBSTITUTIONS', settings)
        self.assertNotIn('CATEGORY_REGEX_SUBSTITUTIONS', settings)
        self.assertNotIn('AUTHOR_REGEX_SUBSTITUTIONS', settings)
        # If SLUG_SUBSTITUTIONS is set, set {SLUG, AUTHOR}_REGEX_SUBSTITUTIONS
        # correctly, don't set {CATEGORY, TAG}_REGEX_SUBSTITUTIONS
        settings = {}
        settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp')]
        settings = handle_deprecated_settings(settings)
        self.assertEqual(settings.get('SLUG_REGEX_SUBSTITUTIONS'),
                         [(r'C\+\+', 'cpp')] + default_slug_regex_subs)
        self.assertNotIn('TAG_REGEX_SUBSTITUTIONS', settings)
        self.assertNotIn('CATEGORY_REGEX_SUBSTITUTIONS', settings)
        self.assertEqual(settings.get('AUTHOR_REGEX_SUBSTITUTIONS'),
                         default_slug_regex_subs)
        # If {CATEGORY, TAG, AUTHOR}_SUBSTITUTIONS are set, set
        # {CATEGORY, TAG, AUTHOR}_REGEX_SUBSTITUTIONS correctly, don't set
        # SLUG_REGEX_SUBSTITUTIONS
        settings = {}
        settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
        settings['CATEGORY_SUBSTITUTIONS'] = [('C#', 'csharp')]
        settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov')]
        settings = handle_deprecated_settings(settings)
        self.assertNotIn('SLUG_REGEX_SUBSTITUTIONS', settings)
        self.assertEqual(settings['TAG_REGEX_SUBSTITUTIONS'],
                         [(r'C\#', 'csharp')] + default_slug_regex_subs)
        self.assertEqual(settings['CATEGORY_REGEX_SUBSTITUTIONS'],
                         [(r'C\#', 'csharp')] + default_slug_regex_subs)
        self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
                         [(r'Alexander\ Todorov', 'atodorov')] +
                         default_slug_regex_subs)
        # If {SLUG, CATEGORY, TAG, AUTHOR}_SUBSTITUTIONS are set, set
        # {SLUG, CATEGORY, TAG, AUTHOR}_REGEX_SUBSTITUTIONS correctly
        settings = {}
        settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp')]
        settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
        settings['CATEGORY_SUBSTITUTIONS'] = [('C#', 'csharp')]
        settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov')]
        settings = handle_deprecated_settings(settings)
        self.assertEqual(settings['TAG_REGEX_SUBSTITUTIONS'],
                         [(r'C\+\+', 'cpp')] + [(r'C\#', 'csharp')] +
                         default_slug_regex_subs)
        self.assertEqual(settings['CATEGORY_REGEX_SUBSTITUTIONS'],
                         [(r'C\+\+', 'cpp')] + [(r'C\#', 'csharp')] +
                         default_slug_regex_subs)
        self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
                         [(r'Alexander\ Todorov', 'atodorov')] +
                         default_slug_regex_subs)
        # Handle old 'skip' flags correctly
        settings = {}
        settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp', True)]
        settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov',
                                             False)]
        settings = handle_deprecated_settings(settings)
        self.assertEqual(settings.get('SLUG_REGEX_SUBSTITUTIONS'),
                         [(r'C\+\+', 'cpp')] +
                         [(r'(?u)\A\s*', ''), (r'(?u)\s*\Z', '')])
        self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
                         [(r'Alexander\ Todorov', 'atodorov')] +
                         default_slug_regex_subs)
--- a/pelican/tests/test_urlwrappers.py
+++ b/pelican/tests/test_urlwrappers.py
@ -55,30 +55,29 @@ class TestURLWrapper(unittest.TestCase):
        self.assertEqual(author, author_equal)
        cat_ascii = Category('指導書', settings={})
-        self.assertEqual(cat_ascii, u'zhi-dao-shu')
+        self.assertEqual(cat_ascii, u'zhi dao shu')
    def test_slugify_with_substitutions_and_dots(self):
-        tag = Tag('Tag Dot',
+        tag = Tag('Tag Dot', settings={'TAG_REGEX_SUBSTITUTIONS': [
-                  settings={
+            ('Tag Dot', 'tag.dot'),
-                        'TAG_SUBSTITUTIONS': [('Tag Dot', 'tag.dot', True)]
+        ]})
                    })
        cat = Category('Category Dot',
-                       settings={
+                       settings={'CATEGORY_REGEX_SUBSTITUTIONS': [
-                        'CATEGORY_SUBSTITUTIONS': (('Category Dot',
+                           ('Category Dot', 'cat.dot'),
-                                                    'cat.dot',
+                       ]})
                                                    True),)
                        })
        self.assertEqual(tag.slug, 'tag.dot')
        self.assertEqual(cat.slug, 'cat.dot')
    def test_author_slug_substitutions(self):
-        settings = {
+        settings = {'AUTHOR_REGEX_SUBSTITUTIONS': [
-            'AUTHOR_SUBSTITUTIONS': [
+            ('Alexander Todorov', 'atodorov'),
-                                    ('Alexander Todorov', 'atodorov', False),
+            ('Krasimir Tsonev', 'krasimir'),
-                                    ('Krasimir Tsonev', 'krasimir', False),
+            (r'[^\w\s-]', ''),
-            ]
+            (r'(?u)\A\s*', ''),
-        }
+            (r'(?u)\s*\Z', ''),
            (r'[-\s]+', '-'),
        ]}
        author1 = Author('Mr. Senko', settings=settings)
        author2 = Author('Alexander Todorov', settings=settings)
--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@ -119,8 +119,11 @@ class TestUtils(LoggedTestCase):
                   ('大飯原発４号機、１８日夜起動へ',
                    'da-fan-yuan-fa-4hao-ji-18ri-ye-qi-dong-he'),)
        settings = read_settings()
        subs = settings['SLUG_REGEX_SUBSTITUTIONS']
        for value, expected in samples:
-            self.assertEqual(utils.slugify(value), expected)
+            self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
    def test_slugify_substitute(self):
@ -129,21 +132,27 @@ class TestUtils(LoggedTestCase):
                   ('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'),
                   ('c++-streams', 'cpp-streams'),)
-        subs = (('C++', 'CPP'), ('C#', 'C-SHARP'))
+        settings = read_settings()
        subs = [
            (r'C\+\+', 'CPP'),
            (r'C#', 'C-SHARP'),
        ] + settings['SLUG_REGEX_SUBSTITUTIONS']
        for value, expected in samples:
-            self.assertEqual(utils.slugify(value, subs), expected)
+            self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
    def test_slugify_substitute_and_keeping_non_alphanum(self):
        samples = (('Fedora QA', 'fedora.qa'),
                   ('C++ is used by Fedora QA', 'cpp is used by fedora.qa'),
-                   ('C++ is based on C', 'cpp-is-based-on-c'),
+                   ('C++ is based on C', 'cpp is based on c'),
-                   ('C+++ test C+ test', 'cpp-test-c-test'),)
+                   ('C+++ test C+ test', 'cpp+ test c+ test'),)
-        subs = (('Fedora QA', 'fedora.qa', True),
+        subs = [
-                ('c++', 'cpp'),)
+            (r'Fedora QA', 'fedora.qa'),
            (r'c\+\+', 'cpp'),
        ]
        for value, expected in samples:
-            self.assertEqual(utils.slugify(value, subs), expected)
+            self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
    def test_get_relative_path(self):
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@ -17,6 +17,7 @@ from six.moves.urllib.request import urlretrieve
 # because logging.setLoggerClass has to be called before logging.getLogger
 from pelican.log import init
 from pelican.settings import read_settings
 from pelican.utils import SafeDatetime, slugify
 try:
@ -291,6 +292,8 @@ def dc2fields(file):
    print("%i posts read." % len(posts))
    settings = read_settings()
    subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    for post in posts:
        fields = post.split('","')
@ -383,8 +386,9 @@ def dc2fields(file):
        kind = 'article'  # TODO: Recognise pages
        status = 'published'  # TODO: Find a way for draft posts
-        yield (post_title, content, slugify(post_title), post_creadt, author,
+        yield (post_title, content, slugify(post_title, regex_subs=subs),
-               categories, tags, status, kind, post_format)
+               post_creadt, author, categories, tags, status, kind,
               post_format)
 def posterous2fields(api_token, email, password):
@ -418,6 +422,8 @@ def posterous2fields(api_token, email, password):
    page = 1
    posts = get_posterous_posts(api_token, email, password, page)
    settings = read_settings()
    subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    while len(posts) > 0:
        posts = get_posterous_posts(api_token, email, password, page)
        page += 1
@ -425,7 +431,7 @@ def posterous2fields(api_token, email, password):
        for post in posts:
            slug = post.get('slug')
            if not slug:
-                slug = slugify(post.get('title'))
+                slug = slugify(post.get('title'), regex_subs=subs)
            tags = [tag.get('name') for tag in post.get('tags')]
            raw_date = post.get('display_date')
            date_object = SafeDatetime.strptime(
@ -469,13 +475,15 @@ def tumblr2fields(api_key, blogname):
    offset = 0
    posts = get_tumblr_posts(api_key, blogname, offset)
    settings = read_settings()
    subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    while len(posts) > 0:
        for post in posts:
            title = \
                post.get('title') or \
                post.get('source_title') or \
                post.get('type').capitalize()
-            slug = post.get('slug') or slugify(title)
+            slug = post.get('slug') or slugify(title, regex_subs=subs)
            tags = post.get('tags')
            timestamp = post.get('timestamp')
            date = SafeDatetime.fromtimestamp(int(timestamp)).strftime(
@ -552,6 +560,8 @@ def feed2fields(file):
    """Read a feed and yield pelican fields"""
    import feedparser
    d = feedparser.parse(file)
    settings = read_settings()
    subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    for entry in d.entries:
        date = (entry.updated_parsed.strftime('%Y-%m-%d %H:%M')
                if hasattr(entry, 'updated_parsed') else None)
@ -559,7 +569,7 @@ def feed2fields(file):
        tags = ([e['term'] for e in entry.tags]
                if hasattr(entry, 'tags') else None)
-        slug = slugify(entry.title)
+        slug = slugify(entry.title, regex_subs=subs)
        kind = 'article'
        yield (entry.title, entry.description, slug, date,
               author, [], tags, None, kind, 'html')
@ -621,7 +631,7 @@ def get_ext(out_markup, in_markup='html'):
 def get_out_filename(output_path, filename, ext, kind,
-                     dirpage, dircat, categories, wp_custpost):
+                     dirpage, dircat, categories, wp_custpost, slug_subs):
    filename = os.path.basename(filename)
    # Enforce filename restrictions for various filesystems at once; see
@ -647,12 +657,12 @@ def get_out_filename(output_path, filename, ext, kind,
    # create subdirectories with category names
    elif kind != 'article':
        if wp_custpost:
-            typename = slugify(kind)
+            typename = slugify(kind, regex_subs=slug_subs)
        else:
            typename = ''
            kind = 'article'
        if dircat and (len(categories) > 0):
-            catname = slugify(categories[0])
+            catname = slugify(categories[0], regex_subs=slug_subs)
        else:
            catname = ''
        out_filename = os.path.join(output_path, typename,
@ -661,7 +671,7 @@ def get_out_filename(output_path, filename, ext, kind,
            os.makedirs(os.path.join(output_path, typename, catname))
    # option to put files in directories with categories names
    elif dircat and (len(categories) > 0):
-        catname = slugify(categories[0])
+        catname = slugify(categories[0], regex_subs=slug_subs)
        out_filename = os.path.join(output_path, catname, filename + ext)
        if not os.path.isdir(os.path.join(output_path, catname)):
            os.mkdir(os.path.join(output_path, catname))
@ -768,6 +778,9 @@ def fields2pelican(
                 'requested import action.')
        exit(error)
    settings = read_settings()
    slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS']
    for (title, content, filename, date, author, categories, tags, status,
            kind, in_markup) in fields:
        if filter_author and filter_author != author:
@ -796,7 +809,7 @@ def fields2pelican(
        out_filename = get_out_filename(
            output_path, filename, ext, kind, dirpage, dircat,
-            categories, wp_custpost)
+            categories, wp_custpost, slug_subs)
        print(out_filename)
        if in_markup in ('html', 'wp-html'):
--- a/pelican/urlwrappers.py
+++ b/pelican/urlwrappers.py
@ -36,8 +36,9 @@ class URLWrapper(object):
    @property
    def slug(self):
        if self._slug is None:
-            self._slug = slugify(self.name,
+            self._slug = slugify(
-                                 self.settings.get('SLUG_SUBSTITUTIONS', ()))
+                self.name,
                regex_subs=self.settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
        return self._slug
    @slug.setter
@ -56,8 +57,8 @@ class URLWrapper(object):
        return hash(self.slug)
    def _normalize_key(self, key):
-        subs = self.settings.get('SLUG_SUBSTITUTIONS', ())
+        subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
-        return six.text_type(slugify(key, subs))
+        return six.text_type(slugify(key, regex_subs=subs))
    def __eq__(self, other):
        if isinstance(other, self.__class__):
@ -115,10 +116,11 @@ class Category(URLWrapper):
    @property
    def slug(self):
        if self._slug is None:
-            substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
+            if 'CATEGORY_REGEX_SUBSTITUTIONS' in self.settings:
-            substitutions += tuple(self.settings.get('CATEGORY_SUBSTITUTIONS',
+                subs = self.settings['CATEGORY_REGEX_SUBSTITUTIONS']
-                                                     ()))
+            else:
-            self._slug = slugify(self.name, substitutions)
+                subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
            self._slug = slugify(self.name, regex_subs=subs)
        return self._slug
@ -129,9 +131,11 @@ class Tag(URLWrapper):
    @property
    def slug(self):
        if self._slug is None:
-            substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
+            if 'TAG_REGEX_SUBSTITUTIONS' in self.settings:
-            substitutions += tuple(self.settings.get('TAG_SUBSTITUTIONS', ()))
+                subs = self.settings['TAG_REGEX_SUBSTITUTIONS']
-            self._slug = slugify(self.name, substitutions)
+            else:
                subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
            self._slug = slugify(self.name, regex_subs=subs)
        return self._slug
@ -139,6 +143,9 @@ class Author(URLWrapper):
    @property
    def slug(self):
        if self._slug is None:
-            self._slug = slugify(self.name,
+            if 'AUTHOR_REGEX_SUBSTITUTIONS' in self.settings:
-                                 self.settings.get('AUTHOR_SUBSTITUTIONS', ()))
+                subs = self.settings['AUTHOR_REGEX_SUBSTITUTIONS']
            else:
                subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
            self._slug = slugify(self.name, regex_subs=subs)
        return self._slug
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -263,13 +263,14 @@ def pelican_open(filename, mode='rb', strip_crs=(sys.platform == 'win32')):
    yield content
-def slugify(value, substitutions=()):
+def slugify(value, regex_subs=()):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
    Took from Django sources.
    """
    # TODO Maybe steal again from current Django 1.5dev
    value = Markup(value).striptags()
    # value must be unicode per se
@ -281,37 +282,16 @@ def slugify(value, substitutions=()):
    if isinstance(value, six.binary_type):
        value = value.decode('ascii')
    # still unicode
-    value = unicodedata.normalize('NFKD', value).lower()
+    value = unicodedata.normalize('NFKD', value)
-    # backward compatible covert from 2-tuples to 3-tuples
+    for src, dst in regex_subs:
-    new_subs = []
+        value = re.sub(src, dst, value, flags=re.IGNORECASE)
    for tpl in substitutions:
        try:
            src, dst, skip = tpl
        except ValueError:
            src, dst = tpl
            skip = False
        new_subs.append((src, dst, skip))
    substitutions = tuple(new_subs)
-    # by default will replace non-alphanum characters
+    # convert to lowercase
-    replace = True
+    value = value.lower()
    for src, dst, skip in substitutions:
        orig_value = value
        value = value.replace(src.lower(), dst.lower())
        # if replacement was made then skip non-alphanum
        # replacement if instructed to do so
        if value != orig_value:
            replace = replace and not skip
    if replace:
        value = re.sub(r'[^\w\s-]', '', value).strip()
        value = re.sub(r'[-\s]+', '-', value)
    else:
        value = value.strip()
    # we want only ASCII chars
-    value = value.encode('ascii', 'ignore')
+    value = value.encode('ascii', 'ignore').strip()
    # but Pelican should generally use only unicode
    return value.decode('ascii')