1
0
Fork 0
forked from github/pelican

Merge pull request #2326 from oulenz/slug_substitutions

Control slug substitutions from settings with regex
This commit is contained in:
Justin Mayer 2018-10-31 20:08:01 +01:00 committed by GitHub
commit 461f535d04
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 409 additions and 235 deletions

View file

@ -6,6 +6,8 @@ Next release
* New signal: ``feed_generated``
* Replace Fabric by Invoke and ``fabfile.py`` template by ``tasks.py``.
* Replace ``SLUG_SUBSTITUTIONS`` (and friends) by ``SLUG_REGEX_SUBSTITUTIONS``
for more finegrained control
3.7.1 (2017-01-10)
==================

View file

@ -519,27 +519,6 @@ respectively.
The URL to use for per-day archives of your posts. Used only if you have the
``{url}`` placeholder in ``PAGINATION_PATTERNS``.
.. data:: SLUG_SUBSTITUTIONS = ()
Substitutions to make prior to stripping out non-alphanumerics when
generating slugs. Specified as a list of 3-tuples of ``(from, to, skip)``
which are applied in order. ``skip`` is a boolean indicating whether or not
to skip replacement of non-alphanumeric characters. Useful for backward
compatibility with existing URLs.
.. data:: AUTHOR_SUBSTITUTIONS = ()
Substitutions for authors. ``SLUG_SUBSTITUTIONS`` is not taken into account
here!
.. data:: CATEGORY_SUBSTITUTIONS = ()
Added to ``SLUG_SUBSTITUTIONS`` for categories.
.. data:: TAG_SUBSTITUTIONS = ()
Added to ``SLUG_SUBSTITUTIONS`` for tags.
.. note::
If you do not want one or more of the default pages to be created (e.g.,
@ -547,24 +526,6 @@ respectively.
set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the
relevant page from being generated.
.. note::
Substitutions are applied in order with the side effect that keeping
non-alphanum characters applies to the whole string when a replacement
is made.
For example if you have the following setting::
SLUG_SUBSTITUTIONS = (('C++', 'cpp'), ('keep dot', 'keep.dot', True))
the string ``Keep Dot`` will be converted to ``keep.dot``, however
``C++ will keep dot`` will be converted to ``cpp will keep.dot`` instead
of ``cpp-will-keep.dot``!
If you want to keep non-alphanum characters only for tags or categories
but not other slugs then configure ``TAG_SUBSTITUTIONS`` and
``CATEGORY_SUBSTITUTIONS`` respectively!
Pelican can optionally create per-year, per-month, and per-day archives of your
posts. These secondary archives are disabled by default but are automatically
enabled if you supply format strings for their respective ``_SAVE_AS`` settings.
@ -626,6 +587,33 @@ URLs for direct template pages are theme-dependent. Some themes use
corresponding ``*_URL`` setting as string, while others hard-code them:
``'archives.html'``, ``'authors.html'``, ``'categories.html'``, ``'tags.html'``.
.. data:: SLUG_REGEX_SUBSTITUTIONS = [
(r'[^\w\s-]', ''), # remove non-alphabetical/whitespace/'-' chars
(r'(?u)\A\s*', ''), # strip leading whitespace
(r'(?u)\s*\Z', ''), # strip trailing whitespace
(r'[-\s]+', '-'), # reduce multiple whitespace or '-' to single '-'
]
Regex substitutions to make when generating slugs of articles and pages.
Specified as a list of pairs of ``(from, to)`` which are applied in order,
ignoring case. The default substitutions have the effect of removing
non-alphanumeric characters and converting internal whitespace to dashes.
Apart from these substitutions, slugs are always converted to lowercase
ascii characters and leading and trailing whitespace is stripped. Useful for
backward compatibility with existing URLs.
.. data:: AUTHOR_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
Regex substitutions for author slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
.. data:: CATEGORY_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
Regex substitutions for category slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
.. data:: TAG_REGEX_SUBSTITUTIONS = SLUG_REGEX_SUBSTITUTIONS
Regex substitutions for tag slugs. Defaults to ``SLUG_REGEX_SUBSTITUTIONS``.
Time and Date
=============

View file

@ -98,14 +98,16 @@ class Content(object):
if not hasattr(self, 'slug'):
if (settings['SLUGIFY_SOURCE'] == 'title' and
hasattr(self, 'title')):
self.slug = slugify(self.title,
settings.get('SLUG_SUBSTITUTIONS', ()))
self.slug = slugify(
self.title,
regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
elif (settings['SLUGIFY_SOURCE'] == 'basename' and
source_path is not None):
basename = os.path.basename(
os.path.splitext(source_path)[0])
self.slug = slugify(
basename, settings.get('SLUG_SUBSTITUTIONS', ()))
basename,
regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
self.source_path = source_path

View file

@ -6,6 +6,7 @@ import inspect
import locale
import logging
import os
import re
from os.path import isabs
from posixpath import join as posix_join
@ -145,7 +146,12 @@ DEFAULT_CONFIG = {
'TEMPLATE_PAGES': {},
'TEMPLATE_EXTENSIONS': ['.html'],
'IGNORE_FILES': ['.#*'],
'SLUG_SUBSTITUTIONS': (),
'SLUG_REGEX_SUBSTITUTIONS': [
(r'[^\w\s-]', ''), # remove non-alphabetical/whitespace/'-' chars
(r'(?u)\A\s*', ''), # strip leading whitespace
(r'(?u)\s*\Z', ''), # strip trailing whitespace
(r'[-\s]+', '-'), # reduce multiple whitespace or '-' to single '-'
],
'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]',
'SLUGIFY_SOURCE': 'title',
'CACHE_CONTENT': False,
@ -164,79 +170,62 @@ PYGMENTS_RST_OPTIONS = None
def read_settings(path=None, override=None):
settings = override or {}
if path:
local_settings = get_settings_from_file(path)
# Make the paths relative to the settings file
settings = dict(get_settings_from_file(path), **settings)
if settings:
settings = handle_deprecated_settings(settings)
if path:
# Make relative paths absolute
def getabs(maybe_relative, base_path=path):
if isabs(maybe_relative):
return maybe_relative
return os.path.abspath(os.path.normpath(os.path.join(
os.path.dirname(base_path), maybe_relative)))
for p in ['PATH', 'OUTPUT_PATH', 'THEME', 'CACHE_PATH']:
if p in local_settings and local_settings[p] is not None \
and not isabs(local_settings[p]):
absp = os.path.abspath(os.path.normpath(os.path.join(
os.path.dirname(path), local_settings[p])))
if settings.get(p) is not None:
absp = getabs(settings[p])
# THEME may be a name rather than a path
if p != 'THEME' or os.path.exists(absp):
local_settings[p] = absp
settings[p] = absp
if 'PLUGIN_PATH' in local_settings:
logger.warning('PLUGIN_PATH setting has been replaced by '
'PLUGIN_PATHS, moving it to the new setting name.')
local_settings['PLUGIN_PATHS'] = local_settings['PLUGIN_PATH']
del local_settings['PLUGIN_PATH']
if 'JINJA_EXTENSIONS' in local_settings:
logger.warning('JINJA_EXTENSIONS setting has been deprecated, '
'moving it to JINJA_ENVIRONMENT setting.')
local_settings['JINJA_ENVIRONMENT']['extensions'] = \
local_settings['JINJA_EXTENSIONS']
del local_settings['JINJA_EXTENSIONS']
if isinstance(local_settings['PLUGIN_PATHS'], six.string_types):
logger.warning("Defining PLUGIN_PATHS setting as string "
"has been deprecated (should be a list)")
local_settings['PLUGIN_PATHS'] = [local_settings['PLUGIN_PATHS']]
elif local_settings['PLUGIN_PATHS'] is not None:
def getabs(path, pluginpath):
if isabs(pluginpath):
return pluginpath
else:
path_dirname = os.path.dirname(path)
path_joined = os.path.join(path_dirname, pluginpath)
path_normed = os.path.normpath(path_joined)
path_absolute = os.path.abspath(path_normed)
return path_absolute
if settings.get('PLUGIN_PATHS') is not None:
settings['PLUGIN_PATHS'] = [getabs(pluginpath)
for pluginpath
in settings['PLUGIN_PATHS']]
pluginpath_list = [getabs(path, pluginpath)
for pluginpath
in local_settings['PLUGIN_PATHS']]
local_settings['PLUGIN_PATHS'] = pluginpath_list
else:
local_settings = copy.deepcopy(DEFAULT_CONFIG)
settings = dict(copy.deepcopy(DEFAULT_CONFIG), **settings)
settings = configure_settings(settings)
if override:
local_settings.update(override)
parsed_settings = configure_settings(local_settings)
# This is because there doesn't seem to be a way to pass extra
# parameters to docutils directive handlers, so we have to have a
# variable here that we'll import from within Pygments.run (see
# rstdirectives.py) to see what the user defaults were.
global PYGMENTS_RST_OPTIONS
PYGMENTS_RST_OPTIONS = parsed_settings.get('PYGMENTS_RST_OPTIONS', None)
return parsed_settings
PYGMENTS_RST_OPTIONS = settings.get('PYGMENTS_RST_OPTIONS', None)
return settings
def get_settings_from_module(module=None, default_settings=DEFAULT_CONFIG):
def get_settings_from_module(module=None):
"""Loads settings from a module, returns a dictionary."""
context = copy.deepcopy(default_settings)
context = {}
if module is not None:
context.update(
(k, v) for k, v in inspect.getmembers(module) if k.isupper())
return context
def get_settings_from_file(path, default_settings=DEFAULT_CONFIG):
def get_settings_from_file(path):
"""Loads settings from a file path, returning a dict."""
name, ext = os.path.splitext(os.path.basename(path))
module = load_source(name, path)
return get_settings_from_module(module, default_settings=default_settings)
return get_settings_from_module(module)
def get_jinja_environment(settings):
@ -253,6 +242,149 @@ def get_jinja_environment(settings):
return settings
def handle_deprecated_settings(settings):
"""Converts deprecated settings and issues warnings. Issues an exception
if both old and new setting is specified.
"""
# PLUGIN_PATH -> PLUGIN_PATHS
if 'PLUGIN_PATH' in settings:
logger.warning('PLUGIN_PATH setting has been replaced by '
'PLUGIN_PATHS, moving it to the new setting name.')
settings['PLUGIN_PATHS'] = settings['PLUGIN_PATH']
del settings['PLUGIN_PATH']
# PLUGIN_PATHS: str -> [str]
if isinstance(settings.get('PLUGIN_PATHS'), six.string_types):
logger.warning("Defining PLUGIN_PATHS setting as string "
"has been deprecated (should be a list)")
settings['PLUGIN_PATHS'] = [settings['PLUGIN_PATHS']]
# JINJA_EXTENSIONS -> JINJA_ENVIRONMENT > extensions
if 'JINJA_EXTENSIONS' in settings:
logger.warning('JINJA_EXTENSIONS setting has been deprecated, '
'moving it to JINJA_ENVIRONMENT setting.')
settings['JINJA_ENVIRONMENT']['extensions'] = \
settings['JINJA_EXTENSIONS']
del settings['JINJA_EXTENSIONS']
# {ARTICLE,PAGE}_DIR -> {ARTICLE,PAGE}_PATHS
for key in ['ARTICLE', 'PAGE']:
old_key = key + '_DIR'
new_key = key + '_PATHS'
if old_key in settings:
logger.warning(
'Deprecated setting %s, moving it to %s list',
old_key, new_key)
settings[new_key] = [settings[old_key]] # also make a list
del settings[old_key]
# EXTRA_TEMPLATES_PATHS -> THEME_TEMPLATES_OVERRIDES
if 'EXTRA_TEMPLATES_PATHS' in settings:
logger.warning('EXTRA_TEMPLATES_PATHS is deprecated use '
'THEME_TEMPLATES_OVERRIDES instead.')
if ('THEME_TEMPLATES_OVERRIDES' in settings and
settings['THEME_TEMPLATES_OVERRIDES']):
raise Exception(
'Setting both EXTRA_TEMPLATES_PATHS and '
'THEME_TEMPLATES_OVERRIDES is not permitted. Please move to '
'only setting THEME_TEMPLATES_OVERRIDES.')
settings['THEME_TEMPLATES_OVERRIDES'] = \
settings['EXTRA_TEMPLATES_PATHS']
del settings['EXTRA_TEMPLATES_PATHS']
# MD_EXTENSIONS -> MARKDOWN
if 'MD_EXTENSIONS' in settings:
logger.warning('MD_EXTENSIONS is deprecated use MARKDOWN '
'instead. Falling back to the default.')
settings['MARKDOWN'] = DEFAULT_CONFIG['MARKDOWN']
# LESS_GENERATOR -> Webassets plugin
# FILES_TO_COPY -> STATIC_PATHS, EXTRA_PATH_METADATA
for old, new, doc in [
('LESS_GENERATOR', 'the Webassets plugin', None),
('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
'https://github.com/getpelican/pelican/'
'blob/master/docs/settings.rst#path-metadata'),
]:
if old in settings:
message = 'The {} setting has been removed in favor of {}'.format(
old, new)
if doc:
message += ', see {} for details'.format(doc)
logger.warning(message)
# PAGINATED_DIRECT_TEMPLATES -> PAGINATED_TEMPLATES
if 'PAGINATED_DIRECT_TEMPLATES' in settings:
message = 'The {} setting has been removed in favor of {}'.format(
'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_TEMPLATES')
logger.warning(message)
for t in settings['PAGINATED_DIRECT_TEMPLATES']:
if t not in settings['PAGINATED_TEMPLATES']:
settings['PAGINATED_TEMPLATES'][t] = None
del settings['PAGINATED_DIRECT_TEMPLATES']
# {SLUG,CATEGORY,TAG,AUTHOR}_SUBSTITUTIONS ->
# {SLUG,CATEGORY,TAG,AUTHOR}_REGEX_SUBSTITUTIONS
url_settings_url = \
'http://docs.getpelican.com/en/latest/settings.html#url-settings'
flavours = {'SLUG', 'CATEGORY', 'TAG', 'AUTHOR'}
old_values = {f: settings[f + '_SUBSTITUTIONS']
for f in flavours if f + '_SUBSTITUTIONS' in settings}
new_values = {f: settings[f + '_REGEX_SUBSTITUTIONS']
for f in flavours if f + '_REGEX_SUBSTITUTIONS' in settings}
if old_values and new_values:
raise Exception(
'Setting both {new_key} and {old_key} (or variants thereof) is '
'not permitted. Please move to only setting {new_key}.'
.format(old_key='SLUG_SUBSTITUTIONS',
new_key='SLUG_REGEX_SUBSTITUTIONS'))
if old_values:
message = ('{} and variants thereof are deprecated and will be '
'removed in the future. Please use {} and variants thereof '
'instead. Check {}.'
.format('SLUG_SUBSTITUTIONS', 'SLUG_REGEX_SUBSTITUTIONS',
url_settings_url))
logger.warning(message)
if old_values.get('SLUG'):
for f in {'CATEGORY', 'TAG'}:
if old_values.get(f):
old_values[f] = old_values['SLUG'] + old_values[f]
old_values['AUTHOR'] = old_values.get('AUTHOR', [])
for f in flavours:
if old_values.get(f) is not None:
regex_subs = []
# by default will replace non-alphanum characters
replace = True
for tpl in old_values[f]:
try:
src, dst, skip = tpl
if skip:
replace = False
except ValueError:
src, dst = tpl
regex_subs.append(
(re.escape(src), dst.replace('\\', r'\\')))
if replace:
regex_subs += [
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]
else:
regex_subs += [
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
]
settings[f + '_REGEX_SUBSTITUTIONS'] = regex_subs
settings.pop(f + '_SUBSTITUTIONS', None)
return settings
def configure_settings(settings):
"""Provide optimizations, error checking, and warnings for the given
settings.
@ -377,31 +509,6 @@ def configure_settings(settings):
key=lambda r: r[0],
)
# move {ARTICLE,PAGE}_DIR -> {ARTICLE,PAGE}_PATHS
for key in ['ARTICLE', 'PAGE']:
old_key = key + '_DIR'
new_key = key + '_PATHS'
if old_key in settings:
logger.warning(
'Deprecated setting %s, moving it to %s list',
old_key, new_key)
settings[new_key] = [settings[old_key]] # also make a list
del settings[old_key]
# Deprecated warning of EXTRA_TEMPLATES_PATHS
if 'EXTRA_TEMPLATES_PATHS' in settings:
logger.warning('EXTRA_TEMPLATES_PATHS is deprecated use '
'THEME_TEMPLATES_OVERRIDES instead.')
if ('THEME_TEMPLATES_OVERRIDES' in settings and
settings['THEME_TEMPLATES_OVERRIDES']):
raise Exception(
'Setting both EXTRA_TEMPLATES_PATHS and '
'THEME_TEMPLATES_OVERRIDES is not permitted. Please move to '
'only setting THEME_TEMPLATES_OVERRIDES.')
settings['THEME_TEMPLATES_OVERRIDES'] = \
settings['EXTRA_TEMPLATES_PATHS']
del settings['EXTRA_TEMPLATES_PATHS']
# Save people from accidentally setting a string rather than a list
path_keys = (
'ARTICLE_EXCLUDES',
@ -425,12 +532,6 @@ def configure_settings(settings):
PATH_KEY)
settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY]
# Deprecated warning of MD_EXTENSIONS
if 'MD_EXTENSIONS' in settings:
logger.warning('MD_EXTENSIONS is deprecated use MARKDOWN '
'instead. Falling back to the default.')
settings['MARKDOWN'] = DEFAULT_CONFIG['MARKDOWN']
# Add {PAGE,ARTICLE}_PATHS to {ARTICLE,PAGE}_EXCLUDES
mutually_exclusive = ('ARTICLE', 'PAGE')
for type_1, type_2 in [mutually_exclusive, mutually_exclusive[::-1]]:
@ -443,27 +544,4 @@ def configure_settings(settings):
except KeyError:
continue # setting not specified, nothing to do
for old, new, doc in [
('LESS_GENERATOR', 'the Webassets plugin', None),
('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
'https://github.com/getpelican/pelican/'
'blob/master/docs/settings.rst#path-metadata'),
]:
if old in settings:
message = 'The {} setting has been removed in favor of {}'.format(
old, new)
if doc:
message += ', see {} for details'.format(doc)
logger.warning(message)
if 'PAGINATED_DIRECT_TEMPLATES' in settings:
message = 'The {} setting has been removed in favor of {}'.format(
'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_TEMPLATES')
logger.warning(message)
for t in settings['PAGINATED_DIRECT_TEMPLATES']:
if t not in settings['PAGINATED_TEMPLATES']:
settings['PAGINATED_TEMPLATES'][t] = None
del settings['PAGINATED_DIRECT_TEMPLATES']
return settings

View file

@ -497,7 +497,13 @@ class TestArticle(TestPage):
def test_slugify_category_author(self):
settings = get_settings()
settings['SLUG_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['SLUG_REGEX_SUBSTITUTIONS'] = [
(r'C#', 'csharp'),
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]
settings['ARTICLE_URL'] = '{author}/{category}/{slug}/'
settings['ARTICLE_SAVE_AS'] = '{author}/{category}/{slug}/index.html'
article_kwargs = self._copy_page_kwargs()
@ -513,9 +519,13 @@ class TestArticle(TestPage):
def test_slugify_with_author_substitutions(self):
settings = get_settings()
settings['AUTHOR_SUBSTITUTIONS'] = [
('Alexander Todorov', 'atodorov', False),
('Krasimir Tsonev', 'krasimir', False),
settings['AUTHOR_REGEX_SUBSTITUTIONS'] = [
('Alexander Todorov', 'atodorov'),
('Krasimir Tsonev', 'krasimir'),
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]
settings['ARTICLE_URL'] = 'blog/{author}/{slug}/'
settings['ARTICLE_SAVE_AS'] = 'blog/{author}/{slug}/index.html'
@ -530,7 +540,9 @@ class TestArticle(TestPage):
def test_slugify_category_with_dots(self):
settings = get_settings()
settings['CATEGORY_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)]
settings['CATEGORY_REGEX_SUBSTITUTIONS'] = [
('Fedora QA', 'fedora.qa'),
]
settings['ARTICLE_URL'] = '{category}/{slug}/'
article_kwargs = self._copy_page_kwargs()
article_kwargs['metadata']['category'] = Category('Fedora QA',
@ -542,7 +554,9 @@ class TestArticle(TestPage):
def test_slugify_tags_with_dots(self):
settings = get_settings()
settings['TAG_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)]
settings['TAG_REGEX_SUBSTITUTIONS'] = [
('Fedora QA', 'fedora.qa'),
]
settings['ARTICLE_URL'] = '{tag}/{slug}/'
article_kwargs = self._copy_page_kwargs()
article_kwargs['metadata']['tag'] = Tag('Fedora QA', settings)

View file

@ -6,6 +6,7 @@ import os
import re
from codecs import open
from pelican.settings import DEFAULT_CONFIG
from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
unittest)
from pelican.tools.pelican_import import (blogger2fields, build_header,
@ -133,10 +134,11 @@ class TestWordpressXmlImporter(unittest.TestCase):
with temporary_folder() as temp:
fnames = list(silent_f2p(test_posts, 'markdown',
temp, dircat=True))
subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
index = 0
for post in test_posts:
name = post[2]
category = slugify(post[5][0])
category = slugify(post[5][0], regex_subs=subs)
name += '.md'
filename = os.path.join(category, name)
out_name = fnames[index]
@ -208,11 +210,12 @@ class TestWordpressXmlImporter(unittest.TestCase):
with temporary_folder() as temp:
fnames = list(silent_f2p(test_posts, 'markdown', temp,
wp_custpost=True, dircat=True))
subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
index = 0
for post in test_posts:
name = post[2]
kind = post[8]
category = slugify(post[5][0])
category = slugify(post[5][0], regex_subs=subs)
name += '.md'
filename = os.path.join(kind, category, name)
out_name = fnames[index]

View file

@ -9,7 +9,8 @@ from sys import platform
from pelican.settings import (DEFAULT_CONFIG, DEFAULT_THEME,
configure_settings, read_settings)
configure_settings, handle_deprecated_settings,
read_settings)
from pelican.tests.support import unittest
@ -128,7 +129,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings['ARTICLE_DIR'] = 'foo'
settings['PAGE_DIR'] = 'bar'
configure_settings(settings)
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['ARTICLE_PATHS'], ['foo'])
self.assertEqual(settings['PAGE_PATHS'], ['bar'])
@ -171,7 +172,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings = self.settings
settings['EXTRA_TEMPLATES_PATHS'] = ['/foo/bar', '/ha']
configure_settings(settings)
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['THEME_TEMPLATES_OVERRIDES'],
['/foo/bar', '/ha'])
@ -181,7 +182,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings = self.settings
settings['PAGINATED_DIRECT_TEMPLATES'] = ['index', 'archives']
settings['PAGINATED_TEMPLATES'] = {'index': 10, 'category': None}
settings = configure_settings(settings)
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['PAGINATED_TEMPLATES'],
{'index': 10, 'category': None, 'archives': None})
self.assertNotIn('PAGINATED_DIRECT_TEMPLATES', settings)
@ -191,4 +192,82 @@ class TestSettingsConfiguration(unittest.TestCase):
settings['EXTRA_TEMPLATES_PATHS'] = ['/ha']
settings['THEME_TEMPLATES_OVERRIDES'] = ['/foo/bar']
self.assertRaises(Exception, configure_settings, settings)
self.assertRaises(Exception, handle_deprecated_settings, settings)
def test_slug_and_slug_regex_substitutions_exception(self):
settings = {}
settings['SLUG_REGEX_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
self.assertRaises(Exception, handle_deprecated_settings, settings)
def test_deprecated_slug_substitutions(self):
default_slug_regex_subs = self.settings['SLUG_REGEX_SUBSTITUTIONS']
# If no deprecated setting is set, don't set new ones
settings = {}
settings = handle_deprecated_settings(settings)
self.assertNotIn('SLUG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('TAG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('CATEGORY_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('AUTHOR_REGEX_SUBSTITUTIONS', settings)
# If SLUG_SUBSTITUTIONS is set, set {SLUG, AUTHOR}_REGEX_SUBSTITUTIONS
# correctly, don't set {CATEGORY, TAG}_REGEX_SUBSTITUTIONS
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings.get('SLUG_REGEX_SUBSTITUTIONS'),
[(r'C\+\+', 'cpp')] + default_slug_regex_subs)
self.assertNotIn('TAG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('CATEGORY_REGEX_SUBSTITUTIONS', settings)
self.assertEqual(settings.get('AUTHOR_REGEX_SUBSTITUTIONS'),
default_slug_regex_subs)
# If {CATEGORY, TAG, AUTHOR}_SUBSTITUTIONS are set, set
# {CATEGORY, TAG, AUTHOR}_REGEX_SUBSTITUTIONS correctly, don't set
# SLUG_REGEX_SUBSTITUTIONS
settings = {}
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['CATEGORY_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov')]
settings = handle_deprecated_settings(settings)
self.assertNotIn('SLUG_REGEX_SUBSTITUTIONS', settings)
self.assertEqual(settings['TAG_REGEX_SUBSTITUTIONS'],
[(r'C\#', 'csharp')] + default_slug_regex_subs)
self.assertEqual(settings['CATEGORY_REGEX_SUBSTITUTIONS'],
[(r'C\#', 'csharp')] + default_slug_regex_subs)
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)
# If {SLUG, CATEGORY, TAG, AUTHOR}_SUBSTITUTIONS are set, set
# {SLUG, CATEGORY, TAG, AUTHOR}_REGEX_SUBSTITUTIONS correctly
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['CATEGORY_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov')]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['TAG_REGEX_SUBSTITUTIONS'],
[(r'C\+\+', 'cpp')] + [(r'C\#', 'csharp')] +
default_slug_regex_subs)
self.assertEqual(settings['CATEGORY_REGEX_SUBSTITUTIONS'],
[(r'C\+\+', 'cpp')] + [(r'C\#', 'csharp')] +
default_slug_regex_subs)
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)
# Handle old 'skip' flags correctly
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp', True)]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov',
False)]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings.get('SLUG_REGEX_SUBSTITUTIONS'),
[(r'C\+\+', 'cpp')] +
[(r'(?u)\A\s*', ''), (r'(?u)\s*\Z', '')])
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)

View file

@ -55,30 +55,29 @@ class TestURLWrapper(unittest.TestCase):
self.assertEqual(author, author_equal)
cat_ascii = Category('指導書', settings={})
self.assertEqual(cat_ascii, u'zhi-dao-shu')
self.assertEqual(cat_ascii, u'zhi dao shu')
def test_slugify_with_substitutions_and_dots(self):
tag = Tag('Tag Dot',
settings={
'TAG_SUBSTITUTIONS': [('Tag Dot', 'tag.dot', True)]
})
tag = Tag('Tag Dot', settings={'TAG_REGEX_SUBSTITUTIONS': [
('Tag Dot', 'tag.dot'),
]})
cat = Category('Category Dot',
settings={
'CATEGORY_SUBSTITUTIONS': (('Category Dot',
'cat.dot',
True),)
})
settings={'CATEGORY_REGEX_SUBSTITUTIONS': [
('Category Dot', 'cat.dot'),
]})
self.assertEqual(tag.slug, 'tag.dot')
self.assertEqual(cat.slug, 'cat.dot')
def test_author_slug_substitutions(self):
settings = {
'AUTHOR_SUBSTITUTIONS': [
('Alexander Todorov', 'atodorov', False),
('Krasimir Tsonev', 'krasimir', False),
]
}
settings = {'AUTHOR_REGEX_SUBSTITUTIONS': [
('Alexander Todorov', 'atodorov'),
('Krasimir Tsonev', 'krasimir'),
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]}
author1 = Author('Mr. Senko', settings=settings)
author2 = Author('Alexander Todorov', settings=settings)

View file

@ -119,8 +119,11 @@ class TestUtils(LoggedTestCase):
('大飯原発4号機、18日夜起動へ',
'da-fan-yuan-fa-4hao-ji-18ri-ye-qi-dong-he'),)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for value, expected in samples:
self.assertEqual(utils.slugify(value), expected)
self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_slugify_substitute(self):
@ -129,21 +132,27 @@ class TestUtils(LoggedTestCase):
('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'),
('c++-streams', 'cpp-streams'),)
subs = (('C++', 'CPP'), ('C#', 'C-SHARP'))
settings = read_settings()
subs = [
(r'C\+\+', 'CPP'),
(r'C#', 'C-SHARP'),
] + settings['SLUG_REGEX_SUBSTITUTIONS']
for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected)
self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_slugify_substitute_and_keeping_non_alphanum(self):
samples = (('Fedora QA', 'fedora.qa'),
('C++ is used by Fedora QA', 'cpp is used by fedora.qa'),
('C++ is based on C', 'cpp-is-based-on-c'),
('C+++ test C+ test', 'cpp-test-c-test'),)
('C++ is based on C', 'cpp is based on c'),
('C+++ test C+ test', 'cpp+ test c+ test'),)
subs = (('Fedora QA', 'fedora.qa', True),
('c++', 'cpp'),)
subs = [
(r'Fedora QA', 'fedora.qa'),
(r'c\+\+', 'cpp'),
]
for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected)
self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_get_relative_path(self):

View file

@ -17,6 +17,7 @@ from six.moves.urllib.request import urlretrieve
# because logging.setLoggerClass has to be called before logging.getLogger
from pelican.log import init
from pelican.settings import read_settings
from pelican.utils import SafeDatetime, slugify
try:
@ -291,6 +292,8 @@ def dc2fields(file):
print("%i posts read." % len(posts))
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for post in posts:
fields = post.split('","')
@ -383,8 +386,9 @@ def dc2fields(file):
kind = 'article' # TODO: Recognise pages
status = 'published' # TODO: Find a way for draft posts
yield (post_title, content, slugify(post_title), post_creadt, author,
categories, tags, status, kind, post_format)
yield (post_title, content, slugify(post_title, regex_subs=subs),
post_creadt, author, categories, tags, status, kind,
post_format)
def posterous2fields(api_token, email, password):
@ -418,6 +422,8 @@ def posterous2fields(api_token, email, password):
page = 1
posts = get_posterous_posts(api_token, email, password, page)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
while len(posts) > 0:
posts = get_posterous_posts(api_token, email, password, page)
page += 1
@ -425,7 +431,7 @@ def posterous2fields(api_token, email, password):
for post in posts:
slug = post.get('slug')
if not slug:
slug = slugify(post.get('title'))
slug = slugify(post.get('title'), regex_subs=subs)
tags = [tag.get('name') for tag in post.get('tags')]
raw_date = post.get('display_date')
date_object = SafeDatetime.strptime(
@ -469,13 +475,15 @@ def tumblr2fields(api_key, blogname):
offset = 0
posts = get_tumblr_posts(api_key, blogname, offset)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
while len(posts) > 0:
for post in posts:
title = \
post.get('title') or \
post.get('source_title') or \
post.get('type').capitalize()
slug = post.get('slug') or slugify(title)
slug = post.get('slug') or slugify(title, regex_subs=subs)
tags = post.get('tags')
timestamp = post.get('timestamp')
date = SafeDatetime.fromtimestamp(int(timestamp)).strftime(
@ -552,6 +560,8 @@ def feed2fields(file):
"""Read a feed and yield pelican fields"""
import feedparser
d = feedparser.parse(file)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for entry in d.entries:
date = (entry.updated_parsed.strftime('%Y-%m-%d %H:%M')
if hasattr(entry, 'updated_parsed') else None)
@ -559,7 +569,7 @@ def feed2fields(file):
tags = ([e['term'] for e in entry.tags]
if hasattr(entry, 'tags') else None)
slug = slugify(entry.title)
slug = slugify(entry.title, regex_subs=subs)
kind = 'article'
yield (entry.title, entry.description, slug, date,
author, [], tags, None, kind, 'html')
@ -621,7 +631,7 @@ def get_ext(out_markup, in_markup='html'):
def get_out_filename(output_path, filename, ext, kind,
dirpage, dircat, categories, wp_custpost):
dirpage, dircat, categories, wp_custpost, slug_subs):
filename = os.path.basename(filename)
# Enforce filename restrictions for various filesystems at once; see
@ -647,12 +657,12 @@ def get_out_filename(output_path, filename, ext, kind,
# create subdirectories with category names
elif kind != 'article':
if wp_custpost:
typename = slugify(kind)
typename = slugify(kind, regex_subs=slug_subs)
else:
typename = ''
kind = 'article'
if dircat and (len(categories) > 0):
catname = slugify(categories[0])
catname = slugify(categories[0], regex_subs=slug_subs)
else:
catname = ''
out_filename = os.path.join(output_path, typename,
@ -661,7 +671,7 @@ def get_out_filename(output_path, filename, ext, kind,
os.makedirs(os.path.join(output_path, typename, catname))
# option to put files in directories with categories names
elif dircat and (len(categories) > 0):
catname = slugify(categories[0])
catname = slugify(categories[0], regex_subs=slug_subs)
out_filename = os.path.join(output_path, catname, filename + ext)
if not os.path.isdir(os.path.join(output_path, catname)):
os.mkdir(os.path.join(output_path, catname))
@ -768,6 +778,9 @@ def fields2pelican(
'requested import action.')
exit(error)
settings = read_settings()
slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for (title, content, filename, date, author, categories, tags, status,
kind, in_markup) in fields:
if filter_author and filter_author != author:
@ -796,7 +809,7 @@ def fields2pelican(
out_filename = get_out_filename(
output_path, filename, ext, kind, dirpage, dircat,
categories, wp_custpost)
categories, wp_custpost, slug_subs)
print(out_filename)
if in_markup in ('html', 'wp-html'):

View file

@ -36,8 +36,9 @@ class URLWrapper(object):
@property
def slug(self):
if self._slug is None:
self._slug = slugify(self.name,
self.settings.get('SLUG_SUBSTITUTIONS', ()))
self._slug = slugify(
self.name,
regex_subs=self.settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
return self._slug
@slug.setter
@ -56,8 +57,8 @@ class URLWrapper(object):
return hash(self.slug)
def _normalize_key(self, key):
subs = self.settings.get('SLUG_SUBSTITUTIONS', ())
return six.text_type(slugify(key, subs))
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
return six.text_type(slugify(key, regex_subs=subs))
def __eq__(self, other):
if isinstance(other, self.__class__):
@ -115,10 +116,11 @@ class Category(URLWrapper):
@property
def slug(self):
if self._slug is None:
substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
substitutions += tuple(self.settings.get('CATEGORY_SUBSTITUTIONS',
()))
self._slug = slugify(self.name, substitutions)
if 'CATEGORY_REGEX_SUBSTITUTIONS' in self.settings:
subs = self.settings['CATEGORY_REGEX_SUBSTITUTIONS']
else:
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug
@ -129,9 +131,11 @@ class Tag(URLWrapper):
@property
def slug(self):
if self._slug is None:
substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
substitutions += tuple(self.settings.get('TAG_SUBSTITUTIONS', ()))
self._slug = slugify(self.name, substitutions)
if 'TAG_REGEX_SUBSTITUTIONS' in self.settings:
subs = self.settings['TAG_REGEX_SUBSTITUTIONS']
else:
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug
@ -139,6 +143,9 @@ class Author(URLWrapper):
@property
def slug(self):
if self._slug is None:
self._slug = slugify(self.name,
self.settings.get('AUTHOR_SUBSTITUTIONS', ()))
if 'AUTHOR_REGEX_SUBSTITUTIONS' in self.settings:
subs = self.settings['AUTHOR_REGEX_SUBSTITUTIONS']
else:
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug

View file

@ -263,13 +263,14 @@ def pelican_open(filename, mode='rb', strip_crs=(sys.platform == 'win32')):
yield content
def slugify(value, substitutions=()):
def slugify(value, regex_subs=()):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
Took from Django sources.
"""
# TODO Maybe steal again from current Django 1.5dev
value = Markup(value).striptags()
# value must be unicode per se
@ -281,37 +282,16 @@ def slugify(value, substitutions=()):
if isinstance(value, six.binary_type):
value = value.decode('ascii')
# still unicode
value = unicodedata.normalize('NFKD', value).lower()
value = unicodedata.normalize('NFKD', value)
# backward compatible covert from 2-tuples to 3-tuples
new_subs = []
for tpl in substitutions:
try:
src, dst, skip = tpl
except ValueError:
src, dst = tpl
skip = False
new_subs.append((src, dst, skip))
substitutions = tuple(new_subs)
for src, dst in regex_subs:
value = re.sub(src, dst, value, flags=re.IGNORECASE)
# by default will replace non-alphanum characters
replace = True
for src, dst, skip in substitutions:
orig_value = value
value = value.replace(src.lower(), dst.lower())
# if replacement was made then skip non-alphanum
# replacement if instructed to do so
if value != orig_value:
replace = replace and not skip
if replace:
value = re.sub(r'[^\w\s-]', '', value).strip()
value = re.sub(r'[-\s]+', '-', value)
else:
value = value.strip()
# convert to lowercase
value = value.lower()
# we want only ASCII chars
value = value.encode('ascii', 'ignore')
value = value.encode('ascii', 'ignore').strip()
# but Pelican should generally use only unicode
return value.decode('ascii')