Merge pull request #2326 from oulenz/slug_substitutions

Control slug substitutions from settings with regex
This commit is contained in:
Justin Mayer 2018-10-31 20:08:01 +01:00 committed by GitHub
commit 461f535d04
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 409 additions and 235 deletions

View file

@ -98,14 +98,16 @@ class Content(object):
if not hasattr(self, 'slug'):
if (settings['SLUGIFY_SOURCE'] == 'title' and
hasattr(self, 'title')):
self.slug = slugify(self.title,
settings.get('SLUG_SUBSTITUTIONS', ()))
self.slug = slugify(
self.title,
regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
elif (settings['SLUGIFY_SOURCE'] == 'basename' and
source_path is not None):
basename = os.path.basename(
os.path.splitext(source_path)[0])
self.slug = slugify(
basename, settings.get('SLUG_SUBSTITUTIONS', ()))
basename,
regex_subs=settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
self.source_path = source_path

View file

@ -6,6 +6,7 @@ import inspect
import locale
import logging
import os
import re
from os.path import isabs
from posixpath import join as posix_join
@ -145,7 +146,12 @@ DEFAULT_CONFIG = {
'TEMPLATE_PAGES': {},
'TEMPLATE_EXTENSIONS': ['.html'],
'IGNORE_FILES': ['.#*'],
'SLUG_SUBSTITUTIONS': (),
'SLUG_REGEX_SUBSTITUTIONS': [
(r'[^\w\s-]', ''), # remove non-alphabetical/whitespace/'-' chars
(r'(?u)\A\s*', ''), # strip leading whitespace
(r'(?u)\s*\Z', ''), # strip trailing whitespace
(r'[-\s]+', '-'), # reduce multiple whitespace or '-' to single '-'
],
'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]',
'SLUGIFY_SOURCE': 'title',
'CACHE_CONTENT': False,
@ -164,79 +170,62 @@ PYGMENTS_RST_OPTIONS = None
def read_settings(path=None, override=None):
settings = override or {}
if path:
local_settings = get_settings_from_file(path)
# Make the paths relative to the settings file
settings = dict(get_settings_from_file(path), **settings)
if settings:
settings = handle_deprecated_settings(settings)
if path:
# Make relative paths absolute
def getabs(maybe_relative, base_path=path):
if isabs(maybe_relative):
return maybe_relative
return os.path.abspath(os.path.normpath(os.path.join(
os.path.dirname(base_path), maybe_relative)))
for p in ['PATH', 'OUTPUT_PATH', 'THEME', 'CACHE_PATH']:
if p in local_settings and local_settings[p] is not None \
and not isabs(local_settings[p]):
absp = os.path.abspath(os.path.normpath(os.path.join(
os.path.dirname(path), local_settings[p])))
if settings.get(p) is not None:
absp = getabs(settings[p])
# THEME may be a name rather than a path
if p != 'THEME' or os.path.exists(absp):
local_settings[p] = absp
settings[p] = absp
if 'PLUGIN_PATH' in local_settings:
logger.warning('PLUGIN_PATH setting has been replaced by '
'PLUGIN_PATHS, moving it to the new setting name.')
local_settings['PLUGIN_PATHS'] = local_settings['PLUGIN_PATH']
del local_settings['PLUGIN_PATH']
if 'JINJA_EXTENSIONS' in local_settings:
logger.warning('JINJA_EXTENSIONS setting has been deprecated, '
'moving it to JINJA_ENVIRONMENT setting.')
local_settings['JINJA_ENVIRONMENT']['extensions'] = \
local_settings['JINJA_EXTENSIONS']
del local_settings['JINJA_EXTENSIONS']
if isinstance(local_settings['PLUGIN_PATHS'], six.string_types):
logger.warning("Defining PLUGIN_PATHS setting as string "
"has been deprecated (should be a list)")
local_settings['PLUGIN_PATHS'] = [local_settings['PLUGIN_PATHS']]
elif local_settings['PLUGIN_PATHS'] is not None:
def getabs(path, pluginpath):
if isabs(pluginpath):
return pluginpath
else:
path_dirname = os.path.dirname(path)
path_joined = os.path.join(path_dirname, pluginpath)
path_normed = os.path.normpath(path_joined)
path_absolute = os.path.abspath(path_normed)
return path_absolute
if settings.get('PLUGIN_PATHS') is not None:
settings['PLUGIN_PATHS'] = [getabs(pluginpath)
for pluginpath
in settings['PLUGIN_PATHS']]
pluginpath_list = [getabs(path, pluginpath)
for pluginpath
in local_settings['PLUGIN_PATHS']]
local_settings['PLUGIN_PATHS'] = pluginpath_list
else:
local_settings = copy.deepcopy(DEFAULT_CONFIG)
settings = dict(copy.deepcopy(DEFAULT_CONFIG), **settings)
settings = configure_settings(settings)
if override:
local_settings.update(override)
parsed_settings = configure_settings(local_settings)
# This is because there doesn't seem to be a way to pass extra
# parameters to docutils directive handlers, so we have to have a
# variable here that we'll import from within Pygments.run (see
# rstdirectives.py) to see what the user defaults were.
global PYGMENTS_RST_OPTIONS
PYGMENTS_RST_OPTIONS = parsed_settings.get('PYGMENTS_RST_OPTIONS', None)
return parsed_settings
PYGMENTS_RST_OPTIONS = settings.get('PYGMENTS_RST_OPTIONS', None)
return settings
def get_settings_from_module(module=None, default_settings=DEFAULT_CONFIG):
def get_settings_from_module(module=None):
"""Loads settings from a module, returns a dictionary."""
context = copy.deepcopy(default_settings)
context = {}
if module is not None:
context.update(
(k, v) for k, v in inspect.getmembers(module) if k.isupper())
return context
def get_settings_from_file(path, default_settings=DEFAULT_CONFIG):
def get_settings_from_file(path):
"""Loads settings from a file path, returning a dict."""
name, ext = os.path.splitext(os.path.basename(path))
module = load_source(name, path)
return get_settings_from_module(module, default_settings=default_settings)
return get_settings_from_module(module)
def get_jinja_environment(settings):
@ -253,6 +242,149 @@ def get_jinja_environment(settings):
return settings
def handle_deprecated_settings(settings):
"""Converts deprecated settings and issues warnings. Issues an exception
if both old and new setting is specified.
"""
# PLUGIN_PATH -> PLUGIN_PATHS
if 'PLUGIN_PATH' in settings:
logger.warning('PLUGIN_PATH setting has been replaced by '
'PLUGIN_PATHS, moving it to the new setting name.')
settings['PLUGIN_PATHS'] = settings['PLUGIN_PATH']
del settings['PLUGIN_PATH']
# PLUGIN_PATHS: str -> [str]
if isinstance(settings.get('PLUGIN_PATHS'), six.string_types):
logger.warning("Defining PLUGIN_PATHS setting as string "
"has been deprecated (should be a list)")
settings['PLUGIN_PATHS'] = [settings['PLUGIN_PATHS']]
# JINJA_EXTENSIONS -> JINJA_ENVIRONMENT > extensions
if 'JINJA_EXTENSIONS' in settings:
logger.warning('JINJA_EXTENSIONS setting has been deprecated, '
'moving it to JINJA_ENVIRONMENT setting.')
settings['JINJA_ENVIRONMENT']['extensions'] = \
settings['JINJA_EXTENSIONS']
del settings['JINJA_EXTENSIONS']
# {ARTICLE,PAGE}_DIR -> {ARTICLE,PAGE}_PATHS
for key in ['ARTICLE', 'PAGE']:
old_key = key + '_DIR'
new_key = key + '_PATHS'
if old_key in settings:
logger.warning(
'Deprecated setting %s, moving it to %s list',
old_key, new_key)
settings[new_key] = [settings[old_key]] # also make a list
del settings[old_key]
# EXTRA_TEMPLATES_PATHS -> THEME_TEMPLATES_OVERRIDES
if 'EXTRA_TEMPLATES_PATHS' in settings:
logger.warning('EXTRA_TEMPLATES_PATHS is deprecated use '
'THEME_TEMPLATES_OVERRIDES instead.')
if ('THEME_TEMPLATES_OVERRIDES' in settings and
settings['THEME_TEMPLATES_OVERRIDES']):
raise Exception(
'Setting both EXTRA_TEMPLATES_PATHS and '
'THEME_TEMPLATES_OVERRIDES is not permitted. Please move to '
'only setting THEME_TEMPLATES_OVERRIDES.')
settings['THEME_TEMPLATES_OVERRIDES'] = \
settings['EXTRA_TEMPLATES_PATHS']
del settings['EXTRA_TEMPLATES_PATHS']
# MD_EXTENSIONS -> MARKDOWN
if 'MD_EXTENSIONS' in settings:
logger.warning('MD_EXTENSIONS is deprecated use MARKDOWN '
'instead. Falling back to the default.')
settings['MARKDOWN'] = DEFAULT_CONFIG['MARKDOWN']
# LESS_GENERATOR -> Webassets plugin
# FILES_TO_COPY -> STATIC_PATHS, EXTRA_PATH_METADATA
for old, new, doc in [
('LESS_GENERATOR', 'the Webassets plugin', None),
('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
'https://github.com/getpelican/pelican/'
'blob/master/docs/settings.rst#path-metadata'),
]:
if old in settings:
message = 'The {} setting has been removed in favor of {}'.format(
old, new)
if doc:
message += ', see {} for details'.format(doc)
logger.warning(message)
# PAGINATED_DIRECT_TEMPLATES -> PAGINATED_TEMPLATES
if 'PAGINATED_DIRECT_TEMPLATES' in settings:
message = 'The {} setting has been removed in favor of {}'.format(
'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_TEMPLATES')
logger.warning(message)
for t in settings['PAGINATED_DIRECT_TEMPLATES']:
if t not in settings['PAGINATED_TEMPLATES']:
settings['PAGINATED_TEMPLATES'][t] = None
del settings['PAGINATED_DIRECT_TEMPLATES']
# {SLUG,CATEGORY,TAG,AUTHOR}_SUBSTITUTIONS ->
# {SLUG,CATEGORY,TAG,AUTHOR}_REGEX_SUBSTITUTIONS
url_settings_url = \
'http://docs.getpelican.com/en/latest/settings.html#url-settings'
flavours = {'SLUG', 'CATEGORY', 'TAG', 'AUTHOR'}
old_values = {f: settings[f + '_SUBSTITUTIONS']
for f in flavours if f + '_SUBSTITUTIONS' in settings}
new_values = {f: settings[f + '_REGEX_SUBSTITUTIONS']
for f in flavours if f + '_REGEX_SUBSTITUTIONS' in settings}
if old_values and new_values:
raise Exception(
'Setting both {new_key} and {old_key} (or variants thereof) is '
'not permitted. Please move to only setting {new_key}.'
.format(old_key='SLUG_SUBSTITUTIONS',
new_key='SLUG_REGEX_SUBSTITUTIONS'))
if old_values:
message = ('{} and variants thereof are deprecated and will be '
'removed in the future. Please use {} and variants thereof '
'instead. Check {}.'
.format('SLUG_SUBSTITUTIONS', 'SLUG_REGEX_SUBSTITUTIONS',
url_settings_url))
logger.warning(message)
if old_values.get('SLUG'):
for f in {'CATEGORY', 'TAG'}:
if old_values.get(f):
old_values[f] = old_values['SLUG'] + old_values[f]
old_values['AUTHOR'] = old_values.get('AUTHOR', [])
for f in flavours:
if old_values.get(f) is not None:
regex_subs = []
# by default will replace non-alphanum characters
replace = True
for tpl in old_values[f]:
try:
src, dst, skip = tpl
if skip:
replace = False
except ValueError:
src, dst = tpl
regex_subs.append(
(re.escape(src), dst.replace('\\', r'\\')))
if replace:
regex_subs += [
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]
else:
regex_subs += [
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
]
settings[f + '_REGEX_SUBSTITUTIONS'] = regex_subs
settings.pop(f + '_SUBSTITUTIONS', None)
return settings
def configure_settings(settings):
"""Provide optimizations, error checking, and warnings for the given
settings.
@ -377,31 +509,6 @@ def configure_settings(settings):
key=lambda r: r[0],
)
# move {ARTICLE,PAGE}_DIR -> {ARTICLE,PAGE}_PATHS
for key in ['ARTICLE', 'PAGE']:
old_key = key + '_DIR'
new_key = key + '_PATHS'
if old_key in settings:
logger.warning(
'Deprecated setting %s, moving it to %s list',
old_key, new_key)
settings[new_key] = [settings[old_key]] # also make a list
del settings[old_key]
# Deprecated warning of EXTRA_TEMPLATES_PATHS
if 'EXTRA_TEMPLATES_PATHS' in settings:
logger.warning('EXTRA_TEMPLATES_PATHS is deprecated use '
'THEME_TEMPLATES_OVERRIDES instead.')
if ('THEME_TEMPLATES_OVERRIDES' in settings and
settings['THEME_TEMPLATES_OVERRIDES']):
raise Exception(
'Setting both EXTRA_TEMPLATES_PATHS and '
'THEME_TEMPLATES_OVERRIDES is not permitted. Please move to '
'only setting THEME_TEMPLATES_OVERRIDES.')
settings['THEME_TEMPLATES_OVERRIDES'] = \
settings['EXTRA_TEMPLATES_PATHS']
del settings['EXTRA_TEMPLATES_PATHS']
# Save people from accidentally setting a string rather than a list
path_keys = (
'ARTICLE_EXCLUDES',
@ -425,12 +532,6 @@ def configure_settings(settings):
PATH_KEY)
settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY]
# Deprecated warning of MD_EXTENSIONS
if 'MD_EXTENSIONS' in settings:
logger.warning('MD_EXTENSIONS is deprecated use MARKDOWN '
'instead. Falling back to the default.')
settings['MARKDOWN'] = DEFAULT_CONFIG['MARKDOWN']
# Add {PAGE,ARTICLE}_PATHS to {ARTICLE,PAGE}_EXCLUDES
mutually_exclusive = ('ARTICLE', 'PAGE')
for type_1, type_2 in [mutually_exclusive, mutually_exclusive[::-1]]:
@ -443,27 +544,4 @@ def configure_settings(settings):
except KeyError:
continue # setting not specified, nothing to do
for old, new, doc in [
('LESS_GENERATOR', 'the Webassets plugin', None),
('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
'https://github.com/getpelican/pelican/'
'blob/master/docs/settings.rst#path-metadata'),
]:
if old in settings:
message = 'The {} setting has been removed in favor of {}'.format(
old, new)
if doc:
message += ', see {} for details'.format(doc)
logger.warning(message)
if 'PAGINATED_DIRECT_TEMPLATES' in settings:
message = 'The {} setting has been removed in favor of {}'.format(
'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_TEMPLATES')
logger.warning(message)
for t in settings['PAGINATED_DIRECT_TEMPLATES']:
if t not in settings['PAGINATED_TEMPLATES']:
settings['PAGINATED_TEMPLATES'][t] = None
del settings['PAGINATED_DIRECT_TEMPLATES']
return settings

View file

@ -497,7 +497,13 @@ class TestArticle(TestPage):
def test_slugify_category_author(self):
settings = get_settings()
settings['SLUG_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['SLUG_REGEX_SUBSTITUTIONS'] = [
(r'C#', 'csharp'),
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]
settings['ARTICLE_URL'] = '{author}/{category}/{slug}/'
settings['ARTICLE_SAVE_AS'] = '{author}/{category}/{slug}/index.html'
article_kwargs = self._copy_page_kwargs()
@ -513,9 +519,13 @@ class TestArticle(TestPage):
def test_slugify_with_author_substitutions(self):
settings = get_settings()
settings['AUTHOR_SUBSTITUTIONS'] = [
('Alexander Todorov', 'atodorov', False),
('Krasimir Tsonev', 'krasimir', False),
settings['AUTHOR_REGEX_SUBSTITUTIONS'] = [
('Alexander Todorov', 'atodorov'),
('Krasimir Tsonev', 'krasimir'),
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]
settings['ARTICLE_URL'] = 'blog/{author}/{slug}/'
settings['ARTICLE_SAVE_AS'] = 'blog/{author}/{slug}/index.html'
@ -530,7 +540,9 @@ class TestArticle(TestPage):
def test_slugify_category_with_dots(self):
settings = get_settings()
settings['CATEGORY_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)]
settings['CATEGORY_REGEX_SUBSTITUTIONS'] = [
('Fedora QA', 'fedora.qa'),
]
settings['ARTICLE_URL'] = '{category}/{slug}/'
article_kwargs = self._copy_page_kwargs()
article_kwargs['metadata']['category'] = Category('Fedora QA',
@ -542,7 +554,9 @@ class TestArticle(TestPage):
def test_slugify_tags_with_dots(self):
settings = get_settings()
settings['TAG_SUBSTITUTIONS'] = [('Fedora QA', 'fedora.qa', True)]
settings['TAG_REGEX_SUBSTITUTIONS'] = [
('Fedora QA', 'fedora.qa'),
]
settings['ARTICLE_URL'] = '{tag}/{slug}/'
article_kwargs = self._copy_page_kwargs()
article_kwargs['metadata']['tag'] = Tag('Fedora QA', settings)

View file

@ -6,6 +6,7 @@ import os
import re
from codecs import open
from pelican.settings import DEFAULT_CONFIG
from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
unittest)
from pelican.tools.pelican_import import (blogger2fields, build_header,
@ -133,10 +134,11 @@ class TestWordpressXmlImporter(unittest.TestCase):
with temporary_folder() as temp:
fnames = list(silent_f2p(test_posts, 'markdown',
temp, dircat=True))
subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
index = 0
for post in test_posts:
name = post[2]
category = slugify(post[5][0])
category = slugify(post[5][0], regex_subs=subs)
name += '.md'
filename = os.path.join(category, name)
out_name = fnames[index]
@ -208,11 +210,12 @@ class TestWordpressXmlImporter(unittest.TestCase):
with temporary_folder() as temp:
fnames = list(silent_f2p(test_posts, 'markdown', temp,
wp_custpost=True, dircat=True))
subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
index = 0
for post in test_posts:
name = post[2]
kind = post[8]
category = slugify(post[5][0])
category = slugify(post[5][0], regex_subs=subs)
name += '.md'
filename = os.path.join(kind, category, name)
out_name = fnames[index]

View file

@ -9,7 +9,8 @@ from sys import platform
from pelican.settings import (DEFAULT_CONFIG, DEFAULT_THEME,
configure_settings, read_settings)
configure_settings, handle_deprecated_settings,
read_settings)
from pelican.tests.support import unittest
@ -128,7 +129,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings['ARTICLE_DIR'] = 'foo'
settings['PAGE_DIR'] = 'bar'
configure_settings(settings)
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['ARTICLE_PATHS'], ['foo'])
self.assertEqual(settings['PAGE_PATHS'], ['bar'])
@ -171,7 +172,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings = self.settings
settings['EXTRA_TEMPLATES_PATHS'] = ['/foo/bar', '/ha']
configure_settings(settings)
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['THEME_TEMPLATES_OVERRIDES'],
['/foo/bar', '/ha'])
@ -181,7 +182,7 @@ class TestSettingsConfiguration(unittest.TestCase):
settings = self.settings
settings['PAGINATED_DIRECT_TEMPLATES'] = ['index', 'archives']
settings['PAGINATED_TEMPLATES'] = {'index': 10, 'category': None}
settings = configure_settings(settings)
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['PAGINATED_TEMPLATES'],
{'index': 10, 'category': None, 'archives': None})
self.assertNotIn('PAGINATED_DIRECT_TEMPLATES', settings)
@ -191,4 +192,82 @@ class TestSettingsConfiguration(unittest.TestCase):
settings['EXTRA_TEMPLATES_PATHS'] = ['/ha']
settings['THEME_TEMPLATES_OVERRIDES'] = ['/foo/bar']
self.assertRaises(Exception, configure_settings, settings)
self.assertRaises(Exception, handle_deprecated_settings, settings)
def test_slug_and_slug_regex_substitutions_exception(self):
settings = {}
settings['SLUG_REGEX_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
self.assertRaises(Exception, handle_deprecated_settings, settings)
def test_deprecated_slug_substitutions(self):
default_slug_regex_subs = self.settings['SLUG_REGEX_SUBSTITUTIONS']
# If no deprecated setting is set, don't set new ones
settings = {}
settings = handle_deprecated_settings(settings)
self.assertNotIn('SLUG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('TAG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('CATEGORY_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('AUTHOR_REGEX_SUBSTITUTIONS', settings)
# If SLUG_SUBSTITUTIONS is set, set {SLUG, AUTHOR}_REGEX_SUBSTITUTIONS
# correctly, don't set {CATEGORY, TAG}_REGEX_SUBSTITUTIONS
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings.get('SLUG_REGEX_SUBSTITUTIONS'),
[(r'C\+\+', 'cpp')] + default_slug_regex_subs)
self.assertNotIn('TAG_REGEX_SUBSTITUTIONS', settings)
self.assertNotIn('CATEGORY_REGEX_SUBSTITUTIONS', settings)
self.assertEqual(settings.get('AUTHOR_REGEX_SUBSTITUTIONS'),
default_slug_regex_subs)
# If {CATEGORY, TAG, AUTHOR}_SUBSTITUTIONS are set, set
# {CATEGORY, TAG, AUTHOR}_REGEX_SUBSTITUTIONS correctly, don't set
# SLUG_REGEX_SUBSTITUTIONS
settings = {}
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['CATEGORY_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov')]
settings = handle_deprecated_settings(settings)
self.assertNotIn('SLUG_REGEX_SUBSTITUTIONS', settings)
self.assertEqual(settings['TAG_REGEX_SUBSTITUTIONS'],
[(r'C\#', 'csharp')] + default_slug_regex_subs)
self.assertEqual(settings['CATEGORY_REGEX_SUBSTITUTIONS'],
[(r'C\#', 'csharp')] + default_slug_regex_subs)
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)
# If {SLUG, CATEGORY, TAG, AUTHOR}_SUBSTITUTIONS are set, set
# {SLUG, CATEGORY, TAG, AUTHOR}_REGEX_SUBSTITUTIONS correctly
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp')]
settings['TAG_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['CATEGORY_SUBSTITUTIONS'] = [('C#', 'csharp')]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov')]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings['TAG_REGEX_SUBSTITUTIONS'],
[(r'C\+\+', 'cpp')] + [(r'C\#', 'csharp')] +
default_slug_regex_subs)
self.assertEqual(settings['CATEGORY_REGEX_SUBSTITUTIONS'],
[(r'C\+\+', 'cpp')] + [(r'C\#', 'csharp')] +
default_slug_regex_subs)
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)
# Handle old 'skip' flags correctly
settings = {}
settings['SLUG_SUBSTITUTIONS'] = [('C++', 'cpp', True)]
settings['AUTHOR_SUBSTITUTIONS'] = [('Alexander Todorov', 'atodorov',
False)]
settings = handle_deprecated_settings(settings)
self.assertEqual(settings.get('SLUG_REGEX_SUBSTITUTIONS'),
[(r'C\+\+', 'cpp')] +
[(r'(?u)\A\s*', ''), (r'(?u)\s*\Z', '')])
self.assertEqual(settings['AUTHOR_REGEX_SUBSTITUTIONS'],
[(r'Alexander\ Todorov', 'atodorov')] +
default_slug_regex_subs)

View file

@ -55,30 +55,29 @@ class TestURLWrapper(unittest.TestCase):
self.assertEqual(author, author_equal)
cat_ascii = Category('指導書', settings={})
self.assertEqual(cat_ascii, u'zhi-dao-shu')
self.assertEqual(cat_ascii, u'zhi dao shu')
def test_slugify_with_substitutions_and_dots(self):
tag = Tag('Tag Dot',
settings={
'TAG_SUBSTITUTIONS': [('Tag Dot', 'tag.dot', True)]
})
tag = Tag('Tag Dot', settings={'TAG_REGEX_SUBSTITUTIONS': [
('Tag Dot', 'tag.dot'),
]})
cat = Category('Category Dot',
settings={
'CATEGORY_SUBSTITUTIONS': (('Category Dot',
'cat.dot',
True),)
})
settings={'CATEGORY_REGEX_SUBSTITUTIONS': [
('Category Dot', 'cat.dot'),
]})
self.assertEqual(tag.slug, 'tag.dot')
self.assertEqual(cat.slug, 'cat.dot')
def test_author_slug_substitutions(self):
settings = {
'AUTHOR_SUBSTITUTIONS': [
('Alexander Todorov', 'atodorov', False),
('Krasimir Tsonev', 'krasimir', False),
]
}
settings = {'AUTHOR_REGEX_SUBSTITUTIONS': [
('Alexander Todorov', 'atodorov'),
('Krasimir Tsonev', 'krasimir'),
(r'[^\w\s-]', ''),
(r'(?u)\A\s*', ''),
(r'(?u)\s*\Z', ''),
(r'[-\s]+', '-'),
]}
author1 = Author('Mr. Senko', settings=settings)
author2 = Author('Alexander Todorov', settings=settings)

View file

@ -119,8 +119,11 @@ class TestUtils(LoggedTestCase):
('大飯原発4号機、18日夜起動へ',
'da-fan-yuan-fa-4hao-ji-18ri-ye-qi-dong-he'),)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for value, expected in samples:
self.assertEqual(utils.slugify(value), expected)
self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_slugify_substitute(self):
@ -129,21 +132,27 @@ class TestUtils(LoggedTestCase):
('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'),
('c++-streams', 'cpp-streams'),)
subs = (('C++', 'CPP'), ('C#', 'C-SHARP'))
settings = read_settings()
subs = [
(r'C\+\+', 'CPP'),
(r'C#', 'C-SHARP'),
] + settings['SLUG_REGEX_SUBSTITUTIONS']
for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected)
self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_slugify_substitute_and_keeping_non_alphanum(self):
samples = (('Fedora QA', 'fedora.qa'),
('C++ is used by Fedora QA', 'cpp is used by fedora.qa'),
('C++ is based on C', 'cpp-is-based-on-c'),
('C+++ test C+ test', 'cpp-test-c-test'),)
('C++ is based on C', 'cpp is based on c'),
('C+++ test C+ test', 'cpp+ test c+ test'),)
subs = (('Fedora QA', 'fedora.qa', True),
('c++', 'cpp'),)
subs = [
(r'Fedora QA', 'fedora.qa'),
(r'c\+\+', 'cpp'),
]
for value, expected in samples:
self.assertEqual(utils.slugify(value, subs), expected)
self.assertEqual(utils.slugify(value, regex_subs=subs), expected)
def test_get_relative_path(self):

View file

@ -17,6 +17,7 @@ from six.moves.urllib.request import urlretrieve
# because logging.setLoggerClass has to be called before logging.getLogger
from pelican.log import init
from pelican.settings import read_settings
from pelican.utils import SafeDatetime, slugify
try:
@ -291,6 +292,8 @@ def dc2fields(file):
print("%i posts read." % len(posts))
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for post in posts:
fields = post.split('","')
@ -383,8 +386,9 @@ def dc2fields(file):
kind = 'article' # TODO: Recognise pages
status = 'published' # TODO: Find a way for draft posts
yield (post_title, content, slugify(post_title), post_creadt, author,
categories, tags, status, kind, post_format)
yield (post_title, content, slugify(post_title, regex_subs=subs),
post_creadt, author, categories, tags, status, kind,
post_format)
def posterous2fields(api_token, email, password):
@ -418,6 +422,8 @@ def posterous2fields(api_token, email, password):
page = 1
posts = get_posterous_posts(api_token, email, password, page)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
while len(posts) > 0:
posts = get_posterous_posts(api_token, email, password, page)
page += 1
@ -425,7 +431,7 @@ def posterous2fields(api_token, email, password):
for post in posts:
slug = post.get('slug')
if not slug:
slug = slugify(post.get('title'))
slug = slugify(post.get('title'), regex_subs=subs)
tags = [tag.get('name') for tag in post.get('tags')]
raw_date = post.get('display_date')
date_object = SafeDatetime.strptime(
@ -469,13 +475,15 @@ def tumblr2fields(api_key, blogname):
offset = 0
posts = get_tumblr_posts(api_key, blogname, offset)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
while len(posts) > 0:
for post in posts:
title = \
post.get('title') or \
post.get('source_title') or \
post.get('type').capitalize()
slug = post.get('slug') or slugify(title)
slug = post.get('slug') or slugify(title, regex_subs=subs)
tags = post.get('tags')
timestamp = post.get('timestamp')
date = SafeDatetime.fromtimestamp(int(timestamp)).strftime(
@ -552,6 +560,8 @@ def feed2fields(file):
"""Read a feed and yield pelican fields"""
import feedparser
d = feedparser.parse(file)
settings = read_settings()
subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for entry in d.entries:
date = (entry.updated_parsed.strftime('%Y-%m-%d %H:%M')
if hasattr(entry, 'updated_parsed') else None)
@ -559,7 +569,7 @@ def feed2fields(file):
tags = ([e['term'] for e in entry.tags]
if hasattr(entry, 'tags') else None)
slug = slugify(entry.title)
slug = slugify(entry.title, regex_subs=subs)
kind = 'article'
yield (entry.title, entry.description, slug, date,
author, [], tags, None, kind, 'html')
@ -621,7 +631,7 @@ def get_ext(out_markup, in_markup='html'):
def get_out_filename(output_path, filename, ext, kind,
dirpage, dircat, categories, wp_custpost):
dirpage, dircat, categories, wp_custpost, slug_subs):
filename = os.path.basename(filename)
# Enforce filename restrictions for various filesystems at once; see
@ -647,12 +657,12 @@ def get_out_filename(output_path, filename, ext, kind,
# create subdirectories with category names
elif kind != 'article':
if wp_custpost:
typename = slugify(kind)
typename = slugify(kind, regex_subs=slug_subs)
else:
typename = ''
kind = 'article'
if dircat and (len(categories) > 0):
catname = slugify(categories[0])
catname = slugify(categories[0], regex_subs=slug_subs)
else:
catname = ''
out_filename = os.path.join(output_path, typename,
@ -661,7 +671,7 @@ def get_out_filename(output_path, filename, ext, kind,
os.makedirs(os.path.join(output_path, typename, catname))
# option to put files in directories with categories names
elif dircat and (len(categories) > 0):
catname = slugify(categories[0])
catname = slugify(categories[0], regex_subs=slug_subs)
out_filename = os.path.join(output_path, catname, filename + ext)
if not os.path.isdir(os.path.join(output_path, catname)):
os.mkdir(os.path.join(output_path, catname))
@ -768,6 +778,9 @@ def fields2pelican(
'requested import action.')
exit(error)
settings = read_settings()
slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS']
for (title, content, filename, date, author, categories, tags, status,
kind, in_markup) in fields:
if filter_author and filter_author != author:
@ -796,7 +809,7 @@ def fields2pelican(
out_filename = get_out_filename(
output_path, filename, ext, kind, dirpage, dircat,
categories, wp_custpost)
categories, wp_custpost, slug_subs)
print(out_filename)
if in_markup in ('html', 'wp-html'):

View file

@ -36,8 +36,9 @@ class URLWrapper(object):
@property
def slug(self):
if self._slug is None:
self._slug = slugify(self.name,
self.settings.get('SLUG_SUBSTITUTIONS', ()))
self._slug = slugify(
self.name,
regex_subs=self.settings.get('SLUG_REGEX_SUBSTITUTIONS', []))
return self._slug
@slug.setter
@ -56,8 +57,8 @@ class URLWrapper(object):
return hash(self.slug)
def _normalize_key(self, key):
subs = self.settings.get('SLUG_SUBSTITUTIONS', ())
return six.text_type(slugify(key, subs))
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
return six.text_type(slugify(key, regex_subs=subs))
def __eq__(self, other):
if isinstance(other, self.__class__):
@ -115,10 +116,11 @@ class Category(URLWrapper):
@property
def slug(self):
if self._slug is None:
substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
substitutions += tuple(self.settings.get('CATEGORY_SUBSTITUTIONS',
()))
self._slug = slugify(self.name, substitutions)
if 'CATEGORY_REGEX_SUBSTITUTIONS' in self.settings:
subs = self.settings['CATEGORY_REGEX_SUBSTITUTIONS']
else:
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug
@ -129,9 +131,11 @@ class Tag(URLWrapper):
@property
def slug(self):
if self._slug is None:
substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
substitutions += tuple(self.settings.get('TAG_SUBSTITUTIONS', ()))
self._slug = slugify(self.name, substitutions)
if 'TAG_REGEX_SUBSTITUTIONS' in self.settings:
subs = self.settings['TAG_REGEX_SUBSTITUTIONS']
else:
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug
@ -139,6 +143,9 @@ class Author(URLWrapper):
@property
def slug(self):
if self._slug is None:
self._slug = slugify(self.name,
self.settings.get('AUTHOR_SUBSTITUTIONS', ()))
if 'AUTHOR_REGEX_SUBSTITUTIONS' in self.settings:
subs = self.settings['AUTHOR_REGEX_SUBSTITUTIONS']
else:
subs = self.settings.get('SLUG_REGEX_SUBSTITUTIONS', [])
self._slug = slugify(self.name, regex_subs=subs)
return self._slug

View file

@ -263,13 +263,14 @@ def pelican_open(filename, mode='rb', strip_crs=(sys.platform == 'win32')):
yield content
def slugify(value, substitutions=()):
def slugify(value, regex_subs=()):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
Took from Django sources.
"""
# TODO Maybe steal again from current Django 1.5dev
value = Markup(value).striptags()
# value must be unicode per se
@ -281,37 +282,16 @@ def slugify(value, substitutions=()):
if isinstance(value, six.binary_type):
value = value.decode('ascii')
# still unicode
value = unicodedata.normalize('NFKD', value).lower()
value = unicodedata.normalize('NFKD', value)
# backward compatible covert from 2-tuples to 3-tuples
new_subs = []
for tpl in substitutions:
try:
src, dst, skip = tpl
except ValueError:
src, dst = tpl
skip = False
new_subs.append((src, dst, skip))
substitutions = tuple(new_subs)
for src, dst in regex_subs:
value = re.sub(src, dst, value, flags=re.IGNORECASE)
# by default will replace non-alphanum characters
replace = True
for src, dst, skip in substitutions:
orig_value = value
value = value.replace(src.lower(), dst.lower())
# if replacement was made then skip non-alphanum
# replacement if instructed to do so
if value != orig_value:
replace = replace and not skip
if replace:
value = re.sub(r'[^\w\s-]', '', value).strip()
value = re.sub(r'[-\s]+', '-', value)
else:
value = value.strip()
# convert to lowercase
value = value.lower()
# we want only ASCII chars
value = value.encode('ascii', 'ignore')
value = value.encode('ascii', 'ignore').strip()
# but Pelican should generally use only unicode
return value.decode('ascii')