Support semicolon-separated author/tag lists.

Idea borrowed from Docutils.  This allows one to write author lists in
lastname,firstname format.  The code change also means that readers with
fancy metadata that can natively represent lists (e.g. Docutils itself,
or MD-Yaml) don't have to merge 'em back together for process_metadata's
sake.
This commit is contained in:
Zack Weinberg 2015-03-21 21:54:06 -04:00
commit c918380802
6 changed files with 67 additions and 9 deletions

View file

@ -35,6 +35,12 @@ this metadata in text files via the following syntax (give your file the
:authors: Alexis Metaireau, Conan Doyle
:summary: Short version for index and feeds
Author and tag lists may be semicolon-separated instead, which allows
you to write authors and tags containing commas::
:tags: pelican, publishing tool; pelican, bird
:authors: Metaireau, Alexis; Doyle, Conan
Pelican implements an extension to reStructuredText to enable support for the
``abbr`` HTML tag. To use it, write something like this in your post::

View file

@ -27,11 +27,25 @@ from pelican import signals
from pelican.contents import Page, Category, Tag, Author
from pelican.utils import get_date, pelican_open, FileStampDataCacher, SafeDatetime, posixize_path
def ensure_metadata_list(text):
"""Canonicalize the format of a list of authors or tags. This works
the same way as Docutils' "authors" field: if it's already a list,
those boundaries are preserved; otherwise, it must be a string;
if the string contains semicolons, it is split on semicolons;
otherwise, it is split on commas. This allows you to write
author lists in either "Jane Doe, John Doe" or "Doe, Jane; Doe, John"
format.
def strip_split(text, sep=','):
"""Return a list of stripped, non-empty substrings, delimited by sep."""
items = [x.strip() for x in text.split(sep)]
return [x for x in items if x]
Regardless, all list items undergo .strip() before returning, and
empty items are discarded.
"""
if isinstance(text, six.text_type):
if ';' in text:
text = text.split(';')
else:
text = text.split(',')
return [v for v in (w.strip() for w in text) if v]
# Metadata processors have no way to discard an unwanted value, so we have
@ -50,13 +64,16 @@ def _process_if_nonempty(processor, name, settings):
METADATA_PROCESSORS = {
'tags': lambda x, y: [Tag(tag, y) for tag in strip_split(x)] or _DISCARD,
'tags': lambda x, y: ([Tag(tag, y) for tag in ensure_metadata_list(x)]
or _DISCARD),
'date': lambda x, y: get_date(x.replace('_', ' ')),
'modified': lambda x, y: get_date(x),
'status': lambda x, y: x.strip() or _DISCARD,
'category': lambda x, y: _process_if_nonempty(Category, x, y),
'author': lambda x, y: _process_if_nonempty(Author, x, y),
'authors': lambda x, y: [Author(a, y) for a in strip_split(x)] or _DISCARD,
'authors': lambda x, y: ([Author(author, y)
for author in ensure_metadata_list(x)]
or _DISCARD),
'slug': lambda x, y: x.strip() or _DISCARD,
}
@ -179,7 +196,6 @@ class RstReader(BaseReader):
elif element.tagname == 'authors': # author list
name = element.tagname
value = [element.astext() for element in element.children]
value = ','.join(value) # METADATA_PROCESSORS expects a string
else: # standard fields (e.g. address)
name = element.tagname
value = element.astext()

View file

@ -0,0 +1,10 @@
This is an article with multiple authors in list format!
########################################################
:date: 2014-02-09 02:20
:modified: 2014-02-09 02:20
:authors: - Author, First
- Author, Second
The author names are in last,first form to verify that
they are not just getting split on commas.

View file

@ -0,0 +1,6 @@
This is an article with multiple authors in lastname, firstname format!
#######################################################################
:date: 2014-02-09 02:20
:modified: 2014-02-09 02:20
:authors: Author, First; Author, Second

View file

@ -162,6 +162,8 @@ class TestArticlesGenerator(unittest.TestCase):
'article'],
['This is an article with multiple authors!', 'published', 'Default', 'article'],
['This is an article with multiple authors!', 'published', 'Default', 'article'],
['This is an article with multiple authors in list format!', 'published', 'Default', 'article'],
['This is an article with multiple authors in lastname, firstname format!', 'published', 'Default', 'article'],
['This is an article without category !', 'published', 'Default',
'article'],
['This is an article without category !', 'published',
@ -348,11 +350,11 @@ class TestArticlesGenerator(unittest.TestCase):
def test_generate_authors(self):
"""Check authors generation."""
authors = [author.name for author, _ in self.generator.authors]
authors_expected = sorted(['Alexis Métaireau', 'First Author', 'Second Author'])
authors_expected = sorted(['Alexis Métaireau', 'Author, First', 'Author, Second', 'First Author', 'Second Author'])
self.assertEqual(sorted(authors), authors_expected)
# test for slug
authors = [author.slug for author, _ in self.generator.authors]
authors_expected = ['alexis-metaireau', 'first-author', 'second-author']
authors_expected = ['alexis-metaireau', 'author-first', 'author-second', 'first-author', 'second-author']
self.assertEqual(sorted(authors), sorted(authors_expected))
@unittest.skipUnless(MagicMock, 'Needs Mock module')
@ -441,6 +443,7 @@ class TestArticlesGenerator(unittest.TestCase):
authors = sorted([author.name for author, _ in generator.authors])
authors_expected = sorted(['Alexis Métaireau', 'Blogger',
'Author, First', 'Author, Second',
'First Author', 'Second Author'])
self.assertEqual(authors, authors_expected)

View file

@ -324,6 +324,23 @@ class RstReaderTest(ReaderTest):
self.assertDictHasSubset(page.metadata, expected)
def test_article_with_multiple_authors_semicolon(self):
page = self.read_file(
path='article_with_multiple_authors_semicolon.rst')
expected = {
'authors': ['Author, First', 'Author, Second']
}
self.assertDictHasSubset(page.metadata, expected)
def test_article_with_multiple_authors_list(self):
page = self.read_file(path='article_with_multiple_authors_list.rst')
expected = {
'authors': ['Author, First', 'Author, Second']
}
self.assertDictHasSubset(page.metadata, expected)
@unittest.skipUnless(readers.Markdown, "markdown isn't installed")
class MdReaderTest(ReaderTest):