Support semicolon-separated author/tag lists.

Idea borrowed from Docutils. This allows one to write author lists in lastname,firstname format. The code change also means that readers with fancy metadata that can natively represent lists (e.g. Docutils itself, or MD-Yaml) don't have to merge 'em back together for process_metadata's sake.
2025-10-15 20:28:56 +02:00 · 2015-03-21 21:54:06 -04:00 · 2015-03-21 21:54:06 -04:00 · c918380802
commit c918380802
parent 940eb76b7f
6 changed files with 67 additions and 9 deletions
--- a/docs/content.rst
+++ b/docs/content.rst
@ -35,6 +35,12 @@ this metadata in text files via the following syntax (give your file the
    :authors: Alexis Metaireau, Conan Doyle
    :summary: Short version for index and feeds

+Author and tag lists may be semicolon-separated instead, which allows
+you to write authors and tags containing commas::
+
+    :tags: pelican, publishing tool; pelican, bird
+    :authors: Metaireau, Alexis; Doyle, Conan
+
 Pelican implements an extension to reStructuredText to enable support for the
 ``abbr`` HTML tag. To use it, write something like this in your post::

--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -27,11 +27,25 @@ from pelican import signals
 from pelican.contents import Page, Category, Tag, Author
 from pelican.utils import get_date, pelican_open, FileStampDataCacher, SafeDatetime, posixize_path

+def ensure_metadata_list(text):
+    """Canonicalize the format of a list of authors or tags.  This works
+       the same way as Docutils' "authors" field: if it's already a list,
+       those boundaries are preserved; otherwise, it must be a string;
+       if the string contains semicolons, it is split on semicolons;
+       otherwise, it is split on commas.  This allows you to write
+       author lists in either "Jane Doe, John Doe" or "Doe, Jane; Doe, John"
+       format.

-def strip_split(text, sep=','):
-    """Return a list of stripped, non-empty substrings, delimited by sep."""
-    items = [x.strip() for x in text.split(sep)]
-    return [x for x in items if x]
+       Regardless, all list items undergo .strip() before returning, and
+       empty items are discarded.
+    """
+    if isinstance(text, six.text_type):
+        if ';' in text:
+            text = text.split(';')
+        else:
+            text = text.split(',')
+
+    return [v for v in (w.strip() for w in text) if v]


 # Metadata processors have no way to discard an unwanted value, so we have
@ -50,13 +64,16 @@ def _process_if_nonempty(processor, name, settings):


 METADATA_PROCESSORS = {
-    'tags': lambda x, y: [Tag(tag, y) for tag in strip_split(x)] or _DISCARD,
+    'tags': lambda x, y: ([Tag(tag, y) for tag in ensure_metadata_list(x)]
+                          or _DISCARD),
    'date': lambda x, y: get_date(x.replace('_', ' ')),
    'modified': lambda x, y: get_date(x),
    'status': lambda x, y: x.strip() or _DISCARD,
    'category': lambda x, y: _process_if_nonempty(Category, x, y),
    'author': lambda x, y: _process_if_nonempty(Author, x, y),
-    'authors': lambda x, y: [Author(a, y) for a in strip_split(x)] or _DISCARD,
+    'authors': lambda x, y: ([Author(author, y)
+                              for author in ensure_metadata_list(x)]
+                             or _DISCARD),
    'slug': lambda x, y: x.strip() or _DISCARD,
 }

@ -179,7 +196,6 @@ class RstReader(BaseReader):
                elif element.tagname == 'authors':  # author list
                    name = element.tagname
                    value = [element.astext() for element in element.children]
-                    value = ','.join(value) # METADATA_PROCESSORS expects a string
                else:  # standard fields (e.g. address)
                    name = element.tagname
                    value = element.astext()
--- a/pelican/tests/content/article_with_multiple_authors_list.rst
+++ b/pelican/tests/content/article_with_multiple_authors_list.rst
@ -0,0 +1,10 @@
+This is an article with multiple authors in list format!
+########################################################
+
+:date: 2014-02-09 02:20
+:modified: 2014-02-09 02:20
+:authors: - Author, First
+          - Author, Second
+
+The author names are in last,first form to verify that
+they are not just getting split on commas.
--- a/pelican/tests/content/article_with_multiple_authors_semicolon.rst
+++ b/pelican/tests/content/article_with_multiple_authors_semicolon.rst
@ -0,0 +1,6 @@
+This is an article with multiple authors in lastname, firstname format!
+#######################################################################
+
+:date: 2014-02-09 02:20
+:modified: 2014-02-09 02:20
+:authors: Author, First; Author, Second
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -162,6 +162,8 @@ class TestArticlesGenerator(unittest.TestCase):
             'article'],
            ['This is an article with multiple authors!', 'published', 'Default', 'article'],
            ['This is an article with multiple authors!', 'published', 'Default', 'article'],
+            ['This is an article with multiple authors in list format!', 'published', 'Default', 'article'],
+            ['This is an article with multiple authors in lastname, firstname format!', 'published', 'Default', 'article'],
            ['This is an article without category !', 'published', 'Default',
             'article'],
            ['This is an article without category !', 'published',
@ -348,11 +350,11 @@ class TestArticlesGenerator(unittest.TestCase):
    def test_generate_authors(self):
        """Check authors generation."""
        authors = [author.name for author, _ in self.generator.authors]
-        authors_expected = sorted(['Alexis Métaireau', 'First Author', 'Second Author'])
+        authors_expected = sorted(['Alexis Métaireau', 'Author, First', 'Author, Second', 'First Author', 'Second Author'])
        self.assertEqual(sorted(authors), authors_expected)
        # test for slug
        authors = [author.slug for author, _ in self.generator.authors]
-        authors_expected = ['alexis-metaireau', 'first-author', 'second-author']
+        authors_expected = ['alexis-metaireau', 'author-first', 'author-second', 'first-author', 'second-author']
        self.assertEqual(sorted(authors), sorted(authors_expected))

    @unittest.skipUnless(MagicMock, 'Needs Mock module')
@ -441,6 +443,7 @@ class TestArticlesGenerator(unittest.TestCase):

        authors = sorted([author.name for author, _ in generator.authors])
        authors_expected = sorted(['Alexis Métaireau', 'Blogger',
+                                   'Author, First', 'Author, Second',
                                   'First Author', 'Second Author'])
        self.assertEqual(authors, authors_expected)

--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -324,6 +324,23 @@ class RstReaderTest(ReaderTest):

        self.assertDictHasSubset(page.metadata, expected)

+    def test_article_with_multiple_authors_semicolon(self):
+        page = self.read_file(
+            path='article_with_multiple_authors_semicolon.rst')
+        expected = {
+            'authors': ['Author, First', 'Author, Second']
+        }
+
+        self.assertDictHasSubset(page.metadata, expected)
+
+    def test_article_with_multiple_authors_list(self):
+        page = self.read_file(path='article_with_multiple_authors_list.rst')
+        expected = {
+            'authors': ['Author, First', 'Author, Second']
+        }
+
+        self.assertDictHasSubset(page.metadata, expected)
+
@unittest.skipUnless(readers.Markdown, "markdown isn't installed")
 class MdReaderTest(ReaderTest):