From c9183808021e54aa9b949939ff289b07c4443b7e Mon Sep 17 00:00:00 2001
From: Zack Weinberg <zackw@panix.com>
Date: Sat, 21 Mar 2015 21:54:06 -0400
Subject: [PATCH] Support semicolon-separated author/tag lists.

Idea borrowed from Docutils.  This allows one to write author lists in
lastname,firstname format.  The code change also means that readers with
fancy metadata that can natively represent lists (e.g. Docutils itself,
or MD-Yaml) don't have to merge 'em back together for process_metadata's
sake.
---
 docs/content.rst                              |  6 ++++
 pelican/readers.py                            | 30 ++++++++++++++-----
 .../article_with_multiple_authors_list.rst    | 10 +++++++
 ...rticle_with_multiple_authors_semicolon.rst |  6 ++++
 pelican/tests/test_generators.py              |  7 +++--
 pelican/tests/test_readers.py                 | 17 +++++++++++
 6 files changed, 67 insertions(+), 9 deletions(-)
 create mode 100644 pelican/tests/content/article_with_multiple_authors_list.rst
 create mode 100644 pelican/tests/content/article_with_multiple_authors_semicolon.rst

diff --git a/docs/content.rst b/docs/content.rst
index 4b19515f..0e3310f1 100644
--- a/docs/content.rst
+++ b/docs/content.rst
@@ -35,6 +35,12 @@ this metadata in text files via the following syntax (give your file the
     :authors: Alexis Metaireau, Conan Doyle
     :summary: Short version for index and feeds
 
+Author and tag lists may be semicolon-separated instead, which allows
+you to write authors and tags containing commas::
+
+    :tags: pelican, publishing tool; pelican, bird
+    :authors: Metaireau, Alexis; Doyle, Conan
+
 Pelican implements an extension to reStructuredText to enable support for the
 ``abbr`` HTML tag. To use it, write something like this in your post::
 
diff --git a/pelican/readers.py b/pelican/readers.py
index 3656cd96..bbc72a73 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -27,11 +27,25 @@ from pelican import signals
 from pelican.contents import Page, Category, Tag, Author
 from pelican.utils import get_date, pelican_open, FileStampDataCacher, SafeDatetime, posixize_path
 
+def ensure_metadata_list(text):
+    """Canonicalize the format of a list of authors or tags.  This works
+       the same way as Docutils' "authors" field: if it's already a list,
+       those boundaries are preserved; otherwise, it must be a string;
+       if the string contains semicolons, it is split on semicolons;
+       otherwise, it is split on commas.  This allows you to write
+       author lists in either "Jane Doe, John Doe" or "Doe, Jane; Doe, John"
+       format.
 
-def strip_split(text, sep=','):
-    """Return a list of stripped, non-empty substrings, delimited by sep."""
-    items = [x.strip() for x in text.split(sep)]
-    return [x for x in items if x]
+       Regardless, all list items undergo .strip() before returning, and
+       empty items are discarded.
+    """
+    if isinstance(text, six.text_type):
+        if ';' in text:
+            text = text.split(';')
+        else:
+            text = text.split(',')
+
+    return [v for v in (w.strip() for w in text) if v]
 
 
 # Metadata processors have no way to discard an unwanted value, so we have
@@ -50,13 +64,16 @@ def _process_if_nonempty(processor, name, settings):
 
 
 METADATA_PROCESSORS = {
-    'tags': lambda x, y: [Tag(tag, y) for tag in strip_split(x)] or _DISCARD,
+    'tags': lambda x, y: ([Tag(tag, y) for tag in ensure_metadata_list(x)]
+                          or _DISCARD),
     'date': lambda x, y: get_date(x.replace('_', ' ')),
     'modified': lambda x, y: get_date(x),
     'status': lambda x, y: x.strip() or _DISCARD,
     'category': lambda x, y: _process_if_nonempty(Category, x, y),
     'author': lambda x, y: _process_if_nonempty(Author, x, y),
-    'authors': lambda x, y: [Author(a, y) for a in strip_split(x)] or _DISCARD,
+    'authors': lambda x, y: ([Author(author, y)
+                              for author in ensure_metadata_list(x)]
+                             or _DISCARD),
     'slug': lambda x, y: x.strip() or _DISCARD,
 }
 
@@ -179,7 +196,6 @@ class RstReader(BaseReader):
                 elif element.tagname == 'authors':  # author list
                     name = element.tagname
                     value = [element.astext() for element in element.children]
-                    value = ','.join(value) # METADATA_PROCESSORS expects a string
                 else:  # standard fields (e.g. address)
                     name = element.tagname
                     value = element.astext()
diff --git a/pelican/tests/content/article_with_multiple_authors_list.rst b/pelican/tests/content/article_with_multiple_authors_list.rst
new file mode 100644
index 00000000..7da5fae2
--- /dev/null
+++ b/pelican/tests/content/article_with_multiple_authors_list.rst
@@ -0,0 +1,10 @@
+This is an article with multiple authors in list format!
+########################################################
+
+:date: 2014-02-09 02:20
+:modified: 2014-02-09 02:20
+:authors: - Author, First
+          - Author, Second
+
+The author names are in last,first form to verify that
+they are not just getting split on commas.
diff --git a/pelican/tests/content/article_with_multiple_authors_semicolon.rst b/pelican/tests/content/article_with_multiple_authors_semicolon.rst
new file mode 100644
index 00000000..fa76ac4e
--- /dev/null
+++ b/pelican/tests/content/article_with_multiple_authors_semicolon.rst
@@ -0,0 +1,6 @@
+This is an article with multiple authors in lastname, firstname format!
+#######################################################################
+
+:date: 2014-02-09 02:20
+:modified: 2014-02-09 02:20
+:authors: Author, First; Author, Second
diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py
index 4fb70826..90261b7a 100644
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@@ -162,6 +162,8 @@ class TestArticlesGenerator(unittest.TestCase):
              'article'],
             ['This is an article with multiple authors!', 'published', 'Default', 'article'],
             ['This is an article with multiple authors!', 'published', 'Default', 'article'],
+            ['This is an article with multiple authors in list format!', 'published', 'Default', 'article'],
+            ['This is an article with multiple authors in lastname, firstname format!', 'published', 'Default', 'article'],
             ['This is an article without category !', 'published', 'Default',
              'article'],
             ['This is an article without category !', 'published',
@@ -348,11 +350,11 @@ class TestArticlesGenerator(unittest.TestCase):
     def test_generate_authors(self):
         """Check authors generation."""
         authors = [author.name for author, _ in self.generator.authors]
-        authors_expected = sorted(['Alexis Métaireau', 'First Author', 'Second Author'])
+        authors_expected = sorted(['Alexis Métaireau', 'Author, First', 'Author, Second', 'First Author', 'Second Author'])
         self.assertEqual(sorted(authors), authors_expected)
         # test for slug
         authors = [author.slug for author, _ in self.generator.authors]
-        authors_expected = ['alexis-metaireau', 'first-author', 'second-author']
+        authors_expected = ['alexis-metaireau', 'author-first', 'author-second', 'first-author', 'second-author']
         self.assertEqual(sorted(authors), sorted(authors_expected))
 
     @unittest.skipUnless(MagicMock, 'Needs Mock module')
@@ -441,6 +443,7 @@ class TestArticlesGenerator(unittest.TestCase):
 
         authors = sorted([author.name for author, _ in generator.authors])
         authors_expected = sorted(['Alexis Métaireau', 'Blogger',
+                                   'Author, First', 'Author, Second',
                                    'First Author', 'Second Author'])
         self.assertEqual(authors, authors_expected)
 
diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py
index d390fb48..18e5111e 100644
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@@ -324,6 +324,23 @@ class RstReaderTest(ReaderTest):
 
         self.assertDictHasSubset(page.metadata, expected)
 
+    def test_article_with_multiple_authors_semicolon(self):
+        page = self.read_file(
+            path='article_with_multiple_authors_semicolon.rst')
+        expected = {
+            'authors': ['Author, First', 'Author, Second']
+        }
+
+        self.assertDictHasSubset(page.metadata, expected)
+
+    def test_article_with_multiple_authors_list(self):
+        page = self.read_file(path='article_with_multiple_authors_list.rst')
+        expected = {
+            'authors': ['Author, First', 'Author, Second']
+        }
+
+        self.assertDictHasSubset(page.metadata, expected)
+
 @unittest.skipUnless(readers.Markdown, "markdown isn't installed")
 class MdReaderTest(ReaderTest):