Merge pull request #1669 from zackw/semicolon-separated-author-lists

Support semicolon-separated author/tag lists
2015-06-06 09:17:52 -07:00 · 2015-06-06 09:17:52 -07:00 · a8d41e0c57
commit a8d41e0c57
parent 69b8a2d8cc c918380802
6 changed files with 67 additions and 9 deletions
--- a/docs/content.rst
+++ b/docs/content.rst
@ -35,6 +35,12 @@ this metadata in text files via the following syntax (give your file the
    :authors: Alexis Metaireau, Conan Doyle
    :summary: Short version for index and feeds
 Author and tag lists may be semicolon-separated instead, which allows
 you to write authors and tags containing commas::
    :tags: pelican, publishing tool; pelican, bird
    :authors: Metaireau, Alexis; Doyle, Conan
 Pelican implements an extension to reStructuredText to enable support for the
 ``abbr`` HTML tag. To use it, write something like this in your post::
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -27,11 +27,25 @@ from pelican import signals
 from pelican.contents import Page, Category, Tag, Author
 from pelican.utils import get_date, pelican_open, FileStampDataCacher, SafeDatetime, posixize_path
 def ensure_metadata_list(text):
    """Canonicalize the format of a list of authors or tags.  This works
       the same way as Docutils' "authors" field: if it's already a list,
       those boundaries are preserved; otherwise, it must be a string;
       if the string contains semicolons, it is split on semicolons;
       otherwise, it is split on commas.  This allows you to write
       author lists in either "Jane Doe, John Doe" or "Doe, Jane; Doe, John"
       format.
-def strip_split(text, sep=','):
+       Regardless, all list items undergo .strip() before returning, and
-    """Return a list of stripped, non-empty substrings, delimited by sep."""
+       empty items are discarded.
-    items = [x.strip() for x in text.split(sep)]
+    """
-    return [x for x in items if x]
+    if isinstance(text, six.text_type):
        if ';' in text:
            text = text.split(';')
        else:
            text = text.split(',')
    return [v for v in (w.strip() for w in text) if v]
 # Metadata processors have no way to discard an unwanted value, so we have
@ -50,13 +64,16 @@ def _process_if_nonempty(processor, name, settings):
 METADATA_PROCESSORS = {
-    'tags': lambda x, y: [Tag(tag, y) for tag in strip_split(x)] or _DISCARD,
+    'tags': lambda x, y: ([Tag(tag, y) for tag in ensure_metadata_list(x)]
                          or _DISCARD),
    'date': lambda x, y: get_date(x.replace('_', ' ')),
    'modified': lambda x, y: get_date(x),
    'status': lambda x, y: x.strip() or _DISCARD,
    'category': lambda x, y: _process_if_nonempty(Category, x, y),
    'author': lambda x, y: _process_if_nonempty(Author, x, y),
-    'authors': lambda x, y: [Author(a, y) for a in strip_split(x)] or _DISCARD,
+    'authors': lambda x, y: ([Author(author, y)
                              for author in ensure_metadata_list(x)]
                             or _DISCARD),
    'slug': lambda x, y: x.strip() or _DISCARD,
 }
@ -179,7 +196,6 @@ class RstReader(BaseReader):
                elif element.tagname == 'authors':  # author list
                    name = element.tagname
                    value = [element.astext() for element in element.children]
                    value = ','.join(value) # METADATA_PROCESSORS expects a string
                else:  # standard fields (e.g. address)
                    name = element.tagname
                    value = element.astext()
--- a/pelican/tests/content/article_with_multiple_authors_list.rst
+++ b/pelican/tests/content/article_with_multiple_authors_list.rst
@ -0,0 +1,10 @@
 This is an article with multiple authors in list format!
 ########################################################
 :date: 2014-02-09 02:20
 :modified: 2014-02-09 02:20
 :authors: - Author, First
          - Author, Second
 The author names are in last,first form to verify that
 they are not just getting split on commas.
--- a/pelican/tests/content/article_with_multiple_authors_semicolon.rst
+++ b/pelican/tests/content/article_with_multiple_authors_semicolon.rst
@ -0,0 +1,6 @@
 This is an article with multiple authors in lastname, firstname format!
 #######################################################################
 :date: 2014-02-09 02:20
 :modified: 2014-02-09 02:20
 :authors: Author, First; Author, Second
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -162,6 +162,8 @@ class TestArticlesGenerator(unittest.TestCase):
             'article'],
            ['This is an article with multiple authors!', 'published', 'Default', 'article'],
            ['This is an article with multiple authors!', 'published', 'Default', 'article'],
            ['This is an article with multiple authors in list format!', 'published', 'Default', 'article'],
            ['This is an article with multiple authors in lastname, firstname format!', 'published', 'Default', 'article'],
            ['This is an article without category !', 'published', 'Default',
             'article'],
            ['This is an article without category !', 'published',
@ -348,11 +350,11 @@ class TestArticlesGenerator(unittest.TestCase):
    def test_generate_authors(self):
        """Check authors generation."""
        authors = [author.name for author, _ in self.generator.authors]
-        authors_expected = sorted(['Alexis Métaireau', 'First Author', 'Second Author'])
+        authors_expected = sorted(['Alexis Métaireau', 'Author, First', 'Author, Second', 'First Author', 'Second Author'])
        self.assertEqual(sorted(authors), authors_expected)
        # test for slug
        authors = [author.slug for author, _ in self.generator.authors]
-        authors_expected = ['alexis-metaireau', 'first-author', 'second-author']
+        authors_expected = ['alexis-metaireau', 'author-first', 'author-second', 'first-author', 'second-author']
        self.assertEqual(sorted(authors), sorted(authors_expected))
    @unittest.skipUnless(MagicMock, 'Needs Mock module')
@ -441,6 +443,7 @@ class TestArticlesGenerator(unittest.TestCase):
        authors = sorted([author.name for author, _ in generator.authors])
        authors_expected = sorted(['Alexis Métaireau', 'Blogger',
                                   'Author, First', 'Author, Second',
                                   'First Author', 'Second Author'])
        self.assertEqual(authors, authors_expected)
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -324,6 +324,23 @@ class RstReaderTest(ReaderTest):
        self.assertDictHasSubset(page.metadata, expected)
    def test_article_with_multiple_authors_semicolon(self):
        page = self.read_file(
            path='article_with_multiple_authors_semicolon.rst')
        expected = {
            'authors': ['Author, First', 'Author, Second']
        }
        self.assertDictHasSubset(page.metadata, expected)
    def test_article_with_multiple_authors_list(self):
        page = self.read_file(path='article_with_multiple_authors_list.rst')
        expected = {
            'authors': ['Author, First', 'Author, Second']
        }
        self.assertDictHasSubset(page.metadata, expected)
@unittest.skipUnless(readers.Markdown, "markdown isn't installed")
 class MdReaderTest(ReaderTest):