From 7f795ed558f7eb5adabf1c2777db9b430ce121ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Fern=C3=A1ndez?= Date: Wed, 26 Aug 2015 11:23:28 +0200 Subject: [PATCH] Remove duplicate tags and authors in metadata --- pelican/readers.py | 5 ++++- .../article_with_duplicate_tags_authors.md | 15 +++++++++++++++ pelican/tests/test_generators.py | 2 ++ pelican/tests/test_readers.py | 10 ++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 pelican/tests/content/article_with_duplicate_tags_authors.md diff --git a/pelican/readers.py b/pelican/readers.py index bc4515e7..2e51c4ff 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -4,6 +4,7 @@ from __future__ import print_function, unicode_literals import logging import os import re +from collections import OrderedDict import docutils import docutils.core @@ -72,7 +73,9 @@ def ensure_metadata_list(text): else: text = text.split(',') - return [v for v in (w.strip() for w in text) if v] + return list(OrderedDict.fromkeys( + [v for v in (w.strip() for w in text) if v] + )) def _process_if_nonempty(processor, name, settings): diff --git a/pelican/tests/content/article_with_duplicate_tags_authors.md b/pelican/tests/content/article_with_duplicate_tags_authors.md new file mode 100644 index 00000000..7ab046f9 --- /dev/null +++ b/pelican/tests/content/article_with_duplicate_tags_authors.md @@ -0,0 +1,15 @@ +Title: Test metadata duplicates +Category: test +Tags: foo, bar, foobar, foo, bar +Authors: Author, First; Author, Second; Author, First +Date: 2010-12-02 10:14 +Modified: 2010-12-02 10:20 +Summary: I have a lot to test + +Test Markdown File Header +========================= + +Used for pelican test +--------------------- + +The quick brown fox jumped over the lazy dog's back. diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index 2cfca04f..c9aa1cff 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -160,6 +160,7 @@ class TestArticlesGenerator(unittest.TestCase): ['Test markdown File', 'published', 'test', 'article'], ['Test md File', 'published', 'test', 'article'], ['Test mdown File', 'published', 'test', 'article'], + ['Test metadata duplicates', 'published', 'test', 'article'], ['Test mkd File', 'published', 'test', 'article'], ['This is a super article !', 'published', 'Yeah', 'article'], ['This is a super article !', 'published', 'Yeah', 'article'], @@ -435,6 +436,7 @@ class TestArticlesGenerator(unittest.TestCase): 'Test markdown File', 'Test md File', 'Test mdown File', + 'Test metadata duplicates', 'Test mkd File', 'This is a super article !', 'This is a super article !', diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py index 71394ee4..5fabc470 100644 --- a/pelican/tests/test_readers.py +++ b/pelican/tests/test_readers.py @@ -516,6 +516,16 @@ class MdReaderTest(ReaderTest): } self.assertDictHasSubset(page.metadata, expected) + def test_duplicate_tags_or_authors_are_removed(self): + reader = readers.MarkdownReader(settings=get_settings()) + content, metadata = reader.read( + _path('article_with_duplicate_tags_authors.md')) + expected = { + 'tags': ['foo', 'bar', 'foobar'], + 'authors': ['Author, First', 'Author, Second'], + } + self.assertDictHasSubset(metadata, expected) + class HTMLReaderTest(ReaderTest): def test_article_with_comments(self):