From 7f795ed558f7eb5adabf1c2777db9b430ce121ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jes=C3=BAs=20Fern=C3=A1ndez?= <fernandez.cuesta@gmail.com>
Date: Wed, 26 Aug 2015 11:23:28 +0200
Subject: [PATCH] Remove duplicate tags and authors in metadata

---
 pelican/readers.py                                |  5 ++++-
 .../article_with_duplicate_tags_authors.md        | 15 +++++++++++++++
 pelican/tests/test_generators.py                  |  2 ++
 pelican/tests/test_readers.py                     | 10 ++++++++++
 4 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 pelican/tests/content/article_with_duplicate_tags_authors.md

diff --git a/pelican/readers.py b/pelican/readers.py
index bc4515e7..2e51c4ff 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -4,6 +4,7 @@ from __future__ import print_function, unicode_literals
 import logging
 import os
 import re
+from collections import OrderedDict
 
 import docutils
 import docutils.core
@@ -72,7 +73,9 @@ def ensure_metadata_list(text):
         else:
             text = text.split(',')
 
-    return [v for v in (w.strip() for w in text) if v]
+    return list(OrderedDict.fromkeys(
+        [v for v in (w.strip() for w in text) if v]
+    ))
 
 
 def _process_if_nonempty(processor, name, settings):
diff --git a/pelican/tests/content/article_with_duplicate_tags_authors.md b/pelican/tests/content/article_with_duplicate_tags_authors.md
new file mode 100644
index 00000000..7ab046f9
--- /dev/null
+++ b/pelican/tests/content/article_with_duplicate_tags_authors.md
@@ -0,0 +1,15 @@
+Title: Test metadata duplicates
+Category: test
+Tags: foo, bar, foobar, foo, bar
+Authors: Author, First; Author, Second; Author, First
+Date: 2010-12-02 10:14
+Modified: 2010-12-02 10:20
+Summary: I have a lot to test
+
+Test Markdown File Header
+=========================
+
+Used for pelican test
+---------------------
+
+The quick brown fox jumped over the lazy dog's back.
diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py
index 2cfca04f..c9aa1cff 100644
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@@ -160,6 +160,7 @@ class TestArticlesGenerator(unittest.TestCase):
             ['Test markdown File', 'published', 'test', 'article'],
             ['Test md File', 'published', 'test', 'article'],
             ['Test mdown File', 'published', 'test', 'article'],
+            ['Test metadata duplicates', 'published', 'test', 'article'],
             ['Test mkd File', 'published', 'test', 'article'],
             ['This is a super article !', 'published', 'Yeah', 'article'],
             ['This is a super article !', 'published', 'Yeah', 'article'],
@@ -435,6 +436,7 @@ class TestArticlesGenerator(unittest.TestCase):
             'Test markdown File',
             'Test md File',
             'Test mdown File',
+            'Test metadata duplicates',
             'Test mkd File',
             'This is a super article !',
             'This is a super article !',
diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py
index 71394ee4..5fabc470 100644
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@@ -516,6 +516,16 @@ class MdReaderTest(ReaderTest):
         }
         self.assertDictHasSubset(page.metadata, expected)
 
+    def test_duplicate_tags_or_authors_are_removed(self):
+        reader = readers.MarkdownReader(settings=get_settings())
+        content, metadata = reader.read(
+            _path('article_with_duplicate_tags_authors.md'))
+        expected = {
+            'tags': ['foo', 'bar', 'foobar'],
+            'authors': ['Author, First', 'Author, Second'],
+        }
+        self.assertDictHasSubset(metadata, expected)
+
 
 class HTMLReaderTest(ReaderTest):
     def test_article_with_comments(self):