Get HtmlReader to work again

wrote unit tests and documentation, improved regular expression. The HtmlReader is enabled by default now and parses metadata in html files of the form:
2025-10-15 20:28:56 +02:00 · 2012-09-02 10:09:08 +02:00 · 2012-09-02 10:09:08 +02:00 · 39db9ddcfd
commit 39db9ddcfd
parent 88555de28c
7 changed files with 72 additions and 15 deletions
--- a/tests/content/article_with_html_metadata.html
+++ b/tests/content/article_with_html_metadata.html
@ -0,0 +1,13 @@
+<!-- title: A great html article with metadata -->
+<!-- tags: foo, bar, foobar -->
+<!-- date: 2010-12-02 10:14 -->
+<!-- category: yeah -->
+<!-- author: Alexis Métaireau -->
+<!-- summary:
+    Multi-line metadata should be supported
+    as well as <strong>inline markup</strong>.
+    -->
+<!-- custom_field: http://notmyidea.org -->
+
+<h1>This is an article in html with metadata</h1>
+<p>It features very interesting insights.</p>
--- a/tests/test_generators.py
+++ b/tests/test_generators.py
@ -73,7 +73,8 @@ class TestArticlesGenerator(unittest.TestCase):
            [u'This is an article with category !', 'published', 'yeah', 'article'],
            [u'This is an article without category !', 'published', 'Default', 'article'],
            [u'This is an article without category !', 'published', 'TestCategory', 'article'],
-            [u'This is a super article !', 'published', 'yeah', 'article']
+            [u'This is a super article !', 'published', 'yeah', 'article'],
+            [u'A great html article with metadata', 'published', u'yeah', 'article']
        ]
        self.assertItemsEqual(articles_expected, articles)

--- a/tests/test_readers.py
+++ b/tests/test_readers.py
@ -90,3 +90,24 @@ class MdReaderTest(unittest.TestCase):
                "<p>This is another markdown test file.  Uses the mkd extension.</p>"
        
        self.assertEqual(content, expected)
+
+
+class HtmlReaderTest(unittest.TestCase):
+
+    def test_article_with_metadata(self):
+        reader = readers.HtmlReader({})
+        content, metadata = reader.read(_filename('article_with_html_metadata.html'))
+        expected = {
+            'category': 'yeah',
+            'author': u'Alexis Métaireau',
+            'title': 'A great html article with metadata',
+            'summary': u'Multi-line metadata should be'\
+                       u' supported\nas well as <strong>inline'\
+                       u' markup</strong>.',
+            'date': datetime.datetime(2010, 12, 2, 10, 14),
+            'tags': ['foo', 'bar', 'foobar'],
+            'custom_field': 'http://notmyidea.org',
+        }
+
+        for key, value in expected.items():
+            self.assertEquals(value, metadata[key], key)