Get HtmlReader to work again

wrote unit tests and documentation, improved regular expression.
The HtmlReader is enabled by default now and parses metadata in html
files of the form:
<!-- key:value -->
This commit is contained in:
Florian Jacob 2012-09-02 10:09:08 +02:00
commit 39db9ddcfd
7 changed files with 72 additions and 15 deletions

View file

@ -0,0 +1,13 @@
<!-- title: A great html article with metadata -->
<!-- tags: foo, bar, foobar -->
<!-- date: 2010-12-02 10:14 -->
<!-- category: yeah -->
<!-- author: Alexis Métaireau -->
<!-- summary:
Multi-line metadata should be supported
as well as <strong>inline markup</strong>.
-->
<!-- custom_field: http://notmyidea.org -->
<h1>This is an article in html with metadata</h1>
<p>It features very interesting insights.</p>

View file

@ -73,7 +73,8 @@ class TestArticlesGenerator(unittest.TestCase):
[u'This is an article with category !', 'published', 'yeah', 'article'],
[u'This is an article without category !', 'published', 'Default', 'article'],
[u'This is an article without category !', 'published', 'TestCategory', 'article'],
[u'This is a super article !', 'published', 'yeah', 'article']
[u'This is a super article !', 'published', 'yeah', 'article'],
[u'A great html article with metadata', 'published', u'yeah', 'article']
]
self.assertItemsEqual(articles_expected, articles)

View file

@ -90,3 +90,24 @@ class MdReaderTest(unittest.TestCase):
"<p>This is another markdown test file. Uses the mkd extension.</p>"
self.assertEqual(content, expected)
class HtmlReaderTest(unittest.TestCase):
def test_article_with_metadata(self):
reader = readers.HtmlReader({})
content, metadata = reader.read(_filename('article_with_html_metadata.html'))
expected = {
'category': 'yeah',
'author': u'Alexis Métaireau',
'title': 'A great html article with metadata',
'summary': u'Multi-line metadata should be'\
u' supported\nas well as <strong>inline'\
u' markup</strong>.',
'date': datetime.datetime(2010, 12, 2, 10, 14),
'tags': ['foo', 'bar', 'foobar'],
'custom_field': 'http://notmyidea.org',
}
for key, value in expected.items():
self.assertEquals(value, metadata[key], key)