1
0
Fork 0
forked from github/pelican

include html comments properly in reader

This commit is contained in:
dave mankoff 2012-06-14 23:16:27 -04:00
commit 0373c15e43
3 changed files with 36 additions and 9 deletions

View file

@ -190,6 +190,8 @@ class HTMLReader(Reader):
def handle_comment(self, data):
if self._in_body and data.strip() == 'PELICAN_END_SUMMARY':
self.metadata['summary'] = self._data_buffer
else:
self._data_buffer += '<!--{}-->'.format(data)
def handle_data(self, data):
self._data_buffer += data

View file

@ -0,0 +1,7 @@
<html>
<body>
Summary comment is not included.
<!-- PELICAN_END_SUMMARY -->
<!-- But this comment is (including extra whitespace) -->
</body>
</html>

View file

@ -88,6 +88,33 @@ class MdReaderTest(unittest.TestCase):
self.assertEqual(content, expected)
class HTMLReaderTest(unittest.TestCase):
def test_article_with_comments(self):
reader = readers.HTMLReader({})
content, metadata = reader.read(_filename('article_with_comments.html'))
expected = {
'summary': '''
Summary comment is not included.
''',
}
for key, value in expected.items():
self.assertEquals(value, metadata[key], key)
self.assertEquals('''
Summary comment is not included.
<!-- But this comment is (including extra whitespace) -->
''', content)
def test_article_with_keywords(self):
reader = readers.HTMLReader({})
content, metadata = reader.read(_filename('article_with_keywords.html'))
expected = {
'tags': ['foo', 'bar', 'foobar'],
}
for key, value in expected.items():
self.assertEquals(value, metadata[key], key)
def test_article_with_metadata(self):
reader = readers.HTMLReader({})
@ -108,15 +135,6 @@ class HTMLReaderTest(unittest.TestCase):
for key, value in expected.items():
self.assertEquals(value, metadata[key], key)
def test_article_with_keywords(self):
reader = readers.HTMLReader({})
content, metadata = reader.read(_filename('article_with_keywords.html'))
expected = {
'tags': ['foo', 'bar', 'foobar'],
}
for key, value in expected.items():
self.assertEquals(value, metadata[key], key)
def test_article_metadata_key_lowercase(self):
"""Keys of metadata should be lowercase."""