diff --git a/docs/getting_started.rst b/docs/getting_started.rst index d09e40c7..eb503295 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -331,11 +331,11 @@ interprets the HTML in a very straightforward manner, reading metadata from My super title - - - - - + + + + + This is the content of my super blog post. diff --git a/pelican/readers.py b/pelican/readers.py index bd9f5914..fb2ccfc4 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -5,6 +5,7 @@ import datetime import logging import os import re +import logging try: import docutils import docutils.core @@ -47,6 +48,8 @@ METADATA_PROCESSORS = { 'author': Author, } +logger = logging.getLogger(__name__) + class Reader(object): enabled = True @@ -199,7 +202,7 @@ class HTMLReader(Reader): enabled = True class _HTMLParser(HTMLParser): - def __init__(self, settings): + def __init__(self, settings, filename): HTMLParser.__init__(self) self.body = '' self.metadata = {} @@ -207,6 +210,8 @@ class HTMLReader(Reader): self._data_buffer = '' + self._filename = filename + self._in_top_level = True self._in_head = False self._in_title = False @@ -275,7 +280,11 @@ class HTMLReader(Reader): def _handle_meta_tag(self, attrs): name = self._attr_value(attrs, 'name').lower() - contents = self._attr_value(attrs, 'contents', '') + contents = self._attr_value(attrs, 'content', '') + if not contents: + contents = self._attr_value(attrs, 'contents', '') + if contents: + logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename) if name == 'keywords': name = 'tags' @@ -288,7 +297,7 @@ class HTMLReader(Reader): def read(self, filename): """Parse content and metadata of HTML files""" with pelican_open(filename) as content: - parser = self._HTMLParser(self.settings) + parser = self._HTMLParser(self.settings, filename) parser.feed(content) parser.close() diff --git a/pelican/tests/content/article_with_keywords.html b/pelican/tests/content/article_with_keywords.html index c869f514..0744c754 100644 --- a/pelican/tests/content/article_with_keywords.html +++ b/pelican/tests/content/article_with_keywords.html @@ -1,6 +1,6 @@ This is a super article ! - + diff --git a/pelican/tests/content/article_with_metadata.html b/pelican/tests/content/article_with_metadata.html index b108ac8a..b501ea29 100644 --- a/pelican/tests/content/article_with_metadata.html +++ b/pelican/tests/content/article_with_metadata.html @@ -1,12 +1,12 @@ This is a super article ! - - - - - - + + + + + + Multi-line metadata should be supported diff --git a/pelican/tests/content/article_with_metadata_and_contents.html b/pelican/tests/content/article_with_metadata_and_contents.html new file mode 100644 index 00000000..b108ac8a --- /dev/null +++ b/pelican/tests/content/article_with_metadata_and_contents.html @@ -0,0 +1,15 @@ + + + This is a super article ! + + + + + + + + + Multi-line metadata should be supported + as well as inline markup. + + diff --git a/pelican/tests/content/article_with_uppercase_metadata.html b/pelican/tests/content/article_with_uppercase_metadata.html index 4fe5a9ee..b4cedf39 100644 --- a/pelican/tests/content/article_with_uppercase_metadata.html +++ b/pelican/tests/content/article_with_uppercase_metadata.html @@ -1,6 +1,6 @@ This is a super article ! - + diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py index 14d42325..c67b8a1f 100644 --- a/pelican/tests/test_readers.py +++ b/pelican/tests/test_readers.py @@ -350,6 +350,21 @@ class HTMLReaderTest(ReaderTest): for key, value in expected.items(): self.assertEqual(value, page.metadata[key], key) + def test_article_with_metadata_and_contents_attrib(self): + page = self.read_file(path='article_with_metadata_and_contents.html') + expected = { + 'category': 'yeah', + 'author': 'Alexis Métaireau', + 'title': 'This is a super article !', + 'summary': 'Summary and stuff', + 'date': datetime.datetime(2010, 12, 2, 10, 14), + 'tags': ['foo', 'bar', 'foobar'], + 'custom_field': 'http://notmyidea.org', + } + for key, value in expected.items(): + self.assertEqual(value, page.metadata[key], key) + + def test_article_with_null_attributes(self): page = self.read_file(path='article_with_null_attributes.html')