diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index d09e40c7..eb503295 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -331,11 +331,11 @@ interprets the HTML in a very straightforward manner, reading metadata from
My super title
-
-
-
-
-
+
+
+
+
+
This is the content of my super blog post.
diff --git a/pelican/readers.py b/pelican/readers.py
index bd9f5914..fb2ccfc4 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -5,6 +5,7 @@ import datetime
import logging
import os
import re
+import logging
try:
import docutils
import docutils.core
@@ -47,6 +48,8 @@ METADATA_PROCESSORS = {
'author': Author,
}
+logger = logging.getLogger(__name__)
+
class Reader(object):
enabled = True
@@ -199,7 +202,7 @@ class HTMLReader(Reader):
enabled = True
class _HTMLParser(HTMLParser):
- def __init__(self, settings):
+ def __init__(self, settings, filename):
HTMLParser.__init__(self)
self.body = ''
self.metadata = {}
@@ -207,6 +210,8 @@ class HTMLReader(Reader):
self._data_buffer = ''
+ self._filename = filename
+
self._in_top_level = True
self._in_head = False
self._in_title = False
@@ -275,7 +280,11 @@ class HTMLReader(Reader):
def _handle_meta_tag(self, attrs):
name = self._attr_value(attrs, 'name').lower()
- contents = self._attr_value(attrs, 'contents', '')
+ contents = self._attr_value(attrs, 'content', '')
+ if not contents:
+ contents = self._attr_value(attrs, 'contents', '')
+ if contents:
+ logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)
if name == 'keywords':
name = 'tags'
@@ -288,7 +297,7 @@ class HTMLReader(Reader):
def read(self, filename):
"""Parse content and metadata of HTML files"""
with pelican_open(filename) as content:
- parser = self._HTMLParser(self.settings)
+ parser = self._HTMLParser(self.settings, filename)
parser.feed(content)
parser.close()
diff --git a/pelican/tests/content/article_with_keywords.html b/pelican/tests/content/article_with_keywords.html
index c869f514..0744c754 100644
--- a/pelican/tests/content/article_with_keywords.html
+++ b/pelican/tests/content/article_with_keywords.html
@@ -1,6 +1,6 @@
This is a super article !
-
+
diff --git a/pelican/tests/content/article_with_metadata.html b/pelican/tests/content/article_with_metadata.html
index b108ac8a..b501ea29 100644
--- a/pelican/tests/content/article_with_metadata.html
+++ b/pelican/tests/content/article_with_metadata.html
@@ -1,12 +1,12 @@
This is a super article !
-
-
-
-
-
-
+
+
+
+
+
+
Multi-line metadata should be supported
diff --git a/pelican/tests/content/article_with_metadata_and_contents.html b/pelican/tests/content/article_with_metadata_and_contents.html
new file mode 100644
index 00000000..b108ac8a
--- /dev/null
+++ b/pelican/tests/content/article_with_metadata_and_contents.html
@@ -0,0 +1,15 @@
+
+
+ This is a super article !
+
+
+
+
+
+
+
+
+ Multi-line metadata should be supported
+ as well as inline markup.
+
+
diff --git a/pelican/tests/content/article_with_uppercase_metadata.html b/pelican/tests/content/article_with_uppercase_metadata.html
index 4fe5a9ee..b4cedf39 100644
--- a/pelican/tests/content/article_with_uppercase_metadata.html
+++ b/pelican/tests/content/article_with_uppercase_metadata.html
@@ -1,6 +1,6 @@
This is a super article !
-
+
diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py
index 14d42325..c67b8a1f 100644
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@@ -350,6 +350,21 @@ class HTMLReaderTest(ReaderTest):
for key, value in expected.items():
self.assertEqual(value, page.metadata[key], key)
+ def test_article_with_metadata_and_contents_attrib(self):
+ page = self.read_file(path='article_with_metadata_and_contents.html')
+ expected = {
+ 'category': 'yeah',
+ 'author': 'Alexis Métaireau',
+ 'title': 'This is a super article !',
+ 'summary': 'Summary and stuff',
+ 'date': datetime.datetime(2010, 12, 2, 10, 14),
+ 'tags': ['foo', 'bar', 'foobar'],
+ 'custom_field': 'http://notmyidea.org',
+ }
+ for key, value in expected.items():
+ self.assertEqual(value, page.metadata[key], key)
+
+
def test_article_with_null_attributes(self):
page = self.read_file(path='article_with_null_attributes.html')