Changed meta tag "contents" attribute to "content", to conform to HTML spec. Fixes #918

2013-06-14 12:12:19 -07:00 · 2013-06-14 12:12:19 -07:00 · 39dd4a0255
commit 39dd4a0255
parent 8f295f7a03
7 changed files with 55 additions and 16 deletions
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@ -265,11 +265,11 @@ interprets the HTML in a very straightforward manner, reading metadata from
    <html>
        <head>
            <title>My super title</title>
-            <meta name="tags" contents="thats, awesome" />
-            <meta name="date" contents="2012-07-09 22:28" />
-            <meta name="category" contents="yeah" />
-            <meta name="author" contents="Alexis Métaireau" />
-            <meta name="summary" contents="Short version for index and feeds" />
+            <meta name="tags" content="thats, awesome" />
+            <meta name="date" content="2012-07-09 22:28" />
+            <meta name="category" content="yeah" />
+            <meta name="author" content="Alexis Métaireau" />
+            <meta name="summary" content="Short version for index and feeds" />
        </head>
        <body>
            This is the content of my super blog post.
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -5,6 +5,7 @@ import datetime
 import logging
 import os
 import re
+import logging
 try:
    import docutils
    import docutils.core
@ -47,6 +48,8 @@ METADATA_PROCESSORS = {
    'author': Author,
 }

+logger = logging.getLogger(__name__)
+

 class Reader(object):
    enabled = True
@ -199,7 +202,7 @@ class HTMLReader(Reader):
    enabled = True

    class _HTMLParser(HTMLParser):
-        def __init__(self, settings):
+        def __init__(self, settings, filename):
            HTMLParser.__init__(self)
            self.body = ''
            self.metadata = {}
@ -207,6 +210,8 @@ class HTMLReader(Reader):

            self._data_buffer = ''

+            self._filename = filename
+
            self._in_top_level = True
            self._in_head = False
            self._in_title = False
@ -275,7 +280,11 @@ class HTMLReader(Reader):

        def _handle_meta_tag(self, attrs):
            name = self._attr_value(attrs, 'name').lower()
-            contents = self._attr_value(attrs, 'contents', '')
+            contents = self._attr_value(attrs, 'content', '')
+            if not contents:
+                contents = self._attr_value(attrs, 'contents', '')
+                if contents:
+                    logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)

            if name == 'keywords':
                name = 'tags'
@ -288,7 +297,7 @@ class HTMLReader(Reader):
    def read(self, filename):
        """Parse content and metadata of HTML files"""
        with pelican_open(filename) as content:
-            parser = self._HTMLParser(self.settings)
+            parser = self._HTMLParser(self.settings, filename)
            parser.feed(content)
            parser.close()

--- a/pelican/tests/content/article_with_keywords.html
+++ b/pelican/tests/content/article_with_keywords.html
@ -1,6 +1,6 @@
 <html>
    <head>
        <title>This is a super article !</title>
-        <meta name="keywords" contents="foo, bar, foobar" />
+        <meta name="keywords" content="foo, bar, foobar" />
    </head>
 </html>
--- a/pelican/tests/content/article_with_metadata.html
+++ b/pelican/tests/content/article_with_metadata.html
@ -1,12 +1,12 @@
 <html>
    <head>
        <title>This is a super article !</title>
-        <meta name="tags" contents="foo, bar, foobar" />
-        <meta name="date" contents="2010-12-02 10:14" />
-        <meta name="category" contents="yeah" />
-        <meta name="author" contents="Alexis Métaireau" />
-        <meta name="summary" contents="Summary and stuff" />
-        <meta name="custom_field" contents="http://notmyidea.org" />
+        <meta name="tags" content="foo, bar, foobar" />
+        <meta name="date" content="2010-12-02 10:14" />
+        <meta name="category" content="yeah" />
+        <meta name="author" content="Alexis Métaireau" />
+        <meta name="summary" content="Summary and stuff" />
+        <meta name="custom_field" content="http://notmyidea.org" />
    </head>
    <body>
        Multi-line metadata should be supported
--- a/pelican/tests/content/article_with_metadata_and_contents.html
+++ b/pelican/tests/content/article_with_metadata_and_contents.html
@ -0,0 +1,15 @@
+<html>
+    <head>
+        <title>This is a super article !</title>
+        <meta name="tags" contents="foo, bar, foobar" />
+        <meta name="date" contents="2010-12-02 10:14" />
+        <meta name="category" contents="yeah" />
+        <meta name="author" contents="Alexis Métaireau" />
+        <meta name="summary" contents="Summary and stuff" />
+        <meta name="custom_field" contents="http://notmyidea.org" />
+    </head>
+    <body>
+        Multi-line metadata should be supported
+        as well as <strong>inline markup</strong>.
+    </body>
+</html>
--- a/pelican/tests/content/article_with_uppercase_metadata.html
+++ b/pelican/tests/content/article_with_uppercase_metadata.html
@ -1,6 +1,6 @@
 <html>
    <head>
        <title>This is a super article !</title>
-        <meta name="Category" contents="Yeah" />
+        <meta name="Category" content="Yeah" />
    </head>
 </html>
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -350,6 +350,21 @@ class HTMLReaderTest(ReaderTest):
        for key, value in expected.items():
            self.assertEqual(value, page.metadata[key], key)

+    def test_article_with_metadata_and_contents_attrib(self):
+        page = self.read_file(path='article_with_metadata_and_contents.html')
+        expected = {
+            'category': 'yeah',
+            'author': 'Alexis Métaireau',
+            'title': 'This is a super article !',
+            'summary': 'Summary and stuff',
+            'date': datetime.datetime(2010, 12, 2, 10, 14),
+            'tags': ['foo', 'bar', 'foobar'],
+            'custom_field': 'http://notmyidea.org',
+        }
+        for key, value in expected.items():
+            self.assertEqual(value, page.metadata[key], key)
+
+
    def test_article_with_null_attributes(self):
        page = self.read_file(path='article_with_null_attributes.html')