1
0
Fork 0
forked from github/pelican

Changed meta tag "contents" attribute to "content", to conform to HTML spec. Fixes #918

This commit is contained in:
Kyle Machulis 2013-06-14 12:12:19 -07:00
commit 39dd4a0255
7 changed files with 55 additions and 16 deletions

View file

@ -265,11 +265,11 @@ interprets the HTML in a very straightforward manner, reading metadata from
<html>
<head>
<title>My super title</title>
<meta name="tags" contents="thats, awesome" />
<meta name="date" contents="2012-07-09 22:28" />
<meta name="category" contents="yeah" />
<meta name="author" contents="Alexis Métaireau" />
<meta name="summary" contents="Short version for index and feeds" />
<meta name="tags" content="thats, awesome" />
<meta name="date" content="2012-07-09 22:28" />
<meta name="category" content="yeah" />
<meta name="author" content="Alexis Métaireau" />
<meta name="summary" content="Short version for index and feeds" />
</head>
<body>
This is the content of my super blog post.

View file

@ -5,6 +5,7 @@ import datetime
import logging
import os
import re
import logging
try:
import docutils
import docutils.core
@ -47,6 +48,8 @@ METADATA_PROCESSORS = {
'author': Author,
}
logger = logging.getLogger(__name__)
class Reader(object):
enabled = True
@ -199,7 +202,7 @@ class HTMLReader(Reader):
enabled = True
class _HTMLParser(HTMLParser):
def __init__(self, settings):
def __init__(self, settings, filename):
HTMLParser.__init__(self)
self.body = ''
self.metadata = {}
@ -207,6 +210,8 @@ class HTMLReader(Reader):
self._data_buffer = ''
self._filename = filename
self._in_top_level = True
self._in_head = False
self._in_title = False
@ -275,7 +280,11 @@ class HTMLReader(Reader):
def _handle_meta_tag(self, attrs):
name = self._attr_value(attrs, 'name').lower()
contents = self._attr_value(attrs, 'contents', '')
contents = self._attr_value(attrs, 'content', '')
if not contents:
contents = self._attr_value(attrs, 'contents', '')
if contents:
logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)
if name == 'keywords':
name = 'tags'
@ -288,7 +297,7 @@ class HTMLReader(Reader):
def read(self, filename):
"""Parse content and metadata of HTML files"""
with pelican_open(filename) as content:
parser = self._HTMLParser(self.settings)
parser = self._HTMLParser(self.settings, filename)
parser.feed(content)
parser.close()

View file

@ -1,6 +1,6 @@
<html>
<head>
<title>This is a super article !</title>
<meta name="keywords" contents="foo, bar, foobar" />
<meta name="keywords" content="foo, bar, foobar" />
</head>
</html>

View file

@ -1,12 +1,12 @@
<html>
<head>
<title>This is a super article !</title>
<meta name="tags" contents="foo, bar, foobar" />
<meta name="date" contents="2010-12-02 10:14" />
<meta name="category" contents="yeah" />
<meta name="author" contents="Alexis Métaireau" />
<meta name="summary" contents="Summary and stuff" />
<meta name="custom_field" contents="http://notmyidea.org" />
<meta name="tags" content="foo, bar, foobar" />
<meta name="date" content="2010-12-02 10:14" />
<meta name="category" content="yeah" />
<meta name="author" content="Alexis Métaireau" />
<meta name="summary" content="Summary and stuff" />
<meta name="custom_field" content="http://notmyidea.org" />
</head>
<body>
Multi-line metadata should be supported

View file

@ -0,0 +1,15 @@
<html>
<head>
<title>This is a super article !</title>
<meta name="tags" contents="foo, bar, foobar" />
<meta name="date" contents="2010-12-02 10:14" />
<meta name="category" contents="yeah" />
<meta name="author" contents="Alexis Métaireau" />
<meta name="summary" contents="Summary and stuff" />
<meta name="custom_field" contents="http://notmyidea.org" />
</head>
<body>
Multi-line metadata should be supported
as well as <strong>inline markup</strong>.
</body>
</html>

View file

@ -1,6 +1,6 @@
<html>
<head>
<title>This is a super article !</title>
<meta name="Category" contents="Yeah" />
<meta name="Category" content="Yeah" />
</head>
</html>

View file

@ -350,6 +350,21 @@ class HTMLReaderTest(ReaderTest):
for key, value in expected.items():
self.assertEqual(value, page.metadata[key], key)
def test_article_with_metadata_and_contents_attrib(self):
page = self.read_file(path='article_with_metadata_and_contents.html')
expected = {
'category': 'yeah',
'author': 'Alexis Métaireau',
'title': 'This is a super article !',
'summary': 'Summary and stuff',
'date': datetime.datetime(2010, 12, 2, 10, 14),
'tags': ['foo', 'bar', 'foobar'],
'custom_field': 'http://notmyidea.org',
}
for key, value in expected.items():
self.assertEqual(value, page.metadata[key], key)
def test_article_with_null_attributes(self):
page = self.read_file(path='article_with_null_attributes.html')