forked from github/pelican
Changed meta tag "contents" attribute to "content", to conform to HTML spec. Fixes #918
This commit is contained in:
parent
8f295f7a03
commit
39dd4a0255
7 changed files with 55 additions and 16 deletions
|
|
@ -265,11 +265,11 @@ interprets the HTML in a very straightforward manner, reading metadata from
|
|||
<html>
|
||||
<head>
|
||||
<title>My super title</title>
|
||||
<meta name="tags" contents="thats, awesome" />
|
||||
<meta name="date" contents="2012-07-09 22:28" />
|
||||
<meta name="category" contents="yeah" />
|
||||
<meta name="author" contents="Alexis Métaireau" />
|
||||
<meta name="summary" contents="Short version for index and feeds" />
|
||||
<meta name="tags" content="thats, awesome" />
|
||||
<meta name="date" content="2012-07-09 22:28" />
|
||||
<meta name="category" content="yeah" />
|
||||
<meta name="author" content="Alexis Métaireau" />
|
||||
<meta name="summary" content="Short version for index and feeds" />
|
||||
</head>
|
||||
<body>
|
||||
This is the content of my super blog post.
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import datetime
|
|||
import logging
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
try:
|
||||
import docutils
|
||||
import docutils.core
|
||||
|
|
@ -47,6 +48,8 @@ METADATA_PROCESSORS = {
|
|||
'author': Author,
|
||||
}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Reader(object):
|
||||
enabled = True
|
||||
|
|
@ -199,7 +202,7 @@ class HTMLReader(Reader):
|
|||
enabled = True
|
||||
|
||||
class _HTMLParser(HTMLParser):
|
||||
def __init__(self, settings):
|
||||
def __init__(self, settings, filename):
|
||||
HTMLParser.__init__(self)
|
||||
self.body = ''
|
||||
self.metadata = {}
|
||||
|
|
@ -207,6 +210,8 @@ class HTMLReader(Reader):
|
|||
|
||||
self._data_buffer = ''
|
||||
|
||||
self._filename = filename
|
||||
|
||||
self._in_top_level = True
|
||||
self._in_head = False
|
||||
self._in_title = False
|
||||
|
|
@ -275,7 +280,11 @@ class HTMLReader(Reader):
|
|||
|
||||
def _handle_meta_tag(self, attrs):
|
||||
name = self._attr_value(attrs, 'name').lower()
|
||||
contents = self._attr_value(attrs, 'content', '')
|
||||
if not contents:
|
||||
contents = self._attr_value(attrs, 'contents', '')
|
||||
if contents:
|
||||
logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)
|
||||
|
||||
if name == 'keywords':
|
||||
name = 'tags'
|
||||
|
|
@ -288,7 +297,7 @@ class HTMLReader(Reader):
|
|||
def read(self, filename):
|
||||
"""Parse content and metadata of HTML files"""
|
||||
with pelican_open(filename) as content:
|
||||
parser = self._HTMLParser(self.settings)
|
||||
parser = self._HTMLParser(self.settings, filename)
|
||||
parser.feed(content)
|
||||
parser.close()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>This is a super article !</title>
|
||||
<meta name="keywords" contents="foo, bar, foobar" />
|
||||
<meta name="keywords" content="foo, bar, foobar" />
|
||||
</head>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>This is a super article !</title>
|
||||
<meta name="tags" contents="foo, bar, foobar" />
|
||||
<meta name="date" contents="2010-12-02 10:14" />
|
||||
<meta name="category" contents="yeah" />
|
||||
<meta name="author" contents="Alexis Métaireau" />
|
||||
<meta name="summary" contents="Summary and stuff" />
|
||||
<meta name="custom_field" contents="http://notmyidea.org" />
|
||||
<meta name="tags" content="foo, bar, foobar" />
|
||||
<meta name="date" content="2010-12-02 10:14" />
|
||||
<meta name="category" content="yeah" />
|
||||
<meta name="author" content="Alexis Métaireau" />
|
||||
<meta name="summary" content="Summary and stuff" />
|
||||
<meta name="custom_field" content="http://notmyidea.org" />
|
||||
</head>
|
||||
<body>
|
||||
Multi-line metadata should be supported
|
||||
|
|
|
|||
|
|
@ -0,0 +1,15 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>This is a super article !</title>
|
||||
<meta name="tags" contents="foo, bar, foobar" />
|
||||
<meta name="date" contents="2010-12-02 10:14" />
|
||||
<meta name="category" contents="yeah" />
|
||||
<meta name="author" contents="Alexis Métaireau" />
|
||||
<meta name="summary" contents="Summary and stuff" />
|
||||
<meta name="custom_field" contents="http://notmyidea.org" />
|
||||
</head>
|
||||
<body>
|
||||
Multi-line metadata should be supported
|
||||
as well as <strong>inline markup</strong>.
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>This is a super article !</title>
|
||||
<meta name="Category" contents="Yeah" />
|
||||
<meta name="Category" content="Yeah" />
|
||||
</head>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -350,6 +350,21 @@ class HTMLReaderTest(ReaderTest):
|
|||
for key, value in expected.items():
|
||||
self.assertEqual(value, page.metadata[key], key)
|
||||
|
||||
def test_article_with_metadata_and_contents_attrib(self):
|
||||
page = self.read_file(path='article_with_metadata_and_contents.html')
|
||||
expected = {
|
||||
'category': 'yeah',
|
||||
'author': 'Alexis Métaireau',
|
||||
'title': 'This is a super article !',
|
||||
'summary': 'Summary and stuff',
|
||||
'date': datetime.datetime(2010, 12, 2, 10, 14),
|
||||
'tags': ['foo', 'bar', 'foobar'],
|
||||
'custom_field': 'http://notmyidea.org',
|
||||
}
|
||||
for key, value in expected.items():
|
||||
self.assertEqual(value, page.metadata[key], key)
|
||||
|
||||
|
||||
def test_article_with_null_attributes(self):
|
||||
page = self.read_file(path='article_with_null_attributes.html')
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue