mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Changed meta tag "contents" attribute to "content", to conform to HTML spec. Fixes #918
This commit is contained in:
parent
8f295f7a03
commit
39dd4a0255
7 changed files with 55 additions and 16 deletions
|
|
@ -265,11 +265,11 @@ interprets the HTML in a very straightforward manner, reading metadata from
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>My super title</title>
|
<title>My super title</title>
|
||||||
<meta name="tags" contents="thats, awesome" />
|
<meta name="tags" content="thats, awesome" />
|
||||||
<meta name="date" contents="2012-07-09 22:28" />
|
<meta name="date" content="2012-07-09 22:28" />
|
||||||
<meta name="category" contents="yeah" />
|
<meta name="category" content="yeah" />
|
||||||
<meta name="author" contents="Alexis Métaireau" />
|
<meta name="author" content="Alexis Métaireau" />
|
||||||
<meta name="summary" contents="Short version for index and feeds" />
|
<meta name="summary" content="Short version for index and feeds" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
This is the content of my super blog post.
|
This is the content of my super blog post.
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import datetime
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import logging
|
||||||
try:
|
try:
|
||||||
import docutils
|
import docutils
|
||||||
import docutils.core
|
import docutils.core
|
||||||
|
|
@ -47,6 +48,8 @@ METADATA_PROCESSORS = {
|
||||||
'author': Author,
|
'author': Author,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Reader(object):
|
class Reader(object):
|
||||||
enabled = True
|
enabled = True
|
||||||
|
|
@ -199,7 +202,7 @@ class HTMLReader(Reader):
|
||||||
enabled = True
|
enabled = True
|
||||||
|
|
||||||
class _HTMLParser(HTMLParser):
|
class _HTMLParser(HTMLParser):
|
||||||
def __init__(self, settings):
|
def __init__(self, settings, filename):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
self.body = ''
|
self.body = ''
|
||||||
self.metadata = {}
|
self.metadata = {}
|
||||||
|
|
@ -207,6 +210,8 @@ class HTMLReader(Reader):
|
||||||
|
|
||||||
self._data_buffer = ''
|
self._data_buffer = ''
|
||||||
|
|
||||||
|
self._filename = filename
|
||||||
|
|
||||||
self._in_top_level = True
|
self._in_top_level = True
|
||||||
self._in_head = False
|
self._in_head = False
|
||||||
self._in_title = False
|
self._in_title = False
|
||||||
|
|
@ -275,7 +280,11 @@ class HTMLReader(Reader):
|
||||||
|
|
||||||
def _handle_meta_tag(self, attrs):
|
def _handle_meta_tag(self, attrs):
|
||||||
name = self._attr_value(attrs, 'name').lower()
|
name = self._attr_value(attrs, 'name').lower()
|
||||||
contents = self._attr_value(attrs, 'contents', '')
|
contents = self._attr_value(attrs, 'content', '')
|
||||||
|
if not contents:
|
||||||
|
contents = self._attr_value(attrs, 'contents', '')
|
||||||
|
if contents:
|
||||||
|
logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)
|
||||||
|
|
||||||
if name == 'keywords':
|
if name == 'keywords':
|
||||||
name = 'tags'
|
name = 'tags'
|
||||||
|
|
@ -288,7 +297,7 @@ class HTMLReader(Reader):
|
||||||
def read(self, filename):
|
def read(self, filename):
|
||||||
"""Parse content and metadata of HTML files"""
|
"""Parse content and metadata of HTML files"""
|
||||||
with pelican_open(filename) as content:
|
with pelican_open(filename) as content:
|
||||||
parser = self._HTMLParser(self.settings)
|
parser = self._HTMLParser(self.settings, filename)
|
||||||
parser.feed(content)
|
parser.feed(content)
|
||||||
parser.close()
|
parser.close()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>This is a super article !</title>
|
<title>This is a super article !</title>
|
||||||
<meta name="keywords" contents="foo, bar, foobar" />
|
<meta name="keywords" content="foo, bar, foobar" />
|
||||||
</head>
|
</head>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>This is a super article !</title>
|
<title>This is a super article !</title>
|
||||||
<meta name="tags" contents="foo, bar, foobar" />
|
<meta name="tags" content="foo, bar, foobar" />
|
||||||
<meta name="date" contents="2010-12-02 10:14" />
|
<meta name="date" content="2010-12-02 10:14" />
|
||||||
<meta name="category" contents="yeah" />
|
<meta name="category" content="yeah" />
|
||||||
<meta name="author" contents="Alexis Métaireau" />
|
<meta name="author" content="Alexis Métaireau" />
|
||||||
<meta name="summary" contents="Summary and stuff" />
|
<meta name="summary" content="Summary and stuff" />
|
||||||
<meta name="custom_field" contents="http://notmyidea.org" />
|
<meta name="custom_field" content="http://notmyidea.org" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
Multi-line metadata should be supported
|
Multi-line metadata should be supported
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>This is a super article !</title>
|
||||||
|
<meta name="tags" contents="foo, bar, foobar" />
|
||||||
|
<meta name="date" contents="2010-12-02 10:14" />
|
||||||
|
<meta name="category" contents="yeah" />
|
||||||
|
<meta name="author" contents="Alexis Métaireau" />
|
||||||
|
<meta name="summary" contents="Summary and stuff" />
|
||||||
|
<meta name="custom_field" contents="http://notmyidea.org" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Multi-line metadata should be supported
|
||||||
|
as well as <strong>inline markup</strong>.
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>This is a super article !</title>
|
<title>This is a super article !</title>
|
||||||
<meta name="Category" contents="Yeah" />
|
<meta name="Category" content="Yeah" />
|
||||||
</head>
|
</head>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
|
|
@ -350,6 +350,21 @@ class HTMLReaderTest(ReaderTest):
|
||||||
for key, value in expected.items():
|
for key, value in expected.items():
|
||||||
self.assertEqual(value, page.metadata[key], key)
|
self.assertEqual(value, page.metadata[key], key)
|
||||||
|
|
||||||
|
def test_article_with_metadata_and_contents_attrib(self):
|
||||||
|
page = self.read_file(path='article_with_metadata_and_contents.html')
|
||||||
|
expected = {
|
||||||
|
'category': 'yeah',
|
||||||
|
'author': 'Alexis Métaireau',
|
||||||
|
'title': 'This is a super article !',
|
||||||
|
'summary': 'Summary and stuff',
|
||||||
|
'date': datetime.datetime(2010, 12, 2, 10, 14),
|
||||||
|
'tags': ['foo', 'bar', 'foobar'],
|
||||||
|
'custom_field': 'http://notmyidea.org',
|
||||||
|
}
|
||||||
|
for key, value in expected.items():
|
||||||
|
self.assertEqual(value, page.metadata[key], key)
|
||||||
|
|
||||||
|
|
||||||
def test_article_with_null_attributes(self):
|
def test_article_with_null_attributes(self):
|
||||||
page = self.read_file(path='article_with_null_attributes.html')
|
page = self.read_file(path='article_with_null_attributes.html')
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue