mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #1151 from malept/html-reader-non-conforming-meta-tags
Log a warning when the HTML reader encounters a nonconformant meta tag
This commit is contained in:
commit
c8f5eb54db
4 changed files with 29 additions and 1 deletions
|
|
@ -302,7 +302,12 @@ class HTMLReader(BaseReader):
|
||||||
return result + '>'
|
return result + '>'
|
||||||
|
|
||||||
def _handle_meta_tag(self, attrs):
|
def _handle_meta_tag(self, attrs):
|
||||||
name = self._attr_value(attrs, 'name').lower()
|
name = self._attr_value(attrs, 'name')
|
||||||
|
if name is None:
|
||||||
|
attr_serialized = ', '.join(['{}="{}"'.format(k, v) for k, v in attrs])
|
||||||
|
logger.warning("Meta tag in file %s does not have a 'name' attribute, skipping. Attributes: %s", self._filename, attr_serialized)
|
||||||
|
return
|
||||||
|
name = name.lower()
|
||||||
contents = self._attr_value(attrs, 'content', '')
|
contents = self._attr_value(attrs, 'content', '')
|
||||||
if not contents:
|
if not contents:
|
||||||
contents = self._attr_value(attrs, 'contents', '')
|
contents = self._attr_value(attrs, 'contents', '')
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||||||
|
<title>Article with Nonconformant HTML meta tags</title>
|
||||||
|
<meta name="summary" content="Summary and stuff" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Multi-line metadata should be supported
|
||||||
|
as well as <strong>inline markup</strong>.
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -86,6 +86,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
||||||
['Test mkd File', 'published', 'test', 'article'],
|
['Test mkd File', 'published', 'test', 'article'],
|
||||||
['This is a super article !', 'published', 'Yeah', 'article'],
|
['This is a super article !', 'published', 'Yeah', 'article'],
|
||||||
['This is a super article !', 'published', 'Yeah', 'article'],
|
['This is a super article !', 'published', 'Yeah', 'article'],
|
||||||
|
['Article with Nonconformant HTML meta tags', 'published', 'Default', 'article'],
|
||||||
['This is a super article !', 'published', 'yeah', 'article'],
|
['This is a super article !', 'published', 'yeah', 'article'],
|
||||||
['This is a super article !', 'published', 'yeah', 'article'],
|
['This is a super article !', 'published', 'yeah', 'article'],
|
||||||
['This is a super article !', 'published', 'yeah', 'article'],
|
['This is a super article !', 'published', 'yeah', 'article'],
|
||||||
|
|
|
||||||
|
|
@ -384,3 +384,13 @@ class HTMLReaderTest(ReaderTest):
|
||||||
self.assertIn('category', page.metadata, 'Key should be lowercase.')
|
self.assertIn('category', page.metadata, 'Key should be lowercase.')
|
||||||
self.assertEqual('Yeah', page.metadata.get('category'),
|
self.assertEqual('Yeah', page.metadata.get('category'),
|
||||||
'Value keeps cases.')
|
'Value keeps cases.')
|
||||||
|
|
||||||
|
def test_article_with_nonconformant_meta_tags(self):
|
||||||
|
page = self.read_file(path='article_with_nonconformant_meta_tags.html')
|
||||||
|
expected = {
|
||||||
|
'summary': 'Summary and stuff',
|
||||||
|
'title': 'Article with Nonconformant HTML meta tags',
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, value in expected.items():
|
||||||
|
self.assertEqual(value, page.metadata[key], key)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue