mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #1151 from malept/html-reader-non-conforming-meta-tags
Log a warning when the HTML reader encounters a nonconformant meta tag
This commit is contained in:
commit
c8f5eb54db
4 changed files with 29 additions and 1 deletions
|
|
@ -302,7 +302,12 @@ class HTMLReader(BaseReader):
|
|||
return result + '>'
|
||||
|
||||
def _handle_meta_tag(self, attrs):
|
||||
name = self._attr_value(attrs, 'name').lower()
|
||||
name = self._attr_value(attrs, 'name')
|
||||
if name is None:
|
||||
attr_serialized = ', '.join(['{}="{}"'.format(k, v) for k, v in attrs])
|
||||
logger.warning("Meta tag in file %s does not have a 'name' attribute, skipping. Attributes: %s", self._filename, attr_serialized)
|
||||
return
|
||||
name = name.lower()
|
||||
contents = self._attr_value(attrs, 'content', '')
|
||||
if not contents:
|
||||
contents = self._attr_value(attrs, 'contents', '')
|
||||
|
|
|
|||
|
|
@ -0,0 +1,12 @@
|
|||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||||
<title>Article with Nonconformant HTML meta tags</title>
|
||||
<meta name="summary" content="Summary and stuff" />
|
||||
</head>
|
||||
<body>
|
||||
Multi-line metadata should be supported
|
||||
as well as <strong>inline markup</strong>.
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -86,6 +86,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
['Test mkd File', 'published', 'test', 'article'],
|
||||
['This is a super article !', 'published', 'Yeah', 'article'],
|
||||
['This is a super article !', 'published', 'Yeah', 'article'],
|
||||
['Article with Nonconformant HTML meta tags', 'published', 'Default', 'article'],
|
||||
['This is a super article !', 'published', 'yeah', 'article'],
|
||||
['This is a super article !', 'published', 'yeah', 'article'],
|
||||
['This is a super article !', 'published', 'yeah', 'article'],
|
||||
|
|
|
|||
|
|
@ -384,3 +384,13 @@ class HTMLReaderTest(ReaderTest):
|
|||
self.assertIn('category', page.metadata, 'Key should be lowercase.')
|
||||
self.assertEqual('Yeah', page.metadata.get('category'),
|
||||
'Value keeps cases.')
|
||||
|
||||
def test_article_with_nonconformant_meta_tags(self):
|
||||
page = self.read_file(path='article_with_nonconformant_meta_tags.html')
|
||||
expected = {
|
||||
'summary': 'Summary and stuff',
|
||||
'title': 'Article with Nonconformant HTML meta tags',
|
||||
}
|
||||
|
||||
for key, value in expected.items():
|
||||
self.assertEqual(value, page.metadata[key], key)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue