Merge pull request #1151 from malept/html-reader-non-conforming-meta-tags

Log a warning when the HTML reader encounters a nonconformant meta tag
2025-10-15 20:28:56 +02:00 · 2013-11-09 14:04:38 -08:00 · 2013-11-09 14:04:38 -08:00 · c8f5eb54db
commit c8f5eb54db
parent 455c159b48 35375b19ff
4 changed files with 29 additions and 1 deletions
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -302,7 +302,12 @@ class HTMLReader(BaseReader):
            return result + '>'
        def _handle_meta_tag(self, attrs):
-            name = self._attr_value(attrs, 'name').lower()
+            name = self._attr_value(attrs, 'name')
            if name is None:
                attr_serialized = ', '.join(['{}="{}"'.format(k, v) for k, v in attrs])
                logger.warning("Meta tag in file %s does not have a 'name' attribute, skipping. Attributes: %s", self._filename, attr_serialized)
                return
            name = name.lower()
            contents = self._attr_value(attrs, 'content', '')
            if not contents:
                contents = self._attr_value(attrs, 'contents', '')
--- a/pelican/tests/content/article_with_nonconformant_meta_tags.html
+++ b/pelican/tests/content/article_with_nonconformant_meta_tags.html
@ -0,0 +1,12 @@
 <html>
    <head>
        <meta charset="utf-8" />
        <meta http-equiv="X-UA-Compatible" content="IE=edge" />
        <title>Article with Nonconformant HTML meta tags</title>
        <meta name="summary" content="Summary and stuff" />
    </head>
    <body>
        Multi-line metadata should be supported
        as well as <strong>inline markup</strong>.
    </body>
 </html>
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -86,6 +86,7 @@ class TestArticlesGenerator(unittest.TestCase):
            ['Test mkd File', 'published', 'test', 'article'],
            ['This is a super article !', 'published', 'Yeah', 'article'],
            ['This is a super article !', 'published', 'Yeah', 'article'],
            ['Article with Nonconformant HTML meta tags', 'published', 'Default', 'article'],
            ['This is a super article !', 'published', 'yeah', 'article'],
            ['This is a super article !', 'published', 'yeah', 'article'],
            ['This is a super article !', 'published', 'yeah', 'article'],
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -384,3 +384,13 @@ class HTMLReaderTest(ReaderTest):
        self.assertIn('category', page.metadata, 'Key should be lowercase.')
        self.assertEqual('Yeah', page.metadata.get('category'),
                         'Value keeps cases.')
    def test_article_with_nonconformant_meta_tags(self):
        page = self.read_file(path='article_with_nonconformant_meta_tags.html')
        expected = {
            'summary': 'Summary and stuff',
            'title': 'Article with Nonconformant HTML meta tags',
        }
        for key, value in expected.items():
            self.assertEqual(value, page.metadata[key], key)