Fix quote escaping in read html attributes.

* Wrap HTML attributes in quotes according to their content. If it contains a double quote use single quotes, otherwise escape with double quotes. * Add escape_html utility to ensure quote entities are converted identically across Python versions. Fixes #1260
2025-10-15 20:28:56 +02:00 · 2015-10-12 20:31:32 +00:00 · 2015-10-12 20:31:32 +00:00 · d333ed12c6
commit d333ed12c6
parent 661ee49eda
5 changed files with 47 additions and 11 deletions
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -587,6 +587,17 @@ class HTMLReaderTest(ReaderTest):
        <input name="test" disabled style="" />
    ''', page.content)

+    def test_article_with_attributes_containing_double_quotes(self):
+        page = self.read_file(path='article_with_attributes_containing_' +
+                                   'double_quotes.html')
+        self.assertEqual('''
+        Ensure that if an attribute value contains a double quote, it is
+        surrounded with single quotes, otherwise with double quotes.
+        <span data-test="'single quoted string'">Span content</span>
+        <span data-test='"double quoted string"'>Span content</span>
+        <span data-test="string without quotes">Span content</span>
+    ''', page.content)
+
    def test_article_metadata_key_lowercase(self):
        # Keys of metadata should be lowercase.
        page = self.read_file(path='article_with_uppercase_metadata.html')