Merge 4462d84461 into bfb2587697

2025-10-15 20:28:56 +02:00 · 2023-10-29 20:09:57 +01:00 · 2023-10-29 20:09:57 +01:00 · 842a81e544
commit 842a81e544
parent bfb2587697 4462d84461
2 changed files with 41 additions and 7 deletions
--- a/pelican/contents.py
+++ b/pelican/contents.py
@ -25,6 +25,25 @@ from pelican.urlwrappers import (Author, Category, Tag, URLWrapper)  # NOQA

 logger = logging.getLogger(__name__)

+try:
+    import html
+except ImportError:
+    # html.escape()/html.unescape() is since Python 3.2, do this for py2.7
+    # https://wiki.python.org/moin/EscapingHtml
+    from xml.sax.saxutils import escape, unescape
+
+    class html(object):
+        _html_escape_table = {'"': "&quot;",
+                              "'": "&apos;"}
+        _html_unescape_table = {'&quot;': '"',
+                                '&apos;': "'"}
+
+        @classmethod
+        def escape(cls, v): return escape(v, cls._html_escape_table)
+
+        @classmethod
+        def unescape(cls, v): return unescape(v, cls._html_unescape_table)
+

 class Content:
    """Represents a content.
@ -231,9 +250,9 @@ class Content:

    def _link_replacer(self, siteurl, m):
        what = m.group('what')
-        value = urlparse(m.group('value'))
+        value = urlparse(html.unescape(m.group('value')))
        path = value.path
-        origin = m.group('path')
+        origin = html.unescape(m.group('path'))

        # urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
        # so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
@ -333,7 +352,7 @@ class Content:
        # keep all other parts, such as query, fragment, etc.
        parts = list(value)
        parts[2] = origin
-        origin = urlunparse(parts)
+        origin = html.escape(urlunparse(parts))

        return ''.join((m.group('markup'), m.group('quote'), origin,
                        m.group('quote')))
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@ -362,28 +362,28 @@ class TestPage(TestBase):
        args['content'] = (
            'A simple test, with a '
            '<a href="|filename|article.rst'
-            '?utm_whatever=234&highlight=word">link</a>'
+            '?utm_whatever=234&amp;highlight=word">link</a>'
        )
        content = Page(**args).get_content('http://notmyidea.org')
        self.assertEqual(
            content,
            'A simple test, with a '
            '<a href="http://notmyidea.org/article.html'
-            '?utm_whatever=234&highlight=word">link</a>'
+            '?utm_whatever=234&amp;highlight=word">link</a>'
        )

        # combination
        args['content'] = (
            'A simple test, with a '
            '<a href="|filename|article.rst'
-            '?utm_whatever=234&highlight=word#section-2">link</a>'
+            '?utm_whatever=234&amp;highlight=word#section-2">link</a>'
        )
        content = Page(**args).get_content('http://notmyidea.org')
        self.assertEqual(
            content,
            'A simple test, with a '
            '<a href="http://notmyidea.org/article.html'
-            '?utm_whatever=234&highlight=word#section-2">link</a>'
+            '?utm_whatever=234&amp;highlight=word#section-2">link</a>'
        )

        # also test for summary in metadata
@ -407,6 +407,21 @@ class TestPage(TestBase):
        self.assertEqual(p.summary, linked)
        self.assertEqual(p.custom, linked)

+        # SITEURL with characters that should be escaped
+        args['content'] = (
+            'A simple test, with a '
+            '<a href="|filename|article.rst'
+            '#highlight=&quot;word&quot;">link</a>'
+        )
+        content = Page(**args).get_content('http://notmyidea.org/'
+                                           '?app=blog&path=')
+        self.assertEqual(
+            content,
+            'A simple test, with a '
+            '<a href="http://notmyidea.org/?app=blog&amp;path='
+            '/article.html#highlight=&quot;word&quot;">link</a>'
+        )
+
    def test_intrasite_link_more(self):
        cls_name = '_DummyAsset'