From 4462d84461fc17700bfa59ab7a24d35388391948 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= <mosra@centrum.cz>
Date: Sun, 10 Dec 2017 21:48:50 +0100
Subject: [PATCH] Fix link replacer to properly escape special HTML characters.

---
 pelican/contents.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/pelican/contents.py b/pelican/contents.py
index a534dbaa..e434d2f9 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -26,6 +26,25 @@ from pelican.urlwrappers import (Author, Category, Tag, URLWrapper)  # NOQA
 
 logger = logging.getLogger(__name__)
 
+try:
+    import html
+except ImportError:
+    # html.escape()/html.unescape() is since Python 3.2, do this for py2.7
+    # https://wiki.python.org/moin/EscapingHtml
+    from xml.sax.saxutils import escape, unescape
+
+    class html(object):
+        _html_escape_table = {'"': "&quot;",
+                              "'": "&apos;"}
+        _html_unescape_table = {'&quot;': '"',
+                                '&apos;': "'"}
+
+        @classmethod
+        def escape(cls, v): return escape(v, cls._html_escape_table)
+
+        @classmethod
+        def unescape(cls, v): return unescape(v, cls._html_unescape_table)
+
 
 @python_2_unicode_compatible
 class Content(object):
@@ -230,9 +249,9 @@ class Content(object):
 
     def _link_replacer(self, siteurl, m):
         what = m.group('what')
-        value = urlparse(m.group('value'))
+        value = urlparse(html.unescape(m.group('value')))
         path = value.path
-        origin = m.group('path')
+        origin = html.unescape(m.group('path'))
 
         # XXX Put this in a different location.
         if what in {'filename', 'attach'}:
@@ -285,7 +304,7 @@ class Content(object):
         # keep all other parts, such as query, fragment, etc.
         parts = list(value)
         parts[2] = origin
-        origin = urlunparse(parts)
+        origin = html.escape(urlunparse(parts))
 
         return ''.join((m.group('markup'), m.group('quote'), origin,
                         m.group('quote')))