From 4462d84461fc17700bfa59ab7a24d35388391948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Sun, 10 Dec 2017 21:48:50 +0100 Subject: [PATCH] Fix link replacer to properly escape special HTML characters. --- pelican/contents.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/pelican/contents.py b/pelican/contents.py index a534dbaa..e434d2f9 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -26,6 +26,25 @@ from pelican.urlwrappers import (Author, Category, Tag, URLWrapper) # NOQA logger = logging.getLogger(__name__) +try: + import html +except ImportError: + # html.escape()/html.unescape() is since Python 3.2, do this for py2.7 + # https://wiki.python.org/moin/EscapingHtml + from xml.sax.saxutils import escape, unescape + + class html(object): + _html_escape_table = {'"': """, + "'": "'"} + _html_unescape_table = {'"': '"', + ''': "'"} + + @classmethod + def escape(cls, v): return escape(v, cls._html_escape_table) + + @classmethod + def unescape(cls, v): return unescape(v, cls._html_unescape_table) + @python_2_unicode_compatible class Content(object): @@ -230,9 +249,9 @@ class Content(object): def _link_replacer(self, siteurl, m): what = m.group('what') - value = urlparse(m.group('value')) + value = urlparse(html.unescape(m.group('value'))) path = value.path - origin = m.group('path') + origin = html.unescape(m.group('path')) # XXX Put this in a different location. if what in {'filename', 'attach'}: @@ -285,7 +304,7 @@ class Content(object): # keep all other parts, such as query, fragment, etc. parts = list(value) parts[2] = origin - origin = urlunparse(parts) + origin = html.escape(urlunparse(parts)) return ''.join((m.group('markup'), m.group('quote'), origin, m.group('quote')))