Fix link replacer to properly escape special HTML characters.

This commit is contained in:
Vladimír Vondruš 2017-12-10 21:48:50 +01:00
commit 4462d84461

View file

@ -26,6 +26,25 @@ from pelican.urlwrappers import (Author, Category, Tag, URLWrapper) # NOQA
logger = logging.getLogger(__name__)
try:
import html
except ImportError:
# html.escape()/html.unescape() is since Python 3.2, do this for py2.7
# https://wiki.python.org/moin/EscapingHtml
from xml.sax.saxutils import escape, unescape
class html(object):
_html_escape_table = {'"': """,
"'": "'"}
_html_unescape_table = {'"': '"',
''': "'"}
@classmethod
def escape(cls, v): return escape(v, cls._html_escape_table)
@classmethod
def unescape(cls, v): return unescape(v, cls._html_unescape_table)
@python_2_unicode_compatible
class Content(object):
@ -230,9 +249,9 @@ class Content(object):
def _link_replacer(self, siteurl, m):
what = m.group('what')
value = urlparse(m.group('value'))
value = urlparse(html.unescape(m.group('value')))
path = value.path
origin = m.group('path')
origin = html.unescape(m.group('path'))
# XXX Put this in a different location.
if what in {'filename', 'attach'}:
@ -285,7 +304,7 @@ class Content(object):
# keep all other parts, such as query, fragment, etc.
parts = list(value)
parts[2] = origin
origin = urlunparse(parts)
origin = html.escape(urlunparse(parts))
return ''.join((m.group('markup'), m.group('quote'), origin,
m.group('quote')))