diff --git a/pelican/contents.py b/pelican/contents.py
index c347a999..54684329 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -25,6 +25,25 @@ from pelican.urlwrappers import (Author, Category, Tag, URLWrapper) # NOQA
logger = logging.getLogger(__name__)
+try:
+ import html
+except ImportError:
+ # html.escape()/html.unescape() is since Python 3.2, do this for py2.7
+ # https://wiki.python.org/moin/EscapingHtml
+ from xml.sax.saxutils import escape, unescape
+
+ class html(object):
+ _html_escape_table = {'"': """,
+ "'": "'"}
+ _html_unescape_table = {'"': '"',
+ ''': "'"}
+
+ @classmethod
+ def escape(cls, v): return escape(v, cls._html_escape_table)
+
+ @classmethod
+ def unescape(cls, v): return unescape(v, cls._html_unescape_table)
+
class Content:
"""Represents a content.
@@ -231,9 +250,9 @@ class Content:
def _link_replacer(self, siteurl, m):
what = m.group('what')
- value = urlparse(m.group('value'))
+ value = urlparse(html.unescape(m.group('value')))
path = value.path
- origin = m.group('path')
+ origin = html.unescape(m.group('path'))
# urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
# so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
@@ -333,7 +352,7 @@ class Content:
# keep all other parts, such as query, fragment, etc.
parts = list(value)
parts[2] = origin
- origin = urlunparse(parts)
+ origin = html.escape(urlunparse(parts))
return ''.join((m.group('markup'), m.group('quote'), origin,
m.group('quote')))
diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py
index 3a223b5a..0cbcb5bf 100644
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@@ -362,28 +362,28 @@ class TestPage(TestBase):
args['content'] = (
'A simple test, with a '
'link'
+ '?utm_whatever=234&highlight=word">link'
)
content = Page(**args).get_content('http://notmyidea.org')
self.assertEqual(
content,
'A simple test, with a '
'link'
+ '?utm_whatever=234&highlight=word">link'
)
# combination
args['content'] = (
'A simple test, with a '
'link'
+ '?utm_whatever=234&highlight=word#section-2">link'
)
content = Page(**args).get_content('http://notmyidea.org')
self.assertEqual(
content,
'A simple test, with a '
'link'
+ '?utm_whatever=234&highlight=word#section-2">link'
)
# also test for summary in metadata
@@ -407,6 +407,21 @@ class TestPage(TestBase):
self.assertEqual(p.summary, linked)
self.assertEqual(p.custom, linked)
+ # SITEURL with characters that should be escaped
+ args['content'] = (
+ 'A simple test, with a '
+ 'link'
+ )
+ content = Page(**args).get_content('http://notmyidea.org/'
+ '?app=blog&path=')
+ self.assertEqual(
+ content,
+ 'A simple test, with a '
+ 'link'
+ )
+
def test_intrasite_link_more(self):
cls_name = '_DummyAsset'