This commit is contained in:
Vladimír Vondruš 2023-10-29 20:09:57 +01:00 committed by GitHub
commit 842a81e544
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 7 deletions

View file

@ -25,6 +25,25 @@ from pelican.urlwrappers import (Author, Category, Tag, URLWrapper) # NOQA
logger = logging.getLogger(__name__)
try:
import html
except ImportError:
# html.escape()/html.unescape() is since Python 3.2, do this for py2.7
# https://wiki.python.org/moin/EscapingHtml
from xml.sax.saxutils import escape, unescape
class html(object):
_html_escape_table = {'"': """,
"'": "'"}
_html_unescape_table = {'"': '"',
''': "'"}
@classmethod
def escape(cls, v): return escape(v, cls._html_escape_table)
@classmethod
def unescape(cls, v): return unescape(v, cls._html_unescape_table)
class Content:
"""Represents a content.
@ -231,9 +250,9 @@ class Content:
def _link_replacer(self, siteurl, m):
what = m.group('what')
value = urlparse(m.group('value'))
value = urlparse(html.unescape(m.group('value')))
path = value.path
origin = m.group('path')
origin = html.unescape(m.group('path'))
# urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
# so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
@ -333,7 +352,7 @@ class Content:
# keep all other parts, such as query, fragment, etc.
parts = list(value)
parts[2] = origin
origin = urlunparse(parts)
origin = html.escape(urlunparse(parts))
return ''.join((m.group('markup'), m.group('quote'), origin,
m.group('quote')))

View file

@ -362,28 +362,28 @@ class TestPage(TestBase):
args['content'] = (
'A simple test, with a '
'<a href="|filename|article.rst'
'?utm_whatever=234&highlight=word">link</a>'
'?utm_whatever=234&amp;highlight=word">link</a>'
)
content = Page(**args).get_content('http://notmyidea.org')
self.assertEqual(
content,
'A simple test, with a '
'<a href="http://notmyidea.org/article.html'
'?utm_whatever=234&highlight=word">link</a>'
'?utm_whatever=234&amp;highlight=word">link</a>'
)
# combination
args['content'] = (
'A simple test, with a '
'<a href="|filename|article.rst'
'?utm_whatever=234&highlight=word#section-2">link</a>'
'?utm_whatever=234&amp;highlight=word#section-2">link</a>'
)
content = Page(**args).get_content('http://notmyidea.org')
self.assertEqual(
content,
'A simple test, with a '
'<a href="http://notmyidea.org/article.html'
'?utm_whatever=234&highlight=word#section-2">link</a>'
'?utm_whatever=234&amp;highlight=word#section-2">link</a>'
)
# also test for summary in metadata
@ -407,6 +407,21 @@ class TestPage(TestBase):
self.assertEqual(p.summary, linked)
self.assertEqual(p.custom, linked)
# SITEURL with characters that should be escaped
args['content'] = (
'A simple test, with a '
'<a href="|filename|article.rst'
'#highlight=&quot;word&quot;">link</a>'
)
content = Page(**args).get_content('http://notmyidea.org/'
'?app=blog&path=')
self.assertEqual(
content,
'A simple test, with a '
'<a href="http://notmyidea.org/?app=blog&amp;path='
'/article.html#highlight=&quot;word&quot;">link</a>'
)
def test_intrasite_link_more(self):
cls_name = '_DummyAsset'