Merge pull request #2812 from avaris/2646

Try unescaped paths in intrasite link discovery
This commit is contained in:
Justin Mayer 2020-10-19 20:43:18 +02:00 committed by GitHub
commit 197cd1e12e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 106 additions and 31 deletions

View file

@ -4,7 +4,8 @@ import locale
import logging
import os
import re
from urllib.parse import urljoin, urlparse, urlunparse
from html import unescape
from urllib.parse import unquote, urljoin, urlparse, urlunparse
import pytz
@ -250,38 +251,55 @@ class Content:
# XXX Put this in a different location.
if what in {'filename', 'static', 'attach'}:
if path.startswith('/'):
path = path[1:]
def _get_linked_content(key, url):
nonlocal value
def _find_path(path):
if path.startswith('/'):
path = path[1:]
else:
# relative to the source path of this content
path = self.get_relative_source_path(
os.path.join(self.relative_dir, path)
)
return self._context[key].get(path, None)
# try path
result = _find_path(url.path)
if result is not None:
return result
# try unquoted path
result = _find_path(unquote(url.path))
if result is not None:
return result
# try html unescaped url
unescaped_url = urlparse(unescape(url.geturl()))
result = _find_path(unescaped_url.path)
if result is not None:
value = unescaped_url
return result
# check if a static file is linked with {filename}
if what == 'filename' and key == 'generated_content':
linked_content = _get_linked_content('static_content', value)
if linked_content:
logger.warning(
'{filename} used for linking to static'
' content %s in %s. Use {static} instead',
value.path,
self.get_relative_source_path())
return linked_content
return None
if what == 'filename':
key = 'generated_content'
else:
# relative to the source path of this content
path = self.get_relative_source_path(
os.path.join(self.relative_dir, path)
)
key = 'static_content'
key = 'static_content' if what in ('static', 'attach')\
else 'generated_content'
def _get_linked_content(key, path):
try:
return self._context[key][path]
except KeyError:
try:
# Markdown escapes spaces, try unescaping
return self._context[key][path.replace('%20', ' ')]
except KeyError:
if what == 'filename' and key == 'generated_content':
key = 'static_content'
linked_content = _get_linked_content(key, path)
if linked_content:
logger.warning(
'{filename} used for linking to static'
' content %s in %s. Use {static} instead',
path,
self.get_relative_source_path())
return linked_content
return None
linked_content = _get_linked_content(key, path)
linked_content = _get_linked_content(key, value)
if linked_content:
if what == 'attach':
linked_content.attach_to(self)

View file

@ -30,6 +30,9 @@ class TestBase(LoggedTestCase):
'content': TEST_CONTENT,
'context': {
'localsiteurl': '',
'generated_content': {},
'static_content': {},
'static_links': set()
},
'metadata': {
'summary': TEST_SUMMARY,
@ -519,6 +522,60 @@ class TestPage(TestBase):
'<img src="http://static.cool.site/images/poster.jpg"/>'
)
def test_intrasite_link_escape(self):
article = type(
'_DummyArticle', (object,), {'url': 'article-spaces.html'})
asset = type(
'_DummyAsset', (object,), {'url': 'name@example.com'})
args = self.page_kwargs.copy()
args['settings'] = get_settings()
args['source_path'] = 'content'
args['context']['generated_content'] = {'article spaces.rst': article}
args['context']['static_content'] = {'name@example.com': asset}
expected_output = (
'A simple test with a '
'<a href="http://notmyidea.org/article-spaces.html#anchor">link</a> '
'<a href="http://notmyidea.org/name@example.com#anchor">file</a>'
)
# not escaped
args['content'] = (
'A simple test with a '
'<a href="{filename}article spaces.rst#anchor">link</a> '
'<a href="{static}name@example.com#anchor">file</a>'
)
content = Page(**args).get_content('http://notmyidea.org')
self.assertEqual(content, expected_output)
# html escaped
args['content'] = (
'A simple test with a '
'<a href="{filename}article spaces.rst#anchor">link</a> '
'<a href="{static}name&#64;example.com#anchor">file</a>'
)
content = Page(**args).get_content('http://notmyidea.org')
self.assertEqual(content, expected_output)
# url escaped
args['content'] = (
'A simple test with a '
'<a href="{filename}article%20spaces.rst#anchor">link</a> '
'<a href="{static}name%40example.com#anchor">file</a>'
)
content = Page(**args).get_content('http://notmyidea.org')
self.assertEqual(content, expected_output)
# html and url escaped
args['content'] = (
'A simple test with a '
'<a href="{filename}article%20spaces.rst#anchor">link</a> '
'<a href="{static}name&#64;example.com#anchor">file</a>'
)
content = Page(**args).get_content('http://notmyidea.org')
self.assertEqual(content, expected_output)
def test_intrasite_link_markdown_spaces(self):
cls_name = '_DummyArticle'
article = type(cls_name, (object,), {'url': 'article-spaces.html'})