mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #2812 from avaris/2646
Try unescaped paths in intrasite link discovery
This commit is contained in:
commit
197cd1e12e
2 changed files with 106 additions and 31 deletions
|
|
@ -4,7 +4,8 @@ import locale
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urljoin, urlparse, urlunparse
|
from html import unescape
|
||||||
|
from urllib.parse import unquote, urljoin, urlparse, urlunparse
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
|
|
@ -250,38 +251,55 @@ class Content:
|
||||||
|
|
||||||
# XXX Put this in a different location.
|
# XXX Put this in a different location.
|
||||||
if what in {'filename', 'static', 'attach'}:
|
if what in {'filename', 'static', 'attach'}:
|
||||||
if path.startswith('/'):
|
def _get_linked_content(key, url):
|
||||||
path = path[1:]
|
nonlocal value
|
||||||
|
|
||||||
|
def _find_path(path):
|
||||||
|
if path.startswith('/'):
|
||||||
|
path = path[1:]
|
||||||
|
else:
|
||||||
|
# relative to the source path of this content
|
||||||
|
path = self.get_relative_source_path(
|
||||||
|
os.path.join(self.relative_dir, path)
|
||||||
|
)
|
||||||
|
return self._context[key].get(path, None)
|
||||||
|
|
||||||
|
# try path
|
||||||
|
result = _find_path(url.path)
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# try unquoted path
|
||||||
|
result = _find_path(unquote(url.path))
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# try html unescaped url
|
||||||
|
unescaped_url = urlparse(unescape(url.geturl()))
|
||||||
|
result = _find_path(unescaped_url.path)
|
||||||
|
if result is not None:
|
||||||
|
value = unescaped_url
|
||||||
|
return result
|
||||||
|
|
||||||
|
# check if a static file is linked with {filename}
|
||||||
|
if what == 'filename' and key == 'generated_content':
|
||||||
|
linked_content = _get_linked_content('static_content', value)
|
||||||
|
if linked_content:
|
||||||
|
logger.warning(
|
||||||
|
'{filename} used for linking to static'
|
||||||
|
' content %s in %s. Use {static} instead',
|
||||||
|
value.path,
|
||||||
|
self.get_relative_source_path())
|
||||||
|
return linked_content
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
if what == 'filename':
|
||||||
|
key = 'generated_content'
|
||||||
else:
|
else:
|
||||||
# relative to the source path of this content
|
key = 'static_content'
|
||||||
path = self.get_relative_source_path(
|
|
||||||
os.path.join(self.relative_dir, path)
|
|
||||||
)
|
|
||||||
|
|
||||||
key = 'static_content' if what in ('static', 'attach')\
|
linked_content = _get_linked_content(key, value)
|
||||||
else 'generated_content'
|
|
||||||
|
|
||||||
def _get_linked_content(key, path):
|
|
||||||
try:
|
|
||||||
return self._context[key][path]
|
|
||||||
except KeyError:
|
|
||||||
try:
|
|
||||||
# Markdown escapes spaces, try unescaping
|
|
||||||
return self._context[key][path.replace('%20', ' ')]
|
|
||||||
except KeyError:
|
|
||||||
if what == 'filename' and key == 'generated_content':
|
|
||||||
key = 'static_content'
|
|
||||||
linked_content = _get_linked_content(key, path)
|
|
||||||
if linked_content:
|
|
||||||
logger.warning(
|
|
||||||
'{filename} used for linking to static'
|
|
||||||
' content %s in %s. Use {static} instead',
|
|
||||||
path,
|
|
||||||
self.get_relative_source_path())
|
|
||||||
return linked_content
|
|
||||||
return None
|
|
||||||
|
|
||||||
linked_content = _get_linked_content(key, path)
|
|
||||||
if linked_content:
|
if linked_content:
|
||||||
if what == 'attach':
|
if what == 'attach':
|
||||||
linked_content.attach_to(self)
|
linked_content.attach_to(self)
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,9 @@ class TestBase(LoggedTestCase):
|
||||||
'content': TEST_CONTENT,
|
'content': TEST_CONTENT,
|
||||||
'context': {
|
'context': {
|
||||||
'localsiteurl': '',
|
'localsiteurl': '',
|
||||||
|
'generated_content': {},
|
||||||
|
'static_content': {},
|
||||||
|
'static_links': set()
|
||||||
},
|
},
|
||||||
'metadata': {
|
'metadata': {
|
||||||
'summary': TEST_SUMMARY,
|
'summary': TEST_SUMMARY,
|
||||||
|
|
@ -519,6 +522,60 @@ class TestPage(TestBase):
|
||||||
'<img src="http://static.cool.site/images/poster.jpg"/>'
|
'<img src="http://static.cool.site/images/poster.jpg"/>'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_intrasite_link_escape(self):
|
||||||
|
article = type(
|
||||||
|
'_DummyArticle', (object,), {'url': 'article-spaces.html'})
|
||||||
|
asset = type(
|
||||||
|
'_DummyAsset', (object,), {'url': 'name@example.com'})
|
||||||
|
|
||||||
|
args = self.page_kwargs.copy()
|
||||||
|
args['settings'] = get_settings()
|
||||||
|
args['source_path'] = 'content'
|
||||||
|
args['context']['generated_content'] = {'article spaces.rst': article}
|
||||||
|
args['context']['static_content'] = {'name@example.com': asset}
|
||||||
|
|
||||||
|
expected_output = (
|
||||||
|
'A simple test with a '
|
||||||
|
'<a href="http://notmyidea.org/article-spaces.html#anchor">link</a> '
|
||||||
|
'<a href="http://notmyidea.org/name@example.com#anchor">file</a>'
|
||||||
|
)
|
||||||
|
|
||||||
|
# not escaped
|
||||||
|
args['content'] = (
|
||||||
|
'A simple test with a '
|
||||||
|
'<a href="{filename}article spaces.rst#anchor">link</a> '
|
||||||
|
'<a href="{static}name@example.com#anchor">file</a>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://notmyidea.org')
|
||||||
|
self.assertEqual(content, expected_output)
|
||||||
|
|
||||||
|
# html escaped
|
||||||
|
args['content'] = (
|
||||||
|
'A simple test with a '
|
||||||
|
'<a href="{filename}article spaces.rst#anchor">link</a> '
|
||||||
|
'<a href="{static}name@example.com#anchor">file</a>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://notmyidea.org')
|
||||||
|
self.assertEqual(content, expected_output)
|
||||||
|
|
||||||
|
# url escaped
|
||||||
|
args['content'] = (
|
||||||
|
'A simple test with a '
|
||||||
|
'<a href="{filename}article%20spaces.rst#anchor">link</a> '
|
||||||
|
'<a href="{static}name%40example.com#anchor">file</a>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://notmyidea.org')
|
||||||
|
self.assertEqual(content, expected_output)
|
||||||
|
|
||||||
|
# html and url escaped
|
||||||
|
args['content'] = (
|
||||||
|
'A simple test with a '
|
||||||
|
'<a href="{filename}article%20spaces.rst#anchor">link</a> '
|
||||||
|
'<a href="{static}name@example.com#anchor">file</a>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://notmyidea.org')
|
||||||
|
self.assertEqual(content, expected_output)
|
||||||
|
|
||||||
def test_intrasite_link_markdown_spaces(self):
|
def test_intrasite_link_markdown_spaces(self):
|
||||||
cls_name = '_DummyArticle'
|
cls_name = '_DummyArticle'
|
||||||
article = type(cls_name, (object,), {'url': 'article-spaces.html'})
|
article = type(cls_name, (object,), {'url': 'article-spaces.html'})
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue