mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #2812 from avaris/2646
Try unescaped paths in intrasite link discovery
This commit is contained in:
commit
197cd1e12e
2 changed files with 106 additions and 31 deletions
|
|
@ -4,7 +4,8 @@ import locale
|
|||
import logging
|
||||
import os
|
||||
import re
|
||||
from urllib.parse import urljoin, urlparse, urlunparse
|
||||
from html import unescape
|
||||
from urllib.parse import unquote, urljoin, urlparse, urlunparse
|
||||
|
||||
import pytz
|
||||
|
||||
|
|
@ -250,38 +251,55 @@ class Content:
|
|||
|
||||
# XXX Put this in a different location.
|
||||
if what in {'filename', 'static', 'attach'}:
|
||||
if path.startswith('/'):
|
||||
path = path[1:]
|
||||
def _get_linked_content(key, url):
|
||||
nonlocal value
|
||||
|
||||
def _find_path(path):
|
||||
if path.startswith('/'):
|
||||
path = path[1:]
|
||||
else:
|
||||
# relative to the source path of this content
|
||||
path = self.get_relative_source_path(
|
||||
os.path.join(self.relative_dir, path)
|
||||
)
|
||||
return self._context[key].get(path, None)
|
||||
|
||||
# try path
|
||||
result = _find_path(url.path)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# try unquoted path
|
||||
result = _find_path(unquote(url.path))
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# try html unescaped url
|
||||
unescaped_url = urlparse(unescape(url.geturl()))
|
||||
result = _find_path(unescaped_url.path)
|
||||
if result is not None:
|
||||
value = unescaped_url
|
||||
return result
|
||||
|
||||
# check if a static file is linked with {filename}
|
||||
if what == 'filename' and key == 'generated_content':
|
||||
linked_content = _get_linked_content('static_content', value)
|
||||
if linked_content:
|
||||
logger.warning(
|
||||
'{filename} used for linking to static'
|
||||
' content %s in %s. Use {static} instead',
|
||||
value.path,
|
||||
self.get_relative_source_path())
|
||||
return linked_content
|
||||
|
||||
return None
|
||||
|
||||
if what == 'filename':
|
||||
key = 'generated_content'
|
||||
else:
|
||||
# relative to the source path of this content
|
||||
path = self.get_relative_source_path(
|
||||
os.path.join(self.relative_dir, path)
|
||||
)
|
||||
key = 'static_content'
|
||||
|
||||
key = 'static_content' if what in ('static', 'attach')\
|
||||
else 'generated_content'
|
||||
|
||||
def _get_linked_content(key, path):
|
||||
try:
|
||||
return self._context[key][path]
|
||||
except KeyError:
|
||||
try:
|
||||
# Markdown escapes spaces, try unescaping
|
||||
return self._context[key][path.replace('%20', ' ')]
|
||||
except KeyError:
|
||||
if what == 'filename' and key == 'generated_content':
|
||||
key = 'static_content'
|
||||
linked_content = _get_linked_content(key, path)
|
||||
if linked_content:
|
||||
logger.warning(
|
||||
'{filename} used for linking to static'
|
||||
' content %s in %s. Use {static} instead',
|
||||
path,
|
||||
self.get_relative_source_path())
|
||||
return linked_content
|
||||
return None
|
||||
|
||||
linked_content = _get_linked_content(key, path)
|
||||
linked_content = _get_linked_content(key, value)
|
||||
if linked_content:
|
||||
if what == 'attach':
|
||||
linked_content.attach_to(self)
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@ class TestBase(LoggedTestCase):
|
|||
'content': TEST_CONTENT,
|
||||
'context': {
|
||||
'localsiteurl': '',
|
||||
'generated_content': {},
|
||||
'static_content': {},
|
||||
'static_links': set()
|
||||
},
|
||||
'metadata': {
|
||||
'summary': TEST_SUMMARY,
|
||||
|
|
@ -519,6 +522,60 @@ class TestPage(TestBase):
|
|||
'<img src="http://static.cool.site/images/poster.jpg"/>'
|
||||
)
|
||||
|
||||
def test_intrasite_link_escape(self):
|
||||
article = type(
|
||||
'_DummyArticle', (object,), {'url': 'article-spaces.html'})
|
||||
asset = type(
|
||||
'_DummyAsset', (object,), {'url': 'name@example.com'})
|
||||
|
||||
args = self.page_kwargs.copy()
|
||||
args['settings'] = get_settings()
|
||||
args['source_path'] = 'content'
|
||||
args['context']['generated_content'] = {'article spaces.rst': article}
|
||||
args['context']['static_content'] = {'name@example.com': asset}
|
||||
|
||||
expected_output = (
|
||||
'A simple test with a '
|
||||
'<a href="http://notmyidea.org/article-spaces.html#anchor">link</a> '
|
||||
'<a href="http://notmyidea.org/name@example.com#anchor">file</a>'
|
||||
)
|
||||
|
||||
# not escaped
|
||||
args['content'] = (
|
||||
'A simple test with a '
|
||||
'<a href="{filename}article spaces.rst#anchor">link</a> '
|
||||
'<a href="{static}name@example.com#anchor">file</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(content, expected_output)
|
||||
|
||||
# html escaped
|
||||
args['content'] = (
|
||||
'A simple test with a '
|
||||
'<a href="{filename}article spaces.rst#anchor">link</a> '
|
||||
'<a href="{static}name@example.com#anchor">file</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(content, expected_output)
|
||||
|
||||
# url escaped
|
||||
args['content'] = (
|
||||
'A simple test with a '
|
||||
'<a href="{filename}article%20spaces.rst#anchor">link</a> '
|
||||
'<a href="{static}name%40example.com#anchor">file</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(content, expected_output)
|
||||
|
||||
# html and url escaped
|
||||
args['content'] = (
|
||||
'A simple test with a '
|
||||
'<a href="{filename}article%20spaces.rst#anchor">link</a> '
|
||||
'<a href="{static}name@example.com#anchor">file</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(content, expected_output)
|
||||
|
||||
def test_intrasite_link_markdown_spaces(self):
|
||||
cls_name = '_DummyArticle'
|
||||
article = type(cls_name, (object,), {'url': 'article-spaces.html'})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue