mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Try unescaped paths in intrasite link discovery
Some content parsers escape link paths in their html output (i.e. docutils uses HTML escaping and markdown uses URL encoding. Intrasite link discovery is refactored to also attempt HTML or URL unescaped versions of the path in order to match more permissively.
This commit is contained in:
parent
7a6686f467
commit
fd0923d2f2
2 changed files with 106 additions and 31 deletions
|
|
@ -30,6 +30,9 @@ class TestBase(LoggedTestCase):
|
|||
'content': TEST_CONTENT,
|
||||
'context': {
|
||||
'localsiteurl': '',
|
||||
'generated_content': {},
|
||||
'static_content': {},
|
||||
'static_links': set()
|
||||
},
|
||||
'metadata': {
|
||||
'summary': TEST_SUMMARY,
|
||||
|
|
@ -519,6 +522,60 @@ class TestPage(TestBase):
|
|||
'<img src="http://static.cool.site/images/poster.jpg"/>'
|
||||
)
|
||||
|
||||
def test_intrasite_link_escape(self):
|
||||
article = type(
|
||||
'_DummyArticle', (object,), {'url': 'article-spaces.html'})
|
||||
asset = type(
|
||||
'_DummyAsset', (object,), {'url': 'name@example.com'})
|
||||
|
||||
args = self.page_kwargs.copy()
|
||||
args['settings'] = get_settings()
|
||||
args['source_path'] = 'content'
|
||||
args['context']['generated_content'] = {'article spaces.rst': article}
|
||||
args['context']['static_content'] = {'name@example.com': asset}
|
||||
|
||||
expected_output = (
|
||||
'A simple test with a '
|
||||
'<a href="http://notmyidea.org/article-spaces.html#anchor">link</a> '
|
||||
'<a href="http://notmyidea.org/name@example.com#anchor">file</a>'
|
||||
)
|
||||
|
||||
# not escaped
|
||||
args['content'] = (
|
||||
'A simple test with a '
|
||||
'<a href="{filename}article spaces.rst#anchor">link</a> '
|
||||
'<a href="{static}name@example.com#anchor">file</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(content, expected_output)
|
||||
|
||||
# html escaped
|
||||
args['content'] = (
|
||||
'A simple test with a '
|
||||
'<a href="{filename}article spaces.rst#anchor">link</a> '
|
||||
'<a href="{static}name@example.com#anchor">file</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(content, expected_output)
|
||||
|
||||
# url escaped
|
||||
args['content'] = (
|
||||
'A simple test with a '
|
||||
'<a href="{filename}article%20spaces.rst#anchor">link</a> '
|
||||
'<a href="{static}name%40example.com#anchor">file</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(content, expected_output)
|
||||
|
||||
# html and url escaped
|
||||
args['content'] = (
|
||||
'A simple test with a '
|
||||
'<a href="{filename}article%20spaces.rst#anchor">link</a> '
|
||||
'<a href="{static}name@example.com#anchor">file</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(content, expected_output)
|
||||
|
||||
def test_intrasite_link_markdown_spaces(self):
|
||||
cls_name = '_DummyArticle'
|
||||
article = type(cls_name, (object,), {'url': 'article-spaces.html'})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue