mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Make URL part joining aware of absolute URLs.
Previously, with RELATIVE_URLS disabled, when both SITEURL and
STATIC_URL were absolute, the final generate data URLs looked wrong like
this (two absolute URLs joined by `/`):
http://your.site/http://static.your.site/image.png
With this patch, the data URLs are correctly:
http://static.your.site/image.png
This also applies to all *_URL configuration options (for example,
ability to have pages and articles on different domains) and behaves
like one expects even with URLs starting with just `//`, thanks to
making use of urllib.parse.urljoin().
However, when RELATIVE_URLS are enabled, urllib.parse.urljoin() doesn't
handle the relative base correctly. In that case, simple os.path.join()
is used. That, however, breaks the above case, but as RELATIVE_URLS are
meant for local development (thus no data scattered across multiple
domains), I don't see any problem.
Just to clarify, this is a fully backwards-compatible change, it only
enables new use cases that were impossible before.
This commit is contained in:
parent
1f30306e23
commit
0b13aa9b46
2 changed files with 73 additions and 6 deletions
|
|
@ -11,7 +11,7 @@ import sys
|
|||
import pytz
|
||||
|
||||
import six
|
||||
from six.moves.urllib.parse import urlparse, urlunparse
|
||||
from six.moves.urllib.parse import urljoin, urlparse, urlunparse
|
||||
|
||||
from pelican import signals
|
||||
from pelican.settings import DEFAULT_CONFIG
|
||||
|
|
@ -234,6 +234,25 @@ class Content(object):
|
|||
path = value.path
|
||||
origin = m.group('path')
|
||||
|
||||
# urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
|
||||
# so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
|
||||
# properly get `../a.html`. However, os.path.join() produces
|
||||
# `baz/http://foo/bar.html` for join("baz", "http://foo/bar.html")
|
||||
# instead of correct "http://foo/bar.html", so one has to pick a side
|
||||
# as there is no silver bullet.
|
||||
if self.settings['RELATIVE_URLS']:
|
||||
joiner = os.path.join
|
||||
else:
|
||||
joiner = urljoin
|
||||
|
||||
# However, it's not *that* simple: urljoin("blog", "index.html")
|
||||
# produces just `index.html` instead of `blog/index.html` (unlike
|
||||
# os.path.join()), so in order to get a correct answer one needs to
|
||||
# append a trailing slash to siteurl in that case. This also makes
|
||||
# the new behavior fully compatible with Pelican 3.7.1.
|
||||
if not siteurl.endswith('/'):
|
||||
siteurl += '/'
|
||||
|
||||
# XXX Put this in a different location.
|
||||
if what in {'filename', 'attach'}:
|
||||
if path.startswith('/'):
|
||||
|
|
@ -260,7 +279,7 @@ class Content(object):
|
|||
"%s used {attach} link syntax on a "
|
||||
"non-static file. Use {filename} instead.",
|
||||
self.get_relative_source_path())
|
||||
origin = '/'.join((siteurl, linked_content.url))
|
||||
origin = joiner(siteurl, linked_content.url)
|
||||
origin = origin.replace('\\', '/') # for Windows paths.
|
||||
else:
|
||||
logger.warning(
|
||||
|
|
@ -269,13 +288,13 @@ class Content(object):
|
|||
'limit_msg': ("Other resources were not found "
|
||||
"and their urls not replaced")})
|
||||
elif what == 'category':
|
||||
origin = '/'.join((siteurl, Category(path, self.settings).url))
|
||||
origin = joiner(siteurl, Category(path, self.settings).url)
|
||||
elif what == 'tag':
|
||||
origin = '/'.join((siteurl, Tag(path, self.settings).url))
|
||||
origin = joiner(siteurl, Tag(path, self.settings).url)
|
||||
elif what == 'index':
|
||||
origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS']))
|
||||
origin = joiner(siteurl, self.settings['INDEX_SAVE_AS'])
|
||||
elif what == 'author':
|
||||
origin = '/'.join((siteurl, Author(path, self.settings).url))
|
||||
origin = joiner(siteurl, Author(path, self.settings).url)
|
||||
else:
|
||||
logger.warning(
|
||||
"Replacement Indicator '%s' not recognized, "
|
||||
|
|
|
|||
|
|
@ -397,6 +397,54 @@ class TestPage(LoggedTestCase):
|
|||
'</blockquote>'
|
||||
)
|
||||
|
||||
def test_intrasite_link_absolute(self):
|
||||
"""Test that absolute URLs are merged properly."""
|
||||
|
||||
args = self.page_kwargs.copy()
|
||||
args['settings'] = get_settings(
|
||||
STATIC_URL='http://static.cool.site/{path}',
|
||||
ARTICLE_URL='http://blog.cool.site/{slug}.html')
|
||||
args['source_path'] = 'content'
|
||||
args['context']['filenames'] = {
|
||||
'images/poster.jpg': Static('',
|
||||
settings=args['settings'],
|
||||
source_path='images/poster.jpg'),
|
||||
'article.rst': Article('',
|
||||
settings=args['settings'],
|
||||
metadata={'slug': 'article',
|
||||
'title': 'Article'})
|
||||
}
|
||||
|
||||
# Article link will go to blog
|
||||
args['content'] = (
|
||||
'<a href="{filename}article.rst">Article</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://cool.site')
|
||||
self.assertEqual(
|
||||
content,
|
||||
'<a href="http://blog.cool.site/article.html">Article</a>'
|
||||
)
|
||||
|
||||
# Page link will go to the main site
|
||||
args['content'] = (
|
||||
'<a href="{index}">Index</a>'
|
||||
)
|
||||
content = Page(**args).get_content('http://cool.site')
|
||||
self.assertEqual(
|
||||
content,
|
||||
'<a href="http://cool.site/index.html">Index</a>'
|
||||
)
|
||||
|
||||
# Image link will go to static
|
||||
args['content'] = (
|
||||
'<img src="{filename}/images/poster.jpg"/>'
|
||||
)
|
||||
content = Page(**args).get_content('http://cool.site')
|
||||
self.assertEqual(
|
||||
content,
|
||||
'<img src="http://static.cool.site/images/poster.jpg"/>'
|
||||
)
|
||||
|
||||
def test_intrasite_link_markdown_spaces(self):
|
||||
# Markdown introduces %20 instead of spaces, this tests that
|
||||
# we support markdown doing this.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue