diff --git a/pelican/contents.py b/pelican/contents.py index a534dbaa..e84c8296 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -11,7 +11,7 @@ import sys import pytz import six -from six.moves.urllib.parse import urlparse, urlunparse +from six.moves.urllib.parse import urljoin, urlparse, urlunparse from pelican import signals from pelican.settings import DEFAULT_CONFIG @@ -234,6 +234,25 @@ class Content(object): path = value.path origin = m.group('path') + # urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html") + # so if RELATIVE_URLS are enabled, we fall back to os.path.join() to + # properly get `../a.html`. However, os.path.join() produces + # `baz/http://foo/bar.html` for join("baz", "http://foo/bar.html") + # instead of correct "http://foo/bar.html", so one has to pick a side + # as there is no silver bullet. + if self.settings['RELATIVE_URLS']: + joiner = os.path.join + else: + joiner = urljoin + + # However, it's not *that* simple: urljoin("blog", "index.html") + # produces just `index.html` instead of `blog/index.html` (unlike + # os.path.join()), so in order to get a correct answer one needs to + # append a trailing slash to siteurl in that case. This also makes + # the new behavior fully compatible with Pelican 3.7.1. + if not siteurl.endswith('/'): + siteurl += '/' + # XXX Put this in a different location. if what in {'filename', 'attach'}: if path.startswith('/'): @@ -260,7 +279,7 @@ class Content(object): "%s used {attach} link syntax on a " "non-static file. Use {filename} instead.", self.get_relative_source_path()) - origin = '/'.join((siteurl, linked_content.url)) + origin = joiner(siteurl, linked_content.url) origin = origin.replace('\\', '/') # for Windows paths. else: logger.warning( @@ -269,13 +288,13 @@ class Content(object): 'limit_msg': ("Other resources were not found " "and their urls not replaced")}) elif what == 'category': - origin = '/'.join((siteurl, Category(path, self.settings).url)) + origin = joiner(siteurl, Category(path, self.settings).url) elif what == 'tag': - origin = '/'.join((siteurl, Tag(path, self.settings).url)) + origin = joiner(siteurl, Tag(path, self.settings).url) elif what == 'index': - origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS'])) + origin = joiner(siteurl, self.settings['INDEX_SAVE_AS']) elif what == 'author': - origin = '/'.join((siteurl, Author(path, self.settings).url)) + origin = joiner(siteurl, Author(path, self.settings).url) else: logger.warning( "Replacement Indicator '%s' not recognized, " diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index d028c7a1..04c82f61 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -397,6 +397,54 @@ class TestPage(LoggedTestCase): '' ) + def test_intrasite_link_absolute(self): + """Test that absolute URLs are merged properly.""" + + args = self.page_kwargs.copy() + args['settings'] = get_settings( + STATIC_URL='http://static.cool.site/{path}', + ARTICLE_URL='http://blog.cool.site/{slug}.html') + args['source_path'] = 'content' + args['context']['filenames'] = { + 'images/poster.jpg': Static('', + settings=args['settings'], + source_path='images/poster.jpg'), + 'article.rst': Article('', + settings=args['settings'], + metadata={'slug': 'article', + 'title': 'Article'}) + } + + # Article link will go to blog + args['content'] = ( + 'Article' + ) + content = Page(**args).get_content('http://cool.site') + self.assertEqual( + content, + 'Article' + ) + + # Page link will go to the main site + args['content'] = ( + 'Index' + ) + content = Page(**args).get_content('http://cool.site') + self.assertEqual( + content, + 'Index' + ) + + # Image link will go to static + args['content'] = ( + '' + ) + content = Page(**args).get_content('http://cool.site') + self.assertEqual( + content, + '' + ) + def test_intrasite_link_markdown_spaces(self): # Markdown introduces %20 instead of spaces, this tests that # we support markdown doing this.