diff --git a/pelican/contents.py b/pelican/contents.py
index 15770fc8..e84c8296 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -11,7 +11,7 @@ import sys
import pytz
import six
-from six.moves.urllib.parse import urlparse, urlunparse
+from six.moves.urllib.parse import urljoin, urlparse, urlunparse
from pelican import signals
from pelican.settings import DEFAULT_CONFIG
@@ -228,6 +228,87 @@ class Content(object):
key = key if self.in_default_lang else 'lang_%s' % key
return self._expand_settings(key)
+ def _link_replacer(self, siteurl, m):
+ what = m.group('what')
+ value = urlparse(m.group('value'))
+ path = value.path
+ origin = m.group('path')
+
+ # urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
+ # so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
+ # properly get `../a.html`. However, os.path.join() produces
+ # `baz/http://foo/bar.html` for join("baz", "http://foo/bar.html")
+ # instead of correct "http://foo/bar.html", so one has to pick a side
+ # as there is no silver bullet.
+ if self.settings['RELATIVE_URLS']:
+ joiner = os.path.join
+ else:
+ joiner = urljoin
+
+ # However, it's not *that* simple: urljoin("blog", "index.html")
+ # produces just `index.html` instead of `blog/index.html` (unlike
+ # os.path.join()), so in order to get a correct answer one needs to
+ # append a trailing slash to siteurl in that case. This also makes
+ # the new behavior fully compatible with Pelican 3.7.1.
+ if not siteurl.endswith('/'):
+ siteurl += '/'
+
+ # XXX Put this in a different location.
+ if what in {'filename', 'attach'}:
+ if path.startswith('/'):
+ path = path[1:]
+ else:
+ # relative to the source path of this content
+ path = self.get_relative_source_path(
+ os.path.join(self.relative_dir, path)
+ )
+
+ if path not in self._context['filenames']:
+ unquoted_path = path.replace('%20', ' ')
+
+ if unquoted_path in self._context['filenames']:
+ path = unquoted_path
+
+ linked_content = self._context['filenames'].get(path)
+ if linked_content:
+ if what == 'attach':
+ if isinstance(linked_content, Static):
+ linked_content.attach_to(self)
+ else:
+ logger.warning(
+ "%s used {attach} link syntax on a "
+ "non-static file. Use {filename} instead.",
+ self.get_relative_source_path())
+ origin = joiner(siteurl, linked_content.url)
+ origin = origin.replace('\\', '/') # for Windows paths.
+ else:
+ logger.warning(
+ "Unable to find '%s', skipping url replacement.",
+ value.geturl(), extra={
+ 'limit_msg': ("Other resources were not found "
+ "and their urls not replaced")})
+ elif what == 'category':
+ origin = joiner(siteurl, Category(path, self.settings).url)
+ elif what == 'tag':
+ origin = joiner(siteurl, Tag(path, self.settings).url)
+ elif what == 'index':
+ origin = joiner(siteurl, self.settings['INDEX_SAVE_AS'])
+ elif what == 'author':
+ origin = joiner(siteurl, Author(path, self.settings).url)
+ else:
+ logger.warning(
+ "Replacement Indicator '%s' not recognized, "
+ "skipping replacement",
+ what)
+
+ # keep all other parts, such as query, fragment, etc.
+ parts = list(value)
+ parts[2] = origin
+ origin = urlunparse(parts)
+
+ return ''.join((m.group('markup'), m.group('quote'), origin,
+ m.group('quote')))
+
def _update_content(self, content, siteurl):
"""Update the content attribute.
@@ -251,69 +332,7 @@ class Content(object):
\2""".format(instrasite_link_regex)
hrefs = re.compile(regex, re.X)
- def replacer(m):
- what = m.group('what')
- value = urlparse(m.group('value'))
- path = value.path
- origin = m.group('path')
-
- # XXX Put this in a different location.
- if what in {'filename', 'attach'}:
- if path.startswith('/'):
- path = path[1:]
- else:
- # relative to the source path of this content
- path = self.get_relative_source_path(
- os.path.join(self.relative_dir, path)
- )
-
- if path not in self._context['filenames']:
- unquoted_path = path.replace('%20', ' ')
-
- if unquoted_path in self._context['filenames']:
- path = unquoted_path
-
- linked_content = self._context['filenames'].get(path)
- if linked_content:
- if what == 'attach':
- if isinstance(linked_content, Static):
- linked_content.attach_to(self)
- else:
- logger.warning(
- "%s used {attach} link syntax on a "
- "non-static file. Use {filename} instead.",
- self.get_relative_source_path())
- origin = '/'.join((siteurl, linked_content.url))
- origin = origin.replace('\\', '/') # for Windows paths.
- else:
- logger.warning(
- "Unable to find '%s', skipping url replacement.",
- value.geturl(), extra={
- 'limit_msg': ("Other resources were not found "
- "and their urls not replaced")})
- elif what == 'category':
- origin = '/'.join((siteurl, Category(path, self.settings).url))
- elif what == 'tag':
- origin = '/'.join((siteurl, Tag(path, self.settings).url))
- elif what == 'index':
- origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS']))
- elif what == 'author':
- origin = '/'.join((siteurl, Author(path, self.settings).url))
- else:
- logger.warning(
- "Replacement Indicator '%s' not recognized, "
- "skipping replacement",
- what)
-
- # keep all other parts, such as query, fragment, etc.
- parts = list(value)
- parts[2] = origin
- origin = urlunparse(parts)
-
- return ''.join((m.group('markup'), m.group('quote'), origin,
- m.group('quote')))
-
- return hrefs.sub(replacer, content)
+ return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
def get_siteurl(self):
return self._context.get('localsiteurl', '')
diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py
index d028c7a1..04c82f61 100644
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@@ -397,6 +397,54 @@ class TestPage(LoggedTestCase):
''
)
+ def test_intrasite_link_absolute(self):
+ """Test that absolute URLs are merged properly."""
+
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings(
+ STATIC_URL='http://static.cool.site/{path}',
+ ARTICLE_URL='http://blog.cool.site/{slug}.html')
+ args['source_path'] = 'content'
+ args['context']['filenames'] = {
+ 'images/poster.jpg': Static('',
+ settings=args['settings'],
+ source_path='images/poster.jpg'),
+ 'article.rst': Article('',
+ settings=args['settings'],
+ metadata={'slug': 'article',
+ 'title': 'Article'})
+ }
+
+ # Article link will go to blog
+ args['content'] = (
+ 'Article'
+ )
+ content = Page(**args).get_content('http://cool.site')
+ self.assertEqual(
+ content,
+ 'Article'
+ )
+
+ # Page link will go to the main site
+ args['content'] = (
+ 'Index'
+ )
+ content = Page(**args).get_content('http://cool.site')
+ self.assertEqual(
+ content,
+ 'Index'
+ )
+
+ # Image link will go to static
+ args['content'] = (
+ '
'
+ )
+ content = Page(**args).get_content('http://cool.site')
+ self.assertEqual(
+ content,
+ '
'
+ )
+
def test_intrasite_link_markdown_spaces(self):
# Markdown introduces %20 instead of spaces, this tests that
# we support markdown doing this.