mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #2196 from mosra/absolute-url-merging
Make URL part joining aware of absolute URLs
This commit is contained in:
commit
e7ac0a9272
2 changed files with 131 additions and 64 deletions
|
|
@ -11,7 +11,7 @@ import sys
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
import six
|
import six
|
||||||
from six.moves.urllib.parse import urlparse, urlunparse
|
from six.moves.urllib.parse import urljoin, urlparse, urlunparse
|
||||||
|
|
||||||
from pelican import signals
|
from pelican import signals
|
||||||
from pelican.settings import DEFAULT_CONFIG
|
from pelican.settings import DEFAULT_CONFIG
|
||||||
|
|
@ -228,6 +228,87 @@ class Content(object):
|
||||||
key = key if self.in_default_lang else 'lang_%s' % key
|
key = key if self.in_default_lang else 'lang_%s' % key
|
||||||
return self._expand_settings(key)
|
return self._expand_settings(key)
|
||||||
|
|
||||||
|
def _link_replacer(self, siteurl, m):
|
||||||
|
what = m.group('what')
|
||||||
|
value = urlparse(m.group('value'))
|
||||||
|
path = value.path
|
||||||
|
origin = m.group('path')
|
||||||
|
|
||||||
|
# urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
|
||||||
|
# so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
|
||||||
|
# properly get `../a.html`. However, os.path.join() produces
|
||||||
|
# `baz/http://foo/bar.html` for join("baz", "http://foo/bar.html")
|
||||||
|
# instead of correct "http://foo/bar.html", so one has to pick a side
|
||||||
|
# as there is no silver bullet.
|
||||||
|
if self.settings['RELATIVE_URLS']:
|
||||||
|
joiner = os.path.join
|
||||||
|
else:
|
||||||
|
joiner = urljoin
|
||||||
|
|
||||||
|
# However, it's not *that* simple: urljoin("blog", "index.html")
|
||||||
|
# produces just `index.html` instead of `blog/index.html` (unlike
|
||||||
|
# os.path.join()), so in order to get a correct answer one needs to
|
||||||
|
# append a trailing slash to siteurl in that case. This also makes
|
||||||
|
# the new behavior fully compatible with Pelican 3.7.1.
|
||||||
|
if not siteurl.endswith('/'):
|
||||||
|
siteurl += '/'
|
||||||
|
|
||||||
|
# XXX Put this in a different location.
|
||||||
|
if what in {'filename', 'attach'}:
|
||||||
|
if path.startswith('/'):
|
||||||
|
path = path[1:]
|
||||||
|
else:
|
||||||
|
# relative to the source path of this content
|
||||||
|
path = self.get_relative_source_path(
|
||||||
|
os.path.join(self.relative_dir, path)
|
||||||
|
)
|
||||||
|
|
||||||
|
if path not in self._context['filenames']:
|
||||||
|
unquoted_path = path.replace('%20', ' ')
|
||||||
|
|
||||||
|
if unquoted_path in self._context['filenames']:
|
||||||
|
path = unquoted_path
|
||||||
|
|
||||||
|
linked_content = self._context['filenames'].get(path)
|
||||||
|
if linked_content:
|
||||||
|
if what == 'attach':
|
||||||
|
if isinstance(linked_content, Static):
|
||||||
|
linked_content.attach_to(self)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"%s used {attach} link syntax on a "
|
||||||
|
"non-static file. Use {filename} instead.",
|
||||||
|
self.get_relative_source_path())
|
||||||
|
origin = joiner(siteurl, linked_content.url)
|
||||||
|
origin = origin.replace('\\', '/') # for Windows paths.
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Unable to find '%s', skipping url replacement.",
|
||||||
|
value.geturl(), extra={
|
||||||
|
'limit_msg': ("Other resources were not found "
|
||||||
|
"and their urls not replaced")})
|
||||||
|
elif what == 'category':
|
||||||
|
origin = joiner(siteurl, Category(path, self.settings).url)
|
||||||
|
elif what == 'tag':
|
||||||
|
origin = joiner(siteurl, Tag(path, self.settings).url)
|
||||||
|
elif what == 'index':
|
||||||
|
origin = joiner(siteurl, self.settings['INDEX_SAVE_AS'])
|
||||||
|
elif what == 'author':
|
||||||
|
origin = joiner(siteurl, Author(path, self.settings).url)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Replacement Indicator '%s' not recognized, "
|
||||||
|
"skipping replacement",
|
||||||
|
what)
|
||||||
|
|
||||||
|
# keep all other parts, such as query, fragment, etc.
|
||||||
|
parts = list(value)
|
||||||
|
parts[2] = origin
|
||||||
|
origin = urlunparse(parts)
|
||||||
|
|
||||||
|
return ''.join((m.group('markup'), m.group('quote'), origin,
|
||||||
|
m.group('quote')))
|
||||||
|
|
||||||
def _update_content(self, content, siteurl):
|
def _update_content(self, content, siteurl):
|
||||||
"""Update the content attribute.
|
"""Update the content attribute.
|
||||||
|
|
||||||
|
|
@ -251,69 +332,7 @@ class Content(object):
|
||||||
\2""".format(instrasite_link_regex)
|
\2""".format(instrasite_link_regex)
|
||||||
hrefs = re.compile(regex, re.X)
|
hrefs = re.compile(regex, re.X)
|
||||||
|
|
||||||
def replacer(m):
|
return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
|
||||||
what = m.group('what')
|
|
||||||
value = urlparse(m.group('value'))
|
|
||||||
path = value.path
|
|
||||||
origin = m.group('path')
|
|
||||||
|
|
||||||
# XXX Put this in a different location.
|
|
||||||
if what in {'filename', 'attach'}:
|
|
||||||
if path.startswith('/'):
|
|
||||||
path = path[1:]
|
|
||||||
else:
|
|
||||||
# relative to the source path of this content
|
|
||||||
path = self.get_relative_source_path(
|
|
||||||
os.path.join(self.relative_dir, path)
|
|
||||||
)
|
|
||||||
|
|
||||||
if path not in self._context['filenames']:
|
|
||||||
unquoted_path = path.replace('%20', ' ')
|
|
||||||
|
|
||||||
if unquoted_path in self._context['filenames']:
|
|
||||||
path = unquoted_path
|
|
||||||
|
|
||||||
linked_content = self._context['filenames'].get(path)
|
|
||||||
if linked_content:
|
|
||||||
if what == 'attach':
|
|
||||||
if isinstance(linked_content, Static):
|
|
||||||
linked_content.attach_to(self)
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"%s used {attach} link syntax on a "
|
|
||||||
"non-static file. Use {filename} instead.",
|
|
||||||
self.get_relative_source_path())
|
|
||||||
origin = '/'.join((siteurl, linked_content.url))
|
|
||||||
origin = origin.replace('\\', '/') # for Windows paths.
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"Unable to find '%s', skipping url replacement.",
|
|
||||||
value.geturl(), extra={
|
|
||||||
'limit_msg': ("Other resources were not found "
|
|
||||||
"and their urls not replaced")})
|
|
||||||
elif what == 'category':
|
|
||||||
origin = '/'.join((siteurl, Category(path, self.settings).url))
|
|
||||||
elif what == 'tag':
|
|
||||||
origin = '/'.join((siteurl, Tag(path, self.settings).url))
|
|
||||||
elif what == 'index':
|
|
||||||
origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS']))
|
|
||||||
elif what == 'author':
|
|
||||||
origin = '/'.join((siteurl, Author(path, self.settings).url))
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"Replacement Indicator '%s' not recognized, "
|
|
||||||
"skipping replacement",
|
|
||||||
what)
|
|
||||||
|
|
||||||
# keep all other parts, such as query, fragment, etc.
|
|
||||||
parts = list(value)
|
|
||||||
parts[2] = origin
|
|
||||||
origin = urlunparse(parts)
|
|
||||||
|
|
||||||
return ''.join((m.group('markup'), m.group('quote'), origin,
|
|
||||||
m.group('quote')))
|
|
||||||
|
|
||||||
return hrefs.sub(replacer, content)
|
|
||||||
|
|
||||||
def get_siteurl(self):
|
def get_siteurl(self):
|
||||||
return self._context.get('localsiteurl', '')
|
return self._context.get('localsiteurl', '')
|
||||||
|
|
|
||||||
|
|
@ -397,6 +397,54 @@ class TestPage(LoggedTestCase):
|
||||||
'</blockquote>'
|
'</blockquote>'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_intrasite_link_absolute(self):
|
||||||
|
"""Test that absolute URLs are merged properly."""
|
||||||
|
|
||||||
|
args = self.page_kwargs.copy()
|
||||||
|
args['settings'] = get_settings(
|
||||||
|
STATIC_URL='http://static.cool.site/{path}',
|
||||||
|
ARTICLE_URL='http://blog.cool.site/{slug}.html')
|
||||||
|
args['source_path'] = 'content'
|
||||||
|
args['context']['filenames'] = {
|
||||||
|
'images/poster.jpg': Static('',
|
||||||
|
settings=args['settings'],
|
||||||
|
source_path='images/poster.jpg'),
|
||||||
|
'article.rst': Article('',
|
||||||
|
settings=args['settings'],
|
||||||
|
metadata={'slug': 'article',
|
||||||
|
'title': 'Article'})
|
||||||
|
}
|
||||||
|
|
||||||
|
# Article link will go to blog
|
||||||
|
args['content'] = (
|
||||||
|
'<a href="{filename}article.rst">Article</a>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://cool.site')
|
||||||
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
'<a href="http://blog.cool.site/article.html">Article</a>'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Page link will go to the main site
|
||||||
|
args['content'] = (
|
||||||
|
'<a href="{index}">Index</a>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://cool.site')
|
||||||
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
'<a href="http://cool.site/index.html">Index</a>'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Image link will go to static
|
||||||
|
args['content'] = (
|
||||||
|
'<img src="{filename}/images/poster.jpg"/>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://cool.site')
|
||||||
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
'<img src="http://static.cool.site/images/poster.jpg"/>'
|
||||||
|
)
|
||||||
|
|
||||||
def test_intrasite_link_markdown_spaces(self):
|
def test_intrasite_link_markdown_spaces(self):
|
||||||
# Markdown introduces %20 instead of spaces, this tests that
|
# Markdown introduces %20 instead of spaces, this tests that
|
||||||
# we support markdown doing this.
|
# we support markdown doing this.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue