mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Rewrote URL reformatter.
This attempts to fix several issues: 1. The regexp that's supposed to catch href's doesn't work at all and even if it did match anything, it has too many parentheses for the following loop. 2. When a relative URL is replaced then it is done globally on the text and not per instance. So this `<a href="/foo/bar">/foo/bar</a>` will incorrectly get reformatted to `<a href="./static/foo/bar">./static/foo/bar</a>`. 3. Query parameter URLs are rewritten but shouldn't: `<a href="?foo=bar">` gets rewritten to `<a href="./static/?foo=bar">` 4. The joiner is producing too many slashes: `"." + "static" + "/files/image.png"` => `./static//files/image.png`.
This commit is contained in:
parent
3d6bf828fd
commit
f2ea886ed2
1 changed files with 17 additions and 18 deletions
|
|
@ -166,24 +166,23 @@ class Writer(object):
|
||||||
"""
|
"""
|
||||||
content = input._content
|
content = input._content
|
||||||
|
|
||||||
hrefs = re.compile(r'<\s*[^\>]*href\s*=(^!#)\s*(["\'])(.*?)\1')
|
hrefs = re.compile(r"""
|
||||||
srcs = re.compile(r'<\s*[^\>]*src\s*=\s*(["\'])(.*?)\1')
|
(?P<markup><\s*[^\>]* # match tag with src and href attr
|
||||||
|
(?:href|src)\s*=\s*
|
||||||
|
)
|
||||||
|
(?P<quote>["\']) # require value to be quoted
|
||||||
|
(?![#?]) # don't match fragment or query URLs
|
||||||
|
(?![a-z]+:) # don't match protocol URLS
|
||||||
|
(?P<path>.*?) # the url value
|
||||||
|
\2""", re.X)
|
||||||
|
|
||||||
matches = hrefs.findall(content)
|
def replacer(m):
|
||||||
matches.extend(srcs.findall(content))
|
relative_path = m.group('path')
|
||||||
relative_paths = []
|
dest_path = os.path.normpath( os.sep.join( (get_relative_path(name),
|
||||||
for found in matches:
|
"static", relative_path) ) )
|
||||||
found = found[1]
|
return m.group('markup') + m.group('quote') + dest_path + m.group('quote')
|
||||||
if found not in relative_paths:
|
|
||||||
relative_paths.append(found)
|
|
||||||
|
|
||||||
for relative_path in relative_paths:
|
return hrefs.sub(replacer, content)
|
||||||
if not ":" in relative_path: # we don't want to rewrite protocols
|
|
||||||
dest_path = os.sep.join((get_relative_path(name), "static",
|
|
||||||
relative_path))
|
|
||||||
content = content.replace(relative_path, dest_path)
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
if context is None:
|
if context is None:
|
||||||
return
|
return
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue