mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Rewrote URL reformatter.
This attempts to fix several issues: 1. The regexp that's supposed to catch href's doesn't work at all and even if it did match anything, it has too many parentheses for the following loop. 2. When a relative URL is replaced then it is done globally on the text and not per instance. So this `<a href="/foo/bar">/foo/bar</a>` will incorrectly get reformatted to `<a href="./static/foo/bar">./static/foo/bar</a>`. 3. Query parameter URLs are rewritten but shouldn't: `<a href="?foo=bar">` gets rewritten to `<a href="./static/?foo=bar">` 4. The joiner is producing too many slashes: `"." + "static" + "/files/image.png"` => `./static//files/image.png`.
This commit is contained in:
parent
3d6bf828fd
commit
f2ea886ed2
1 changed files with 17 additions and 18 deletions
|
|
@ -166,24 +166,23 @@ class Writer(object):
|
|||
"""
|
||||
content = input._content
|
||||
|
||||
hrefs = re.compile(r'<\s*[^\>]*href\s*=(^!#)\s*(["\'])(.*?)\1')
|
||||
srcs = re.compile(r'<\s*[^\>]*src\s*=\s*(["\'])(.*?)\1')
|
||||
hrefs = re.compile(r"""
|
||||
(?P<markup><\s*[^\>]* # match tag with src and href attr
|
||||
(?:href|src)\s*=\s*
|
||||
)
|
||||
(?P<quote>["\']) # require value to be quoted
|
||||
(?![#?]) # don't match fragment or query URLs
|
||||
(?![a-z]+:) # don't match protocol URLS
|
||||
(?P<path>.*?) # the url value
|
||||
\2""", re.X)
|
||||
|
||||
matches = hrefs.findall(content)
|
||||
matches.extend(srcs.findall(content))
|
||||
relative_paths = []
|
||||
for found in matches:
|
||||
found = found[1]
|
||||
if found not in relative_paths:
|
||||
relative_paths.append(found)
|
||||
def replacer(m):
|
||||
relative_path = m.group('path')
|
||||
dest_path = os.path.normpath( os.sep.join( (get_relative_path(name),
|
||||
"static", relative_path) ) )
|
||||
return m.group('markup') + m.group('quote') + dest_path + m.group('quote')
|
||||
|
||||
for relative_path in relative_paths:
|
||||
if not ":" in relative_path: # we don't want to rewrite protocols
|
||||
dest_path = os.sep.join((get_relative_path(name), "static",
|
||||
relative_path))
|
||||
content = content.replace(relative_path, dest_path)
|
||||
|
||||
return content
|
||||
return hrefs.sub(replacer, content)
|
||||
|
||||
if context is None:
|
||||
return
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue