From f2ea886ed207531468930479abeb43d6a7a33a4d Mon Sep 17 00:00:00 2001 From: Borgar Date: Fri, 17 Jun 2011 19:11:44 +0000 Subject: [PATCH] Rewrote URL reformatter. This attempts to fix several issues: 1. The regexp that's supposed to catch href's doesn't work at all and even if it did match anything, it has too many parentheses for the following loop. 2. When a relative URL is replaced then it is done globally on the text and not per instance. So this `/foo/bar` will incorrectly get reformatted to `./static/foo/bar`. 3. Query parameter URLs are rewritten but shouldn't: `` gets rewritten to `` 4. The joiner is producing too many slashes: `"." + "static" + "/files/image.png"` => `./static//files/image.png`. --- pelican/writers.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pelican/writers.py b/pelican/writers.py index 1f560c01..bc6f6f2a 100644 --- a/pelican/writers.py +++ b/pelican/writers.py @@ -166,24 +166,23 @@ class Writer(object): """ content = input._content - hrefs = re.compile(r'<\s*[^\>]*href\s*=(^!#)\s*(["\'])(.*?)\1') - srcs = re.compile(r'<\s*[^\>]*src\s*=\s*(["\'])(.*?)\1') + hrefs = re.compile(r""" + (?P<\s*[^\>]* # match tag with src and href attr + (?:href|src)\s*=\s* + ) + (?P["\']) # require value to be quoted + (?![#?]) # don't match fragment or query URLs + (?![a-z]+:) # don't match protocol URLS + (?P.*?) # the url value + \2""", re.X) - matches = hrefs.findall(content) - matches.extend(srcs.findall(content)) - relative_paths = [] - for found in matches: - found = found[1] - if found not in relative_paths: - relative_paths.append(found) + def replacer(m): + relative_path = m.group('path') + dest_path = os.path.normpath( os.sep.join( (get_relative_path(name), + "static", relative_path) ) ) + return m.group('markup') + m.group('quote') + dest_path + m.group('quote') - for relative_path in relative_paths: - if not ":" in relative_path: # we don't want to rewrite protocols - dest_path = os.sep.join((get_relative_path(name), "static", - relative_path)) - content = content.replace(relative_path, dest_path) - - return content + return hrefs.sub(replacer, content) if context is None: return