From f2ea886ed207531468930479abeb43d6a7a33a4d Mon Sep 17 00:00:00 2001 From: Borgar Date: Fri, 17 Jun 2011 19:11:44 +0000 Subject: [PATCH 1/3] Rewrote URL reformatter. This attempts to fix several issues: 1. The regexp that's supposed to catch href's doesn't work at all and even if it did match anything, it has too many parentheses for the following loop. 2. When a relative URL is replaced then it is done globally on the text and not per instance. So this `/foo/bar` will incorrectly get reformatted to `./static/foo/bar`. 3. Query parameter URLs are rewritten but shouldn't: `` gets rewritten to `` 4. The joiner is producing too many slashes: `"." + "static" + "/files/image.png"` => `./static//files/image.png`. --- pelican/writers.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pelican/writers.py b/pelican/writers.py index 1f560c01..bc6f6f2a 100644 --- a/pelican/writers.py +++ b/pelican/writers.py @@ -166,24 +166,23 @@ class Writer(object): """ content = input._content - hrefs = re.compile(r'<\s*[^\>]*href\s*=(^!#)\s*(["\'])(.*?)\1') - srcs = re.compile(r'<\s*[^\>]*src\s*=\s*(["\'])(.*?)\1') + hrefs = re.compile(r""" + (?P<\s*[^\>]* # match tag with src and href attr + (?:href|src)\s*=\s* + ) + (?P["\']) # require value to be quoted + (?![#?]) # don't match fragment or query URLs + (?![a-z]+:) # don't match protocol URLS + (?P.*?) # the url value + \2""", re.X) - matches = hrefs.findall(content) - matches.extend(srcs.findall(content)) - relative_paths = [] - for found in matches: - found = found[1] - if found not in relative_paths: - relative_paths.append(found) + def replacer(m): + relative_path = m.group('path') + dest_path = os.path.normpath( os.sep.join( (get_relative_path(name), + "static", relative_path) ) ) + return m.group('markup') + m.group('quote') + dest_path + m.group('quote') - for relative_path in relative_paths: - if not ":" in relative_path: # we don't want to rewrite protocols - dest_path = os.sep.join((get_relative_path(name), "static", - relative_path)) - content = content.replace(relative_path, dest_path) - - return content + return hrefs.sub(replacer, content) if context is None: return From a7d5a9a420fa9269b9555317ad8ef3d7556e3fc5 Mon Sep 17 00:00:00 2001 From: Borgar Date: Fri, 17 Jun 2011 19:09:27 +0000 Subject: [PATCH 2/3] Don't rewrite content URLs unless RELATIVE_URLS is True. --- pelican/writers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pelican/writers.py b/pelican/writers.py index bc6f6f2a..13ab9647 100644 --- a/pelican/writers.py +++ b/pelican/writers.py @@ -111,7 +111,8 @@ class Writer(object): localcontext['SITEURL'] = get_relative_path(name) localcontext.update(kwargs) - self.update_context_contents(name, localcontext) + if relative_urls: + self.update_context_contents(name, localcontext) # check paginated paginated = paginated or {} From 53e25ce2cf7e1af51ba98ce309e5af4a1a59a1fe Mon Sep 17 00:00:00 2001 From: Borgar Date: Fri, 17 Jun 2011 19:08:20 +0000 Subject: [PATCH 3/3] Fixed typo in docs: missing plural s in RELATIVE_URLS. --- docs/settings.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/settings.rst b/docs/settings.rst index 9c596da3..482a5350 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -51,7 +51,7 @@ Setting name (default value) what does it do? `PDF_GENERATOR` (``False``) Set to True if you want to have PDF versions of your documents. You will need to install `rst2pdf`. -`RELATIVE_URL` (``True``) Defines if pelican should use relative urls or +`RELATIVE_URLS` (``True``) Defines if pelican should use relative urls or not. `SITEURL` base URL of your website. Note that this is not a way to tell pelican to use relative urls