From ad38d602c710355fe5051f7857793fcb036d8bad Mon Sep 17 00:00:00 2001
From: jvoisin <jvo@nbs-system.com>
Date: Wed, 8 Mar 2017 14:05:57 +0100
Subject: [PATCH] Improve the regexp used in _update_content

a html tag always starts with <[a-z], < [a-z] is invalid
a space can be found after the = in href='bleh'

This function is taking 10% of the compilation time, with caching enabled,
maybe it's worth optimising the regexp a bit more, I don't know.
---
 pelican/contents.py            |  4 ++--
 pelican/tests/test_contents.py | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/pelican/contents.py b/pelican/contents.py
index 3187f328..3d1128c9 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -219,8 +219,8 @@ class Content(object):
 
         instrasite_link_regex = self.settings['INTRASITE_LINK_REGEX']
         regex = r"""
-            (?P<markup><\s*[^\>]*  # match tag with all url-value attributes
-                (?:href|src|poster|data|cite|formaction|action)\s*=)
+            (?P<markup><[^\>]+  # match tag with all url-value attributes
+                (?:href|src|poster|data|cite|formaction|action)\s*=\s*)
 
             (?P<quote>["\'])      # require value to be quoted
             (?P<path>{0}(?P<value>.*?))  # the url value
diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py
index b4146150..11fa958a 100644
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@@ -771,3 +771,24 @@ class TestStatic(LoggedTestCase):
             count=1,
             msg="Unable to find 'foo', skipping url replacement.",
             level=logging.WARNING)
+
+    def test_index_link_syntax_with_spaces(self):
+        """{index} link syntax triggers url replacement
+        with spaces around the equal sign."""
+
+        html = '<a href = "{index}">link</a>'
+        page = Page(
+            content=html,
+            metadata={'title': 'fakepage'},
+            settings=self.settings,
+            source_path=os.path.join('dir', 'otherdir', 'fakepage.md'),
+            context=self.context)
+        content = page.get_content('')
+
+        self.assertNotEqual(content, html)
+
+        expected_html = ('<a href = "' +
+                         '/'.join((self.settings['SITEURL'],
+                                   self.settings['INDEX_SAVE_AS'])) +
+                         '">link</a>')
+        self.assertEqual(content, expected_html)