From ad38d602c710355fe5051f7857793fcb036d8bad Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 8 Mar 2017 14:05:57 +0100 Subject: [PATCH] Improve the regexp used in _update_content a html tag always starts with <[a-z], < [a-z] is invalid a space can be found after the = in href='bleh' This function is taking 10% of the compilation time, with caching enabled, maybe it's worth optimising the regexp a bit more, I don't know. --- pelican/contents.py | 4 ++-- pelican/tests/test_contents.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pelican/contents.py b/pelican/contents.py index 3187f328..3d1128c9 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -219,8 +219,8 @@ class Content(object): instrasite_link_regex = self.settings['INTRASITE_LINK_REGEX'] regex = r""" - (?P<\s*[^\>]* # match tag with all url-value attributes - (?:href|src|poster|data|cite|formaction|action)\s*=) + (?P<[^\>]+ # match tag with all url-value attributes + (?:href|src|poster|data|cite|formaction|action)\s*=\s*) (?P["\']) # require value to be quoted (?P{0}(?P.*?)) # the url value diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index b4146150..11fa958a 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -771,3 +771,24 @@ class TestStatic(LoggedTestCase): count=1, msg="Unable to find 'foo', skipping url replacement.", level=logging.WARNING) + + def test_index_link_syntax_with_spaces(self): + """{index} link syntax triggers url replacement + with spaces around the equal sign.""" + + html = 'link' + page = Page( + content=html, + metadata={'title': 'fakepage'}, + settings=self.settings, + source_path=os.path.join('dir', 'otherdir', 'fakepage.md'), + context=self.context) + content = page.get_content('') + + self.assertNotEqual(content, html) + + expected_html = ('link') + self.assertEqual(content, expected_html)