forked from github/pelican
Improve the regexp used in _update_content
a html tag always starts with <[a-z], < [a-z] is invalid a space can be found after the = in href='bleh' This function is taking 10% of the compilation time, with caching enabled, maybe it's worth optimising the regexp a bit more, I don't know.
This commit is contained in:
parent
ee643d47d7
commit
ad38d602c7
2 changed files with 23 additions and 2 deletions
|
|
@ -219,8 +219,8 @@ class Content(object):
|
|||
|
||||
instrasite_link_regex = self.settings['INTRASITE_LINK_REGEX']
|
||||
regex = r"""
|
||||
(?P<markup><\s*[^\>]* # match tag with all url-value attributes
|
||||
(?:href|src|poster|data|cite|formaction|action)\s*=)
|
||||
(?P<markup><[^\>]+ # match tag with all url-value attributes
|
||||
(?:href|src|poster|data|cite|formaction|action)\s*=\s*)
|
||||
|
||||
(?P<quote>["\']) # require value to be quoted
|
||||
(?P<path>{0}(?P<value>.*?)) # the url value
|
||||
|
|
|
|||
|
|
@ -771,3 +771,24 @@ class TestStatic(LoggedTestCase):
|
|||
count=1,
|
||||
msg="Unable to find 'foo', skipping url replacement.",
|
||||
level=logging.WARNING)
|
||||
|
||||
def test_index_link_syntax_with_spaces(self):
|
||||
"""{index} link syntax triggers url replacement
|
||||
with spaces around the equal sign."""
|
||||
|
||||
html = '<a href = "{index}">link</a>'
|
||||
page = Page(
|
||||
content=html,
|
||||
metadata={'title': 'fakepage'},
|
||||
settings=self.settings,
|
||||
source_path=os.path.join('dir', 'otherdir', 'fakepage.md'),
|
||||
context=self.context)
|
||||
content = page.get_content('')
|
||||
|
||||
self.assertNotEqual(content, html)
|
||||
|
||||
expected_html = ('<a href = "' +
|
||||
'/'.join((self.settings['SITEURL'],
|
||||
self.settings['INDEX_SAVE_AS'])) +
|
||||
'">link</a>')
|
||||
self.assertEqual(content, expected_html)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue