From 30b2aab7b645309eed6f328e00aed6eaa8e551ca Mon Sep 17 00:00:00 2001 From: "Mr. Senko" Date: Sun, 11 Jun 2017 02:28:32 +0300 Subject: [PATCH] Add recursion stop --- pelican/contents.py | 25 ++++++++++++++++++++++--- pelican/tests/content/include5.html | 1 + pelican/tests/content/include6.html | 1 + pelican/tests/test_contents.py | 9 +++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 pelican/tests/content/include5.html create mode 100644 pelican/tests/content/include6.html diff --git a/pelican/contents.py b/pelican/contents.py index f6e08147..96713014 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -321,10 +321,19 @@ class Content(object): """ regex = r"""[{|]include[|}](?P[\w./]+)""" hrefs = re.compile(regex, re.X) + processed_paths = [] + # In Python 3.x we can use the `nonlocal` declaration, in `replacer()`, + # to tell Python we mean to assign to the `source_path` variable from + # `_update_includes()`. + # In Python 2.x we simply can't assign to `source_path` in `replacer()`. + # However, we work around this by not assigning to the variable itself, + # but using a mutable container to keep track about the current working + # directory while doing the recursion. + source_dir = [source_path] def replacer(m): path = m.group('path') - path = self._path_replacer(path, source_path) + path = self._path_replacer(path, source_dir[0]) path = posixize_path( os.path.abspath( os.path.join(self.settings['PATH'], path) @@ -343,15 +352,25 @@ class Content(object): logger.warning("Unable to read `%s`, skipping include.", path) return ''.join(('{include}', m.group('path'))) + # recursion stop + if path in processed_paths: + raise RuntimeError("Circular inclusion detected for '%s'" % path) + processed_paths.append(path) + reader = self.readers.reader_classes[ext](self.settings) text, meta = reader.read(path) # if we recurse into another file to perform more includes # self._path_replacer needs to know in which directory # it operates otherwise it produces wrong paths - source_dir = posixize_path(os.path.dirname(path)) + source_dir[0] = posixize_path(os.path.dirname(path)) + current_source_dir = source_dir[0] - text = self._update_includes(text, source_dir) + # recursively replace other includes + text = hrefs.sub(replacer, text) + + # restore source dir + source_dir[0] = current_source_dir return text return hrefs.sub(replacer, content) diff --git a/pelican/tests/content/include5.html b/pelican/tests/content/include5.html new file mode 100644 index 00000000..b5018e36 --- /dev/null +++ b/pelican/tests/content/include5.html @@ -0,0 +1 @@ +{include}include6.html diff --git a/pelican/tests/content/include6.html b/pelican/tests/content/include6.html new file mode 100644 index 00000000..e70cf4e6 --- /dev/null +++ b/pelican/tests/content/include6.html @@ -0,0 +1 @@ +{include}include5.html diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 9546ab8a..660d33f9 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -558,6 +558,15 @@ class TestPage(LoggedTestCase): ' Included content is above' ) + # recursion loop, include5.html includes include6.html + # and the other way around + args['content'] = ( + 'There is a simple include here ' + '{include}include5.html' + ) + with self.assertRaisesRegex(RuntimeError, 'Circular inclusion detected'): + Page(**args).get_content('http://notmyidea.org') + def test_multiple_authors(self): """Test article with multiple authors.""" args = self.page_kwargs.copy()