diff --git a/pelican/contents.py b/pelican/contents.py index 1ded6cdb..db63dd9e 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -17,8 +17,9 @@ from pelican import signals from pelican.settings import DEFAULT_CONFIG from pelican.utils import (SafeDatetime, deprecated_attribute, memoized, path_to_url, posixize_path, - python_2_unicode_compatible, set_date_tzinfo, - slugify, strftime, truncate_html_words) + python_2_unicode_compatible, sanitised_join, + set_date_tzinfo, slugify, strftime, + truncate_html_words) # Import these so that they're avalaible when you import from pelican.contents. from pelican.urlwrappers import (Author, Category, Tag, URLWrapper) # NOQA @@ -161,6 +162,22 @@ class Content(object): if not hasattr(self, prop): raise NameError(prop) + def valid_save_as(self): + """Return true if save_as doesn't write outside output path, false + otherwise.""" + try: + output_path = self.settings["OUTPUT_PATH"] + except KeyError: + # we cannot check + return True + + try: + sanitised_join(output_path, self.save_as) + except RuntimeError: # outside output_dir + return False + + return True + @property def url_format(self): """Returns the URL, formatted with the proper values""" @@ -470,9 +487,20 @@ class Static(Page): def is_valid_content(content, f): try: content.check_properties() - return True except NameError as e: logger.error( "Skipping %s: could not find information about '%s'", f, six.text_type(e)) return False + + if not content.valid_save_as(): + logger.error( + "Skipping %s: file %r would be written outside output path", + f, + content.save_as, + ) + # Note: future code might want to use a result variable instead, to + # allow showing multiple error messages at once. + return False + + return True diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 2f774a6e..7f37edf1 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -497,6 +497,30 @@ class TestArticle(TestPage): article = Article(**article_kwargs) self.assertEqual(article.url, 'fedora.qa/this-week-in-fedora-qa/') + def test_valid_save_as_detects_breakout(self): + settings = get_settings() + article_kwargs = self._copy_page_kwargs() + article_kwargs['metadata']['slug'] = '../foo' + article_kwargs['settings'] = settings + article = Article(**article_kwargs) + self.assertFalse(article.valid_save_as()) + + def test_valid_save_as_detects_breakout_to_root(self): + settings = get_settings() + article_kwargs = self._copy_page_kwargs() + article_kwargs['metadata']['slug'] = '/foo' + article_kwargs['settings'] = settings + article = Article(**article_kwargs) + self.assertFalse(article.valid_save_as()) + + def test_valid_save_as_passes_valid(self): + settings = get_settings() + article_kwargs = self._copy_page_kwargs() + article_kwargs['metadata']['slug'] = 'foo' + article_kwargs['settings'] = settings + article = Article(**article_kwargs) + self.assertTrue(article.valid_save_as()) + class TestStatic(LoggedTestCase): diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 040707ca..9a7109d6 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -11,6 +11,8 @@ from tempfile import mkdtemp import pytz +import six + from pelican import utils from pelican.generators import TemplatePagesGenerator from pelican.settings import read_settings @@ -666,3 +668,34 @@ class TestDateFormatter(unittest.TestCase): with utils.pelican_open(output_path) as output_file: self.assertEqual(output_file, utils.strftime(self.date, 'date = %A, %d %B %Y')) + + +class TestSanitisedJoin(unittest.TestCase): + def test_detect_parent_breakout(self): + with six.assertRaisesRegex( + self, + RuntimeError, + "Attempted to break out of output directory to /foo/test"): + utils.sanitised_join( + "/foo/bar", + "../test" + ) + + def test_detect_root_breakout(self): + with six.assertRaisesRegex( + self, + RuntimeError, + "Attempted to break out of output directory to /test"): + utils.sanitised_join( + "/foo/bar", + "/test" + ) + + def test_pass_deep_subpaths(self): + self.assertEqual( + utils.sanitised_join( + "/foo/bar", + "test" + ), + os.path.join("/foo/bar", "test") + ) diff --git a/pelican/utils.py b/pelican/utils.py index 9d780039..a521d3a8 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -36,6 +36,18 @@ except ImportError: logger = logging.getLogger(__name__) +def sanitised_join(base_directory, *parts): + joined = os.path.abspath(os.path.join(base_directory, *parts)) + if not joined.startswith(os.path.abspath(base_directory)): + raise RuntimeError( + "Attempted to break out of output directory to {}".format( + joined + ) + ) + + return joined + + def strftime(date, date_format): ''' Replacement for built-in strftime diff --git a/pelican/writers.py b/pelican/writers.py index 88a2bcfd..049b05b2 100644 --- a/pelican/writers.py +++ b/pelican/writers.py @@ -13,7 +13,7 @@ import six from pelican import signals from pelican.paginator import Paginator from pelican.utils import (get_relative_path, is_selected_for_writing, - path_to_url, set_date_tzinfo) + path_to_url, sanitised_join, set_date_tzinfo) if not six.PY3: from codecs import open @@ -21,18 +21,6 @@ if not six.PY3: logger = logging.getLogger(__name__) -def _sanitised_join(base_directory, *parts): - joined = os.path.abspath(os.path.join(base_directory, *parts)) - if not joined.startswith(base_directory): - raise RuntimeError( - "attempt to break out of output directory to {}".format( - joined - ) - ) - - return joined - - class Writer(object): def __init__(self, output_path, settings=None): @@ -135,7 +123,7 @@ class Writer(object): self._add_item_to_the_feed(feed, elements[i]) if path: - complete_path = _sanitised_join(self.output_path, path) + complete_path = sanitised_join(self.output_path, path) try: os.makedirs(os.path.dirname(complete_path)) @@ -182,7 +170,7 @@ class Writer(object): if localcontext['localsiteurl']: context['localsiteurl'] = localcontext['localsiteurl'] output = template.render(localcontext) - path = _sanitised_join(output_path, name) + path = sanitised_join(output_path, name) try: os.makedirs(os.path.dirname(path))