From 0fe290c321fefabcbd8b2957683063d825e3997d Mon Sep 17 00:00:00 2001 From: Forest Date: Sun, 12 Oct 2014 20:34:53 -0700 Subject: [PATCH] Make PAGE_EXCLUDES work with subdirs. Fixes #1500. The old code was naively comparing the strings in PAGE_EXCLUDES to the subdirectory names produced by os.walk(). (Same with ARTICLE_EXCLUDES.) This had two surprising effects: Setting PAGE_EXCLUDES=['foo'] would exclude all directories named foo, regardless of whether they were in the top-level content directory or nested deep within a directory whose contents should not be excluded. Setting PAGE_EXCLUDES=['subdir/foo'] would never exclude any directories. In other words, there is no way to exclude a subdirectory without risking the accidental exclusion of other directories with the same name elsewhere in the file system. This change fixes the problem, so 'subdir/foo' and 'foo' will be distinct and both work as expected. If anyone out there is depending on the old behavior, they will have to update their settings. I don't expect it to affect most users yet, since Pelican doesn't yet make nested directory structures very useful. When it does, this fix will become important to more people. --- pelican/generators.py | 12 +++++-- .../tests/nested_content/maindir/maindir.md | 3 ++ .../nested_content/maindir/subdir/subdir.md | 3 ++ pelican/tests/test_generators.py | 32 +++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 pelican/tests/nested_content/maindir/maindir.md create mode 100644 pelican/tests/nested_content/maindir/subdir/subdir.md diff --git a/pelican/generators.py b/pelican/generators.py index 4d5cb6cb..5122fa6d 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -122,13 +122,21 @@ class Generator(object): """ if isinstance(paths, six.string_types): paths = [paths] # backward compatibility for older generators + + # group the exclude dir names by parent path, for use with os.walk() + exclusions_by_dirpath = {} + for e in exclude: + parent_path, subdir = os.path.split(os.path.join(self.path, e)) + exclusions_by_dirpath.setdefault(parent_path, set()).add(subdir) + files = [] for path in paths: - root = os.path.join(self.path, path) + # careful: os.path.join() will add a slash when path == ''. + root = os.path.join(self.path, path) if path else self.path if os.path.isdir(root): for dirpath, dirs, temp_files in os.walk(root, followlinks=True): - for e in exclude: + for e in exclusions_by_dirpath.get(dirpath, ()): if e in dirs: dirs.remove(e) reldir = os.path.relpath(dirpath, self.path) diff --git a/pelican/tests/nested_content/maindir/maindir.md b/pelican/tests/nested_content/maindir/maindir.md new file mode 100644 index 00000000..443e1827 --- /dev/null +++ b/pelican/tests/nested_content/maindir/maindir.md @@ -0,0 +1,3 @@ +Title: Main Dir Page + +This page lives in maindir. diff --git a/pelican/tests/nested_content/maindir/subdir/subdir.md b/pelican/tests/nested_content/maindir/subdir/subdir.md new file mode 100644 index 00000000..32e73617 --- /dev/null +++ b/pelican/tests/nested_content/maindir/subdir/subdir.md @@ -0,0 +1,3 @@ +Title: Subdir Page + +This page lives in maindir/subdir. diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index 2f53ac95..4be1b35e 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -41,6 +41,38 @@ class TestGenerator(unittest.TestCase): self.assertTrue(include_path(filename, extensions=('rst',))) self.assertFalse(include_path(filename, extensions=('md',))) + def test_get_files_exclude(self): + """Test that Generator.get_files() properly excludes directories. + """ + # We use our own Generator so we can give it our own content path + generator = Generator(context=self.settings.copy(), + settings=self.settings, + path=os.path.join(CUR_DIR, 'nested_content'), + theme=self.settings['THEME'], output_path=None) + + filepaths = generator.get_files(paths=['maindir']) + found_files = {os.path.basename(f) for f in filepaths} + expected_files = {'maindir.md', 'subdir.md'} + self.assertFalse(expected_files - found_files, + "get_files() failed to find one or more files") + + filepaths = generator.get_files(paths=[''], exclude=['maindir']) + found_files = {os.path.basename(f) for f in filepaths} + self.assertNotIn('maindir.md', found_files, + "get_files() failed to exclude a top-level directory") + self.assertNotIn('subdir.md', found_files, + "get_files() failed to exclude a subdir of an excluded directory") + + filepaths = generator.get_files(paths=[''], + exclude=[os.path.join('maindir', 'subdir')]) + found_files = {os.path.basename(f) for f in filepaths} + self.assertNotIn('subdir.md', found_files, + "get_files() failed to exclude a subdirectory") + + filepaths = generator.get_files(paths=[''], exclude=['subdir']) + found_files = {os.path.basename(f) for f in filepaths} + self.assertIn('subdir.md', found_files, + "get_files() excluded a subdirectory by name, ignoring its path") class TestArticlesGenerator(unittest.TestCase):