Make PAGE_EXCLUDES work with subdirs. Fixes #1500.

The old code was naively comparing the strings in PAGE_EXCLUDES to the subdirectory names produced by os.walk(). (Same with ARTICLE_EXCLUDES.) This had two surprising effects: Setting PAGE_EXCLUDES=['foo'] would exclude all directories named foo, regardless of whether they were in the top-level content directory or nested deep within a directory whose contents should not be excluded. Setting PAGE_EXCLUDES=['subdir/foo'] would never exclude any directories. In other words, there is no way to exclude a subdirectory without risking the accidental exclusion of other directories with the same name elsewhere in the file system. This change fixes the problem, so 'subdir/foo' and 'foo' will be distinct and both work as expected. If anyone out there is depending on the old behavior, they will have to update their settings. I don't expect it to affect most users yet, since Pelican doesn't yet make nested directory structures very useful. When it does, this fix will become important to more people.
2025-10-15 20:28:56 +02:00 · 2014-10-12 20:34:53 -07:00 · 2014-10-12 20:34:53 -07:00 · 0fe290c321
commit 0fe290c321
parent a81fcd3fef
4 changed files with 48 additions and 2 deletions
--- a/pelican/generators.py
+++ b/pelican/generators.py
@ -122,13 +122,21 @@ class Generator(object):
        """
        if isinstance(paths, six.string_types):
            paths = [paths] # backward compatibility for older generators
+
+        # group the exclude dir names by parent path, for use with os.walk()
+        exclusions_by_dirpath = {}
+        for e in exclude:
+            parent_path, subdir = os.path.split(os.path.join(self.path, e))
+            exclusions_by_dirpath.setdefault(parent_path, set()).add(subdir)
+
        files = []
        for path in paths:
-            root = os.path.join(self.path, path)
+            # careful: os.path.join() will add a slash when path == ''.
+            root = os.path.join(self.path, path) if path else self.path

            if os.path.isdir(root):
                for dirpath, dirs, temp_files in os.walk(root, followlinks=True):
-                    for e in exclude:
+                    for e in exclusions_by_dirpath.get(dirpath, ()):
                        if e in dirs:
                            dirs.remove(e)
                    reldir = os.path.relpath(dirpath, self.path)
--- a/pelican/tests/nested_content/maindir/maindir.md
+++ b/pelican/tests/nested_content/maindir/maindir.md
@ -0,0 +1,3 @@
+Title: Main Dir Page
+
+This page lives in maindir.
--- a/pelican/tests/nested_content/maindir/subdir/subdir.md
+++ b/pelican/tests/nested_content/maindir/subdir/subdir.md
@ -0,0 +1,3 @@
+Title: Subdir Page
+
+This page lives in maindir/subdir.
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -41,6 +41,38 @@ class TestGenerator(unittest.TestCase):
        self.assertTrue(include_path(filename, extensions=('rst',)))
        self.assertFalse(include_path(filename, extensions=('md',)))

+    def test_get_files_exclude(self):
+        """Test that Generator.get_files() properly excludes directories.
+        """
+        # We use our own Generator so we can give it our own content path
+        generator = Generator(context=self.settings.copy(),
+            settings=self.settings,
+            path=os.path.join(CUR_DIR, 'nested_content'),
+            theme=self.settings['THEME'], output_path=None)
+
+        filepaths = generator.get_files(paths=['maindir'])
+        found_files = {os.path.basename(f) for f in filepaths}
+        expected_files = {'maindir.md', 'subdir.md'}
+        self.assertFalse(expected_files - found_files,
+            "get_files() failed to find one or more files")
+
+        filepaths = generator.get_files(paths=[''], exclude=['maindir'])
+        found_files = {os.path.basename(f) for f in filepaths}
+        self.assertNotIn('maindir.md', found_files,
+            "get_files() failed to exclude a top-level directory")
+        self.assertNotIn('subdir.md', found_files,
+            "get_files() failed to exclude a subdir of an excluded directory")
+
+        filepaths = generator.get_files(paths=[''],
+            exclude=[os.path.join('maindir', 'subdir')])
+        found_files = {os.path.basename(f) for f in filepaths}
+        self.assertNotIn('subdir.md', found_files,
+            "get_files() failed to exclude a subdirectory")
+
+        filepaths = generator.get_files(paths=[''], exclude=['subdir'])
+        found_files = {os.path.basename(f) for f in filepaths}
+        self.assertIn('subdir.md', found_files,
+            "get_files() excluded a subdirectory by name, ignoring its path")

 class TestArticlesGenerator(unittest.TestCase):