Make PAGE_EXCLUDES work with subdirs. Fixes #1500.

The old code was naively comparing the strings in PAGE_EXCLUDES to the
subdirectory names produced by os.walk(). (Same with ARTICLE_EXCLUDES.)
This had two surprising effects:

Setting PAGE_EXCLUDES=['foo'] would exclude all directories named foo,
regardless of whether they were in the top-level content directory or
nested deep within a directory whose contents should not be excluded.

Setting PAGE_EXCLUDES=['subdir/foo'] would never exclude any directories.

In other words, there is no way to exclude a subdirectory without risking
the accidental exclusion of other directories with the same name elsewhere
in the file system.

This change fixes the problem, so 'subdir/foo' and 'foo' will be distinct
and both work as expected. If anyone out there is depending on the old
behavior, they will have to update their settings. I don't expect it to
affect most users yet, since Pelican doesn't yet make nested directory
structures very useful. When it does, this fix will become important to
more people.
This commit is contained in:
Forest 2014-10-12 20:34:53 -07:00
commit 0fe290c321
4 changed files with 48 additions and 2 deletions

View file

@ -122,13 +122,21 @@ class Generator(object):
"""
if isinstance(paths, six.string_types):
paths = [paths] # backward compatibility for older generators
# group the exclude dir names by parent path, for use with os.walk()
exclusions_by_dirpath = {}
for e in exclude:
parent_path, subdir = os.path.split(os.path.join(self.path, e))
exclusions_by_dirpath.setdefault(parent_path, set()).add(subdir)
files = []
for path in paths:
root = os.path.join(self.path, path)
# careful: os.path.join() will add a slash when path == ''.
root = os.path.join(self.path, path) if path else self.path
if os.path.isdir(root):
for dirpath, dirs, temp_files in os.walk(root, followlinks=True):
for e in exclude:
for e in exclusions_by_dirpath.get(dirpath, ()):
if e in dirs:
dirs.remove(e)
reldir = os.path.relpath(dirpath, self.path)

View file

@ -0,0 +1,3 @@
Title: Main Dir Page
This page lives in maindir.

View file

@ -0,0 +1,3 @@
Title: Subdir Page
This page lives in maindir/subdir.

View file

@ -41,6 +41,38 @@ class TestGenerator(unittest.TestCase):
self.assertTrue(include_path(filename, extensions=('rst',)))
self.assertFalse(include_path(filename, extensions=('md',)))
def test_get_files_exclude(self):
"""Test that Generator.get_files() properly excludes directories.
"""
# We use our own Generator so we can give it our own content path
generator = Generator(context=self.settings.copy(),
settings=self.settings,
path=os.path.join(CUR_DIR, 'nested_content'),
theme=self.settings['THEME'], output_path=None)
filepaths = generator.get_files(paths=['maindir'])
found_files = {os.path.basename(f) for f in filepaths}
expected_files = {'maindir.md', 'subdir.md'}
self.assertFalse(expected_files - found_files,
"get_files() failed to find one or more files")
filepaths = generator.get_files(paths=[''], exclude=['maindir'])
found_files = {os.path.basename(f) for f in filepaths}
self.assertNotIn('maindir.md', found_files,
"get_files() failed to exclude a top-level directory")
self.assertNotIn('subdir.md', found_files,
"get_files() failed to exclude a subdir of an excluded directory")
filepaths = generator.get_files(paths=[''],
exclude=[os.path.join('maindir', 'subdir')])
found_files = {os.path.basename(f) for f in filepaths}
self.assertNotIn('subdir.md', found_files,
"get_files() failed to exclude a subdirectory")
filepaths = generator.get_files(paths=[''], exclude=['subdir'])
found_files = {os.path.basename(f) for f in filepaths}
self.assertIn('subdir.md', found_files,
"get_files() excluded a subdirectory by name, ignoring its path")
class TestArticlesGenerator(unittest.TestCase):