From c7b9a339eb2c23b08df5f7b2fb46a1387101e95d Mon Sep 17 00:00:00 2001
From: Zack Weinberg <zackw@panix.com>
Date: Thu, 4 Jun 2015 17:52:30 -0400
Subject: [PATCH] Apply 'IGNORE_FILES' globs to directories as well (issue
 1692)

This adjusts the only piece of code that currently looks at IGNORE_FILES.
A subsequent commit will add a new use, with the same semantics.
---
 docs/settings.rst     |  7 ++++---
 pelican/generators.py | 14 +++++++++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/docs/settings.rst b/docs/settings.rst
index 99829258..66a44034 100644
--- a/docs/settings.rst
+++ b/docs/settings.rst
@@ -108,9 +108,10 @@ Setting name (followed by default value, if any)
                                                                                  process or ignore. For example, to avoid processing .html files,
                                                                                  set: ``READERS = {'html': None}``. To add a custom reader for the
                                                                                  ``foo`` extension, set: ``READERS = {'foo': FooReader}``
-``IGNORE_FILES = ['.#*']``                                                       A list of file globbing patterns to match against the
-                                                                                 source files to be ignored by the processor. For example,
-                                                                                 the default ``['.#*']`` will ignore emacs lock files.
+``IGNORE_FILES = ['.#*']``                                                       A list of glob patterns.  Files and directories matching any
+                                                                                 of these patterns will be ignored by the processor. For example,
+                                                                                 the default ``['.#*']`` will ignore emacs lock files, and
+                                                                                 ``['__pycache__']`` would ignore Python 3's bytecode caches.
 ``MD_EXTENSIONS =`` ``['codehilite(css_class=highlight)','extra']``              A list of the extensions that the Markdown processor
                                                                                  will use. Refer to the Python Markdown documentation's
                                                                                  `Extensions section <http://pythonhosted.org/Markdown/extensions/>`_
diff --git a/pelican/generators.py b/pelican/generators.py
index 99961d6e..82a4b790 100644
--- a/pelican/generators.py
+++ b/pelican/generators.py
@@ -132,15 +132,23 @@ class Generator(object):
             exclusions_by_dirpath.setdefault(parent_path, set()).add(subdir)
 
         files = []
+        ignores = self.settings['IGNORE_FILES']
         for path in paths:
             # careful: os.path.join() will add a slash when path == ''.
             root = os.path.join(self.path, path) if path else self.path
 
             if os.path.isdir(root):
                 for dirpath, dirs, temp_files in os.walk(root, followlinks=True):
-                    for e in exclusions_by_dirpath.get(dirpath, ()):
-                        if e in dirs:
-                            dirs.remove(e)
+                    drop = []
+                    excl = exclusions_by_dirpath.get(dirpath, ())
+                    for d in dirs:
+                        if (d in excl or
+                            any(fnmatch.fnmatch(d, ignore)
+                                for ignore in ignores)):
+                            drop.append(d)
+                    for d in drop:
+                        dirs.remove(d)
+
                     reldir = os.path.relpath(dirpath, self.path)
                     for f in temp_files:
                         fp = os.path.join(reldir, f)