diff --git a/docs/settings.rst b/docs/settings.rst index b4e73316..e08291f0 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -108,9 +108,10 @@ Setting name (followed by default value, if any) process or ignore. For example, to avoid processing .html files, set: ``READERS = {'html': None}``. To add a custom reader for the ``foo`` extension, set: ``READERS = {'foo': FooReader}`` -``IGNORE_FILES = ['.#*']`` A list of file globbing patterns to match against the - source files to be ignored by the processor. For example, - the default ``['.#*']`` will ignore emacs lock files. +``IGNORE_FILES = ['.#*']`` A list of glob patterns. Files and directories matching any + of these patterns will be ignored by the processor. For example, + the default ``['.#*']`` will ignore emacs lock files, and + ``['__pycache__']`` would ignore Python 3's bytecode caches. ``MD_EXTENSIONS =`` ``['codehilite(css_class=highlight)','extra']`` A list of the extensions that the Markdown processor will use. Refer to the Python Markdown documentation's `Extensions section `_ diff --git a/pelican/generators.py b/pelican/generators.py index d4ec29c8..3692ef0b 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -132,15 +132,23 @@ class Generator(object): exclusions_by_dirpath.setdefault(parent_path, set()).add(subdir) files = [] + ignores = self.settings['IGNORE_FILES'] for path in paths: # careful: os.path.join() will add a slash when path == ''. root = os.path.join(self.path, path) if path else self.path if os.path.isdir(root): for dirpath, dirs, temp_files in os.walk(root, followlinks=True): - for e in exclusions_by_dirpath.get(dirpath, ()): - if e in dirs: - dirs.remove(e) + drop = [] + excl = exclusions_by_dirpath.get(dirpath, ()) + for d in dirs: + if (d in excl or + any(fnmatch.fnmatch(d, ignore) + for ignore in ignores)): + drop.append(d) + for d in drop: + dirs.remove(d) + reldir = os.path.relpath(dirpath, self.path) for f in temp_files: fp = os.path.join(reldir, f) @@ -668,10 +676,12 @@ class StaticGenerator(Generator): for path in paths: if final_path: copy(os.path.join(source, path), - os.path.join(output_path, destination, final_path)) + os.path.join(output_path, destination, final_path), + self.settings['IGNORE_FILES']) else: copy(os.path.join(source, path), - os.path.join(output_path, destination, path)) + os.path.join(output_path, destination, path), + self.settings['IGNORE_FILES']) def generate_context(self): self.staticfiles = [] diff --git a/pelican/utils.py b/pelican/utils.py index bc50308e..6368996f 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -272,51 +272,73 @@ def slugify(value, substitutions=()): return value.decode('ascii') -def copy(source, destination): +def copy(source, destination, ignores=None): """Recursively copy source into destination. If source is a file, destination has to be a file as well. - The function is able to copy either files or directories. :param source: the source file or directory :param destination: the destination file or directory + :param ignores: either None, or a list of glob patterns; + files matching those patterns will _not_ be copied. """ + def walk_error(err): + logger.warning("While copying %s: %s: %s", + source_, err.filename, err.strerror) + source_ = os.path.abspath(os.path.expanduser(source)) destination_ = os.path.abspath(os.path.expanduser(destination)) - if not os.path.exists(destination_) and not os.path.isfile(source_): - os.makedirs(destination_) + if ignores is None: + ignores = [] - def recurse(source, destination): - for entry in os.listdir(source): - entry_path = os.path.join(source, entry) - if os.path.isdir(entry_path): - entry_dest = os.path.join(destination, entry) - if os.path.exists(entry_dest): - if not os.path.isdir(entry_dest): - raise IOError('Failed to copy {0} a directory.' - .format(entry_dest)) - recurse(entry_path, entry_dest) - else: - shutil.copytree(entry_path, entry_dest) - else: - shutil.copy2(entry_path, destination) + if any(fnmatch.fnmatch(os.path.basename(source), ignore) + for ignore in ignores): + logger.info('Not copying %s due to ignores', source_) + return - - if os.path.isdir(source_): - recurse(source_, destination_) - - elif os.path.isfile(source_): - dest_dir = os.path.dirname(destination_) - if not os.path.exists(dest_dir): - os.makedirs(dest_dir) - shutil.copy2(source_, destination_) + if os.path.isfile(source_): + dst_dir = os.path.dirname(destination_) + if not os.path.exists(dst_dir): + logger.info('Creating directory %s', dst_dir) + os.makedirs(dst_dir) logger.info('Copying %s to %s', source_, destination_) - else: - logger.warning('Skipped copy %s to %s', source_, destination_) + shutil.copy2(source_, destination_) + elif os.path.isdir(source_): + if not os.path.exists(destination_): + logger.info('Creating directory %s', destination_) + os.makedirs(destination_) + if not os.path.isdir(destination_): + logger.warning('Cannot copy %s (a directory) to %s (a file)', + source_, destination_) + return + + for src_dir, subdirs, others in os.walk(source_): + dst_dir = os.path.join(destination_, + os.path.relpath(src_dir, source_)) + + subdirs[:] = (s for s in subdirs if not any(fnmatch.fnmatch(s, i) + for i in ignores)) + others[:] = (o for o in others if not any(fnmatch.fnmatch(o, i) + for i in ignores)) + + if not os.path.isdir(dst_dir): + logger.info('Creating directory %s', dst_dir) + # Parent directories are known to exist, so 'mkdir' suffices. + os.mkdir(dst_dir) + + for o in others: + src_path = os.path.join(src_dir, o) + dst_path = os.path.join(dst_dir, o) + if os.path.isfile(src_path): + logger.info('Copying %s to %s', src_path, dst_path) + shutil.copy2(src_path, dst_path) + else: + logger.warning('Skipped copy %s (not a file or directory) to %s', + src_path, dst_path) def clean_output_dir(path, retention): """Remove all files from output directory except those in retention list"""