Keep certain files when cleaning output; fix #574

If DELETE_OUTPUT_DIRECTORY is set to True, all files and directories are
deleted from the output directory. There are, however, several reasons
one might want to retain certain files/directories and avoid their
deletion from the output directory. One such use case is version control
system data: a versioned output directory can facilitate deployment via
Heroku and/or allow the user to easily revert to a prior version of the
site without having to rely on regeneration via Pelican.

This change introduces the OUTPUT_RETENTION setting, a tuple of
filenames that will be preserved when the clean_output_dir function in
pelican.utils is run. Setting OUTPUT_RETENTION = (".hg", ".git") would,
for example, prevent the relevant VCS data from being deleted when the
output directory is cleaned.
This commit is contained in:
Justin Mayer 2013-06-23 11:44:53 -07:00
commit 6f36b0a246
5 changed files with 19 additions and 8 deletions

View file

@ -72,6 +72,9 @@ Setting name (default value) What doe
generating new files. This can be useful in preventing older, generating new files. This can be useful in preventing older,
unnecessary files from persisting in your output. However, **this is unnecessary files from persisting in your output. However, **this is
a destructive setting and should be handled with extreme care.** a destructive setting and should be handled with extreme care.**
`OUTPUT_RETENTION` (``()``) A tuple of filenames that should be retained and not deleted from the
output directory. One use case would be the preservation of version
control data. For example: ``(".hg", ".git", ".bzr")``
`JINJA_EXTENSIONS` (``[]``) A list of any Jinja2 extensions you want to use. `JINJA_EXTENSIONS` (``[]``) A list of any Jinja2 extensions you want to use.
`JINJA_FILTERS` (``{}``) A list of custom Jinja2 filters you want to use. `JINJA_FILTERS` (``{}``) A list of custom Jinja2 filters you want to use.
The dictionary should map the filtername to the filter function. The dictionary should map the filtername to the filter function.

View file

@ -49,6 +49,7 @@ class Pelican(object):
self.markup = settings['MARKUP'] self.markup = settings['MARKUP']
self.ignore_files = settings['IGNORE_FILES'] self.ignore_files = settings['IGNORE_FILES']
self.delete_outputdir = settings['DELETE_OUTPUT_DIRECTORY'] self.delete_outputdir = settings['DELETE_OUTPUT_DIRECTORY']
self.output_retention = settings['OUTPUT_RETENTION']
self.init_path() self.init_path()
self.init_plugins() self.init_plugins()
@ -175,7 +176,7 @@ class Pelican(object):
# explicitely asked # explicitely asked
if (self.delete_outputdir and not if (self.delete_outputdir and not
os.path.realpath(self.path).startswith(self.output_path)): os.path.realpath(self.path).startswith(self.output_path)):
clean_output_dir(self.output_path) clean_output_dir(self.output_path, self.output_retention)
writer = self.get_writer() writer = self.get_writer()

View file

@ -54,6 +54,7 @@ DEFAULT_CONFIG = {
'NEWEST_FIRST_ARCHIVES': True, 'NEWEST_FIRST_ARCHIVES': True,
'REVERSE_CATEGORY_ORDER': False, 'REVERSE_CATEGORY_ORDER': False,
'DELETE_OUTPUT_DIRECTORY': False, 'DELETE_OUTPUT_DIRECTORY': False,
'OUTPUT_RETENTION': (),
'ARTICLE_URL': '{slug}.html', 'ARTICLE_URL': '{slug}.html',
'ARTICLE_SAVE_AS': '{slug}.html', 'ARTICLE_SAVE_AS': '{slug}.html',
'ARTICLE_LANG_URL': '{slug}-{lang}.html', 'ARTICLE_LANG_URL': '{slug}-{lang}.html',

View file

@ -193,28 +193,31 @@ class TestUtils(LoggedTestCase):
shutil.rmtree(empty_path, True) shutil.rmtree(empty_path, True)
def test_clean_output_dir(self): def test_clean_output_dir(self):
retention = ()
test_directory = os.path.join(os.path.dirname(__file__), test_directory = os.path.join(os.path.dirname(__file__),
'clean_output') 'clean_output')
content = os.path.join(os.path.dirname(__file__), 'content') content = os.path.join(os.path.dirname(__file__), 'content')
shutil.copytree(content, test_directory) shutil.copytree(content, test_directory)
utils.clean_output_dir(test_directory) utils.clean_output_dir(test_directory, retention)
self.assertTrue(os.path.isdir(test_directory)) self.assertTrue(os.path.isdir(test_directory))
self.assertListEqual([], os.listdir(test_directory)) self.assertListEqual([], os.listdir(test_directory))
shutil.rmtree(test_directory) shutil.rmtree(test_directory)
def test_clean_output_dir_not_there(self): def test_clean_output_dir_not_there(self):
retention = ()
test_directory = os.path.join(os.path.dirname(__file__), test_directory = os.path.join(os.path.dirname(__file__),
'does_not_exist') 'does_not_exist')
utils.clean_output_dir(test_directory) utils.clean_output_dir(test_directory, retention)
self.assertFalse(os.path.exists(test_directory)) self.assertFalse(os.path.exists(test_directory))
def test_clean_output_dir_is_file(self): def test_clean_output_dir_is_file(self):
retention = ()
test_directory = os.path.join(os.path.dirname(__file__), test_directory = os.path.join(os.path.dirname(__file__),
'this_is_a_file') 'this_is_a_file')
f = open(test_directory, 'w') f = open(test_directory, 'w')
f.write('') f.write('')
f.close() f.close()
utils.clean_output_dir(test_directory) utils.clean_output_dir(test_directory, retention)
self.assertFalse(os.path.exists(test_directory)) self.assertFalse(os.path.exists(test_directory))
def test_strftime(self): def test_strftime(self):

View file

@ -298,8 +298,8 @@ def copy(path, source, destination, destination_path=None, overwrite=False):
logger.warning('skipped copy %s to %s' % (source_, destination_)) logger.warning('skipped copy %s to %s' % (source_, destination_))
def clean_output_dir(path): def clean_output_dir(path, retention):
"""Remove all the files from the output directory""" """Remove all files from output directory except those in retention list"""
if not os.path.exists(path): if not os.path.exists(path):
logger.debug("Directory already removed: %s" % path) logger.debug("Directory already removed: %s" % path)
@ -312,10 +312,13 @@ def clean_output_dir(path):
logger.error("Unable to delete file %s; %s" % (path, str(e))) logger.error("Unable to delete file %s; %s" % (path, str(e)))
return return
# remove all the existing content from the output folder # remove existing content from output folder unless in retention list
for filename in os.listdir(path): for filename in os.listdir(path):
file = os.path.join(path, filename) file = os.path.join(path, filename)
if os.path.isdir(file): if any(filename == retain for retain in retention):
logger.debug("Skipping deletion; %s is on retention list: %s" \
% (filename, file))
elif os.path.isdir(file):
try: try:
shutil.rmtree(file) shutil.rmtree(file)
logger.debug("Deleted directory %s" % file) logger.debug("Deleted directory %s" % file)