In autoreload mode, only reprocess modified files

This commit is contained in:
gbrener 2017-06-18 22:25:54 -05:00
commit aa5cef341d
4 changed files with 80 additions and 40 deletions

View file

@ -141,7 +141,7 @@ class Pelican(object):
) )
self.settings[old] = self.settings[new] self.settings[old] = self.settings[new]
def run(self): def run(self, modified=None):
"""Run the generators and return""" """Run the generators and return"""
start_time = time.time() start_time = time.time()
@ -164,11 +164,13 @@ class Pelican(object):
# explicitly asked # explicitly asked
if (self.delete_outputdir and not if (self.delete_outputdir and not
os.path.realpath(self.path).startswith(self.output_path)): os.path.realpath(self.path).startswith(self.output_path)):
clean_output_dir(self.output_path, self.output_retention) clean_output_dir(self.output_path,
self.output_retention,
files_to_clean=modified)
for p in generators: for p in generators:
if hasattr(p, 'generate_context'): if hasattr(p, 'generate_context'):
p.generate_context() p.generate_context(modified=modified)
signals.all_generators_finalized.send(generators) signals.all_generators_finalized.send(generators)
@ -462,7 +464,9 @@ def main():
logger.warning('Empty theme folder. Using `basic` ' logger.warning('Empty theme folder. Using `basic` '
'theme.') 'theme.')
pelican.run() modified_files = [v for vals in list(modified.values())
for v in vals]
pelican.run(modified=modified_files)
except KeyboardInterrupt: except KeyboardInterrupt:
logger.warning("Keyboard interrupt, quitting.") logger.warning("Keyboard interrupt, quitting.")
@ -484,6 +488,8 @@ def main():
if next(watchers['theme']) is None: if next(watchers['theme']) is None:
logger.warning('Empty theme folder. Using `basic` theme.') logger.warning('Empty theme folder. Using `basic` theme.')
modified_files = [val for vals in list(modified.values())
for val in vals]
pelican.run() pelican.run()
except Exception as e: except Exception as e:

View file

@ -94,6 +94,19 @@ class Generator(object):
name, self._templates_path)) name, self._templates_path))
return self._templates[name] return self._templates[name]
def _filter_child_paths(self, ancestor_paths, child_paths):
"""Return set of elements in child_paths which are children of at least
one of ancestor_paths.
"""
paths = set()
for ancestor_path in ancestor_paths:
ancestor_path = os.path.join(self.path, ancestor_path)
paths.update(
filter(lambda p: os.path.realpath(p).startswith(ancestor_path),
child_paths)
)
return paths
def _include_path(self, path, extensions=None): def _include_path(self, path, extensions=None):
"""Inclusion logic for .get_files(), returns True/False """Inclusion logic for .get_files(), returns True/False
@ -501,14 +514,17 @@ class ArticlesGenerator(CachingGenerator):
self.generate_authors(write) self.generate_authors(write)
self.generate_drafts(write) self.generate_drafts(write)
def generate_context(self): def generate_context(self, modified=None):
"""Add the articles into the shared context""" """Add the articles into the shared context"""
all_articles = [] all_articles = []
all_drafts = [] all_drafts = []
for f in self.get_files( files = self.get_files(
self.settings['ARTICLE_PATHS'], (self.settings['ARTICLE_PATHS'] if modified is None
exclude=self.settings['ARTICLE_EXCLUDES']): else self._filter_child_paths(self.settings['ARTICLE_PATHS'],
modified)),
exclude=self.settings['ARTICLE_EXCLUDES'])
for f in files:
article_or_draft = self.get_cached_data(f, None) article_or_draft = self.get_cached_data(f, None)
if article_or_draft is None: if article_or_draft is None:
# TODO needs overhaul, maybe nomad for read_file # TODO needs overhaul, maybe nomad for read_file
@ -611,12 +627,15 @@ class PagesGenerator(CachingGenerator):
super(PagesGenerator, self).__init__(*args, **kwargs) super(PagesGenerator, self).__init__(*args, **kwargs)
signals.page_generator_init.send(self) signals.page_generator_init.send(self)
def generate_context(self): def generate_context(self, modified=None):
all_pages = [] all_pages = []
hidden_pages = [] hidden_pages = []
for f in self.get_files( files = self.get_files(
self.settings['PAGE_PATHS'], (self.settings['PAGE_PATHS'] if modified is None
exclude=self.settings['PAGE_EXCLUDES']): else self._filter_child_paths(self.settings['PAGE_PATHS'],
modified)),
exclude=self.settings['PAGE_EXCLUDES'])
for f in files:
page = self.get_cached_data(f, None) page = self.get_cached_data(f, None)
if page is None: if page is None:
try: try:
@ -697,11 +716,15 @@ class StaticGenerator(Generator):
os.path.join(output_path, destination, path), os.path.join(output_path, destination, path),
self.settings['IGNORE_FILES']) self.settings['IGNORE_FILES'])
def generate_context(self): def generate_context(self, modified=None):
self.staticfiles = [] self.staticfiles = []
for f in self.get_files(self.settings['STATIC_PATHS'], files = self.get_files(
exclude=self.settings['STATIC_EXCLUDES'], (self.settings['STATIC_PATHS'] if modified is None
extensions=False): else self._filter_child_paths(self.settings['STATIC_PATHS'],
modified)),
exclude=self.settings['STATIC_EXCLUDES'],
extensions=False)
for f in files:
# skip content source files unless the user explicitly wants them # skip content source files unless the user explicitly wants them
if self.settings['STATIC_EXCLUDE_SOURCES']: if self.settings['STATIC_EXCLUDE_SOURCES']:
@ -735,7 +758,8 @@ class StaticGenerator(Generator):
class SourceFileGenerator(Generator): class SourceFileGenerator(Generator):
def generate_context(self): def generate_context(self, modified=None):
"""`modified` is unused; only here for consistency."""
self.output_extension = self.settings['OUTPUT_SOURCES_EXTENSION'] self.output_extension = self.settings['OUTPUT_SOURCES_EXTENSION']
def _create_source(self, obj): def _create_source(self, obj):

View file

@ -286,22 +286,22 @@ class TestUtils(LoggedTestCase):
file_watcher = utils.file_watcher(path) file_watcher = utils.file_watcher(path)
# first check returns True # first check returns True
self.assertEqual(next(folder_watcher), True) self.assertTrue(next(folder_watcher))
self.assertEqual(next(file_watcher), True) self.assertTrue(next(file_watcher))
# next check without modification returns False # next check without modification returns False
self.assertEqual(next(folder_watcher), False) self.assertFalse(next(folder_watcher))
self.assertEqual(next(file_watcher), False) self.assertFalse(next(file_watcher))
# after modification, returns True # after modification, returns True
t = time.time() t = time.time()
os.utime(path, (t, t)) os.utime(path, (t, t))
self.assertEqual(next(folder_watcher), True) self.assertTrue(next(folder_watcher))
self.assertEqual(next(file_watcher), True) self.assertTrue(next(file_watcher))
# file watcher with None or empty path should return None # file watcher with None or empty path should return None
self.assertEqual(next(utils.file_watcher('')), None) self.assertIsNone(next(utils.file_watcher('')))
self.assertEqual(next(utils.file_watcher(None)), None) self.assertIsNone(next(utils.file_watcher(None)))
empty_path = os.path.join(os.path.dirname(__file__), 'empty') empty_path = os.path.join(os.path.dirname(__file__), 'empty')
try: try:
@ -309,9 +309,9 @@ class TestUtils(LoggedTestCase):
os.mkdir(os.path.join(empty_path, "empty_folder")) os.mkdir(os.path.join(empty_path, "empty_folder"))
shutil.copy(__file__, empty_path) shutil.copy(__file__, empty_path)
# if no files of interest, returns None # if no files of interest, returns empty list
watcher = utils.folder_watcher(empty_path, ['rst']) watcher = utils.folder_watcher(empty_path, ['rst'])
self.assertEqual(next(watcher), None) self.assertEqual(next(watcher), [])
except OSError: except OSError:
self.fail("OSError Exception in test_files_changed test") self.fail("OSError Exception in test_files_changed test")
finally: finally:

View file

@ -398,8 +398,11 @@ def copy_file_metadata(source, destination):
source, destination, e) source, destination, e)
def clean_output_dir(path, retention): def clean_output_dir(path, retention, files_to_clean=None):
"""Remove all files from output directory except those in retention list""" """Remove all files from output directory except those in retention list.
If files_to_clean is provided, only clean these files (but still skip over
the ones in retention list).
"""
if not os.path.exists(path): if not os.path.exists(path):
logger.debug("Directory already removed: %s", path) logger.debug("Directory already removed: %s", path)
@ -414,6 +417,8 @@ def clean_output_dir(path, retention):
# remove existing content from output folder unless in retention list # remove existing content from output folder unless in retention list
for filename in os.listdir(path): for filename in os.listdir(path):
if files_to_clean is not None and filename not in files_to_clean:
continue
file = os.path.join(path, filename) file = os.path.join(path, filename)
if any(filename == retain for retain in retention): if any(filename == retain for retain in retention):
logger.debug("Skipping deletion; %s is on retention list: %s", logger.debug("Skipping deletion; %s is on retention list: %s",
@ -727,11 +732,13 @@ def process_translations(content_list, order_by=None):
def folder_watcher(path, extensions, ignores=[]): def folder_watcher(path, extensions, ignores=[]):
'''Generator for monitoring a folder for modifications. '''Generator for monitoring a folder for modifications.
Returns a boolean indicating if files are changed since last check. Returns a list indicating the files that were changed since last check.
Returns None if there are no matching files in the folder''' Returns None if there are no matching files in the folder'''
def file_times(path): def file_times(path, after=0):
'''Return `mtime` for each file in path''' '''Return a (`mtime`, `file_path`) tuple for each file in path.
If `after` kwarg is provided, only return files with `mtime` > `after`.
'''
for root, dirs, files in os.walk(path, followlinks=True): for root, dirs, files in os.walk(path, followlinks=True):
dirs[:] = [x for x in dirs if not x.startswith(os.curdir)] dirs[:] = [x for x in dirs if not x.startswith(os.curdir)]
@ -740,21 +747,24 @@ def folder_watcher(path, extensions, ignores=[]):
if f.endswith(tuple(extensions)) and \ if f.endswith(tuple(extensions)) and \
not any(fnmatch.fnmatch(f, ignore) for ignore in ignores): not any(fnmatch.fnmatch(f, ignore) for ignore in ignores):
try: try:
yield os.stat(os.path.join(root, f)).st_mtime mtime = os.stat(os.path.join(root, f)).st_mtime
if mtime > after:
yield (mtime, os.path.join(root, f))
except OSError as e: except OSError as e:
logger.warning('Caught Exception: %s', e) logger.warning('Caught Exception: %s', e)
LAST_MTIME = 0 LAST_MTIME = 0
while True: while True:
try: try:
mtime = max(file_times(path)) modified_files = sorted(file_times(path, after=LAST_MTIME),
if mtime > LAST_MTIME: reverse=True)
LAST_MTIME = mtime if modified_files:
yield True LAST_MTIME = modified_files[0][0]
yield [mf[1] for mf in modified_files]
except ValueError: except ValueError:
yield None yield None
else: else:
yield False yield []
def file_watcher(path): def file_watcher(path):
@ -770,9 +780,9 @@ def file_watcher(path):
if mtime > LAST_MTIME: if mtime > LAST_MTIME:
LAST_MTIME = mtime LAST_MTIME = mtime
yield True yield path
else: else:
yield False yield ''
else: else:
yield None yield None