fix caching

*  break out cache into cache.py
*  break out cache-tests into test_cache.py
*  fix broken cache tests
   *  replace non existing assert calls with self.assertEqual
   *  fix path for page caching test (was invalid)
   *  cleanup test code
*  restructure generate_context in Article and Path Generator
   * destinguish between valid/invalid files correctly and cache accordingly
*  use cPickle if available for increased performance
This commit is contained in:
derwinlu 2015-06-05 12:11:53 +02:00
commit b7e6390f04
6 changed files with 364 additions and 299 deletions

View file

@ -14,7 +14,6 @@ import shutil
import sys
import traceback
import pickle
import hashlib
import datetime
from collections import Hashable
@ -627,129 +626,6 @@ def split_all(path):
return components
class FileDataCacher(object):
'''Class that can cache data contained in files'''
def __init__(self, settings, cache_name, caching_policy, load_policy):
'''Load the specified cache within CACHE_PATH in settings
only if *load_policy* is True,
May use gzip if GZIP_CACHE ins settings is True.
Sets caching policy according to *caching_policy*.
'''
self.settings = settings
self._cache_path = os.path.join(self.settings['CACHE_PATH'],
cache_name)
self._cache_data_policy = caching_policy
if self.settings['GZIP_CACHE']:
import gzip
self._cache_open = gzip.open
else:
self._cache_open = open
if load_policy:
try:
with self._cache_open(self._cache_path, 'rb') as fhandle:
self._cache = pickle.load(fhandle)
except (IOError, OSError) as err:
logger.debug('Cannot load cache %s (this is normal on first '
'run). Proceeding with empty cache.\n%s',
self._cache_path, err)
self._cache = {}
except Exception as err:
logger.warning(('Cannot unpickle cache %s, cache may be using '
'an incompatible protocol (see pelican caching docs). '
'Proceeding with empty cache.\n%s'),
self._cache_path, err)
self._cache = {}
else:
self._cache = {}
def cache_data(self, filename, data):
'''Cache data for given file'''
if self._cache_data_policy:
self._cache[filename] = data
def get_cached_data(self, filename, default=None):
'''Get cached data for the given file
if no data is cached, return the default object
'''
return self._cache.get(filename, default)
def save_cache(self):
'''Save the updated cache'''
if self._cache_data_policy:
try:
mkdir_p(self.settings['CACHE_PATH'])
with self._cache_open(self._cache_path, 'wb') as fhandle:
pickle.dump(self._cache, fhandle)
except (IOError, OSError, pickle.PicklingError) as err:
logger.warning('Could not save cache %s\n ... %s',
self._cache_path, err)
class FileStampDataCacher(FileDataCacher):
'''Subclass that also caches the stamp of the file'''
def __init__(self, settings, cache_name, caching_policy, load_policy):
'''This sublcass additionally sets filestamp function
and base path for filestamping operations
'''
super(FileStampDataCacher, self).__init__(settings, cache_name,
caching_policy,
load_policy)
method = self.settings['CHECK_MODIFIED_METHOD']
if method == 'mtime':
self._filestamp_func = os.path.getmtime
else:
try:
hash_func = getattr(hashlib, method)
def filestamp_func(filename):
'''return hash of file contents'''
with open(filename, 'rb') as fhandle:
return hash_func(fhandle.read()).digest()
self._filestamp_func = filestamp_func
except AttributeError as err:
logger.warning('Could not get hashing function\n\t%s', err)
self._filestamp_func = None
def cache_data(self, filename, data):
'''Cache stamp and data for the given file'''
stamp = self._get_file_stamp(filename)
super(FileStampDataCacher, self).cache_data(filename, (stamp, data))
def _get_file_stamp(self, filename):
'''Check if the given file has been modified
since the previous build.
depending on CHECK_MODIFIED_METHOD
a float may be returned for 'mtime',
a hash for a function name in the hashlib module
or an empty bytes string otherwise
'''
try:
return self._filestamp_func(filename)
except (IOError, OSError, TypeError) as err:
logger.warning('Cannot get modification stamp for %s\n\t%s',
filename, err)
return b''
def get_cached_data(self, filename, default=None):
'''Get the cached data for the given filename
if the file has not been modified.
If no record exists or file has been modified, return default.
Modification is checked by comparing the cached
and current file stamp.
'''
stamp, data = super(FileStampDataCacher, self).get_cached_data(
filename, (None, default))
if stamp != self._get_file_stamp(filename):
return default
return data
def is_selected_for_writing(settings, path):
'''Check whether path is selected for writing
according to the WRITE_SELECTED list