implement cache invalidation

* update version to indicate that tis is not a stable version
* implement cache invalidation
  * via version
  * via settings change
* improve logging
* add cache invalidation tests
This commit is contained in:
derwinlu 2015-06-10 12:14:25 +02:00
commit 82bdb4f9a5
4 changed files with 175 additions and 26 deletions

View file

@ -25,7 +25,7 @@ from pelican.settings import read_settings
from pelican.utils import clean_output_dir, folder_watcher, file_watcher
from pelican.writers import Writer
__version__ = "3.5.0"
__version__ = "3.6.0.dev"
DEFAULT_CONFIG_NAME = 'pelicanconf.py'

View file

@ -8,6 +8,9 @@ try:
except:
import pickle
#TODO don't load pelican module, maybe have to move version definition
import pelican
from pelican.settings import settings_check_equal
from pelican.utils import mkdir_p
@ -25,6 +28,7 @@ class FileDataCacher(object):
Sets caching policy according to *caching_policy*.
"""
self.settings = settings
self._cache = {}
self._cache_path = os.path.join(self.settings['CACHE_PATH'],
cache_name)
self._cache_data_policy = caching_policy
@ -35,22 +39,31 @@ class FileDataCacher(object):
self._cache_open = open
if load_policy:
try:
with self._cache_open(self._cache_path, 'rb') as fhandle:
self._cache = pickle.load(fhandle)
self._load_cache()
except (IOError, OSError) as err:
logger.debug('Cannot load cache %s (this is normal on first '
'run). Proceeding with empty cache.\n%s',
self._cache_path, err)
self._cache = {}
'run). Proceeding with empty cache.\n%s',
self._cache_path, err)
except pickle.PickleError as err:
logger.warning('Cannot unpickle cache %s, cache may be using '
'an incompatible protocol (see pelican '
'caching docs). '
'Proceeding with empty cache.\n%s',
self._cache_path, err)
self._cache = {}
else:
self._cache = {}
logger.warning(('Cannot unpickle cache %s, cache may be using '
'an incompatible protocol (see pelican caching docs). '
'Proceeding with empty cache.\n%s'),
self._cache_path, err)
def _load_cache(self):
'''tries loading the cache'''
with self._cache_open(self._cache_path, 'rb') as fhandle:
cache = pickle.load(fhandle)
if not cache.get('__version__') == pelican.__version__:
logger.debug('Pelican version changed. Proceeding with empty cache')
return
if not settings_check_equal(cache.get('__settings__'), self.settings):
logger.debug('Settings changed. Proceeding with empty cache')
return
logger.debug('cache accepted')
self._cache = cache
return
def cache_data(self, filename, data):
"""Cache data for given file"""
@ -64,9 +77,14 @@ class FileDataCacher(object):
"""
return self._cache.get(filename, default)
def _add_validation_data(self):
self._cache['__version__'] = pelican.__version__
self._cache['__settings__'] = self.settings
def save_cache(self):
"""Save the updated cache"""
if self._cache_data_policy:
self._add_validation_data()
try:
mkdir_p(self.settings['CACHE_PATH'])
with self._cache_open(self._cache_path, 'wb') as fhandle:
@ -121,8 +139,9 @@ class FileStampDataCacher(FileDataCacher):
try:
return self._filestamp_func(filename)
except (IOError, OSError, TypeError) as err:
logger.warning('Cannot get modification stamp for %s\n\t%s',
filename, err)
logger.warning(
'Cannot get modification stamp for %s\n\t%s',
filename, err)
return ''
def get_cached_data(self, filename, default=None):

View file

@ -375,3 +375,22 @@ def configure_settings(settings):
logger.warning(message)
return settings
def settings_check_equal(s1, s2, ignored_keys=set(['filenames'])):
'''
checks if two configurations are equal
used to determine if cache needs to be invalidated
'''
s1_keys = set(s1.keys()) - ignored_keys
s2_keys = set(s2.keys()) - ignored_keys
intersect_keys = s1_keys.intersection(s2_keys)
if not s1_keys == s2_keys:
return False
for o in intersect_keys:
if not s1[o] == s2[o]:
return False
return True

View file

@ -44,17 +44,19 @@ class TestCache(unittest.TestCase):
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['READERS'] = {'asc': None}
# populate cache
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache'))
# regenerate, with cache
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
"""
3 Files don't get cached because they were not valid
@ -68,20 +70,24 @@ class TestCache(unittest.TestCase):
def test_article_reader_content_caching(self):
"""Test raw article content caching at the reader level"""
settings = self._get_cache_enabled_settings()
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['READERS'] = {'asc': None}
# populate cache
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator.readers, '_cache'))
# regenerate, with cache
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
readers = generator.readers.readers
for reader in readers.values():
reader.read = MagicMock()
reader.read = MagicMock(
side_effect=reader.read)
generator.generate_context()
for reader in readers.values():
self.assertEqual(reader.read.call_count, 0)
@ -92,12 +98,14 @@ class TestCache(unittest.TestCase):
used in --ignore-cache or autoreload mode"""
settings = self._get_cache_enabled_settings()
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['READERS'] = {'asc': None}
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache_open'))
orig_call_count = generator.readers.read_file.call_count
@ -106,9 +114,111 @@ class TestCache(unittest.TestCase):
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
self.assertEqual(generator.readers.read_file.call_count, orig_call_count)
self.assertEqual(
generator.readers.read_file.call_count,
orig_call_count
)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_ignore_cache_when_version_missmatch(self):
"""
Test regenerate iff version change
"""
import pelican
settings = self._get_cache_enabled_settings()
settings['CONTENT_CACHING_LAYER'] = 'generator'
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['READERS'] = {'asc': None}
# popluate cache
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
orig_call_count = generator.readers.read_file.call_count
# cache should prevent calls from valid input
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache_open'))
cached_call_count = generator.readers.read_file.call_count
self.assertTrue(cached_call_count < orig_call_count,
'Expected cached_call_count {} is not less then orig_call_count {}'
.format(cached_call_count, orig_call_count))
# every file should be reloaded because we use another version
pelican.__version__='0.0.0'
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
self.assertEqual(
generator.readers.read_file.call_count, orig_call_count,
'Expected new call_count {} to be equal to orig_call_count {}'
.format(generator.readers.read_file.call_count, orig_call_count)
)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_ignore_cache_when_settings_missmatch(self):
"""
Test regenerate iff settings differ
"""
settings = self._get_cache_enabled_settings()
settings['CONTENT_CACHING_LAYER'] = 'generator'
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['READERS'] = {'asc': None}
# popluate cache
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
orig_call_count = generator.readers.read_file.call_count
# cache should prevent calls from valid input
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache_open'))
cached_call_count = generator.readers.read_file.call_count
self.assertTrue(cached_call_count < orig_call_count,
'Expected cached_call_count {} is not less then orig_call_count {}'
.format(cached_call_count, orig_call_count))
# every file should be reloaded because we use other settings
import copy
settings_dif = copy.deepcopy(settings)
settings_dif['DEFAULT_DATE'] = (1971, 1, 1)
generator = ArticlesGenerator(
context=settings_dif.copy(), settings=settings_dif,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
self.assertEqual(
generator.readers.read_file.call_count, orig_call_count,
'Expected new call_count {} to be equal to orig_call_count {}'
.format(generator.readers.read_file.call_count, orig_call_count)
)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_page_object_caching(self):
@ -127,7 +237,8 @@ class TestCache(unittest.TestCase):
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
"""
1 File doesn't get cached because it was not valid
@ -153,7 +264,7 @@ class TestCache(unittest.TestCase):
path=CUR_DIR, theme=settings['THEME'], output_path=None)
readers = generator.readers.readers
for reader in readers.values():
reader.read = MagicMock()
reader.read = MagicMock(side_effect=reader.read)
generator.generate_context()
for reader in readers.values():
self.assertEqual(reader.read.call_count, 0)
@ -170,7 +281,8 @@ class TestCache(unittest.TestCase):
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.readers.read_file = MagicMock(
side_effect=generator.readers.read_file)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache_open'))
orig_call_count = generator.readers.read_file.call_count
@ -182,4 +294,3 @@ class TestCache(unittest.TestCase):
generator.readers.read_file = MagicMock()
generator.generate_context()
self.assertEqual(generator.readers.read_file.call_count, orig_call_count)