diff --git a/pelican/__init__.py b/pelican/__init__.py index 42c89bf0..c2c7ca5a 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -26,7 +26,7 @@ from pelican.utils import (clean_output_dir, folder_watcher, file_watcher, maybe_pluralize) from pelican.writers import Writer -__version__ = "3.5.0" +__version__ = "3.6.0.dev" DEFAULT_CONFIG_NAME = 'pelicanconf.py' diff --git a/pelican/cache.py b/pelican/cache.py index d955ae08..d07250e0 100644 --- a/pelican/cache.py +++ b/pelican/cache.py @@ -8,6 +8,9 @@ try: except: import pickle +#TODO don't load pelican module, maybe have to move version definition +import pelican +from pelican.settings import settings_check_equal from pelican.utils import mkdir_p @@ -25,6 +28,7 @@ class FileDataCacher(object): Sets caching policy according to *caching_policy*. """ self.settings = settings + self._cache = {} self._cache_path = os.path.join(self.settings['CACHE_PATH'], cache_name) self._cache_data_policy = caching_policy @@ -35,22 +39,31 @@ class FileDataCacher(object): self._cache_open = open if load_policy: try: - with self._cache_open(self._cache_path, 'rb') as fhandle: - self._cache = pickle.load(fhandle) + self._load_cache() except (IOError, OSError) as err: logger.debug('Cannot load cache %s (this is normal on first ' - 'run). Proceeding with empty cache.\n%s', - self._cache_path, err) - self._cache = {} + 'run). Proceeding with empty cache.\n%s', + self._cache_path, err) except pickle.PickleError as err: - logger.warning('Cannot unpickle cache %s, cache may be using ' - 'an incompatible protocol (see pelican ' - 'caching docs). ' - 'Proceeding with empty cache.\n%s', - self._cache_path, err) - self._cache = {} - else: - self._cache = {} + logger.warning(('Cannot unpickle cache %s, cache may be using ' + 'an incompatible protocol (see pelican caching docs). ' + 'Proceeding with empty cache.\n%s'), + self._cache_path, err) + + def _load_cache(self): + '''tries loading the cache''' + with self._cache_open(self._cache_path, 'rb') as fhandle: + cache = pickle.load(fhandle) + if not cache.get('__version__') == pelican.__version__: + logger.debug('Pelican version changed. Proceeding with empty cache') + return + if not settings_check_equal(cache.get('__settings__'), self.settings): + logger.debug('Settings changed. Proceeding with empty cache') + return + + logger.debug('cache accepted') + self._cache = cache + return def cache_data(self, filename, data): """Cache data for given file""" @@ -64,9 +77,14 @@ class FileDataCacher(object): """ return self._cache.get(filename, default) + def _add_validation_data(self): + self._cache['__version__'] = pelican.__version__ + self._cache['__settings__'] = self.settings + def save_cache(self): """Save the updated cache""" if self._cache_data_policy: + self._add_validation_data() try: mkdir_p(self.settings['CACHE_PATH']) with self._cache_open(self._cache_path, 'wb') as fhandle: @@ -121,8 +139,9 @@ class FileStampDataCacher(FileDataCacher): try: return self._filestamp_func(filename) except (IOError, OSError, TypeError) as err: - logger.warning('Cannot get modification stamp for %s\n\t%s', - filename, err) + logger.warning( + 'Cannot get modification stamp for %s\n\t%s', + filename, err) return '' def get_cached_data(self, filename, default=None): diff --git a/pelican/settings.py b/pelican/settings.py index c1a902cd..ea309ca3 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -375,3 +375,22 @@ def configure_settings(settings): logger.warning(message) return settings + +def settings_check_equal(s1, s2, ignored_keys=set(['filenames'])): + ''' + checks if two configurations are equal + + used to determine if cache needs to be invalidated + ''' + s1_keys = set(s1.keys()) - ignored_keys + s2_keys = set(s2.keys()) - ignored_keys + intersect_keys = s1_keys.intersection(s2_keys) + + if not s1_keys == s2_keys: + return False + + for o in intersect_keys: + if not s1[o] == s2[o]: + return False + + return True diff --git a/pelican/tests/test_cache.py b/pelican/tests/test_cache.py index 8a20c36b..aa7b4894 100644 --- a/pelican/tests/test_cache.py +++ b/pelican/tests/test_cache.py @@ -44,17 +44,19 @@ class TestCache(unittest.TestCase): settings['DEFAULT_DATE'] = (1970, 1, 1) settings['READERS'] = {'asc': None} - + # populate cache generator = ArticlesGenerator( context=settings.copy(), settings=settings, path=CONTENT_DIR, theme=settings['THEME'], output_path=None) generator.generate_context() self.assertTrue(hasattr(generator, '_cache')) + # regenerate, with cache generator = ArticlesGenerator( context=settings.copy(), settings=settings, path=CONTENT_DIR, theme=settings['THEME'], output_path=None) - generator.readers.read_file = MagicMock() + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) generator.generate_context() """ 3 Files don't get cached because they were not valid @@ -68,20 +70,24 @@ class TestCache(unittest.TestCase): def test_article_reader_content_caching(self): """Test raw article content caching at the reader level""" settings = self._get_cache_enabled_settings() + settings['DEFAULT_DATE'] = (1970, 1, 1) settings['READERS'] = {'asc': None} + # populate cache generator = ArticlesGenerator( context=settings.copy(), settings=settings, path=CONTENT_DIR, theme=settings['THEME'], output_path=None) generator.generate_context() self.assertTrue(hasattr(generator.readers, '_cache')) + # regenerate, with cache generator = ArticlesGenerator( context=settings.copy(), settings=settings, path=CONTENT_DIR, theme=settings['THEME'], output_path=None) readers = generator.readers.readers for reader in readers.values(): - reader.read = MagicMock() + reader.read = MagicMock( + side_effect=reader.read) generator.generate_context() for reader in readers.values(): self.assertEqual(reader.read.call_count, 0) @@ -92,12 +98,14 @@ class TestCache(unittest.TestCase): used in --ignore-cache or autoreload mode""" settings = self._get_cache_enabled_settings() + settings['DEFAULT_DATE'] = (1970, 1, 1) settings['READERS'] = {'asc': None} generator = ArticlesGenerator( context=settings.copy(), settings=settings, path=CONTENT_DIR, theme=settings['THEME'], output_path=None) - generator.readers.read_file = MagicMock() + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) generator.generate_context() self.assertTrue(hasattr(generator, '_cache_open')) orig_call_count = generator.readers.read_file.call_count @@ -106,9 +114,176 @@ class TestCache(unittest.TestCase): generator = ArticlesGenerator( context=settings.copy(), settings=settings, path=CONTENT_DIR, theme=settings['THEME'], output_path=None) - generator.readers.read_file = MagicMock() + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) generator.generate_context() - self.assertEqual(generator.readers.read_file.call_count, orig_call_count) + self.assertEqual( + generator.readers.read_file.call_count, + orig_call_count + ) + + @unittest.skipUnless(MagicMock, 'Needs Mock module') + def test_article_ignore_cache_when_version_missmatch(self): + """ + Test regenerate iff version change + """ + + import pelican + + settings = self._get_cache_enabled_settings() + settings['CONTENT_CACHING_LAYER'] = 'generator' + settings['DEFAULT_DATE'] = (1970, 1, 1) + settings['READERS'] = {'asc': None} + + # popluate cache + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) + generator.generate_context() + orig_call_count = generator.readers.read_file.call_count + + # cache should prevent calls from valid input + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) + generator.generate_context() + self.assertTrue(hasattr(generator, '_cache_open')) + cached_call_count = generator.readers.read_file.call_count + self.assertTrue(cached_call_count < orig_call_count, + 'Expected cached_call_count {} is not less then orig_call_count {}' + .format(cached_call_count, orig_call_count)) + + # every file should be reloaded because we use another version + pelican.__version__='0.0.0' + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) + generator.generate_context() + self.assertEqual( + generator.readers.read_file.call_count, orig_call_count, + 'Expected new call_count {} to be equal to orig_call_count {}' + .format(generator.readers.read_file.call_count, orig_call_count) + ) + + @unittest.skipUnless(MagicMock, 'Needs Mock module') + def test_article_ignore_cache_when_settings_missmatch(self): + """ + Test regenerate iff settings differ + """ + + settings = self._get_cache_enabled_settings() + settings['CONTENT_CACHING_LAYER'] = 'generator' + settings['DEFAULT_DATE'] = (1970, 1, 1) + settings['READERS'] = {'asc': None} + + # popluate cache + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) + generator.generate_context() + + orig_call_count = generator.readers.read_file.call_count + + # cache should prevent calls from valid input + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) + generator.generate_context() + self.assertTrue(hasattr(generator, '_cache_open')) + cached_call_count = generator.readers.read_file.call_count + self.assertTrue(cached_call_count < orig_call_count, + 'Expected cached_call_count {} is not less then orig_call_count {}' + .format(cached_call_count, orig_call_count)) + + # every file should be reloaded because we use other settings + import copy + settings_dif = copy.deepcopy(settings) + settings_dif['DEFAULT_DATE'] = (1971, 1, 1) + generator = ArticlesGenerator( + context=settings_dif.copy(), settings=settings_dif, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) + generator.generate_context() + self.assertEqual( + generator.readers.read_file.call_count, orig_call_count, + 'Expected new call_count {} to be equal to orig_call_count {}' + .format(generator.readers.read_file.call_count, orig_call_count) + ) + + def test_article_reader_cache_speed(self): + """Test that reader caching actually is providing a speed increase + + while this number is not a real benchmark, it should provide a headsup + if something during caching is not providing benefits + """ + import time + settings = self._get_cache_enabled_settings() + settings['READERS'] = {'asc': None} + + uncached_start = time.time() + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.generate_context() + uncached_end = time.time() + uncached_time = uncached_end - uncached_start + + cached_start = time.time() + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.generate_context() + cached_end = time.time() + cached_time = cached_end - cached_start + + self.assertTrue( + cached_time < uncached_time, + 'cached time {} is higher then uncached time {}' + .format(cached_time, uncached_time)) + + def test_article_generator_cache_speed(self): + """Test that generator caching actually is providing a speed increase + + while this number is not a real benchmark, it should provide a headsup + if something during caching is not providing benefits + """ + import time + settings = self._get_cache_enabled_settings() + settings['CONTENT_CACHING_LAYER'] = 'generator' + settings['READERS'] = {'asc': None} + + uncached_start = time.time() + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.generate_context() + uncached_end = time.time() + uncached_time = uncached_end - uncached_start + + cached_start = time.time() + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.generate_context() + cached_end = time.time() + cached_time = cached_end - cached_start + + self.assertTrue( + cached_time < uncached_time, + 'cached time {} is higher then uncached time {}' + .format(cached_time, uncached_time)) + @unittest.skipUnless(MagicMock, 'Needs Mock module') def test_page_object_caching(self): @@ -127,7 +302,8 @@ class TestCache(unittest.TestCase): generator = PagesGenerator( context=settings.copy(), settings=settings, path=CUR_DIR, theme=settings['THEME'], output_path=None) - generator.readers.read_file = MagicMock() + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) generator.generate_context() """ 1 File doesn't get cached because it was not valid @@ -153,7 +329,7 @@ class TestCache(unittest.TestCase): path=CUR_DIR, theme=settings['THEME'], output_path=None) readers = generator.readers.readers for reader in readers.values(): - reader.read = MagicMock() + reader.read = MagicMock(side_effect=reader.read) generator.generate_context() for reader in readers.values(): self.assertEqual(reader.read.call_count, 0) @@ -170,7 +346,8 @@ class TestCache(unittest.TestCase): generator = PagesGenerator( context=settings.copy(), settings=settings, path=CUR_DIR, theme=settings['THEME'], output_path=None) - generator.readers.read_file = MagicMock() + generator.readers.read_file = MagicMock( + side_effect=generator.readers.read_file) generator.generate_context() self.assertTrue(hasattr(generator, '_cache_open')) orig_call_count = generator.readers.read_file.call_count @@ -182,4 +359,3 @@ class TestCache(unittest.TestCase): generator.readers.read_file = MagicMock() generator.generate_context() self.assertEqual(generator.readers.read_file.call_count, orig_call_count) -