1
0
Fork 0
forked from github/pelican

Merge pull request #1747 from ingwinlu/fix_cache

Fix caching and disable by default
This commit is contained in:
Justin Mayer 2015-06-09 08:42:51 -07:00
commit b7e8af5977
9 changed files with 375 additions and 315 deletions

View file

@ -204,7 +204,7 @@ Setting name (followed by default value, if any)
``SLUGIFY_SOURCE = 'title'`` Specifies where you want the slug to be automatically generated
from. Can be set to ``title`` to use the 'Title:' metadata tag or
``basename`` to use the article's file name when creating the slug.
``CACHE_CONTENT = True`` If ``True``, save content in a cache file.
``CACHE_CONTENT = False`` If ``True``, saves content in caches.
See :ref:`reading_only_modified_content` for details about caching.
``CONTENT_CACHING_LAYER = 'reader'`` If set to ``'reader'``, save only the raw content and metadata
returned by readers. If set to ``'generator'``, save processed
@ -212,9 +212,7 @@ Setting name (followed by default value, if any)
``CACHE_PATH = 'cache'`` Directory in which to store cache files.
``GZIP_CACHE = True`` If ``True``, use gzip to (de)compress the cache files.
``CHECK_MODIFIED_METHOD = 'mtime'`` Controls how files are checked for modifications.
``LOAD_CONTENT_CACHE = True`` If ``True``, load unmodified content from cache.
``AUTORELOAD_IGNORE_CACHE = False`` If ``True``, do not load content cache in autoreload mode
when the settings file changes.
``LOAD_CONTENT_CACHE = False`` If ``True``, load unmodified content from caches.
``WRITE_SELECTED = []`` If this list is not empty, **only** output files with their paths
in this list are written. Paths should be either absolute or relative
to the current Pelican working directory. For possible use cases see

View file

@ -101,6 +101,11 @@ class Pelican(object):
'PAGE_LANG_URL'):
logger.warning("%s = '%s'", setting, self.settings[setting])
if self.settings.get('AUTORELOAD_IGNORE_CACHE'):
logger.warning('Found deprecated `AUTORELOAD_IGNORE_CACHE` in '
'settings. Use --ignore-cache instead.')
self.settings.pop('AUTORELOAD_IGNORE_CACHE')
if self.settings.get('ARTICLE_PERMALINK_STRUCTURE', False):
logger.warning('Found deprecated `ARTICLE_PERMALINK_STRUCTURE` in'
' settings. Modifying the following settings for'
@ -381,10 +386,6 @@ def main():
print(' --- AutoReload Mode: Monitoring `content`, `theme` and'
' `settings` for changes. ---')
def _ignore_cache(pelican_obj):
if pelican_obj.settings['AUTORELOAD_IGNORE_CACHE']:
pelican_obj.settings['LOAD_CONTENT_CACHE'] = False
while True:
try:
# Check source dir for changed files ending with the given
@ -393,12 +394,9 @@ def main():
# have changed, no matter what extension the filenames
# have.
modified = {k: next(v) for k, v in watchers.items()}
original_load_cache = settings['LOAD_CONTENT_CACHE']
if modified['settings']:
pelican, settings = get_instance(args)
original_load_cache = settings['LOAD_CONTENT_CACHE']
_ignore_cache(pelican)
# Adjust static watchers if there are any changes
new_static = settings.get("STATIC_PATHS", [])
@ -435,8 +433,6 @@ def main():
'theme.')
pelican.run()
# restore original caching policy
pelican.settings['LOAD_CONTENT_CACHE'] = original_load_cache
except KeyboardInterrupt:
logger.warning("Keyboard interrupt, quitting.")

140
pelican/cache.py Normal file
View file

@ -0,0 +1,140 @@
from __future__ import unicode_literals
import hashlib
import logging
import os
try:
import cPickle as pickle
except:
import pickle
from pelican.utils import mkdir_p
logger = logging.getLogger(__name__)
class FileDataCacher(object):
"""Class that can cache data contained in files"""
def __init__(self, settings, cache_name, caching_policy, load_policy):
"""Load the specified cache within CACHE_PATH in settings
only if *load_policy* is True,
May use gzip if GZIP_CACHE ins settings is True.
Sets caching policy according to *caching_policy*.
"""
self.settings = settings
self._cache_path = os.path.join(self.settings['CACHE_PATH'],
cache_name)
self._cache_data_policy = caching_policy
if self.settings['GZIP_CACHE']:
import gzip
self._cache_open = gzip.open
else:
self._cache_open = open
if load_policy:
try:
with self._cache_open(self._cache_path, 'rb') as fhandle:
self._cache = pickle.load(fhandle)
except (IOError, OSError) as err:
logger.debug('Cannot load cache %s (this is normal on first '
'run). Proceeding with empty cache.\n%s',
self._cache_path, err)
self._cache = {}
except pickle.PickleError as err:
logger.warning('Cannot unpickle cache %s, cache may be using '
'an incompatible protocol (see pelican '
'caching docs). '
'Proceeding with empty cache.\n%s',
self._cache_path, err)
self._cache = {}
else:
self._cache = {}
def cache_data(self, filename, data):
"""Cache data for given file"""
if self._cache_data_policy:
self._cache[filename] = data
def get_cached_data(self, filename, default=None):
"""Get cached data for the given file
if no data is cached, return the default object
"""
return self._cache.get(filename, default)
def save_cache(self):
"""Save the updated cache"""
if self._cache_data_policy:
try:
mkdir_p(self.settings['CACHE_PATH'])
with self._cache_open(self._cache_path, 'wb') as fhandle:
pickle.dump(self._cache, fhandle)
except (IOError, OSError, pickle.PicklingError) as err:
logger.warning('Could not save cache %s\n ... %s',
self._cache_path, err)
class FileStampDataCacher(FileDataCacher):
"""Subclass that also caches the stamp of the file"""
def __init__(self, settings, cache_name, caching_policy, load_policy):
"""This sublcass additionally sets filestamp function
and base path for filestamping operations
"""
super(FileStampDataCacher, self).__init__(settings, cache_name,
caching_policy,
load_policy)
method = self.settings['CHECK_MODIFIED_METHOD']
if method == 'mtime':
self._filestamp_func = os.path.getmtime
else:
try:
hash_func = getattr(hashlib, method)
def filestamp_func(filename):
"""return hash of file contents"""
with open(filename, 'rb') as fhandle:
return hash_func(fhandle.read()).digest()
self._filestamp_func = filestamp_func
except AttributeError as err:
logger.warning('Could not get hashing function\n\t%s', err)
self._filestamp_func = None
def cache_data(self, filename, data):
"""Cache stamp and data for the given file"""
stamp = self._get_file_stamp(filename)
super(FileStampDataCacher, self).cache_data(filename, (stamp, data))
def _get_file_stamp(self, filename):
"""Check if the given file has been modified
since the previous build.
depending on CHECK_MODIFIED_METHOD
a float may be returned for 'mtime',
a hash for a function name in the hashlib module
or an empty bytes string otherwise
"""
try:
return self._filestamp_func(filename)
except (IOError, OSError, TypeError) as err:
logger.warning('Cannot get modification stamp for %s\n\t%s',
filename, err)
return ''
def get_cached_data(self, filename, default=None):
"""Get the cached data for the given filename
if the file has not been modified.
If no record exists or file has been modified, return default.
Modification is checked by comparing the cached
and current file stamp.
"""
stamp, data = super(FileStampDataCacher, self).get_cached_data(
filename, (None, default))
if stamp != self._get_file_stamp(filename):
return default
return data

View file

@ -17,11 +17,11 @@ from operator import attrgetter
from jinja2 import (Environment, FileSystemLoader, PrefixLoader, ChoiceLoader,
BaseLoader, TemplateNotFound)
from pelican.cache import FileStampDataCacher
from pelican.contents import Article, Draft, Page, Static, is_valid_content
from pelican.readers import Readers
from pelican.utils import (copy, process_translations, mkdir_p, DateFormatter,
FileStampDataCacher, python_2_unicode_compatible,
posixize_path)
python_2_unicode_compatible, posixize_path)
from pelican import signals
@ -493,10 +493,11 @@ class ArticlesGenerator(CachingGenerator):
for f in self.get_files(
self.settings['ARTICLE_PATHS'],
exclude=self.settings['ARTICLE_EXCLUDES']):
article = self.get_cached_data(f, None)
if article is None:
article_or_draft = self.get_cached_data(f, None)
if article_or_draft is None:
#TODO needs overhaul, maybe nomad for read_file solution, unified behaviour
try:
article = self.readers.read_file(
article_or_draft = self.readers.read_file(
base_path=self.path, path=f, content_class=Article,
context=self.context,
preread_signal=signals.article_generator_preread,
@ -509,29 +510,32 @@ class ArticlesGenerator(CachingGenerator):
self._add_failed_source_path(f)
continue
if not is_valid_content(article, f):
if not is_valid_content(article_or_draft, f):
self._add_failed_source_path(f)
continue
self.cache_data(f, article)
if article_or_draft.status.lower() == "published":
all_articles.append(article_or_draft)
elif article_or_draft.status.lower() == "draft":
article_or_draft = self.readers.read_file(
base_path=self.path, path=f, content_class=Draft,
context=self.context,
preread_signal=signals.article_generator_preread,
preread_sender=self,
context_signal=signals.article_generator_context,
context_sender=self)
self.add_source_path(article_or_draft)
all_drafts.append(article_or_draft)
else:
logger.error("Unknown status '%s' for file %s, skipping it.",
article_or_draft.status, f)
self._add_failed_source_path(f)
continue
self.add_source_path(article)
self.cache_data(f, article_or_draft)
self.add_source_path(article_or_draft)
if article.status.lower() == "published":
all_articles.append(article)
elif article.status.lower() == "draft":
draft = self.readers.read_file(
base_path=self.path, path=f, content_class=Draft,
context=self.context,
preread_signal=signals.article_generator_preread,
preread_sender=self,
context_signal=signals.article_generator_context,
context_sender=self)
self.add_source_path(draft)
all_drafts.append(draft)
else:
logger.error("Unknown status '%s' for file %s, skipping it.",
article.status, f)
self.articles, self.translations = process_translations(all_articles,
order_by=self.settings['ARTICLE_ORDER_BY'])
@ -613,18 +617,20 @@ class PagesGenerator(CachingGenerator):
self._add_failed_source_path(f)
continue
if page.status.lower() == "published":
all_pages.append(page)
elif page.status.lower() == "hidden":
hidden_pages.append(page)
else:
logger.error("Unknown status '%s' for file %s, skipping it.",
page.status, f)
self._add_failed_source_path(f)
continue
self.cache_data(f, page)
self.add_source_path(page)
if page.status.lower() == "published":
all_pages.append(page)
elif page.status.lower() == "hidden":
hidden_pages.append(page)
else:
logger.error("Unknown status '%s' for file %s, skipping it.",
page.status, f)
self.pages, self.translations = process_translations(all_pages,
order_by=self.settings['PAGE_ORDER_BY'])
self.hidden_pages, self.hidden_translations = (

View file

@ -24,8 +24,9 @@ except ImportError:
from six.moves.html_parser import HTMLParser
from pelican import signals
from pelican.cache import FileStampDataCacher
from pelican.contents import Page, Category, Tag, Author
from pelican.utils import get_date, pelican_open, FileStampDataCacher, SafeDatetime, posixize_path
from pelican.utils import get_date, pelican_open, SafeDatetime, posixize_path
def ensure_metadata_list(text):
"""Canonicalize the format of a list of authors or tags. This works

View file

@ -123,13 +123,12 @@ DEFAULT_CONFIG = {
'SLUG_SUBSTITUTIONS': (),
'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]',
'SLUGIFY_SOURCE': 'title',
'CACHE_CONTENT': True,
'CACHE_CONTENT': False,
'CONTENT_CACHING_LAYER': 'reader',
'CACHE_PATH': 'cache',
'GZIP_CACHE': True,
'CHECK_MODIFIED_METHOD': 'mtime',
'LOAD_CONTENT_CACHE': True,
'AUTORELOAD_IGNORE_CACHE': False,
'LOAD_CONTENT_CACHE': False,
'WRITE_SELECTED': [],
'FORMATTED_FIELDS': ['summary'],
}

185
pelican/tests/test_cache.py Normal file
View file

@ -0,0 +1,185 @@
from __future__ import unicode_literals
import os
from codecs import open
try:
from unittest.mock import MagicMock
except ImportError:
try:
from mock import MagicMock
except ImportError:
MagicMock = False
from shutil import rmtree
from tempfile import mkdtemp
from pelican.generators import ArticlesGenerator, PagesGenerator
from pelican.tests.support import unittest, get_settings
CUR_DIR = os.path.dirname(__file__)
CONTENT_DIR = os.path.join(CUR_DIR, 'content')
class TestCache(unittest.TestCase):
def setUp(self):
self.temp_cache = mkdtemp(prefix='pelican_cache.')
def tearDown(self):
rmtree(self.temp_cache)
def _get_cache_enabled_settings(self):
settings = get_settings(filenames={})
settings['CACHE_CONTENT'] = True
settings['LOAD_CONTENT_CACHE'] = True
settings['CACHE_PATH'] = self.temp_cache
return settings
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_object_caching(self):
"""Test Article objects caching at the generator level"""
settings = self._get_cache_enabled_settings()
settings['CONTENT_CACHING_LAYER'] = 'generator'
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['READERS'] = {'asc': None}
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache'))
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
"""
3 Files don't get cached because they were not valid
- article_with_comments.html
- article_with_null_attributes.html
- 2012-11-30_md_w_filename_meta#foo-bar.md
"""
self.assertEqual(generator.readers.read_file.call_count, 3)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_reader_content_caching(self):
"""Test raw article content caching at the reader level"""
settings = self._get_cache_enabled_settings()
settings['READERS'] = {'asc': None}
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator.readers, '_cache'))
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
readers = generator.readers.readers
for reader in readers.values():
reader.read = MagicMock()
generator.generate_context()
for reader in readers.values():
self.assertEqual(reader.read.call_count, 0)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_ignore_cache(self):
"""Test that all the articles are read again when not loading cache
used in --ignore-cache or autoreload mode"""
settings = self._get_cache_enabled_settings()
settings['READERS'] = {'asc': None}
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache_open'))
orig_call_count = generator.readers.read_file.call_count
settings['LOAD_CONTENT_CACHE'] = False
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
self.assertEqual(generator.readers.read_file.call_count, orig_call_count)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_page_object_caching(self):
"""Test Page objects caching at the generator level"""
settings = self._get_cache_enabled_settings()
settings['CONTENT_CACHING_LAYER'] = 'generator'
settings['PAGE_PATHS'] = ['TestPages']
settings['READERS'] = {'asc': None}
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache'))
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
"""
1 File doesn't get cached because it was not valid
- bad_page.rst
"""
self.assertEqual(generator.readers.read_file.call_count, 1)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_page_reader_content_caching(self):
"""Test raw page content caching at the reader level"""
settings = self._get_cache_enabled_settings()
settings['PAGE_PATHS'] = ['TestPages']
settings['READERS'] = {'asc': None}
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator.readers, '_cache'))
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], output_path=None)
readers = generator.readers.readers
for reader in readers.values():
reader.read = MagicMock()
generator.generate_context()
for reader in readers.values():
self.assertEqual(reader.read.call_count, 0)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_page_ignore_cache(self):
"""Test that all the pages are read again when not loading cache
used in --ignore_cache or autoreload mode"""
settings = self._get_cache_enabled_settings()
settings['PAGE_PATHS'] = ['TestPages']
settings['READERS'] = {'asc': None}
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache_open'))
orig_call_count = generator.readers.read_file.call_count
settings['LOAD_CONTENT_CACHE'] = False
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
self.assertEqual(generator.readers.read_file.call_count, orig_call_count)

View file

@ -135,7 +135,6 @@ class TestArticlesGenerator(unittest.TestCase):
self.assertFalse(writer.write_feed.called)
def test_generate_context(self):
articles_expected = [
['Article title', 'published', 'Default', 'article'],
['Article with markdown and summary metadata multi', 'published',
@ -176,7 +175,6 @@ class TestArticlesGenerator(unittest.TestCase):
self.assertEqual(sorted(articles_expected), sorted(self.articles))
def test_generate_categories(self):
# test for name
# categories are grouped by slug; if two categories have the same slug
# but different names they will be grouped together, the first one in
@ -194,7 +192,6 @@ class TestArticlesGenerator(unittest.TestCase):
self.assertEqual(sorted(categories), sorted(categories_expected))
def test_do_not_use_folder_as_category(self):
settings = get_settings(filenames={})
settings['DEFAULT_CATEGORY'] = 'Default'
settings['DEFAULT_DATE'] = (1970, 1, 1)
@ -357,75 +354,6 @@ class TestArticlesGenerator(unittest.TestCase):
authors_expected = ['alexis-metaireau', 'author-first', 'author-second', 'first-author', 'second-author']
self.assertEqual(sorted(authors), sorted(authors_expected))
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_object_caching(self):
"""Test Article objects caching at the generator level"""
settings = get_settings(filenames={})
settings['CACHE_PATH'] = self.temp_cache
settings['CONTENT_CACHING_LAYER'] = 'generator'
settings['READERS'] = {'asc': None}
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache'))
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
generator.readers.read_file.assert_called_count == 0
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_reader_content_caching(self):
"""Test raw content caching at the reader level"""
settings = get_settings(filenames={})
settings['CACHE_PATH'] = self.temp_cache
settings['READERS'] = {'asc': None}
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator.readers, '_cache'))
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
readers = generator.readers.readers
for reader in readers.values():
reader.read = MagicMock()
generator.generate_context()
for reader in readers.values():
reader.read.assert_called_count == 0
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_ignore_cache(self):
"""Test that all the articles are read again when not loading cache
used in --ignore-cache or autoreload mode"""
settings = get_settings(filenames={})
settings['CACHE_PATH'] = self.temp_cache
settings['READERS'] = {'asc': None}
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache_open'))
orig_call_count = generator.readers.read_file.call_count
settings['LOAD_CONTENT_CACHE'] = False
generator = ArticlesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
generator.readers.read_file.assert_called_count == orig_call_count
def test_standard_metadata_in_default_metadata(self):
settings = get_settings(filenames={})
settings['CACHE_CONTENT'] = False
@ -506,75 +434,6 @@ class TestPageGenerator(unittest.TestCase):
self.assertEqual(sorted(pages_expected), sorted(pages))
self.assertEqual(sorted(hidden_pages_expected), sorted(hidden_pages))
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_page_object_caching(self):
"""Test Page objects caching at the generator level"""
settings = get_settings(filenames={})
settings['CACHE_PATH'] = self.temp_cache
settings['CONTENT_CACHING_LAYER'] = 'generator'
settings['READERS'] = {'asc': None}
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache'))
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
generator.readers.read_file.assert_called_count == 0
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_reader_content_caching(self):
"""Test raw content caching at the reader level"""
settings = get_settings(filenames={})
settings['CACHE_PATH'] = self.temp_cache
settings['READERS'] = {'asc': None}
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.generate_context()
self.assertTrue(hasattr(generator.readers, '_cache'))
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
readers = generator.readers.readers
for reader in readers.values():
reader.read = MagicMock()
generator.generate_context()
for reader in readers.values():
reader.read.assert_called_count == 0
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_ignore_cache(self):
"""Test that all the pages are read again when not loading cache
used in --ignore_cache or autoreload mode"""
settings = get_settings(filenames={})
settings['CACHE_PATH'] = self.temp_cache
settings['READERS'] = {'asc': None}
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
self.assertTrue(hasattr(generator, '_cache_open'))
orig_call_count = generator.readers.read_file.call_count
settings['LOAD_CONTENT_CACHE'] = False
generator = PagesGenerator(
context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
generator.readers.read_file = MagicMock()
generator.generate_context()
generator.readers.read_file.assert_called_count == orig_call_count
def test_generate_sorted(self):
settings = get_settings(filenames={})
settings['PAGE_PATHS'] = ['TestPages'] # relative to CUR_DIR

View file

@ -14,7 +14,6 @@ import shutil
import sys
import traceback
import pickle
import hashlib
import datetime
from collections import Hashable
@ -627,129 +626,6 @@ def split_all(path):
return components
class FileDataCacher(object):
'''Class that can cache data contained in files'''
def __init__(self, settings, cache_name, caching_policy, load_policy):
'''Load the specified cache within CACHE_PATH in settings
only if *load_policy* is True,
May use gzip if GZIP_CACHE ins settings is True.
Sets caching policy according to *caching_policy*.
'''
self.settings = settings
self._cache_path = os.path.join(self.settings['CACHE_PATH'],
cache_name)
self._cache_data_policy = caching_policy
if self.settings['GZIP_CACHE']:
import gzip
self._cache_open = gzip.open
else:
self._cache_open = open
if load_policy:
try:
with self._cache_open(self._cache_path, 'rb') as fhandle:
self._cache = pickle.load(fhandle)
except (IOError, OSError) as err:
logger.debug('Cannot load cache %s (this is normal on first '
'run). Proceeding with empty cache.\n%s',
self._cache_path, err)
self._cache = {}
except Exception as err:
logger.warning(('Cannot unpickle cache %s, cache may be using '
'an incompatible protocol (see pelican caching docs). '
'Proceeding with empty cache.\n%s'),
self._cache_path, err)
self._cache = {}
else:
self._cache = {}
def cache_data(self, filename, data):
'''Cache data for given file'''
if self._cache_data_policy:
self._cache[filename] = data
def get_cached_data(self, filename, default=None):
'''Get cached data for the given file
if no data is cached, return the default object
'''
return self._cache.get(filename, default)
def save_cache(self):
'''Save the updated cache'''
if self._cache_data_policy:
try:
mkdir_p(self.settings['CACHE_PATH'])
with self._cache_open(self._cache_path, 'wb') as fhandle:
pickle.dump(self._cache, fhandle)
except (IOError, OSError, pickle.PicklingError) as err:
logger.warning('Could not save cache %s\n ... %s',
self._cache_path, err)
class FileStampDataCacher(FileDataCacher):
'''Subclass that also caches the stamp of the file'''
def __init__(self, settings, cache_name, caching_policy, load_policy):
'''This sublcass additionally sets filestamp function
and base path for filestamping operations
'''
super(FileStampDataCacher, self).__init__(settings, cache_name,
caching_policy,
load_policy)
method = self.settings['CHECK_MODIFIED_METHOD']
if method == 'mtime':
self._filestamp_func = os.path.getmtime
else:
try:
hash_func = getattr(hashlib, method)
def filestamp_func(filename):
'''return hash of file contents'''
with open(filename, 'rb') as fhandle:
return hash_func(fhandle.read()).digest()
self._filestamp_func = filestamp_func
except AttributeError as err:
logger.warning('Could not get hashing function\n\t%s', err)
self._filestamp_func = None
def cache_data(self, filename, data):
'''Cache stamp and data for the given file'''
stamp = self._get_file_stamp(filename)
super(FileStampDataCacher, self).cache_data(filename, (stamp, data))
def _get_file_stamp(self, filename):
'''Check if the given file has been modified
since the previous build.
depending on CHECK_MODIFIED_METHOD
a float may be returned for 'mtime',
a hash for a function name in the hashlib module
or an empty bytes string otherwise
'''
try:
return self._filestamp_func(filename)
except (IOError, OSError, TypeError) as err:
logger.warning('Cannot get modification stamp for %s\n\t%s',
filename, err)
return b''
def get_cached_data(self, filename, default=None):
'''Get the cached data for the given filename
if the file has not been modified.
If no record exists or file has been modified, return default.
Modification is checked by comparing the cached
and current file stamp.
'''
stamp, data = super(FileStampDataCacher, self).get_cached_data(
filename, (None, default))
if stamp != self._get_file_stamp(filename):
return default
return data
def is_selected_for_writing(settings, path):
'''Check whether path is selected for writing
according to the WRITE_SELECTED list