From 7f2bc2a23b6e8c8f0865cb293942a560a06d74e0 Mon Sep 17 00:00:00 2001 From: Stefan hr Berder Date: Sun, 16 Feb 2014 12:51:52 +0100 Subject: [PATCH 01/24] change date metadata parsing to dateutil.parser --- docs/getting_started.rst | 2 ++ pelican/tests/test_utils.py | 32 +++++++++++++++++++++++++++--- pelican/utils.py | 39 +++++-------------------------------- setup.py | 2 +- 4 files changed, 37 insertions(+), 38 deletions(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 6655d8d6..8ee37162 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -130,6 +130,8 @@ automatically installed without any action on your part: utilities * `MarkupSafe `_, for a markup safe string implementation +* `python-dateutil `_, to read + the date metadata If you want the following optional packages, you will need to install them manually via ``pip``: diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index f6f96a1c..9047593f 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -41,6 +41,12 @@ class TestUtils(LoggedTestCase): date = datetime.datetime(year=2012, month=11, day=22) date_hour = datetime.datetime( year=2012, month=11, day=22, hour=22, minute=11) + date_hour_z = datetime.datetime( + year=2012, month=11, day=22, hour=22, minute=11, + tzinfo=pytz.timezone('UTC')) + date_hour_est = datetime.datetime( + year=2012, month=11, day=22, hour=22, minute=11, + tzinfo=pytz.timezone('EST')) date_hour_sec = datetime.datetime( year=2012, month=11, day=22, hour=22, minute=11, second=10) date_hour_sec_z = datetime.datetime( @@ -61,22 +67,42 @@ class TestUtils(LoggedTestCase): '22/11/2012': date, '22.11.2012': date, '22.11.2012 22:11': date_hour, + '2012-11-22T22:11Z': date_hour_z, + '2012-11-22T22:11-0500': date_hour_est, '2012-11-22 22:11:10': date_hour_sec, '2012-11-22T22:11:10Z': date_hour_sec_z, '2012-11-22T22:11:10-0500': date_hour_sec_est, '2012-11-22T22:11:10.123Z': date_hour_sec_frac_z, } + # examples from http://www.w3.org/TR/NOTE-datetime + iso_8601_date = datetime.datetime(year=1997, month=7, day=16) + iso_8601_date_hour_tz = datetime.datetime( + year=1997, month=7, day=16, hour=19, minute=20, + tzinfo=pytz.timezone('CET')) + iso_8601_date_hour_sec_tz = datetime.datetime( + year=1997, month=7, day=16, hour=19, minute=20, second=30, + tzinfo=pytz.timezone('CET')) + iso_8601_date_hour_sec_ms_tz = datetime.datetime( + year=1997, month=7, day=16, hour=19, minute=20, second=30, + microsecond=450000, tzinfo=pytz.timezone('CET')) + iso_8601 = { + '1997-07-16': iso_8601_date, + '1997-07-16T19:20+01:00': iso_8601_date_hour_tz, + '1997-07-16T19:20:30+01:00': iso_8601_date_hour_sec_tz, + '1997-07-16T19:20:30.45+01:00': iso_8601_date_hour_sec_ms_tz, + } + # invalid ones invalid_dates = ['2010-110-12', 'yay'] - if version_info < (3, 2): - dates.pop('2012-11-22T22:11:10-0500') - invalid_dates.append('2012-11-22T22:11:10-0500') for value, expected in dates.items(): self.assertEqual(utils.get_date(value), expected, value) + for value, expected in iso_8601.items(): + self.assertEqual(utils.get_date(value), expected, value) + for item in invalid_dates: self.assertRaises(ValueError, utils.get_date, item) diff --git a/pelican/utils.py b/pelican/utils.py index 822e50e9..c5aacaa3 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -15,7 +15,7 @@ import traceback from collections import Hashable from contextlib import contextmanager -from datetime import datetime +import dateutil.parser from functools import partial from itertools import groupby from jinja2 import Markup @@ -181,39 +181,10 @@ def get_date(string): If no format matches the given date, raise a ValueError. """ string = re.sub(' +', ' ', string) - formats = [ - # ISO 8601 - '%Y', - '%Y-%m', - '%Y-%m-%d', - '%Y-%m-%dT%H:%M%z', - '%Y-%m-%dT%H:%MZ', - '%Y-%m-%dT%H:%M', - '%Y-%m-%dT%H:%M:%S%z', - '%Y-%m-%dT%H:%M:%SZ', - '%Y-%m-%dT%H:%M:%S', - '%Y-%m-%dT%H:%M:%S.%f%z', - '%Y-%m-%dT%H:%M:%S.%fZ', - '%Y-%m-%dT%H:%M:%S.%f', - # end ISO 8601 forms - '%Y-%m-%d %H:%M', - '%Y-%m-%d %H:%M:%S', - '%Y/%m/%d %H:%M', - '%Y/%m/%d', - '%d-%m-%Y', - '%d.%m.%Y %H:%M', - '%d.%m.%Y', - '%d/%m/%Y', - ] - for date_format in formats: - try: - date = datetime.strptime(string, date_format) - except ValueError: - continue - if date_format.endswith('Z'): - date = date.replace(tzinfo=pytz.timezone('UTC')) - return date - raise ValueError('{0!r} is not a valid date'.format(string)) + try: + return dateutil.parser.parse(string) + except (TypeError, ValueError): + raise ValueError('{0!r} is not a valid date'.format(string)) @contextmanager diff --git a/setup.py b/setup.py index f56a7c41..e989d549 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup requires = ['feedgenerator >= 1.6', 'jinja2 >= 2.7', 'pygments', 'docutils', - 'pytz >= 0a', 'blinker', 'unidecode', 'six'] + 'pytz >= 0a', 'blinker', 'unidecode', 'six', 'python-dateutil'] entry_points = { 'console_scripts': [ From 3f304a2e9277a5c512ac722279cc278bb866ea5c Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Mon, 31 Mar 2014 19:38:49 +0200 Subject: [PATCH 02/24] change the inhibition value of *_SAVE_AS to '' Previously, the documentation claimed the value of None for this purpose even though False was used for certain defaults. The values False and None cause warnings to be emitted from URLWrapper._from_settings though, so the new way of inhibiting page generation is to set a *_SAVE_AS value to the empty string. --- docs/settings.rst | 8 ++++---- pelican/settings.py | 6 +++--- pelican/writers.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/settings.rst b/docs/settings.rst index b579ae95..0c16db3c 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -269,9 +269,9 @@ Setting name (default value) What does it do? `TAG_SAVE_AS` (``'tag/{slug}.html'``) The location to save the tag page. `AUTHOR_URL` (``'author/{slug}.html'``) The URL to use for an author. `AUTHOR_SAVE_AS` (``'author/{slug}.html'``) The location to save an author. -`YEAR_ARCHIVE_SAVE_AS` (False) The location to save per-year archives of your posts. -`MONTH_ARCHIVE_SAVE_AS` (False) The location to save per-month archives of your posts. -`DAY_ARCHIVE_SAVE_AS` (False) The location to save per-day archives of your posts. +`YEAR_ARCHIVE_SAVE_AS` (``''``) The location to save per-year archives of your posts. +`MONTH_ARCHIVE_SAVE_AS` (``''``) The location to save per-month archives of your posts. +`DAY_ARCHIVE_SAVE_AS` (``''``) The location to save per-day archives of your posts. `SLUG_SUBSTITUTIONS` (``()``) Substitutions to make prior to stripping out non-alphanumerics when generating slugs. Specified as a list of 2-tuples of ``(from, to)`` which are @@ -282,7 +282,7 @@ Setting name (default value) What does it do? If you do not want one or more of the default pages to be created (e.g., you are the only author on your site and thus do not need an Authors page), - set the corresponding ``*_SAVE_AS`` setting to ``None`` to prevent the + set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the relevant page from being generated. `DIRECT_TEMPLATES` diff --git a/pelican/settings.py b/pelican/settings.py index 796678e0..ffd0bc8f 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -81,9 +81,9 @@ DEFAULT_CONFIG = { 'PAGINATION_PATTERNS': [ (0, '{name}{number}{extension}', '{name}{number}{extension}'), ], - 'YEAR_ARCHIVE_SAVE_AS': False, - 'MONTH_ARCHIVE_SAVE_AS': False, - 'DAY_ARCHIVE_SAVE_AS': False, + 'YEAR_ARCHIVE_SAVE_AS': '', + 'MONTH_ARCHIVE_SAVE_AS': '', + 'DAY_ARCHIVE_SAVE_AS': '', 'RELATIVE_URLS': False, 'DEFAULT_LANG': 'en', 'TAG_CLOUD_STEPS': 4, diff --git a/pelican/writers.py b/pelican/writers.py index 63d74126..19e36e39 100644 --- a/pelican/writers.py +++ b/pelican/writers.py @@ -140,7 +140,7 @@ class Writer(object): :param **kwargs: additional variables to pass to the templates """ - if name is False: + if name is False or name == "": return elif not name: # other stuff, just return for now From d9b0091357565391854e6a00563a551b8b7bfe13 Mon Sep 17 00:00:00 2001 From: Rogdham Date: Tue, 1 Apr 2014 20:44:09 +0200 Subject: [PATCH 03/24] Limit and filter logs Drop duplicates logs. Allow for logs to be grouped, enforcing a maximum number of logs per group. Add the LOG_FILTER setting to ask from the configuration file to ignore some logs (of level up to warning). --- docs/contribute.rst | 38 ++++++++++++++++++++ docs/settings.rst | 20 +++++++++++ pelican/__init__.py | 5 ++- pelican/contents.py | 6 ++-- pelican/log.py | 65 ++++++++++++++++++++++++++++++++--- pelican/readers.py | 34 ++++++------------ pelican/settings.py | 9 ++++- pelican/tests/test_pelican.py | 2 +- 8 files changed, 146 insertions(+), 33 deletions(-) diff --git a/docs/contribute.rst b/docs/contribute.rst index 304d1de8..28df1fcd 100644 --- a/docs/contribute.rst +++ b/docs/contribute.rst @@ -143,3 +143,41 @@ and Python 3 at the same time: changed it where I felt necessary. - Changed xrange() back to range(), so it is valid in both Python versions. + + +Logging tips +============ + +Try to use logging with appropriate levels. + +For logging messages that are not repeated, use the usual Python way: + + # at top of file + import logging + logger = logging.getLogger(__name__) + + # when needed + logger.warning('A warning that could occur only once") + +However, if you want to log messages that may occur several times, instead of +a string, gives a tuple to the logging method, with two arguments: + + 1. The message to log for this very execution + 2. A generic message that will appear if the previous one would occur to many + times. + +For example, if you want to log missing resources, use the following code: + + for ressource in ressources: + if ressource.is_missing: + logger.warning(( + 'The resource {r} is missing'.format(r=ressource.name), + 'Other resources were missing')) + +The logs will be displayed as follows: + + WARNING: The resource prettiest_cat.jpg is missing + WARNING: The resource best_cat_ever.jpg is missing + WARNING: The resource cutest_cat.jpg is missing + WARNING: The resource lolcat.jpg is missing + WARNING: Other resources were missing diff --git a/docs/settings.rst b/docs/settings.rst index 0c16db3c..f7cfa69d 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -88,6 +88,9 @@ Setting name (default value) here or a single string representing one locale. When providing a list, all the locales will be tried until one works. +`LOG_FILTER` (``[]``) A list of tuples containing the logging level (up to warning) + and the message to be ignored. + For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]`` `READERS` (``{}``) A dictionary of file extensions / Reader classes for Pelican to process or ignore. For example, to avoid processing .html files, set: ``READERS = {'html': None}``. To add a custom reader for the @@ -694,6 +697,23 @@ adding the following to your configuration:: CSS_FILE = "wide.css" + +Logging +======= + +Sometimes, useless lines of log appears while the generation occurs. Finding +**the** meaningful error message in the middle of tons of annoying log outputs +can be quite tricky. To be able to filter out all useless log messages, Pelican +comes with the ``LOG_FILTER`` setting. + +``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being +composed of the logging level (up to warning) and the message to be ignored. +Simply populate the list with the logs you want to hide and they will be +filtered out. + +For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]`` + + Example settings ================ diff --git a/pelican/__init__.py b/pelican/__init__.py index 08dd484e..494e7e43 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -11,12 +11,15 @@ import argparse import locale import collections +# pelican.log has to be the first pelican module to be loaded +# because logging.setLoggerClass has to be called before logging.getLogger +from pelican.log import init + from pelican import signals from pelican.generators import (ArticlesGenerator, PagesGenerator, StaticGenerator, SourceFileGenerator, TemplatePagesGenerator) -from pelican.log import init from pelican.readers import Readers from pelican.settings import read_settings from pelican.utils import clean_output_dir, folder_watcher, file_watcher diff --git a/pelican/contents.py b/pelican/contents.py index 66602666..3096a064 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -239,8 +239,10 @@ class Content(object): self._context['filenames'][path].url)) origin = origin.replace('\\', '/') # for Windows paths. else: - logger.warning("Unable to find {fn}, skipping url" - " replacement".format(fn=path)) + logger.warning(("Unable to find {fn}, skipping url" + " replacement".format(fn=value), + "Other ressources were not found" + " and their urls not replaced")) elif what == 'category': origin = Category(path, self.settings).url elif what == 'tag': diff --git a/pelican/log.py b/pelican/log.py index bde8037e..d3aae012 100644 --- a/pelican/log.py +++ b/pelican/log.py @@ -9,7 +9,7 @@ import os import sys import logging -from logging import Formatter, getLogger, StreamHandler, DEBUG +from collections import defaultdict RESET_TERM = '\033[0;m' @@ -30,7 +30,7 @@ def ansi(color, text): return '\033[1;{0}m{1}{2}'.format(code, text, RESET_TERM) -class ANSIFormatter(Formatter): +class ANSIFormatter(logging.Formatter): """Convert a `logging.LogRecord' object into colored text, using ANSI escape sequences. @@ -51,7 +51,7 @@ class ANSIFormatter(Formatter): return ansi('white', record.levelname) + ': ' + msg -class TextFormatter(Formatter): +class TextFormatter(logging.Formatter): """ Convert a `logging.LogRecord' object into text. """ @@ -63,7 +63,62 @@ class TextFormatter(Formatter): return record.levelname + ': ' + record.getMessage() -def init(level=None, logger=getLogger(), handler=StreamHandler()): +class LimitFilter(logging.Filter): + """ + Remove duplicates records, and limit the number of records in the same + group. + + Groups are specified by the message to use when the number of records in + the same group hit the limit. + E.g.: log.warning(('43 is not the answer', 'More erroneous answers')) + """ + + ignore = set() + threshold = 5 + group_count = defaultdict(int) + + def filter(self, record): + # don't limit levels over warnings + if record.levelno > logging.WARN: + return record + # extract group + group = None + if len(record.msg) == 2: + record.msg, group = record.msg + # ignore record if it was already raised + # use .getMessage() and not .msg for string formatting + ignore_key = (record.levelno, record.getMessage()) + to_ignore = ignore_key in LimitFilter.ignore + LimitFilter.ignore.add(ignore_key) + if to_ignore: + return False + # check if we went over threshold + if group: + key = (record.levelno, group) + LimitFilter.group_count[key] += 1 + if LimitFilter.group_count[key] == LimitFilter.threshold: + record.msg = group + if LimitFilter.group_count[key] > LimitFilter.threshold: + return False + return record + + +class LimitLogger(logging.Logger): + """ + A logger which add LimitFilter automatically + """ + + limit_filter = LimitFilter() + + def __init__(self, *args, **kwargs): + super(LimitLogger, self).__init__(*args, **kwargs) + self.addFilter(LimitLogger.limit_filter) + +logging.setLoggerClass(LimitLogger) + + +def init(level=None, handler=logging.StreamHandler()): + logger = logging.getLogger() if (os.isatty(sys.stdout.fileno()) @@ -79,7 +134,7 @@ def init(level=None, logger=getLogger(), handler=StreamHandler()): if __name__ == '__main__': - init(level=DEBUG) + init(level=logging.DEBUG) root_logger = logging.getLogger() root_logger.debug('debug') diff --git a/pelican/readers.py b/pelican/readers.py index 26329af6..35c38220 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -318,7 +318,11 @@ class HTMLReader(BaseReader): if not contents: contents = self._attr_value(attrs, 'contents', '') if contents: - logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename) + logger.warning(( + "Meta tag attribute 'contents' used in file {}, should" + " be changed to 'content'".format(self._filename), + "Other files have meta tag attribute 'contents' that" + " should be changed to 'content'")) if name == 'keywords': name = 'tags' @@ -385,10 +389,6 @@ class Readers(object): """ - # used to warn about missing dependencies only once, at the first - # instanciation of a Readers object. - warn_missing_deps = True - def __init__(self, settings=None): self.settings = settings or {} self.readers = {} @@ -396,16 +396,13 @@ class Readers(object): for cls in [BaseReader] + BaseReader.__subclasses__(): if not cls.enabled: - if self.__class__.warn_missing_deps: - logger.debug('Missing dependencies for {}' - .format(', '.join(cls.file_extensions))) + logger.debug('Missing dependencies for {}' + .format(', '.join(cls.file_extensions))) continue for ext in cls.file_extensions: self.reader_classes[ext] = cls - self.__class__.warn_missing_deps = False - if self.settings['READERS']: self.reader_classes.update(self.settings['READERS']) @@ -505,19 +502,10 @@ def find_empty_alt(content, path): src=(['"])(.*)\5 ) """, re.X) - matches = re.findall(imgs, content) - # find a correct threshold - nb_warnings = 10 - if len(matches) == nb_warnings + 1: - nb_warnings += 1 # avoid bad looking case - # print one warning per image with empty alt until threshold - for match in matches[:nb_warnings]: - logger.warning('Empty alt attribute for image {} in {}'.format( - os.path.basename(match[1] + match[5]), path)) - # print one warning for the other images with empty alt - if len(matches) > nb_warnings: - logger.warning('{} other images with empty alt attributes' - .format(len(matches) - nb_warnings)) + for match in re.findall(imgs, content): + logger.warning(('Empty alt attribute for image {} in {}'.format( + os.path.basename(match[1] + match[5]), path), + 'Other images have empty alt attributes')) def default_metadata(settings=None, process=None): diff --git a/pelican/settings.py b/pelican/settings.py index ffd0bc8f..f70f74a8 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -19,6 +19,8 @@ except ImportError: from os.path import isabs +from pelican.log import LimitFilter + logger = logging.getLogger(__name__) @@ -98,6 +100,7 @@ DEFAULT_CONFIG = { 'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra'], 'JINJA_EXTENSIONS': [], 'JINJA_FILTERS': {}, + 'LOG_FILTER': [], 'LOCALE': [''], # defaults to user locale 'DEFAULT_PAGINATION': False, 'DEFAULT_ORPHANS': 0, @@ -170,12 +173,16 @@ def get_settings_from_file(path, default_settings=DEFAULT_CONFIG): def configure_settings(settings): """Provide optimizations, error checking and warnings for the given settings. - + Set up the logs to be ignored as well. """ if not 'PATH' in settings or not os.path.isdir(settings['PATH']): raise Exception('You need to specify a path containing the content' ' (see pelican --help for more information)') + # set up logs to be ignored + LimitFilter.ignore.update(set(settings.get('LOG_FILTER', + DEFAULT_CONFIG['LOG_FILTER']))) + # lookup the theme in "pelican/themes" if the given one doesn't exist if not os.path.isdir(settings['THEME']): theme_path = os.path.join( diff --git a/pelican/tests/test_pelican.py b/pelican/tests/test_pelican.py index 21a77e6b..2d4bbdfc 100644 --- a/pelican/tests/test_pelican.py +++ b/pelican/tests/test_pelican.py @@ -83,7 +83,7 @@ class TestPelican(LoggedTestCase): mute(True)(pelican.run)() self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'basic')) self.assertLogCountEqual( - count=4, + count=3, msg="Unable to find.*skipping url replacement", level=logging.WARNING) From 80842cbc0e2bbaed991c23de318752ca866a32a9 Mon Sep 17 00:00:00 2001 From: Justin Mayer Date: Wed, 2 Apr 2014 12:38:49 -0700 Subject: [PATCH 04/24] Fix deprecated logger warning for Python 3 logger.warn() has been deprecated in Python 3 in favor of logger.warning() --- pelican/tools/pelican_import.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index d6b57c47..30d6346c 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -135,7 +135,7 @@ def wp2fields(xml, wp_custpost=False): title = HTMLParser().unescape(item.title.contents[0]) except IndexError: title = 'No title [%s]' % item.find('post_name').string - logger.warn('Post "%s" is lacking a proper title' % title) + logger.warning('Post "%s" is lacking a proper title' % title) filename = item.find('post_name').string post_id = item.find('post_id').string @@ -601,11 +601,11 @@ def download_attachments(output_path, urls): except URLError as e: error = ("No file could be downloaded from {}; Error {}" .format(url, e)) - logger.warn(error) + logger.warning(error) except IOError as e: #Python 2.7 throws an IOError rather Than URLError error = ("No file could be downloaded from {}; Error {}" .format(url, e)) - logger.warn(error) + logger.warning(error) return locations From c6ff88d0fce7f7ab5e05f2c414a365aa9faa6454 Mon Sep 17 00:00:00 2001 From: Justin Mayer Date: Mon, 7 Apr 2014 14:29:21 -0700 Subject: [PATCH 05/24] Minor correction to settings documentation --- docs/settings.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/settings.rst b/docs/settings.rst index 0c16db3c..53a25f8c 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -36,7 +36,7 @@ Setting name (default value) =============================================================================== ===================================================================== `AUTHOR` Default author (put your name) `DATE_FORMATS` (``{}``) If you manage multiple languages, you can set the date formatting - here. See the "Date format and locales" section below for details. + here. See the "Date format and locale" section below for details. `USE_FOLDER_AS_CATEGORY` (``True``) When you don't specify a category in your post metadata, set this setting to ``True``, and organize your articles in subfolders, the subfolder will become the category of your post. If set to ``False``, @@ -167,9 +167,9 @@ Setting name (default value) code blocks. See :ref:`internal_pygments_options` for a list of supported options. -`SLUGIFY_SOURCE` (``'input'``) Specifies where you want the slug to be automatically generated - from. Can be set to 'title' to use the 'Title:' metadata tag or - 'basename' to use the articles basename when creating the slug. +`SLUGIFY_SOURCE` (``'input'``) Specifies where you want the slug to be automatically generated + from. Can be set to 'title' to use the 'Title:' metadata tag or + 'basename' to use the articles basename when creating the slug. =============================================================================== ===================================================================== .. [#] Default is the system locale. From 03976b650d0e96251a63a9510d23fff7d9aec2d1 Mon Sep 17 00:00:00 2001 From: Antoine Brenner Date: Mon, 14 Apr 2014 20:43:19 +0200 Subject: [PATCH 06/24] Fix unittest issue related to python2/python3 differences The test_datetime test passed on python3 but not python2 because datetime.strftime is a byte string in python2, and a unicode string in python3 This patch allows the test to pass in both python2 and python3 (3.3+ only) --- pelican/tests/test_contents.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 4c6f8ed1..27d2a897 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -129,9 +129,15 @@ class TestPage(unittest.TestCase): page_kwargs['metadata']['date'] = dt page = Page(**page_kwargs) - self.assertEqual(page.locale_date, - dt.strftime(DEFAULT_CONFIG['DEFAULT_DATE_FORMAT'])) + # page.locale_date is a unicode string in both python2 and python3 + dt_date = dt.strftime(DEFAULT_CONFIG['DEFAULT_DATE_FORMAT']) + # dt_date is a byte string in python2, and a unicode string in python3 + # Let's make sure it is a unicode string (relies on python 3.3 supporting the u prefix) + if type(dt_date) != type(u''): + # python2: + dt_date = unicode(dt_date, 'utf8') + self.assertEqual(page.locale_date, dt_date ) page_kwargs['settings'] = get_settings() # I doubt this can work on all platforms ... From 7e06912bcad1eeb71d8f154dff21555a5ed865ca Mon Sep 17 00:00:00 2001 From: Justin Mayer Date: Mon, 14 Apr 2014 16:18:07 -0400 Subject: [PATCH 07/24] Minor text changes to log message limitation --- docs/contribute.rst | 16 ++++++++-------- docs/settings.rst | 14 +++++++------- pelican/contents.py | 2 +- pelican/log.py | 4 ++-- pelican/settings.py | 6 +++--- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/contribute.rst b/docs/contribute.rst index 28df1fcd..57349156 100644 --- a/docs/contribute.rst +++ b/docs/contribute.rst @@ -157,24 +157,24 @@ For logging messages that are not repeated, use the usual Python way: logger = logging.getLogger(__name__) # when needed - logger.warning('A warning that could occur only once") + logger.warning("A warning that would usually occur only once") However, if you want to log messages that may occur several times, instead of -a string, gives a tuple to the logging method, with two arguments: +a string, give a tuple to the logging method, with two arguments: - 1. The message to log for this very execution - 2. A generic message that will appear if the previous one would occur to many + 1. The message to log for the initial execution + 2. A generic message that will appear if the previous one would occur too many times. For example, if you want to log missing resources, use the following code: - for ressource in ressources: - if ressource.is_missing: + for resource in resources: + if resource.is_missing: logger.warning(( - 'The resource {r} is missing'.format(r=ressource.name), + 'The resource {r} is missing'.format(r=resource.name), 'Other resources were missing')) -The logs will be displayed as follows: +The log messages will be displayed as follows: WARNING: The resource prettiest_cat.jpg is missing WARNING: The resource best_cat_ever.jpg is missing diff --git a/docs/settings.rst b/docs/settings.rst index c35bf08d..36cc3f9a 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -88,7 +88,7 @@ Setting name (default value) here or a single string representing one locale. When providing a list, all the locales will be tried until one works. -`LOG_FILTER` (``[]``) A list of tuples containing the logging level (up to warning) +`LOG_FILTER` (``[]``) A list of tuples containing the logging level (up to ``warning``) and the message to be ignored. For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]`` `READERS` (``{}``) A dictionary of file extensions / Reader classes for Pelican to @@ -701,15 +701,15 @@ adding the following to your configuration:: Logging ======= -Sometimes, useless lines of log appears while the generation occurs. Finding -**the** meaningful error message in the middle of tons of annoying log outputs -can be quite tricky. To be able to filter out all useless log messages, Pelican +Sometimes, a long list of warnings may appear during site generation. Finding +the **meaningful** error message in the middle of tons of annoying log output +can be quite tricky. In order to filter out redundant log messages, Pelican comes with the ``LOG_FILTER`` setting. ``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being -composed of the logging level (up to warning) and the message to be ignored. -Simply populate the list with the logs you want to hide and they will be -filtered out. +composed of the logging level (up to ``warning``) and the message to be ignored. +Simply populate the list with the log messages you want to hide, and they will +be filtered out. For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]`` diff --git a/pelican/contents.py b/pelican/contents.py index 3096a064..615a7fd8 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -241,7 +241,7 @@ class Content(object): else: logger.warning(("Unable to find {fn}, skipping url" " replacement".format(fn=value), - "Other ressources were not found" + "Other resources were not found" " and their urls not replaced")) elif what == 'category': origin = Category(path, self.settings).url diff --git a/pelican/log.py b/pelican/log.py index d3aae012..fdf41cb0 100644 --- a/pelican/log.py +++ b/pelican/log.py @@ -78,7 +78,7 @@ class LimitFilter(logging.Filter): group_count = defaultdict(int) def filter(self, record): - # don't limit levels over warnings + # don't limit log messages for anything above "warning" if record.levelno > logging.WARN: return record # extract group @@ -105,7 +105,7 @@ class LimitFilter(logging.Filter): class LimitLogger(logging.Logger): """ - A logger which add LimitFilter automatically + A logger which adds LimitFilter automatically """ limit_filter = LimitFilter() diff --git a/pelican/settings.py b/pelican/settings.py index f70f74a8..7277c121 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -171,15 +171,15 @@ def get_settings_from_file(path, default_settings=DEFAULT_CONFIG): def configure_settings(settings): - """Provide optimizations, error checking and warnings for the given + """Provide optimizations, error checking, and warnings for the given settings. - Set up the logs to be ignored as well. + Also, specify the log messages to be ignored. """ if not 'PATH' in settings or not os.path.isdir(settings['PATH']): raise Exception('You need to specify a path containing the content' ' (see pelican --help for more information)') - # set up logs to be ignored + # specify the log messages to be ignored LimitFilter.ignore.update(set(settings.get('LOG_FILTER', DEFAULT_CONFIG['LOG_FILTER']))) From aabb7f9345853c7afb73e781ac1cc65f6a2094dd Mon Sep 17 00:00:00 2001 From: Antoine Brenner Date: Mon, 14 Apr 2014 22:28:25 +0200 Subject: [PATCH 08/24] Fix error in download_attachments() triggered by python2 unit test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The download_attachments error is triggered in the unit tests by a japanese error message (接続を拒否されました) (connexion denied), that python is not able to serialize the into a byte string. This error weirdly does not appear every time the unit tests are run. It might be related to the order in which the tests are run. This error was found and fixed during the PyconUS 2014 pelican sprint. It was discovered on a Linux Fedora20 computer running Python2.7 in virtualenv --- pelican/tools/pelican_import.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 30d6346c..27e47754 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -603,8 +603,18 @@ def download_attachments(output_path, urls): .format(url, e)) logger.warning(error) except IOError as e: #Python 2.7 throws an IOError rather Than URLError - error = ("No file could be downloaded from {}; Error {}" - .format(url, e)) + # For japanese, the error might look kind of like this: + # e = IOError( 'socket error', socket.error(111, u'\u63a5\u7d9a\u3092\u62d2\u5426\u3055\u308c\u307e\u3057\u305f') ) + # and not be suitable to use in "{}".format(e) , raising UnicodeDecodeError + # (This is at least the case on my Fedora running Python 2.7.5 + # (default, Feb 19 2014, 13:47:28) [GCC 4.8.2 20131212 (Red Hat 4.8.2-7)] on linux2 + try: + error = ("No file could be downloaded from {}; Error {}" + .format(url, e)) + except UnicodeDecodeError: + # For lack of a better log message because we could not decode e, let's use repr(e) + error = ("No file could be downloaded from {}; Error {}" + .format(url, repr(e))) logger.warning(error) return locations From fd779267000ac539ee0a9ba5856d103fbbc7cd7c Mon Sep 17 00:00:00 2001 From: Ondrej Grover Date: Sat, 15 Feb 2014 21:20:51 +0100 Subject: [PATCH 09/24] Cache content to speed up reading. Fixes #224. Cache read content so that it doesn't have to be read next time if its source has not been modified. --- docs/faq.rst | 19 ++++++ docs/settings.rst | 64 ++++++++++++++++- pelican/__init__.py | 19 ++++++ pelican/contents.py | 7 ++ pelican/generators.py | 72 ++++++++++++-------- pelican/settings.py | 7 +- pelican/tests/test_generators.py | 59 ++++++++++++++++ pelican/tests/test_pelican.py | 6 ++ pelican/utils.py | 113 +++++++++++++++++++++++++++++++ 9 files changed, 334 insertions(+), 32 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 80e14d21..bb9377e6 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -205,3 +205,22 @@ You can also disable generation of tag-related pages via:: TAGS_SAVE_AS = '' TAG_SAVE_AS = '' + +Why does Pelican always write all HTML files even with content caching enabled? +=============================================================================== + +In order to reliably determine whether the HTML output is different +before writing it, a large part of the generation environment +including the template contexts, imported plugins, etc. would have to +be saved and compared, at least in the form of a hash (which would +require special handling of unhashable types), because of all the +possible combinations of plugins, pagination, etc. which may change in +many different ways. This would require a lot more processing time +and memory and storage space. Simply writing the files each time is a +lot faster and a lot more reliable. + +However, this means that the modification time of the files changes +every time, so a ``rsync`` based upload will transfer them even if +their content hasn't changed. A simple solution is to make ``rsync`` +use the ``--checksum`` option, which will make it compare the file +checksums in a much faster way than Pelican would. diff --git a/docs/settings.rst b/docs/settings.rst index 36cc3f9a..d8690230 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -173,6 +173,12 @@ Setting name (default value) `SLUGIFY_SOURCE` (``'input'``) Specifies where you want the slug to be automatically generated from. Can be set to 'title' to use the 'Title:' metadata tag or 'basename' to use the articles basename when creating the slug. +`CACHE_CONTENT` (``True``) If ``True``, save read content in a cache file. + See :ref:`reading_only_modified_content` for details about caching. +`CACHE_DIRECTORY` (``cache``) Directory in which to store cache files. +`CHECK_MODIFIED_METHOD` (``mtime``) Controls how files are checked for modifications. +`LOAD_CONTENT_CACHE` (``True``) If ``True``, load unmodified content from cache. +`GZIP_CACHE` (``True``) If ``True``, use gzip to (de)compress the cache files. =============================================================================== ===================================================================== .. [#] Default is the system locale. @@ -602,7 +608,7 @@ Setting name (default value) What does it do? .. [3] %s is the language Ordering content -================= +================ ================================================ ===================================================== Setting name (default value) What does it do? @@ -697,7 +703,6 @@ adding the following to your configuration:: CSS_FILE = "wide.css" - Logging ======= @@ -713,6 +718,61 @@ be filtered out. For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]`` +.. _reading_only_modified_content: + +Reading only modified content +============================= + +To speed up the build process, pelican can optionally read only articles +and pages with modified content. + +When Pelican is about to read some content source file: + +1. The hash or modification time information for the file from a + previous build are loaded from a cache file if `LOAD_CONTENT_CACHE` + is ``True``. These files are stored in the `CACHE_DIRECTORY` + directory. If the file has no record in the cache file, it is read + as usual. +2. The file is checked according to `CHECK_MODIFIED_METHOD`: + + - If set to ``'mtime'``, the modification time of the file is + checked. + - If set to a name of a function provided by the ``hashlib`` + module, e.g. ``'md5'``, the file hash is checked. + - If set to anything else or the necessary information about the + file cannot be found in the cache file, the content is read as + usual. + +3. If the file is considered unchanged, the content object saved in a + previous build corresponding to the file is loaded from the cache + and the file is not read. +4. If the file is considered changed, the file is read and the new + modification information and the content object are saved to the + cache if `CACHE_CONTENT` is ``True``. + +Modification time based checking is faster than comparing file hashes, +but is not as reliable, because mtime information can be lost when +e.g. copying the content sources using the ``cp`` or ``rsync`` +commands without the mtime preservation mode (invoked e.g. by +``--archive``). + +The cache files are Python pickles, so they may not be readable by +different versions of Python as the pickle format often changes. If +such an error is encountered, the cache files have to be rebuilt +using the pelican command-line option ``--full-rebuild``. +The cache files also have to be rebuilt when changing the +`GZIP_CACHE` setting for cache file reading to work. + +The ``--full-rebuild`` command-line option is also useful when the +whole site needs to be regenerated due to e.g. modifications to the +settings file or theme files. When pelican runs in autorealod mode, +modification of the settings file or theme will trigger a full rebuild +automatically. + +Note that even when using cached content, all output is always +written, so the modification times of the ``*.html`` files always +change. Therefore, ``rsync`` based upload may benefit from the +``--checksum`` option. Example settings ================ diff --git a/pelican/__init__.py b/pelican/__init__.py index 494e7e43..b6bfe326 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -260,6 +260,10 @@ def parse_arguments(): action='store_true', help='Relaunch pelican each time a modification occurs' ' on the content files.') + + parser.add_argument('-f', '--full-rebuild', action='store_true', + dest='full_rebuild', help='Rebuild everything by not loading from cache') + return parser.parse_args() @@ -275,6 +279,8 @@ def get_config(args): config['THEME'] = abstheme if os.path.exists(abstheme) else args.theme if args.delete_outputdir is not None: config['DELETE_OUTPUT_DIRECTORY'] = args.delete_outputdir + if args.full_rebuild: + config['LOAD_CONTENT_CACHE'] = False # argparse returns bytes in Py2. There is no definite answer as to which # encoding argparse (or sys.argv) uses. @@ -327,6 +333,7 @@ def main(): print(' --- AutoReload Mode: Monitoring `content`, `theme` and' ' `settings` for changes. ---') + first_run = True # load cache on first run while True: try: # Check source dir for changed files ending with the given @@ -335,9 +342,14 @@ def main(): # have changed, no matter what extension the filenames # have. modified = {k: next(v) for k, v in watchers.items()} + original_load_cache = settings['LOAD_CONTENT_CACHE'] if modified['settings']: pelican, settings = get_instance(args) + if not first_run: + original_load_cache = settings['LOAD_CONTENT_CACHE'] + # invalidate cache + pelican.settings['LOAD_CONTENT_CACHE'] = False if any(modified.values()): print('\n-> Modified: {}. re-generating...'.format( @@ -349,8 +361,15 @@ def main(): if modified['theme'] is None: logger.warning('Empty theme folder. Using `basic` ' 'theme.') + elif modified['theme']: + # theme modified, needs full rebuild -> no cache + if not first_run: # but not on first run + pelican.settings['LOAD_CONTENT_CACHE'] = False pelican.run() + first_run = False + # restore original caching policy + pelican.settings['LOAD_CONTENT_CACHE'] = original_load_cache except KeyboardInterrupt: logger.warning("Keyboard interrupt, quitting.") diff --git a/pelican/contents.py b/pelican/contents.py index 615a7fd8..c02047b8 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -325,6 +325,13 @@ class Content(object): os.path.abspath(self.settings['PATH'])) ) + def __eq__(self, other): + """Compare with metadata and content of other Content object""" + return other and self.metadata == other.metadata and self.content == other.content + + # keep basic hashing functionality for caching to work + __hash__ = object.__hash__ + class Page(Content): mandatory_properties = ('title',) diff --git a/pelican/generators.py b/pelican/generators.py index bfdac1a5..7c2dbbf2 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -20,14 +20,15 @@ from jinja2 import (Environment, FileSystemLoader, PrefixLoader, ChoiceLoader, from pelican.contents import Article, Draft, Page, Static, is_valid_content from pelican.readers import Readers -from pelican.utils import copy, process_translations, mkdir_p, DateFormatter +from pelican.utils import (copy, process_translations, mkdir_p, DateFormatter, + FileStampDataCacher) from pelican import signals logger = logging.getLogger(__name__) -class Generator(object): +class Generator(FileStampDataCacher): """Baseclass generator""" def __init__(self, context, settings, path, theme, output_path, **kwargs): @@ -73,6 +74,10 @@ class Generator(object): custom_filters = self.settings['JINJA_FILTERS'] self.env.filters.update(custom_filters) + # set up caching + super(Generator, self).__init__(settings, 'CACHE_CONTENT', + 'LOAD_CONTENT_CACHE') + signals.generator_init.send(self) def get_template(self, name): @@ -408,20 +413,24 @@ class ArticlesGenerator(Generator): for f in self.get_files( self.settings['ARTICLE_DIR'], exclude=self.settings['ARTICLE_EXCLUDES']): - try: - article = self.readers.read_file( - base_path=self.path, path=f, content_class=Article, - context=self.context, - preread_signal=signals.article_generator_preread, - preread_sender=self, - context_signal=signals.article_generator_context, - context_sender=self) - except Exception as e: - logger.warning('Could not process {}\n{}'.format(f, e)) - continue + article = self.get_cached_data(f, None) + if article is None: + try: + article = self.readers.read_file( + base_path=self.path, path=f, content_class=Article, + context=self.context, + preread_signal=signals.article_generator_preread, + preread_sender=self, + context_signal=signals.article_generator_context, + context_sender=self) + except Exception as e: + logger.warning('Could not process {}\n{}'.format(f, e)) + continue - if not is_valid_content(article, f): - continue + if not is_valid_content(article, f): + continue + + self.cache_data(f, article) self.add_source_path(article) @@ -502,7 +511,7 @@ class ArticlesGenerator(Generator): self._update_context(('articles', 'dates', 'tags', 'categories', 'tag_cloud', 'authors', 'related_posts')) - + self.save_cache() signals.article_generator_finalized.send(self) def generate_output(self, writer): @@ -527,20 +536,24 @@ class PagesGenerator(Generator): for f in self.get_files( self.settings['PAGE_DIR'], exclude=self.settings['PAGE_EXCLUDES']): - try: - page = self.readers.read_file( - base_path=self.path, path=f, content_class=Page, - context=self.context, - preread_signal=signals.page_generator_preread, - preread_sender=self, - context_signal=signals.page_generator_context, - context_sender=self) - except Exception as e: - logger.warning('Could not process {}\n{}'.format(f, e)) - continue + page = self.get_cached_data(f, None) + if page is None: + try: + page = self.readers.read_file( + base_path=self.path, path=f, content_class=Page, + context=self.context, + preread_signal=signals.page_generator_preread, + preread_sender=self, + context_signal=signals.page_generator_context, + context_sender=self) + except Exception as e: + logger.warning('Could not process {}\n{}'.format(f, e)) + continue - if not is_valid_content(page, f): - continue + if not is_valid_content(page, f): + continue + + self.cache_data(f, page) self.add_source_path(page) @@ -560,6 +573,7 @@ class PagesGenerator(Generator): self._update_context(('pages', )) self.context['PAGES'] = self.pages + self.save_cache() signals.page_generator_finalized.send(self) def generate_output(self, writer): diff --git a/pelican/settings.py b/pelican/settings.py index 7277c121..baf2a497 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -119,7 +119,12 @@ DEFAULT_CONFIG = { 'IGNORE_FILES': ['.#*'], 'SLUG_SUBSTITUTIONS': (), 'INTRASITE_LINK_REGEX': '[{|](?P.*?)[|}]', - 'SLUGIFY_SOURCE': 'title' + 'SLUGIFY_SOURCE': 'title', + 'CACHE_CONTENT': True, + 'CACHE_DIRECTORY': 'cache', + 'GZIP_CACHE': True, + 'CHECK_MODIFIED_METHOD': 'mtime', + 'LOAD_CONTENT_CACHE': True, } PYGMENTS_RST_OPTIONS = None diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index 6f13aeb6..a500f87a 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -42,6 +42,7 @@ class TestArticlesGenerator(unittest.TestCase): settings['DEFAULT_CATEGORY'] = 'Default' settings['DEFAULT_DATE'] = (1970, 1, 1) settings['READERS'] = {'asc': None} + settings['CACHE_CONTENT'] = False # cache not needed for this logic tests cls.generator = ArticlesGenerator( context=settings.copy(), settings=settings, @@ -50,8 +51,15 @@ class TestArticlesGenerator(unittest.TestCase): cls.articles = [[page.title, page.status, page.category.name, page.template] for page in cls.generator.articles] + def setUp(self): + self.temp_cache = mkdtemp(prefix='pelican_cache.') + + def tearDown(self): + rmtree(self.temp_cache) + def test_generate_feeds(self): settings = get_settings() + settings['CACHE_DIRECTORY'] = self.temp_cache generator = ArticlesGenerator( context=settings, settings=settings, path=None, theme=settings['THEME'], output_path=None) @@ -127,6 +135,7 @@ class TestArticlesGenerator(unittest.TestCase): settings['DEFAULT_CATEGORY'] = 'Default' settings['DEFAULT_DATE'] = (1970, 1, 1) settings['USE_FOLDER_AS_CATEGORY'] = False + settings['CACHE_DIRECTORY'] = self.temp_cache settings['READERS'] = {'asc': None} settings['filenames'] = {} generator = ArticlesGenerator( @@ -151,6 +160,7 @@ class TestArticlesGenerator(unittest.TestCase): def test_direct_templates_save_as_default(self): settings = get_settings(filenames={}) + settings['CACHE_DIRECTORY'] = self.temp_cache generator = ArticlesGenerator( context=settings, settings=settings, path=None, theme=settings['THEME'], output_path=None) @@ -165,6 +175,7 @@ class TestArticlesGenerator(unittest.TestCase): settings = get_settings() settings['DIRECT_TEMPLATES'] = ['archives'] settings['ARCHIVES_SAVE_AS'] = 'archives/index.html' + settings['CACHE_DIRECTORY'] = self.temp_cache generator = ArticlesGenerator( context=settings, settings=settings, path=None, theme=settings['THEME'], output_path=None) @@ -180,6 +191,7 @@ class TestArticlesGenerator(unittest.TestCase): settings = get_settings() settings['DIRECT_TEMPLATES'] = ['archives'] settings['ARCHIVES_SAVE_AS'] = 'archives/index.html' + settings['CACHE_DIRECTORY'] = self.temp_cache generator = ArticlesGenerator( context=settings, settings=settings, path=None, theme=settings['THEME'], output_path=None) @@ -206,6 +218,7 @@ class TestArticlesGenerator(unittest.TestCase): settings = get_settings(filenames={}) settings['YEAR_ARCHIVE_SAVE_AS'] = 'posts/{date:%Y}/index.html' + settings['CACHE_DIRECTORY'] = self.temp_cache generator = ArticlesGenerator( context=settings, settings=settings, path=CONTENT_DIR, theme=settings['THEME'], output_path=None) @@ -268,6 +281,25 @@ class TestArticlesGenerator(unittest.TestCase): authors_expected = ['alexis-metaireau', 'first-author', 'second-author'] self.assertEqual(sorted(authors), sorted(authors_expected)) + def test_content_caching(self): + """Test that the articles are read only once when caching""" + settings = get_settings(filenames={}) + settings['CACHE_DIRECTORY'] = self.temp_cache + settings['READERS'] = {'asc': None} + + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.generate_context() + self.assertTrue(hasattr(generator, '_cache')) + + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock() + generator.generate_context() + generator.readers.read_file.assert_called_count == 0 + class TestPageGenerator(unittest.TestCase): # Note: Every time you want to test for a new field; Make sure the test @@ -275,12 +307,19 @@ class TestPageGenerator(unittest.TestCase): # distill_pages Then update the assertEqual in test_generate_context # to match expected + def setUp(self): + self.temp_cache = mkdtemp(prefix='pelican_cache.') + + def tearDown(self): + rmtree(self.temp_cache) + def distill_pages(self, pages): return [[page.title, page.status, page.template] for page in pages] def test_generate_context(self): settings = get_settings(filenames={}) settings['PAGE_DIR'] = 'TestPages' # relative to CUR_DIR + settings['CACHE_DIRECTORY'] = self.temp_cache settings['DEFAULT_DATE'] = (1970, 1, 1) generator = PagesGenerator( @@ -306,6 +345,26 @@ class TestPageGenerator(unittest.TestCase): self.assertEqual(sorted(pages_expected), sorted(pages)) self.assertEqual(sorted(hidden_pages_expected), sorted(hidden_pages)) + def test_content_caching(self): + """Test that the pages are read only once when caching""" + settings = get_settings(filenames={}) + settings['CACHE_DIRECTORY'] = 'cache_dir' #TODO + settings['CACHE_DIRECTORY'] = self.temp_cache + settings['READERS'] = {'asc': None} + + generator = PagesGenerator( + context=settings.copy(), settings=settings, + path=CUR_DIR, theme=settings['THEME'], output_path=None) + generator.generate_context() + self.assertTrue(hasattr(generator, '_cache')) + + generator = PagesGenerator( + context=settings.copy(), settings=settings, + path=CUR_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock() + generator.generate_context() + generator.readers.read_file.assert_called_count == 0 + class TestTemplatePagesGenerator(unittest.TestCase): diff --git a/pelican/tests/test_pelican.py b/pelican/tests/test_pelican.py index 2d4bbdfc..15876095 100644 --- a/pelican/tests/test_pelican.py +++ b/pelican/tests/test_pelican.py @@ -43,12 +43,14 @@ class TestPelican(LoggedTestCase): def setUp(self): super(TestPelican, self).setUp() self.temp_path = mkdtemp(prefix='pelicantests.') + self.temp_cache = mkdtemp(prefix='pelican_cache.') self.old_locale = locale.setlocale(locale.LC_ALL) self.maxDiff = None locale.setlocale(locale.LC_ALL, str('C')) def tearDown(self): rmtree(self.temp_path) + rmtree(self.temp_cache) locale.setlocale(locale.LC_ALL, self.old_locale) super(TestPelican, self).tearDown() @@ -77,6 +79,7 @@ class TestPelican(LoggedTestCase): settings = read_settings(path=None, override={ 'PATH': INPUT_PATH, 'OUTPUT_PATH': self.temp_path, + 'CACHE_DIRECTORY': self.temp_cache, 'LOCALE': locale.normalize('en_US'), }) pelican = Pelican(settings=settings) @@ -92,6 +95,7 @@ class TestPelican(LoggedTestCase): settings = read_settings(path=SAMPLE_CONFIG, override={ 'PATH': INPUT_PATH, 'OUTPUT_PATH': self.temp_path, + 'CACHE_DIRECTORY': self.temp_cache, 'LOCALE': locale.normalize('en_US'), }) pelican = Pelican(settings=settings) @@ -103,6 +107,7 @@ class TestPelican(LoggedTestCase): settings = read_settings(path=SAMPLE_CONFIG, override={ 'PATH': INPUT_PATH, 'OUTPUT_PATH': self.temp_path, + 'CACHE_DIRECTORY': self.temp_cache, 'THEME_STATIC_PATHS': [os.path.join(SAMPLES_PATH, 'very'), os.path.join(SAMPLES_PATH, 'kinda'), os.path.join(SAMPLES_PATH, 'theme_standard')] @@ -123,6 +128,7 @@ class TestPelican(LoggedTestCase): settings = read_settings(path=SAMPLE_CONFIG, override={ 'PATH': INPUT_PATH, 'OUTPUT_PATH': self.temp_path, + 'CACHE_DIRECTORY': self.temp_cache, 'THEME_STATIC_PATHS': [os.path.join(SAMPLES_PATH, 'theme_standard')] }) diff --git a/pelican/utils.py b/pelican/utils.py index c5aacaa3..8c416921 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -12,6 +12,8 @@ import pytz import re import shutil import traceback +import pickle +import hashlib from collections import Hashable from contextlib import contextmanager @@ -545,3 +547,114 @@ def split_all(path): break path = head return components + + +class FileDataCacher(object): + '''Class that can cache data contained in files''' + + def __init__(self, settings, cache_policy_key, load_policy_key): + '''Load the specified cache within CACHE_DIRECTORY + + only if load_policy_key in setttings is True, + May use gzip if GZIP_CACHE. + Sets caching policy according to *cache_policy_key* + in *settings* + ''' + self.settings = settings + name = self.__class__.__name__ + self._cache_path = os.path.join(self.settings['CACHE_DIRECTORY'], name) + self._cache_data_policy = self.settings[cache_policy_key] + if not self.settings[load_policy_key]: + self._cache = {} + return + if self.settings['GZIP_CACHE']: + import gzip + self._cache_open = gzip.open + else: + self._cache_open = open + try: + with self._cache_open(self._cache_path, 'rb') as f: + self._cache = pickle.load(f) + except Exception as e: + self._cache = {} + + def cache_data(self, filename, data): + '''Cache data for given file''' + if not self._cache_data_policy: + return + self._cache[filename] = data + + def get_cached_data(self, filename, default={}): + '''Get cached data for the given file + + if no data is cached, return the default object + ''' + return self._cache.get(filename, default) + + def save_cache(self): + '''Save the updated cache''' + if not self._cache_data_policy: + return + try: + mkdir_p(self.settings['CACHE_DIRECTORY']) + with self._cache_open(self._cache_path, 'wb') as f: + pickle.dump(self._cache, f) + except Exception as e: + logger.warning('Could not save cache {}\n{}'.format( + self._cache_path, e)) + + +class FileStampDataCacher(FileDataCacher): + '''Subclass that also caches the stamp of the file''' + + def __init__(self, settings, cache_policy_key, load_policy_key): + '''This sublcass additionaly sets filestamp function''' + super(FileStampDataCacher, self).__init__(settings, cache_policy_key, + load_policy_key) + + method = self.settings['CHECK_MODIFIED_METHOD'] + if method == 'mtime': + self._filestamp_func = os.path.getmtime + else: + try: + hash_func = getattr(hashlib, method) + def filestamp_func(buf): + return hash_func(buf).digest() + self._filestamp_func = filestamp_func + except ImportError: + self._filestamp_func = None + + def cache_data(self, filename, data): + '''Cache stamp and data for the given file''' + stamp = self._get_file_stamp(filename) + super(FileStampDataCacher, self).cache_data(filename, (stamp, data)) + + def _get_file_stamp(self, filename): + '''Check if the given file has been modified + since the previous build. + + depending on CHECK_MODIFIED_METHOD + a float may be returned for 'mtime', + a hash for a function name in the hashlib module + or an empty bytes string otherwise + ''' + filename = os.path.join(self.path, filename) + try: + with open(filename, 'rb') as f: + return self._filestamp_func(f.read()) + except Exception: + return b'' + + def get_cached_data(self, filename, default=None): + '''Get the cached data for the given filename + if the file has not been modified. + + If no record exists or file has been modified, return default. + Modification is checked by compaing the cached + and current file stamp. + ''' + stamp, data = super(FileStampDataCacher, self).get_cached_data( + filename, (None, default)) + if stamp != self._get_file_stamp(filename): + return default + return data From 5b4381c19cb372866f5e2343d108c3193206195d Mon Sep 17 00:00:00 2001 From: Federico Ceratto Date: Sun, 9 Mar 2014 22:04:43 +0000 Subject: [PATCH 10/24] Add s3cmd MIME type detection --- pelican/tools/templates/Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pelican/tools/templates/Makefile.in b/pelican/tools/templates/Makefile.in index fe7a60a4..c542e588 100644 --- a/pelican/tools/templates/Makefile.in +++ b/pelican/tools/templates/Makefile.in @@ -97,7 +97,7 @@ ftp_upload: publish lftp ftp://$$(FTP_USER)@$$(FTP_HOST) -e "mirror -R $$(OUTPUTDIR) $$(FTP_TARGET_DIR) ; quit" s3_upload: publish - s3cmd sync $(OUTPUTDIR)/ s3://$(S3_BUCKET) --acl-public --delete-removed + s3cmd sync $(OUTPUTDIR)/ s3://$(S3_BUCKET) --acl-public --delete-removed --guess-mime-type cf_upload: publish cd $(OUTPUTDIR) && swift -v -A https://auth.api.rackspacecloud.com/v1.0 -U $(CLOUDFILES_USERNAME) -K $(CLOUDFILES_API_KEY) upload -c $(CLOUDFILES_CONTAINER) . From 4cae9ea88f552ec3166629ec1e9f004567245a06 Mon Sep 17 00:00:00 2001 From: Lonewolf Date: Mon, 31 Mar 2014 14:28:46 +0530 Subject: [PATCH 11/24] Added new sphinxtheme as requirement for docs Modified docs conf to support the theme update --- dev_requirements.txt | 3 +++ docs/conf.py | 34 +++++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index c90ac630..01fe2507 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -9,3 +9,6 @@ typogrify # To perform release bumpr==0.2.0 + +# For docs theme +sphinx_rtd_theme diff --git a/docs/conf.py b/docs/conf.py index 6db0f3d1..99acd1b6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals import sys, os +on_rtd = os.environ.get('READTHEDOCS', None) == 'True' + sys.path.append(os.path.abspath(os.pardir)) from pelican import __version__ @@ -21,29 +23,43 @@ rst_prolog = ''' .. |last_stable| replace:: :pelican-doc:`{0}` '''.format(last_stable) +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + extlinks = { 'pelican-doc': ('http://docs.getpelican.com/%s/', '') } # -- Options for HTML output --------------------------------------------------- -html_theme_path = ['_themes'] -html_theme = 'pelican' - -html_theme_options = { - 'nosidebar': True, - 'index_logo': 'pelican.png', - 'github_fork': 'getpelican/pelican', -} +html_theme = 'default' +if not on_rtd: + try: + import sphinx_rtd_theme + html_theme = 'sphinx_rtd_theme' + html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + except ImportError: + pass html_static_path = ['_static'] # Output file base name for HTML help builder. htmlhelp_basename = 'Pelicandoc' +html_use_smartypants = True + +# If false, no module index is generated. +html_use_modindex = False + +# If false, no index is generated. +html_use_index = False + +# If true, links to the reST sources are added to the pages. +html_show_sourcelink = False + # -- Options for LaTeX output -------------------------------------------------- latex_documents = [ - ('index', 'Pelican.tex', 'Pelican Documentation', + ('index', 'Pelican.tex', 'Pelican Documentation', 'Alexis Métaireau', 'manual'), ] From e97e9b5ae5fa5494f136521afb3594cf3e65fc83 Mon Sep 17 00:00:00 2001 From: Antoine Brenner Date: Tue, 15 Apr 2014 00:04:40 +0200 Subject: [PATCH 12/24] Fix unittest issue related to python2/python3 differences Under python 2, with non-ascii locales, u"{:%b}".format(date) can raise UnicodeDecodeError because u"{:%b}".format(date) will call date.__format__(u"%b"), which will return a byte string and not a unicode string. eg: locale.setlocale(locale.LC_ALL, 'ja_JP.utf8') date.__format__(u"%b") == '12\xe6\x9c\x88' # True This commit catches UnicodeDecodeError and calls date.__format__() with byte strings instead of characters, since it to work with character strings --- pelican/generators.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pelican/generators.py b/pelican/generators.py index 7c2dbbf2..1b584d3f 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -309,7 +309,20 @@ class ArticlesGenerator(Generator): # format string syntax can be used for specifying the # period archive dates date = archive[0].date - save_as = save_as_fmt.format(date=date) + # Under python 2, with non-ascii locales, u"{:%b}".format(date) might raise UnicodeDecodeError + # because u"{:%b}".format(date) will call date.__format__(u"%b"), which will return a byte string + # and not a unicode string. + # eg: + # locale.setlocale(locale.LC_ALL, 'ja_JP.utf8') + # date.__format__(u"%b") == '12\xe6\x9c\x88' # True + try: + save_as = save_as_fmt.format(date=date) + except UnicodeDecodeError: + # Python2 only: + # Let date.__format__() work with byte strings instead of characters since it fails to work with characters + bytes_save_as_fmt = save_as_fmt.encode('utf8') + bytes_save_as = bytes_save_as_fmt.format(date=date) + save_as = unicode(bytes_save_as,'utf8') context = self.context.copy() if key == period_date_key['year']: From 7277c95fb588463d035decefcf86f59d41a4e7c0 Mon Sep 17 00:00:00 2001 From: Antoine Brenner Date: Tue, 15 Apr 2014 16:36:29 +0200 Subject: [PATCH 13/24] Make sure locale is what we want before/after the tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The locale is a global state, and it was not properly reset to whatever it was before the unitttest possibly changed it. This is now fixed. Not restoring the locale led to weird issues: depending on the order chosen by "python -m unittest discover" to run the unit tests, some tests would apparently randomly fail due to the locale not being what was expected. For example, test_period_in_timeperiod_archive would call mock('posts/1970/ 1月/index.html',...) instead of expected mock('posts/1970/Jan/index.html',...) and fail. --- pelican/tests/test_contents.py | 6 ++++++ pelican/tests/test_generators.py | 11 +++++++++++ pelican/tests/test_importer.py | 11 +++++++++++ pelican/tests/test_paginator.py | 8 +++++++- pelican/tests/test_pelican.py | 2 +- pelican/tests/test_settings.py | 5 +++++ pelican/tests/test_utils.py | 3 +++ 7 files changed, 44 insertions(+), 2 deletions(-) diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 27d2a897..3c0f8d75 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals, absolute_import import six from datetime import datetime from sys import platform +import locale from pelican.tests.support import unittest, get_settings @@ -22,6 +23,8 @@ class TestPage(unittest.TestCase): def setUp(self): super(TestPage, self).setUp() + self.old_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, str('C')) self.page_kwargs = { 'content': TEST_CONTENT, 'context': { @@ -35,6 +38,9 @@ class TestPage(unittest.TestCase): 'source_path': '/path/to/file/foo.ext' } + def tearDown(self): + locale.setlocale(locale.LC_ALL, self.old_locale) + def test_use_args(self): # Creating a page with arguments passed to the constructor should use # them to initialise object's attributes. diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index a500f87a..ff487c3e 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -14,6 +14,7 @@ from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator, TemplatePagesGenerator) from pelican.writers import Writer from pelican.tests.support import unittest, get_settings +import locale CUR_DIR = os.path.dirname(__file__) CONTENT_DIR = os.path.join(CUR_DIR, 'content') @@ -21,11 +22,17 @@ CONTENT_DIR = os.path.join(CUR_DIR, 'content') class TestGenerator(unittest.TestCase): def setUp(self): + self.old_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, str('C')) self.settings = get_settings() self.settings['READERS'] = {'asc': None} self.generator = Generator(self.settings.copy(), self.settings, CUR_DIR, self.settings['THEME'], None) + def tearDown(self): + locale.setlocale(locale.LC_ALL, self.old_locale) + + def test_include_path(self): filename = os.path.join(CUR_DIR, 'content', 'article.rst') include_path = self.generator._include_path @@ -373,10 +380,14 @@ class TestTemplatePagesGenerator(unittest.TestCase): def setUp(self): self.temp_content = mkdtemp(prefix='pelicantests.') self.temp_output = mkdtemp(prefix='pelicantests.') + self.old_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, str('C')) + def tearDown(self): rmtree(self.temp_content) rmtree(self.temp_output) + locale.setlocale(locale.LC_ALL, self.old_locale) def test_generate_output(self): diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py index 8412c75b..65193bf5 100644 --- a/pelican/tests/test_importer.py +++ b/pelican/tests/test_importer.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function import os import re +import locale from pelican.tools.pelican_import import wp2fields, fields2pelican, decode_wp_content, build_header, build_markdown_header, get_attachments, download_attachments from pelican.tests.support import (unittest, temporary_folder, mute, skipIfNoExecutable) @@ -30,9 +31,14 @@ except ImportError: class TestWordpressXmlImporter(unittest.TestCase): def setUp(self): + self.old_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, str('C')) self.posts = list(wp2fields(WORDPRESS_XML_SAMPLE)) self.custposts = list(wp2fields(WORDPRESS_XML_SAMPLE, True)) + def tearDown(self): + locale.setlocale(locale.LC_ALL, self.old_locale) + def test_ignore_empty_posts(self): self.assertTrue(self.posts) for title, content, fname, date, author, categ, tags, kind, format in self.posts: @@ -261,8 +267,13 @@ class TestBuildHeader(unittest.TestCase): @unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module') class TestWordpressXMLAttachements(unittest.TestCase): def setUp(self): + self.old_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, str('C')) self.attachments = get_attachments(WORDPRESS_XML_SAMPLE) + def tearDown(self): + locale.setlocale(locale.LC_ALL, self.old_locale) + def test_recognise_attachments(self): self.assertTrue(self.attachments) self.assertTrue(len(self.attachments.keys()) == 3) diff --git a/pelican/tests/test_paginator.py b/pelican/tests/test_paginator.py index f454d47d..108dc791 100644 --- a/pelican/tests/test_paginator.py +++ b/pelican/tests/test_paginator.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals, absolute_import import six +import locale from pelican.tests.support import unittest, get_settings @@ -16,6 +17,8 @@ TEST_SUMMARY = generate_lorem_ipsum(n=1, html=False) class TestPage(unittest.TestCase): def setUp(self): super(TestPage, self).setUp() + self.old_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, str('C')) self.page_kwargs = { 'content': TEST_CONTENT, 'context': { @@ -29,6 +32,9 @@ class TestPage(unittest.TestCase): 'source_path': '/path/to/file/foo.ext' } + def tearDown(self): + locale.setlocale(locale.LC_ALL, self.old_locale) + def test_save_as_preservation(self): settings = get_settings() # fix up pagination rules @@ -47,4 +53,4 @@ class TestPage(unittest.TestCase): object_list = [Article(**self.page_kwargs), Article(**self.page_kwargs)] paginator = Paginator('foobar.foo', object_list, settings) page = paginator.page(1) - self.assertEqual(page.save_as, 'foobar.foo') \ No newline at end of file + self.assertEqual(page.save_as, 'foobar.foo') diff --git a/pelican/tests/test_pelican.py b/pelican/tests/test_pelican.py index 15876095..974986cd 100644 --- a/pelican/tests/test_pelican.py +++ b/pelican/tests/test_pelican.py @@ -44,8 +44,8 @@ class TestPelican(LoggedTestCase): super(TestPelican, self).setUp() self.temp_path = mkdtemp(prefix='pelicantests.') self.temp_cache = mkdtemp(prefix='pelican_cache.') - self.old_locale = locale.setlocale(locale.LC_ALL) self.maxDiff = None + self.old_locale = locale.setlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, str('C')) def tearDown(self): diff --git a/pelican/tests/test_settings.py b/pelican/tests/test_settings.py index 7907a551..930e0fea 100644 --- a/pelican/tests/test_settings.py +++ b/pelican/tests/test_settings.py @@ -16,10 +16,15 @@ class TestSettingsConfiguration(unittest.TestCase): optimizations. """ def setUp(self): + self.old_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, str('C')) self.PATH = abspath(dirname(__file__)) default_conf = join(self.PATH, 'default_conf.py') self.settings = read_settings(default_conf) + def tearDown(self): + locale.setlocale(locale.LC_ALL, self.old_locale) + def test_overwrite_existing_settings(self): self.assertEqual(self.settings.get('SITENAME'), "Alexis' log") self.assertEqual(self.settings.get('SITEURL'), diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 9047593f..02398336 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -354,9 +354,12 @@ class TestCopy(unittest.TestCase): def setUp(self): self.root_dir = mkdtemp(prefix='pelicantests.') + self.old_locale = locale.setlocale(locale.LC_ALL) + locale.setlocale(locale.LC_ALL, str('C')) def tearDown(self): shutil.rmtree(self.root_dir) + locale.setlocale(locale.LC_ALL, self.old_locale) def _create_file(self, *path): with open(os.path.join(self.root_dir, *path), 'w') as f: From dd70f1b24ece9bba33fb115336a6e159c867f5b2 Mon Sep 17 00:00:00 2001 From: Justin Mayer Date: Tue, 15 Apr 2014 11:13:10 -0400 Subject: [PATCH 14/24] Fix settings table in docs --- docs/settings.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/settings.rst b/docs/settings.rst index d8690230..9599ee10 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -249,9 +249,9 @@ posts for the month at ``posts/2011/Aug/index.html``. arrive at an appropriate archive of posts, without having to specify a page name. -==================================================== ===================================================== +====================================================== ===================================================== Setting name (default value) What does it do? -==================================================== ===================================================== +====================================================== ===================================================== `ARTICLE_URL` (``'{slug}.html'``) The URL to refer to an article. `ARTICLE_SAVE_AS` (``'{slug}.html'``) The place where we will save an article. `ARTICLE_LANG_URL` (``'{slug}-{lang}.html'``) The URL to refer to an article which doesn't use the @@ -262,7 +262,7 @@ Setting name (default value) What does it do? `DRAFT_SAVE_AS` (``'drafts/{slug}.html'``) The place where we will save an article draft. `DRAFT_LANG_URL` (``'drafts/{slug}-{lang}.html'``) The URL to refer to an article draft which doesn't use the default language. -`DRAFT_LANG_SAVE_AS` (``'drafts/{slug}-{lang}.html'``) The place where we will save an article draft which +`DRAFT_LANG_SAVE_AS` (``'drafts/{slug}-{lang}.html'``) The place where we will save an article draft which doesn't use the default language. `PAGE_URL` (``'pages/{slug}.html'``) The URL we will use to link to a page. `PAGE_SAVE_AS` (``'pages/{slug}.html'``) The location we will save the page. This value has to be @@ -285,7 +285,7 @@ Setting name (default value) What does it do? non-alphanumerics when generating slugs. Specified as a list of 2-tuples of ``(from, to)`` which are applied in order. -==================================================== ===================================================== +====================================================== ===================================================== .. note:: From fd7cb9e2132e0b66adeff40c5d780a842f84fb6c Mon Sep 17 00:00:00 2001 From: Antoine Brenner Date: Tue, 15 Apr 2014 22:01:20 +0200 Subject: [PATCH 15/24] Test to reproduce an issue that occurs with python3.3 under macos10 only This test passes fine under linux --- pelican/tests/test_utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 02398336..3c12a15b 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -458,6 +458,25 @@ class TestDateFormatter(unittest.TestCase): locale.setlocale(locale.LC_ALL, '') + @unittest.skipUnless(locale_available('fr_FR.UTF-8') or + locale_available('French'), + 'French locale needed') + def test_french_strftime(self): + # This test tries to reproduce an issue that occured with python3.3 under macos10 only + locale.setlocale(locale.LC_ALL, str('fr_FR.UTF-8')) + date = datetime.datetime(2014,8,14) + # we compare the lower() dates since macos10 returns "Jeudi" for %A whereas linux reports "jeudi" + self.assertEqual( u'jeudi, 14 août 2014', utils.strftime(date, date_format="%A, %d %B %Y").lower() ) + df = utils.DateFormatter() + self.assertEqual( u'jeudi, 14 août 2014', df(date, date_format="%A, %d %B %Y").lower() ) + # Let us now set the global locale to C: + locale.setlocale(locale.LC_ALL, str('C')) + # DateFormatter should still work as expected since it is the whole point of DateFormatter + # (This is where pre-2014/4/15 code fails on macos10) + df_date = df(date, date_format="%A, %d %B %Y").lower() + self.assertEqual( u'jeudi, 14 août 2014', df_date ) + + @unittest.skipUnless(locale_available('fr_FR.UTF-8') or locale_available('French'), 'French locale needed') From 6703950abec63a25d412bd96d6aa419c6f449c97 Mon Sep 17 00:00:00 2001 From: Ondrej Grover Date: Thu, 17 Apr 2014 16:28:22 +0200 Subject: [PATCH 16/24] enable writing of only selected output paths - add WRITE_SELECTED setting - add --write-selected commandline option --- docs/faq.rst | 5 +++++ docs/settings.rst | 19 +++++++++++++++++++ pelican/__init__.py | 6 ++++++ pelican/settings.py | 7 +++++++ pelican/tests/test_pelican.py | 23 +++++++++++++++++++++++ pelican/utils.py | 14 ++++++++++++++ pelican/writers.py | 9 +++++++-- 7 files changed, 81 insertions(+), 2 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index bb9377e6..bf468c51 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -224,3 +224,8 @@ every time, so a ``rsync`` based upload will transfer them even if their content hasn't changed. A simple solution is to make ``rsync`` use the ``--checksum`` option, which will make it compare the file checksums in a much faster way than Pelican would. + +When only several specific output files are of interest (e.g. when +working on some specific page or the theme templates), the +`WRITE_SELECTED` option may help, see +:ref:`writing_only_selected_content`. diff --git a/docs/settings.rst b/docs/settings.rst index 9599ee10..8d8f9a16 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -179,6 +179,10 @@ Setting name (default value) `CHECK_MODIFIED_METHOD` (``mtime``) Controls how files are checked for modifications. `LOAD_CONTENT_CACHE` (``True``) If ``True``, load unmodified content from cache. `GZIP_CACHE` (``True``) If ``True``, use gzip to (de)compress the cache files. +`WRITE_SELECTED` (``[]``) If this list is not empty, **only** output files with their paths + in this list are written. Paths should be either relative to the current + working directory of Pelican or absolute. For possible use cases see + :ref:`writing_only_selected_content`. =============================================================================== ===================================================================== .. [#] Default is the system locale. @@ -774,6 +778,21 @@ written, so the modification times of the ``*.html`` files always change. Therefore, ``rsync`` based upload may benefit from the ``--checksum`` option. +.. _writing_only_selected_content: + +Writing only selected content +============================= + +When one article or page or the theme is being worked on it is often +desirable to display selected output files as soon as possible. In +such cases generating and writing all output is often unnecessary. +These selected output files can be given as output paths in the +`WRITE_SELECTED` list and **only** those files will be written. This +list can be also specified on the command-line using the +``--write-selected`` option which accepts a comma separated list +of output file paths. By default the list is empty so all output is +written. + Example settings ================ diff --git a/pelican/__init__.py b/pelican/__init__.py index b6bfe326..1ed98fc3 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -264,6 +264,10 @@ def parse_arguments(): parser.add_argument('-f', '--full-rebuild', action='store_true', dest='full_rebuild', help='Rebuild everything by not loading from cache') + parser.add_argument('-w', '--write-selected', type=str, + dest='selected_paths', default=None, + help='Comma separated list of selected paths to write') + return parser.parse_args() @@ -281,6 +285,8 @@ def get_config(args): config['DELETE_OUTPUT_DIRECTORY'] = args.delete_outputdir if args.full_rebuild: config['LOAD_CONTENT_CACHE'] = False + if args.selected_paths: + config['WRITE_SELECTED'] = args.selected_paths.split(',') # argparse returns bytes in Py2. There is no definite answer as to which # encoding argparse (or sys.argv) uses. diff --git a/pelican/settings.py b/pelican/settings.py index baf2a497..7615c25c 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -125,6 +125,7 @@ DEFAULT_CONFIG = { 'GZIP_CACHE': True, 'CHECK_MODIFIED_METHOD': 'mtime', 'LOAD_CONTENT_CACHE': True, + 'WRITE_SELECTED': [], } PYGMENTS_RST_OPTIONS = None @@ -200,6 +201,12 @@ def configure_settings(settings): raise Exception("Could not find the theme %s" % settings['THEME']) + # make paths selected for writing absolute if necessary + settings['WRITE_SELECTED'] = [ + os.path.abspath(path) for path in + settings.get('WRITE_SELECTED', DEFAULT_CONFIG['WRITE_SELECTED']) + ] + # standardize strings to lowercase strings for key in [ 'DEFAULT_LANG', diff --git a/pelican/tests/test_pelican.py b/pelican/tests/test_pelican.py index 974986cd..294cf399 100644 --- a/pelican/tests/test_pelican.py +++ b/pelican/tests/test_pelican.py @@ -138,3 +138,26 @@ class TestPelican(LoggedTestCase): for file in ['a_stylesheet', 'a_template']: self.assertTrue(os.path.exists(os.path.join(theme_output, file))) + + def test_write_only_selected(self): + """Test that only the selected files are written""" + settings = read_settings(path=None, override={ + 'PATH': INPUT_PATH, + 'OUTPUT_PATH': self.temp_path, + 'CACHE_DIRECTORY': self.temp_cache, + 'WRITE_SELECTED': [ + os.path.join(self.temp_path, 'oh-yeah.html'), + os.path.join(self.temp_path, 'categories.html'), + ], + 'LOCALE': locale.normalize('en_US'), + }) + pelican = Pelican(settings=settings) + logger = logging.getLogger() + orig_level = logger.getEffectiveLevel() + logger.setLevel(logging.INFO) + mute(True)(pelican.run)() + logger.setLevel(orig_level) + self.assertLogCountEqual( + count=2, + msg="writing .*", + level=logging.INFO) diff --git a/pelican/utils.py b/pelican/utils.py index 8c416921..cd942fd5 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -658,3 +658,17 @@ class FileStampDataCacher(FileDataCacher): if stamp != self._get_file_stamp(filename): return default return data + + +def is_selected_for_writing(settings, path): + '''Check whether path is selected for writing + according to the WRITE_SELECTED list + + If WRITE_SELECTED is an empty list (default), + any path is selected for writing. + ''' + if settings['WRITE_SELECTED']: + return path in settings['WRITE_SELECTED'] + else: + return True + diff --git a/pelican/writers.py b/pelican/writers.py index 19e36e39..a92feee4 100644 --- a/pelican/writers.py +++ b/pelican/writers.py @@ -16,7 +16,8 @@ from feedgenerator import Atom1Feed, Rss201rev2Feed from jinja2 import Markup from pelican.paginator import Paginator -from pelican.utils import get_relative_path, path_to_url, set_date_tzinfo +from pelican.utils import (get_relative_path, path_to_url, set_date_tzinfo, + is_selected_for_writing) from pelican import signals logger = logging.getLogger(__name__) @@ -92,6 +93,8 @@ class Writer(object): :param path: the path to output. :param feed_type: the feed type to use (atom or rss) """ + if not is_selected_for_writing(self.settings, path): + return old_locale = locale.setlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, str('C')) try: @@ -140,7 +143,9 @@ class Writer(object): :param **kwargs: additional variables to pass to the templates """ - if name is False or name == "": + if name is False or name == "" or\ + not is_selected_for_writing(self.settings,\ + os.path.join(self.output_path, name)): return elif not name: # other stuff, just return for now From 6972261261a4eed164728943a037e1382d772f21 Mon Sep 17 00:00:00 2001 From: Simon Conseil Date: Thu, 17 Apr 2014 22:57:26 +0200 Subject: [PATCH 17/24] Add python 3.4 to tox config. --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index a72aea21..5dd36c36 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ # depends on some external libraries that aren't released yet. [tox] -envlist = py27,py33 +envlist = py27,py33,py34 [testenv] commands = From 676981c62110779e3d6adb04d08492f9f32f7af3 Mon Sep 17 00:00:00 2001 From: Ondrej Grover Date: Fri, 18 Apr 2014 06:57:59 +0200 Subject: [PATCH 18/24] set _cache_open func even if not loading cache, fixes autoreload The _cache_open attribute of the FileDataCacher class was not set when settings[load_policy_key] was not True, so saving later failed. As a precaution, replaced the `if ...: return` style with a plain if structure to prevent such readability issues and added tests. --- pelican/tests/test_generators.py | 48 ++++++++++++++++++++++++++++++++ pelican/utils.py | 36 +++++++++++------------- 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index ff487c3e..f951f0cb 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -307,6 +307,30 @@ class TestArticlesGenerator(unittest.TestCase): generator.generate_context() generator.readers.read_file.assert_called_count == 0 + def test_full_rebuild(self): + """Test that all the articles are read again when not loading cache + + used in --full-rebuild or autoreload mode""" + settings = get_settings(filenames={}) + settings['CACHE_DIRECTORY'] = self.temp_cache + settings['READERS'] = {'asc': None} + + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock() + generator.generate_context() + self.assertTrue(hasattr(generator, '_cache_open')) + orig_call_count = generator.readers.read_file.call_count + + settings['LOAD_CONTENT_CACHE'] = False + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock() + generator.generate_context() + generator.readers.read_file.assert_called_count == orig_call_count + class TestPageGenerator(unittest.TestCase): # Note: Every time you want to test for a new field; Make sure the test @@ -372,6 +396,30 @@ class TestPageGenerator(unittest.TestCase): generator.generate_context() generator.readers.read_file.assert_called_count == 0 + def test_full_rebuild(self): + """Test that all the pages are read again when not loading cache + + used in --full-rebuild or autoreload mode""" + settings = get_settings(filenames={}) + settings['CACHE_DIRECTORY'] = self.temp_cache + settings['READERS'] = {'asc': None} + + generator = PagesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock() + generator.generate_context() + self.assertTrue(hasattr(generator, '_cache_open')) + orig_call_count = generator.readers.read_file.call_count + + settings['LOAD_CONTENT_CACHE'] = False + generator = PagesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.readers.read_file = MagicMock() + generator.generate_context() + generator.readers.read_file.assert_called_count == orig_call_count + class TestTemplatePagesGenerator(unittest.TestCase): diff --git a/pelican/utils.py b/pelican/utils.py index 8c416921..e76c559f 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -564,25 +564,24 @@ class FileDataCacher(object): name = self.__class__.__name__ self._cache_path = os.path.join(self.settings['CACHE_DIRECTORY'], name) self._cache_data_policy = self.settings[cache_policy_key] - if not self.settings[load_policy_key]: - self._cache = {} - return if self.settings['GZIP_CACHE']: import gzip self._cache_open = gzip.open else: self._cache_open = open - try: - with self._cache_open(self._cache_path, 'rb') as f: - self._cache = pickle.load(f) - except Exception as e: + if self.settings[load_policy_key]: + try: + with self._cache_open(self._cache_path, 'rb') as f: + self._cache = pickle.load(f) + except Exception as e: + self._cache = {} + else: self._cache = {} def cache_data(self, filename, data): '''Cache data for given file''' - if not self._cache_data_policy: - return - self._cache[filename] = data + if self._cache_data_policy: + self._cache[filename] = data def get_cached_data(self, filename, default={}): '''Get cached data for the given file @@ -593,15 +592,14 @@ class FileDataCacher(object): def save_cache(self): '''Save the updated cache''' - if not self._cache_data_policy: - return - try: - mkdir_p(self.settings['CACHE_DIRECTORY']) - with self._cache_open(self._cache_path, 'wb') as f: - pickle.dump(self._cache, f) - except Exception as e: - logger.warning('Could not save cache {}\n{}'.format( - self._cache_path, e)) + if self._cache_data_policy: + try: + mkdir_p(self.settings['CACHE_DIRECTORY']) + with self._cache_open(self._cache_path, 'wb') as f: + pickle.dump(self._cache, f) + except Exception as e: + logger.warning('Could not save cache {}\n{}'.format( + self._cache_path, e)) class FileStampDataCacher(FileDataCacher): From 22484983e911daec0234e924574e5b2f52683f70 Mon Sep 17 00:00:00 2001 From: James Lee Date: Sat, 19 Apr 2014 03:37:47 +0900 Subject: [PATCH 19/24] Handle list metadata as list of string in MarkdownReader --- pelican/readers.py | 6 ++++++ .../tests/content/article_with_markdown_and_footnote.md | 6 ++++++ pelican/tests/test_readers.py | 8 ++++++++ 3 files changed, 20 insertions(+) diff --git a/pelican/readers.py b/pelican/readers.py index 35c38220..3f8a551e 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -204,12 +204,18 @@ class MarkdownReader(BaseReader): for name, value in meta.items(): name = name.lower() if name == "summary": + # handle summary metadata as markdown + # summary metadata is special case and join all list values summary_values = "\n".join(value) # reset the markdown instance to clear any state self._md.reset() summary = self._md.convert(summary_values) output[name] = self.process_metadata(name, summary) + elif len(value) > 1: + # handle list metadata as list of string + output[name] = self.process_metadata(name, value) else: + # otherwise, handle metadata as single string output[name] = self.process_metadata(name, value[0]) return output diff --git a/pelican/tests/content/article_with_markdown_and_footnote.md b/pelican/tests/content/article_with_markdown_and_footnote.md index 332ccea6..6fea2d6e 100644 --- a/pelican/tests/content/article_with_markdown_and_footnote.md +++ b/pelican/tests/content/article_with_markdown_and_footnote.md @@ -2,6 +2,12 @@ Title: Article with markdown containing footnotes Date: 2012-10-31 Modified: 2012-11-01 Summary: Summary with **inline** markup *should* be supported. +Multiline: Line Metadata should be handle properly. + See syntax of Meta-Data extension of Python Markdown package: + If a line is indented by 4 or more spaces, + that line is assumed to be an additional line of the value + for the previous keyword. + A keyword may have as many lines as desired. This is some content[^1] with some footnotes[^footnote] diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py index d4201a5e..fd30e9b9 100644 --- a/pelican/tests/test_readers.py +++ b/pelican/tests/test_readers.py @@ -214,6 +214,14 @@ class MdReaderTest(ReaderTest): 'date': datetime.datetime(2012, 10, 31), 'modified': datetime.datetime(2012, 11, 1), 'slug': 'article-with-markdown-containing-footnotes', + 'multiline': [ + 'Line Metadata should be handle properly.', + 'See syntax of Meta-Data extension of Python Markdown package:', + 'If a line is indented by 4 or more spaces,', + 'that line is assumed to be an additional line of the value', + 'for the previous keyword.', + 'A keyword may have as many lines as desired.', + ] } self.assertEqual(content, expected_content) for key, value in metadata.items(): From c386e29d0c21e17895eef545e1ba0936ccc9c30a Mon Sep 17 00:00:00 2001 From: Lonewolf Date: Sun, 2 Mar 2014 19:21:22 +0530 Subject: [PATCH 20/24] Ability to specify PLUGIN_PATH as list PLUGIN_PATH added to settings table --- docs/plugins.rst | 2 +- pelican/__init__.py | 3 ++- pelican/settings.py | 14 +++++++++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/plugins.rst b/docs/plugins.rst index c03b1251..9dddce70 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -24,7 +24,7 @@ If your plugins are not in an importable path, you can specify a ``PLUGIN_PATH`` in the settings. ``PLUGIN_PATH`` can be an absolute path or a path relative to the settings file:: - PLUGIN_PATH = "plugins" + PLUGIN_PATH = ["list", "of", plugins path"] PLUGINS = ["list", "of", "plugins"] Where to find plugins diff --git a/pelican/__init__.py b/pelican/__init__.py index 1ed98fc3..077859bb 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -65,7 +65,8 @@ class Pelican(object): self.plugins = [] logger.debug('Temporarily adding PLUGIN_PATH to system path') _sys_path = sys.path[:] - sys.path.insert(0, self.settings['PLUGIN_PATH']) + for pluginpath in self.settings['PLUGIN_PATH']: + sys.path.insert(0, pluginpath) for plugin in self.settings['PLUGINS']: # if it's a string, then import it if isinstance(plugin, six.string_types): diff --git a/pelican/settings.py b/pelican/settings.py index 7615c25c..7caffa61 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -112,7 +112,7 @@ DEFAULT_CONFIG = { 'ARTICLE_PERMALINK_STRUCTURE': '', 'TYPOGRIFY': False, 'SUMMARY_MAX_LENGTH': 50, - 'PLUGIN_PATH': '', + 'PLUGIN_PATH': [], 'PLUGINS': [], 'PYGMENTS_RST_OPTIONS': {}, 'TEMPLATE_PAGES': {}, @@ -135,13 +135,21 @@ def read_settings(path=None, override=None): if path: local_settings = get_settings_from_file(path) # Make the paths relative to the settings file - for p in ['PATH', 'OUTPUT_PATH', 'THEME', 'PLUGIN_PATH']: + for p in ['PATH', 'OUTPUT_PATH', 'THEME']: if p in local_settings and local_settings[p] is not None \ and not isabs(local_settings[p]): absp = os.path.abspath(os.path.normpath(os.path.join( os.path.dirname(path), local_settings[p]))) - if p not in ('THEME', 'PLUGIN_PATH') or os.path.exists(absp): + if p not in ('THEME') or os.path.exists(absp): local_settings[p] = absp + + if isinstance(local_settings['PLUGIN_PATH'], six.string_types): + logger.warning("Detected misconfiguration with %s setting ""(must be a list)" % 'PLUGIN_PATH') + local_settings['PLUGIN_PATH'] = [local_settings['PLUGIN_PATH']] + else: + if 'PLUGIN_PATH' in local_settings and local_settings['PLUGIN_PATH'] is not None: + local_settings['PLUGIN_PATH'] = [os.path.abspath(os.path.normpath(os.path.join(os.path.dirname(path), pluginpath))) + if not isabs(pluginpath) else pluginpath for pluginpath in local_settings['PLUGIN_PATH']] else: local_settings = copy.deepcopy(DEFAULT_CONFIG) From f0802e8114b6edf6f239ecfb9ab3cd4db8cc9dfc Mon Sep 17 00:00:00 2001 From: Justin Mayer Date: Fri, 18 Apr 2014 13:21:06 -0700 Subject: [PATCH 21/24] Text tweaks for "PLUGIN_PATH as list" feature --- docs/plugins.rst | 10 +++++----- pelican/settings.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/plugins.rst b/docs/plugins.rst index 9dddce70..16d697fa 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -20,12 +20,12 @@ Alternatively, another method is to import them and add them to the list:: from package import myplugin PLUGINS = [myplugin,] -If your plugins are not in an importable path, you can specify a ``PLUGIN_PATH`` -in the settings. ``PLUGIN_PATH`` can be an absolute path or a path relative to -the settings file:: +If your plugins are not in an importable path, you can specify a list of paths +via the ``PLUGIN_PATH`` setting. As shown in the following example, paths in +the ``PLUGIN_PATH`` list can be absolute or relative to the settings file:: - PLUGIN_PATH = ["list", "of", plugins path"] - PLUGINS = ["list", "of", "plugins"] + PLUGIN_PATH = ["plugins", "/srv/pelican/plugins"] + PLUGINS = ["assets", "liquid_tags", "sitemap"] Where to find plugins ===================== diff --git a/pelican/settings.py b/pelican/settings.py index 7caffa61..ee337386 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -144,7 +144,7 @@ def read_settings(path=None, override=None): local_settings[p] = absp if isinstance(local_settings['PLUGIN_PATH'], six.string_types): - logger.warning("Detected misconfiguration with %s setting ""(must be a list)" % 'PLUGIN_PATH') + logger.warning("Defining %s setting as string has been deprecated (should be a list)" % 'PLUGIN_PATH') local_settings['PLUGIN_PATH'] = [local_settings['PLUGIN_PATH']] else: if 'PLUGIN_PATH' in local_settings and local_settings['PLUGIN_PATH'] is not None: From 260953da02bea53a68545060d354b3ed079fc988 Mon Sep 17 00:00:00 2001 From: Tastalian Date: Mon, 10 Mar 2014 04:16:38 +0100 Subject: [PATCH 22/24] Make docutils requirement explicit. Fixes #1243. Previously, the error returned by Python when docutils is not installed was not explicit, instead saying that HTMLTranslator is not defined (needed by FeedGenerator and such), forcing the user to go into readers.py to figure out that this happens because "import docutils" failed. This pull request makes the docutils dependency explicit, so that there is an ImportError if doctutils is not found. --- pelican/readers.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pelican/readers.py b/pelican/readers.py index 35c38220..43749dce 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -6,16 +6,13 @@ import logging import os import re -try: - import docutils - import docutils.core - import docutils.io - from docutils.writers.html4css1 import HTMLTranslator +import docutils +import docutils.core +import docutils.io +from docutils.writers.html4css1 import HTMLTranslator - # import the directives to have pygments support - from pelican import rstdirectives # NOQA -except ImportError: - docutils = False +# import the directives to have pygments support +from pelican import rstdirectives # NOQA try: from markdown import Markdown except ImportError: From e6be02264afc838276923933c676ef69676c320f Mon Sep 17 00:00:00 2001 From: Shauna Date: Sat, 5 Apr 2014 15:27:03 -0400 Subject: [PATCH 23/24] Add feeds for each author --- docs/settings.rst | 2 + pelican/generators.py | 12 ++++ pelican/settings.py | 3 + .../basic/feeds/alexis-metaireau.atom.xml | 20 ++++++ .../basic/feeds/alexis-metaireau.rss.xml | 20 ++++++ .../custom/feeds/alexis-metaireau.atom.xml | 61 +++++++++++++++++++ .../custom/feeds/alexis-metaireau.rss.xml | 61 +++++++++++++++++++ 7 files changed, 179 insertions(+) create mode 100644 pelican/tests/output/basic/feeds/alexis-metaireau.atom.xml create mode 100644 pelican/tests/output/basic/feeds/alexis-metaireau.rss.xml create mode 100644 pelican/tests/output/custom/feeds/alexis-metaireau.atom.xml create mode 100644 pelican/tests/output/custom/feeds/alexis-metaireau.rss.xml diff --git a/docs/settings.rst b/docs/settings.rst index 8d8f9a16..0de811ec 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -474,6 +474,8 @@ Setting name (default value) What does it do? language. `CATEGORY_FEED_ATOM` ('feeds/%s.atom.xml'[2]_) Where to put the category Atom feeds. `CATEGORY_FEED_RSS` (``None``, i.e. no RSS) Where to put the category RSS feeds. +`AUTHOR_FEED_ATOM` ('feeds/%s.atom.xml'[2]_) Where to put the author Atom feeds. +`AUTHOR_FEED_RSS` ('feeds/%s.rss.xml'[2]_) Where to put the author RSS feeds. `TAG_FEED_ATOM` (``None``, i.e. no tag feed) Relative URL to output the tag Atom feed. It should be defined using a "%s" match in the tag name. `TAG_FEED_RSS` (``None``, ie no RSS tag feed) Relative URL to output the tag RSS feed diff --git a/pelican/generators.py b/pelican/generators.py index 1b584d3f..a2d7320a 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -239,6 +239,18 @@ class ArticlesGenerator(Generator): self.settings['CATEGORY_FEED_RSS'] % cat.slug, feed_type='rss') + for auth, arts in self.authors: + arts.sort(key=attrgetter('date'), reverse=True) + if self.settings.get('AUTHOR_FEED_ATOM'): + writer.write_feed(arts, self.context, + self.settings['AUTHOR_FEED_ATOM'] + % auth.slug) + + if self.settings.get('AUTHOR_FEED_RSS'): + writer.write_feed(arts, self.context, + self.settings['AUTHOR_FEED_RSS'] + % auth.slug, feed_type='rss') + if (self.settings.get('TAG_FEED_ATOM') or self.settings.get('TAG_FEED_RSS')): for tag, arts in self.tags.items(): diff --git a/pelican/settings.py b/pelican/settings.py index ee337386..1d0ada0c 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -41,6 +41,8 @@ DEFAULT_CONFIG = { 'THEME_STATIC_PATHS': ['static', ], 'FEED_ALL_ATOM': os.path.join('feeds', 'all.atom.xml'), 'CATEGORY_FEED_ATOM': os.path.join('feeds', '%s.atom.xml'), + 'AUTHOR_FEED_ATOM': os.path.join('feeds', '%s.atom.xml'), + 'AUTHOR_FEED_RSS': os.path.join('feeds', '%s.rss.xml'), 'TRANSLATION_FEED_ATOM': os.path.join('feeds', 'all-%s.atom.xml'), 'FEED_MAX_ITEMS': '', 'SITEURL': '', @@ -269,6 +271,7 @@ def configure_settings(settings): 'FEED_ATOM', 'FEED_RSS', 'FEED_ALL_ATOM', 'FEED_ALL_RSS', 'CATEGORY_FEED_ATOM', 'CATEGORY_FEED_RSS', + 'AUTHOR_FEED_ATOM', 'AUTHOR_FEED_RSS', 'TAG_FEED_ATOM', 'TAG_FEED_RSS', 'TRANSLATION_FEED_ATOM', 'TRANSLATION_FEED_RSS', ] diff --git a/pelican/tests/output/basic/feeds/alexis-metaireau.atom.xml b/pelican/tests/output/basic/feeds/alexis-metaireau.atom.xml new file mode 100644 index 00000000..d87023b5 --- /dev/null +++ b/pelican/tests/output/basic/feeds/alexis-metaireau.atom.xml @@ -0,0 +1,20 @@ + +A Pelican Blog/2013-11-17T23:29:00ZThis is a super article !2013-11-17T23:29:00ZAlexis Métaireautag:,2010-12-02:this-is-a-super-article.html<p>Some content here !</p> +<div class="section" id="this-is-a-simple-title"> +<h2>This is a simple title</h2> +<p>And here comes the cool <a class="reference external" href="http://books.couchdb.org/relax/design-documents/views">stuff</a>.</p> +<img alt="alternate text" src="|filename|/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /> +<img alt="alternate text" src="|filename|/pictures/Sushi_Macro.jpg" style="width: 600px; height: 450px;" /> +<pre class="literal-block"> +&gt;&gt;&gt; from ipdb import set_trace +&gt;&gt;&gt; set_trace() +</pre> +<p>→ And now try with some utf8 hell: ééé</p> +</div> +Oh yeah !2010-10-20T10:14:00ZAlexis Métaireautag:,2010-10-20:oh-yeah.html<div class="section" id="why-not"> +<h2>Why not ?</h2> +<p>After all, why not ? It's pretty simple to do it, and it will allow me to write my blogposts in rst ! +YEAH !</p> +<img alt="alternate text" src="|filename|/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /> +</div> + \ No newline at end of file diff --git a/pelican/tests/output/basic/feeds/alexis-metaireau.rss.xml b/pelican/tests/output/basic/feeds/alexis-metaireau.rss.xml new file mode 100644 index 00000000..09409217 --- /dev/null +++ b/pelican/tests/output/basic/feeds/alexis-metaireau.rss.xml @@ -0,0 +1,20 @@ + +A Pelican Blog/Sun, 17 Nov 2013 23:29:00 -0000This is a super article !/this-is-a-super-article.html<p>Some content here !</p> +<div class="section" id="this-is-a-simple-title"> +<h2>This is a simple title</h2> +<p>And here comes the cool <a class="reference external" href="http://books.couchdb.org/relax/design-documents/views">stuff</a>.</p> +<img alt="alternate text" src="|filename|/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /> +<img alt="alternate text" src="|filename|/pictures/Sushi_Macro.jpg" style="width: 600px; height: 450px;" /> +<pre class="literal-block"> +&gt;&gt;&gt; from ipdb import set_trace +&gt;&gt;&gt; set_trace() +</pre> +<p>→ And now try with some utf8 hell: ééé</p> +</div> +Alexis MétaireauSun, 17 Nov 2013 23:29:00 -0000tag:,2010-12-02:this-is-a-super-article.htmlfoobarfoobarOh yeah !/oh-yeah.html<div class="section" id="why-not"> +<h2>Why not ?</h2> +<p>After all, why not ? It's pretty simple to do it, and it will allow me to write my blogposts in rst ! +YEAH !</p> +<img alt="alternate text" src="|filename|/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /> +</div> +Alexis MétaireauWed, 20 Oct 2010 10:14:00 -0000tag:,2010-10-20:oh-yeah.htmlohbaryeah \ No newline at end of file diff --git a/pelican/tests/output/custom/feeds/alexis-metaireau.atom.xml b/pelican/tests/output/custom/feeds/alexis-metaireau.atom.xml new file mode 100644 index 00000000..cb746377 --- /dev/null +++ b/pelican/tests/output/custom/feeds/alexis-metaireau.atom.xml @@ -0,0 +1,61 @@ + +Alexis' loghttp://blog.notmyidea.org/2013-11-17T23:29:00+01:00FILENAME_METADATA example2012-11-30T00:00:00+01:00Alexis Métaireautag:blog.notmyidea.org,2012-11-30:filename_metadata-example.html<p>Some cool stuff!</p> +Second article2012-02-29T00:00:00+01:00Alexis Métaireautag:blog.notmyidea.org,2012-02-29:second-article.html<p>This is some article, in english</p> +A markdown powered article2011-04-20T00:00:00+02:00Alexis Métaireautag:blog.notmyidea.org,2011-04-20:a-markdown-powered-article.html<p>You're mutually oblivious.</p> +<p><a href="http://blog.notmyidea.org/unbelievable.html">a root-relative link to unbelievable</a> +<a href="http://blog.notmyidea.org/unbelievable.html">a file-relative link to unbelievable</a></p>Article 12011-02-17T00:00:00+01:00Alexis Métaireautag:blog.notmyidea.org,2011-02-17:article-1.html<p>Article 1</p> +Article 22011-02-17T00:00:00+01:00Alexis Métaireautag:blog.notmyidea.org,2011-02-17:article-2.html<p>Article 2</p> +Article 32011-02-17T00:00:00+01:00Alexis Métaireautag:blog.notmyidea.org,2011-02-17:article-3.html<p>Article 3</p> +This is a super article !2013-11-17T23:29:00+01:00Alexis Métaireautag:blog.notmyidea.org,2010-12-02:this-is-a-super-article.html<p>Some content here !</p> +<div class="section" id="this-is-a-simple-title"> +<h2>This is a simple title</h2> +<p>And here comes the cool <a class="reference external" href="http://books.couchdb.org/relax/design-documents/views">stuff</a>.</p> +<img alt="alternate text" src="http://blog.notmyidea.org/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /> +<img alt="alternate text" src="http://blog.notmyidea.org/pictures/Sushi_Macro.jpg" style="width: 600px; height: 450px;" /> +<pre class="literal-block"> +&gt;&gt;&gt; from ipdb import set_trace +&gt;&gt;&gt; set_trace() +</pre> +<p>→ And now try with some utf8 hell: ééé</p> +</div> +Oh yeah !2010-10-20T10:14:00+02:00Alexis Métaireautag:blog.notmyidea.org,2010-10-20:oh-yeah.html<div class="section" id="why-not"> +<h2>Why not ?</h2> +<p>After all, why not ? It's pretty simple to do it, and it will allow me to write my blogposts in rst ! +YEAH !</p> +<img alt="alternate text" src="http://blog.notmyidea.org/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /> +</div> +Unbelievable !2010-10-15T20:30:00+02:00Alexis Métaireautag:blog.notmyidea.org,2010-10-15:unbelievable.html<p>Or completely awesome. Depends the needs.</p> +<p><a class="reference external" href="http://blog.notmyidea.org/a-markdown-powered-article.html">a root-relative link to markdown-article</a> +<a class="reference external" href="http://blog.notmyidea.org/a-markdown-powered-article.html">a file-relative link to markdown-article</a></p> +<div class="section" id="testing-sourcecode-directive"> +<h2>Testing sourcecode directive</h2> +<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span class="n">formatter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">options</span> <span class="ow">and</span> <span class="n">VARIANTS</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="o">.</span><span class="n">keys</span><span class="p">()[</span><span class="mi">0</span><span class="p">]]</span> +</pre></div> +</td></tr></table></div> +<div class="section" id="testing-another-case"> +<h2>Testing another case</h2> +<p>This will now have a line number in 'custom' since it's the default in +pelican.conf, it will have nothing in default.</p> +<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span class="n">formatter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">options</span> <span class="ow">and</span> <span class="n">VARIANTS</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="o">.</span><span class="n">keys</span><span class="p">()[</span><span class="mi">0</span><span class="p">]]</span> +</pre></div> +</td></tr></table><p>Lovely.</p> +</div> +<div class="section" id="testing-more-sourcecode-directives"> +<h2>Testing more sourcecode directives</h2> +<div class="highlight"><pre><span id="foo-8"><a name="foo-8"></a><span class="lineno special"> 8</span> <span class="testingk">def</span> <span class="testingnf">run</span><span class="testingp">(</span><span class="testingbp">self</span><span class="testingp">):</span><br></span><span id="foo-9"><a name="foo-9"></a><span class="lineno"> </span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">assert_has_content</span><span class="testingp">()</span><br></span><span id="foo-10"><a name="foo-10"></a><span class="lineno special">10</span> <span class="testingk">try</span><span class="testingp">:</span><br></span><span id="foo-11"><a name="foo-11"></a><span class="lineno"> </span> <span class="testingn">lexer</span> <span class="testingo">=</span> <span class="testingn">get_lexer_by_name</span><span class="testingp">(</span><span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">arguments</span><span class="testingp">[</span><span class="testingmi">0</span><span class="testingp">])</span><br></span><span id="foo-12"><a name="foo-12"></a><span class="lineno special">12</span> <span class="testingk">except</span> <span class="testingne">ValueError</span><span class="testingp">:</span><br></span><span id="foo-13"><a name="foo-13"></a><span class="lineno"> </span> <span class="testingc"># no lexer found - use the text one instead of an exception</span><br></span><span id="foo-14"><a name="foo-14"></a><span class="lineno special">14</span> <span class="testingn">lexer</span> <span class="testingo">=</span> <span class="testingn">TextLexer</span><span class="testingp">()</span><br></span><span id="foo-15"><a name="foo-15"></a><span class="lineno"> </span> <br></span><span id="foo-16"><a name="foo-16"></a><span class="lineno special">16</span> <span class="testingk">if</span> <span class="testingp">(</span><span class="testings">&#39;linenos&#39;</span> <span class="testingow">in</span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span> <span class="testingow">and</span><br></span><span id="foo-17"><a name="foo-17"></a><span class="lineno"> </span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">[</span><span class="testings">&#39;linenos&#39;</span><span class="testingp">]</span> <span class="testingow">not</span> <span class="testingow">in</span> <span class="testingp">(</span><span class="testings">&#39;table&#39;</span><span class="testingp">,</span> <span class="testings">&#39;inline&#39;</span><span class="testingp">)):</span><br></span><span id="foo-18"><a name="foo-18"></a><span class="lineno special">18</span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">[</span><span class="testings">&#39;linenos&#39;</span><span class="testingp">]</span> <span class="testingo">=</span> <span class="testings">&#39;table&#39;</span><br></span><span id="foo-19"><a name="foo-19"></a><span class="lineno"> </span> <br></span><span id="foo-20"><a name="foo-20"></a><span class="lineno special">20</span> <span class="testingk">for</span> <span class="testingn">flag</span> <span class="testingow">in</span> <span class="testingp">(</span><span class="testings">&#39;nowrap&#39;</span><span class="testingp">,</span> <span class="testings">&#39;nobackground&#39;</span><span class="testingp">,</span> <span class="testings">&#39;anchorlinenos&#39;</span><span class="testingp">):</span><br></span><span id="foo-21"><a name="foo-21"></a><span class="lineno"> </span> <span class="testingk">if</span> <span class="testingn">flag</span> <span class="testingow">in</span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">:</span><br></span><span id="foo-22"><a name="foo-22"></a><span class="lineno special">22</span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">[</span><span class="testingn">flag</span><span class="testingp">]</span> <span class="testingo">=</span> <span class="testingbp">True</span><br></span><span id="foo-23"><a name="foo-23"></a><span class="lineno"> </span> <br></span><span id="foo-24"><a name="foo-24"></a><span class="lineno special">24</span> <span class="testingc"># noclasses should already default to False, but just in case...</span><br></span><span id="foo-25"><a name="foo-25"></a><span class="lineno"> </span> <span class="testingn">formatter</span> <span class="testingo">=</span> <span class="testingn">HtmlFormatter</span><span class="testingp">(</span><span class="testingn">noclasses</span><span class="testingo">=</span><span class="testingbp">False</span><span class="testingp">,</span> <span class="testingo">**</span><span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">)</span><br></span><span id="foo-26"><a name="foo-26"></a><span class="lineno special">26</span> <span class="testingn">parsed</span> <span class="testingo">=</span> <span class="testingn">highlight</span><span class="testingp">(</span><span class="testings">&#39;</span><span class="testingse">\n</span><span class="testings">&#39;</span><span class="testingo">.</span><span class="testingn">join</span><span class="testingp">(</span><span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">content</span><span class="testingp">),</span> <span class="testingn">lexer</span><span class="testingp">,</span> <span class="testingn">formatter</span><span class="testingp">)</span><br></span><span id="foo-27"><a name="foo-27"></a><span class="lineno"> </span> <span class="testingk">return</span> <span class="testingp">[</span><span class="testingn">nodes</span><span class="testingo">.</span><span class="testingn">raw</span><span class="testingp">(</span><span class="testings">&#39;&#39;</span><span class="testingp">,</span> <span class="testingn">parsed</span><span class="testingp">,</span> <span class="testingn">format</span><span class="testingo">=</span><span class="testings">&#39;html&#39;</span><span class="testingp">)]</span><br></span></pre></div> +<p>Lovely.</p> +</div> +<div class="section" id="testing-even-more-sourcecode-directives"> +<h2>Testing even more sourcecode directives</h2> +<span class="n">formatter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">options</span> <span class="ow">and</span> <span class="n">VARIANTS</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="o">.</span><span class="n">keys</span><span class="p">()[</span><span class="mi">0</span><span class="p">]]</span> +<p>Lovely.</p> +</div> +<div class="section" id="testing-overriding-config-defaults"> +<h2>Testing overriding config defaults</h2> +<p>Even if the default is line numbers, we can override it here</p> +<div class="highlight"><pre><span class="n">formatter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">options</span> <span class="ow">and</span> <span class="n">VARIANTS</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="o">.</span><span class="n">keys</span><span class="p">()[</span><span class="mi">0</span><span class="p">]]</span> +</pre></div> +<p>Lovely.</p> +</div> +The baz tag2010-03-14T00:00:00+01:00Alexis Métaireautag:blog.notmyidea.org,2010-03-14:tag/baz.html<p>This article overrides the listening of the articles under the <em>baz</em> tag.</p> + \ No newline at end of file diff --git a/pelican/tests/output/custom/feeds/alexis-metaireau.rss.xml b/pelican/tests/output/custom/feeds/alexis-metaireau.rss.xml new file mode 100644 index 00000000..2c4b1160 --- /dev/null +++ b/pelican/tests/output/custom/feeds/alexis-metaireau.rss.xml @@ -0,0 +1,61 @@ + +Alexis' loghttp://blog.notmyidea.org/Sun, 17 Nov 2013 23:29:00 +0100FILENAME_METADATA examplehttp://blog.notmyidea.org/filename_metadata-example.html<p>Some cool stuff!</p> +Alexis MétaireauFri, 30 Nov 2012 00:00:00 +0100tag:blog.notmyidea.org,2012-11-30:filename_metadata-example.htmlSecond articlehttp://blog.notmyidea.org/second-article.html<p>This is some article, in english</p> +Alexis MétaireauWed, 29 Feb 2012 00:00:00 +0100tag:blog.notmyidea.org,2012-02-29:second-article.htmlfoobarbazA markdown powered articlehttp://blog.notmyidea.org/a-markdown-powered-article.html<p>You're mutually oblivious.</p> +<p><a href="http://blog.notmyidea.org/unbelievable.html">a root-relative link to unbelievable</a> +<a href="http://blog.notmyidea.org/unbelievable.html">a file-relative link to unbelievable</a></p>Alexis MétaireauWed, 20 Apr 2011 00:00:00 +0200tag:blog.notmyidea.org,2011-04-20:a-markdown-powered-article.htmlArticle 1http://blog.notmyidea.org/article-1.html<p>Article 1</p> +Alexis MétaireauThu, 17 Feb 2011 00:00:00 +0100tag:blog.notmyidea.org,2011-02-17:article-1.htmlArticle 2http://blog.notmyidea.org/article-2.html<p>Article 2</p> +Alexis MétaireauThu, 17 Feb 2011 00:00:00 +0100tag:blog.notmyidea.org,2011-02-17:article-2.htmlArticle 3http://blog.notmyidea.org/article-3.html<p>Article 3</p> +Alexis MétaireauThu, 17 Feb 2011 00:00:00 +0100tag:blog.notmyidea.org,2011-02-17:article-3.htmlThis is a super article !http://blog.notmyidea.org/this-is-a-super-article.html<p>Some content here !</p> +<div class="section" id="this-is-a-simple-title"> +<h2>This is a simple title</h2> +<p>And here comes the cool <a class="reference external" href="http://books.couchdb.org/relax/design-documents/views">stuff</a>.</p> +<img alt="alternate text" src="http://blog.notmyidea.org/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /> +<img alt="alternate text" src="http://blog.notmyidea.org/pictures/Sushi_Macro.jpg" style="width: 600px; height: 450px;" /> +<pre class="literal-block"> +&gt;&gt;&gt; from ipdb import set_trace +&gt;&gt;&gt; set_trace() +</pre> +<p>→ And now try with some utf8 hell: ééé</p> +</div> +Alexis MétaireauSun, 17 Nov 2013 23:29:00 +0100tag:blog.notmyidea.org,2010-12-02:this-is-a-super-article.htmlfoobarfoobarOh yeah !http://blog.notmyidea.org/oh-yeah.html<div class="section" id="why-not"> +<h2>Why not ?</h2> +<p>After all, why not ? It's pretty simple to do it, and it will allow me to write my blogposts in rst ! +YEAH !</p> +<img alt="alternate text" src="http://blog.notmyidea.org/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /> +</div> +Alexis MétaireauWed, 20 Oct 2010 10:14:00 +0200tag:blog.notmyidea.org,2010-10-20:oh-yeah.htmlohbaryeahUnbelievable !http://blog.notmyidea.org/unbelievable.html<p>Or completely awesome. Depends the needs.</p> +<p><a class="reference external" href="http://blog.notmyidea.org/a-markdown-powered-article.html">a root-relative link to markdown-article</a> +<a class="reference external" href="http://blog.notmyidea.org/a-markdown-powered-article.html">a file-relative link to markdown-article</a></p> +<div class="section" id="testing-sourcecode-directive"> +<h2>Testing sourcecode directive</h2> +<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span class="n">formatter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">options</span> <span class="ow">and</span> <span class="n">VARIANTS</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="o">.</span><span class="n">keys</span><span class="p">()[</span><span class="mi">0</span><span class="p">]]</span> +</pre></div> +</td></tr></table></div> +<div class="section" id="testing-another-case"> +<h2>Testing another case</h2> +<p>This will now have a line number in 'custom' since it's the default in +pelican.conf, it will have nothing in default.</p> +<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span class="n">formatter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">options</span> <span class="ow">and</span> <span class="n">VARIANTS</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="o">.</span><span class="n">keys</span><span class="p">()[</span><span class="mi">0</span><span class="p">]]</span> +</pre></div> +</td></tr></table><p>Lovely.</p> +</div> +<div class="section" id="testing-more-sourcecode-directives"> +<h2>Testing more sourcecode directives</h2> +<div class="highlight"><pre><span id="foo-8"><a name="foo-8"></a><span class="lineno special"> 8</span> <span class="testingk">def</span> <span class="testingnf">run</span><span class="testingp">(</span><span class="testingbp">self</span><span class="testingp">):</span><br></span><span id="foo-9"><a name="foo-9"></a><span class="lineno"> </span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">assert_has_content</span><span class="testingp">()</span><br></span><span id="foo-10"><a name="foo-10"></a><span class="lineno special">10</span> <span class="testingk">try</span><span class="testingp">:</span><br></span><span id="foo-11"><a name="foo-11"></a><span class="lineno"> </span> <span class="testingn">lexer</span> <span class="testingo">=</span> <span class="testingn">get_lexer_by_name</span><span class="testingp">(</span><span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">arguments</span><span class="testingp">[</span><span class="testingmi">0</span><span class="testingp">])</span><br></span><span id="foo-12"><a name="foo-12"></a><span class="lineno special">12</span> <span class="testingk">except</span> <span class="testingne">ValueError</span><span class="testingp">:</span><br></span><span id="foo-13"><a name="foo-13"></a><span class="lineno"> </span> <span class="testingc"># no lexer found - use the text one instead of an exception</span><br></span><span id="foo-14"><a name="foo-14"></a><span class="lineno special">14</span> <span class="testingn">lexer</span> <span class="testingo">=</span> <span class="testingn">TextLexer</span><span class="testingp">()</span><br></span><span id="foo-15"><a name="foo-15"></a><span class="lineno"> </span> <br></span><span id="foo-16"><a name="foo-16"></a><span class="lineno special">16</span> <span class="testingk">if</span> <span class="testingp">(</span><span class="testings">&#39;linenos&#39;</span> <span class="testingow">in</span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span> <span class="testingow">and</span><br></span><span id="foo-17"><a name="foo-17"></a><span class="lineno"> </span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">[</span><span class="testings">&#39;linenos&#39;</span><span class="testingp">]</span> <span class="testingow">not</span> <span class="testingow">in</span> <span class="testingp">(</span><span class="testings">&#39;table&#39;</span><span class="testingp">,</span> <span class="testings">&#39;inline&#39;</span><span class="testingp">)):</span><br></span><span id="foo-18"><a name="foo-18"></a><span class="lineno special">18</span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">[</span><span class="testings">&#39;linenos&#39;</span><span class="testingp">]</span> <span class="testingo">=</span> <span class="testings">&#39;table&#39;</span><br></span><span id="foo-19"><a name="foo-19"></a><span class="lineno"> </span> <br></span><span id="foo-20"><a name="foo-20"></a><span class="lineno special">20</span> <span class="testingk">for</span> <span class="testingn">flag</span> <span class="testingow">in</span> <span class="testingp">(</span><span class="testings">&#39;nowrap&#39;</span><span class="testingp">,</span> <span class="testings">&#39;nobackground&#39;</span><span class="testingp">,</span> <span class="testings">&#39;anchorlinenos&#39;</span><span class="testingp">):</span><br></span><span id="foo-21"><a name="foo-21"></a><span class="lineno"> </span> <span class="testingk">if</span> <span class="testingn">flag</span> <span class="testingow">in</span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">:</span><br></span><span id="foo-22"><a name="foo-22"></a><span class="lineno special">22</span> <span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">[</span><span class="testingn">flag</span><span class="testingp">]</span> <span class="testingo">=</span> <span class="testingbp">True</span><br></span><span id="foo-23"><a name="foo-23"></a><span class="lineno"> </span> <br></span><span id="foo-24"><a name="foo-24"></a><span class="lineno special">24</span> <span class="testingc"># noclasses should already default to False, but just in case...</span><br></span><span id="foo-25"><a name="foo-25"></a><span class="lineno"> </span> <span class="testingn">formatter</span> <span class="testingo">=</span> <span class="testingn">HtmlFormatter</span><span class="testingp">(</span><span class="testingn">noclasses</span><span class="testingo">=</span><span class="testingbp">False</span><span class="testingp">,</span> <span class="testingo">**</span><span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">options</span><span class="testingp">)</span><br></span><span id="foo-26"><a name="foo-26"></a><span class="lineno special">26</span> <span class="testingn">parsed</span> <span class="testingo">=</span> <span class="testingn">highlight</span><span class="testingp">(</span><span class="testings">&#39;</span><span class="testingse">\n</span><span class="testings">&#39;</span><span class="testingo">.</span><span class="testingn">join</span><span class="testingp">(</span><span class="testingbp">self</span><span class="testingo">.</span><span class="testingn">content</span><span class="testingp">),</span> <span class="testingn">lexer</span><span class="testingp">,</span> <span class="testingn">formatter</span><span class="testingp">)</span><br></span><span id="foo-27"><a name="foo-27"></a><span class="lineno"> </span> <span class="testingk">return</span> <span class="testingp">[</span><span class="testingn">nodes</span><span class="testingo">.</span><span class="testingn">raw</span><span class="testingp">(</span><span class="testings">&#39;&#39;</span><span class="testingp">,</span> <span class="testingn">parsed</span><span class="testingp">,</span> <span class="testingn">format</span><span class="testingo">=</span><span class="testings">&#39;html&#39;</span><span class="testingp">)]</span><br></span></pre></div> +<p>Lovely.</p> +</div> +<div class="section" id="testing-even-more-sourcecode-directives"> +<h2>Testing even more sourcecode directives</h2> +<span class="n">formatter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">options</span> <span class="ow">and</span> <span class="n">VARIANTS</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="o">.</span><span class="n">keys</span><span class="p">()[</span><span class="mi">0</span><span class="p">]]</span> +<p>Lovely.</p> +</div> +<div class="section" id="testing-overriding-config-defaults"> +<h2>Testing overriding config defaults</h2> +<p>Even if the default is line numbers, we can override it here</p> +<div class="highlight"><pre><span class="n">formatter</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">options</span> <span class="ow">and</span> <span class="n">VARIANTS</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">options</span><span class="o">.</span><span class="n">keys</span><span class="p">()[</span><span class="mi">0</span><span class="p">]]</span> +</pre></div> +<p>Lovely.</p> +</div> +Alexis MétaireauFri, 15 Oct 2010 20:30:00 +0200tag:blog.notmyidea.org,2010-10-15:unbelievable.htmlThe baz taghttp://blog.notmyidea.org/tag/baz.html<p>This article overrides the listening of the articles under the <em>baz</em> tag.</p> +Alexis MétaireauSun, 14 Mar 2010 00:00:00 +0100tag:blog.notmyidea.org,2010-03-14:tag/baz.html \ No newline at end of file From c1324b0206a70b8179689d2305c8de678d5e7b1d Mon Sep 17 00:00:00 2001 From: Ondrej Grover Date: Sun, 20 Apr 2014 14:34:52 +0200 Subject: [PATCH 24/24] split content caching into two layers This is a reworked and improved version of content caching. Notable changes: - by default only raw content and metadata returned by readers are cached which should prevent conficts with plugins, the speed benefit of content objects caching is not very big with a simple setup - renamed --full-rebuild to --ignore-cache - added more elaborate logging to caching code --- README.rst | 1 + docs/index.rst | 1 + docs/settings.rst | 41 +++++++++++++------ pelican/__init__.py | 24 +++++------ pelican/generators.py | 46 ++++++++++++++++----- pelican/readers.py | 20 +++++++-- pelican/settings.py | 10 +++++ pelican/tests/test_generators.py | 67 ++++++++++++++++++++++++++----- pelican/utils.py | 69 ++++++++++++++++++-------------- 9 files changed, 199 insertions(+), 80 deletions(-) diff --git a/README.rst b/README.rst index 20c3f217..bf506c5f 100644 --- a/README.rst +++ b/README.rst @@ -29,6 +29,7 @@ Pelican currently supports: * Code syntax highlighting * Import from WordPress, Dotclear, or RSS feeds * Integration with external tools: Twitter, Google Analytics, etc. (optional) +* Fast rebuild times thanks to content caching and selective output writing. Have a look at the `Pelican documentation`_ for more information. diff --git a/docs/index.rst b/docs/index.rst index 43193e9e..c2deb6de 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,6 +33,7 @@ Pelican |version| currently supports: * Code syntax highlighting * Import from WordPress, Dotclear, or RSS feeds * Integration with external tools: Twitter, Google Analytics, etc. (optional) +* Fast rebuild times thanks to content caching and selective output writing. Why the name "Pelican"? ----------------------- diff --git a/docs/settings.rst b/docs/settings.rst index 0de811ec..1b4bae94 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -161,6 +161,7 @@ Setting name (default value) `_ `WITH_FUTURE_DATES` (``True``) If disabled, content with dates in the future will get a default status of ``draft``. + see :ref:`reading_only_modified_content` for details. `INTRASITE_LINK_REGEX` (``'[{|](?P.*?)[|}]'``) Regular expression that is used to parse internal links. Default syntax of links to internal files, tags, etc., is to enclose the identifier, say ``filename``, in ``{}`` or ``||``. @@ -173,12 +174,16 @@ Setting name (default value) `SLUGIFY_SOURCE` (``'input'``) Specifies where you want the slug to be automatically generated from. Can be set to 'title' to use the 'Title:' metadata tag or 'basename' to use the articles basename when creating the slug. -`CACHE_CONTENT` (``True``) If ``True``, save read content in a cache file. +`CACHE_CONTENT` (``True``) If ``True``, save content in a cache file. See :ref:`reading_only_modified_content` for details about caching. +`CONTENT_CACHING_LAYER` (``'reader'``) If set to ``'reader'``, save only the raw content and metadata returned + by readers, if set to ``'generator'``, save processed content objects. `CACHE_DIRECTORY` (``cache``) Directory in which to store cache files. +`GZIP_CACHE` (``True``) If ``True``, use gzip to (de)compress the cache files. `CHECK_MODIFIED_METHOD` (``mtime``) Controls how files are checked for modifications. `LOAD_CONTENT_CACHE` (``True``) If ``True``, load unmodified content from cache. -`GZIP_CACHE` (``True``) If ``True``, use gzip to (de)compress the cache files. +`AUTORELOAD_IGNORE_CACHE` (``False``) If ``True``, do not load content cache in autoreload mode + when the settings file changes. `WRITE_SELECTED` (``[]``) If this list is not empty, **only** output files with their paths in this list are written. Paths should be either relative to the current working directory of Pelican or absolute. For possible use cases see @@ -749,13 +754,21 @@ When Pelican is about to read some content source file: file cannot be found in the cache file, the content is read as usual. -3. If the file is considered unchanged, the content object saved in a +3. If the file is considered unchanged, the content data saved in a previous build corresponding to the file is loaded from the cache and the file is not read. 4. If the file is considered changed, the file is read and the new - modification information and the content object are saved to the + modification information and the content data are saved to the cache if `CACHE_CONTENT` is ``True``. +Depending on `CONTENT_CACHING_LAYER` either the raw content and +metadata returned by a reader are cached if set to ``'reader'``, or +the processed content object is cached if set to ``'generator'``. +Caching the processed content object may conflict with plugins (as +some reading related signals may be skipped) or e.g. the +`WITH_FUTURE_DATES` functionality (as the ``draft`` status of the +cached content objects would not change automatically over time). + Modification time based checking is faster than comparing file hashes, but is not as reliable, because mtime information can be lost when e.g. copying the content sources using the ``cp`` or ``rsync`` @@ -764,16 +777,18 @@ commands without the mtime preservation mode (invoked e.g. by The cache files are Python pickles, so they may not be readable by different versions of Python as the pickle format often changes. If -such an error is encountered, the cache files have to be rebuilt -using the pelican command-line option ``--full-rebuild``. -The cache files also have to be rebuilt when changing the -`GZIP_CACHE` setting for cache file reading to work. +such an error is encountered, the cache files have to be rebuilt by +running pelican after removing them or by using the pelican +command-line option ``--ignore-cache``. The cache files also have to +be rebuilt when changing the `GZIP_CACHE` setting for cache file +reading to work. -The ``--full-rebuild`` command-line option is also useful when the -whole site needs to be regenerated due to e.g. modifications to the -settings file or theme files. When pelican runs in autorealod mode, -modification of the settings file or theme will trigger a full rebuild -automatically. +The ``--ignore-cache`` command-line option is also useful when the +whole cache needs to be regenerated due to e.g. modifications to the +settings file which should change the cached content or just for +debugging purposes. When pelican runs in autoreload mode, modification +of the settings file will make it ignore the cache automatically if +`AUTORELOAD_IGNORE_CACHE` is ``True``. Note that even when using cached content, all output is always written, so the modification times of the ``*.html`` files always diff --git a/pelican/__init__.py b/pelican/__init__.py index 077859bb..8cae468c 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -262,8 +262,9 @@ def parse_arguments(): help='Relaunch pelican each time a modification occurs' ' on the content files.') - parser.add_argument('-f', '--full-rebuild', action='store_true', - dest='full_rebuild', help='Rebuild everything by not loading from cache') + parser.add_argument('-c', '--ignore-cache', action='store_true', + dest='ignore_cache', help='Ignore content cache ' + 'from previous runs by not loading cache files.') parser.add_argument('-w', '--write-selected', type=str, dest='selected_paths', default=None, @@ -284,7 +285,7 @@ def get_config(args): config['THEME'] = abstheme if os.path.exists(abstheme) else args.theme if args.delete_outputdir is not None: config['DELETE_OUTPUT_DIRECTORY'] = args.delete_outputdir - if args.full_rebuild: + if args.ignore_cache: config['LOAD_CONTENT_CACHE'] = False if args.selected_paths: config['WRITE_SELECTED'] = args.selected_paths.split(',') @@ -340,7 +341,10 @@ def main(): print(' --- AutoReload Mode: Monitoring `content`, `theme` and' ' `settings` for changes. ---') - first_run = True # load cache on first run + def _ignore_cache(pelican_obj): + if pelican_obj.settings['AUTORELOAD_IGNORE_CACHE']: + pelican_obj.settings['LOAD_CONTENT_CACHE'] = False + while True: try: # Check source dir for changed files ending with the given @@ -353,10 +357,9 @@ def main(): if modified['settings']: pelican, settings = get_instance(args) - if not first_run: - original_load_cache = settings['LOAD_CONTENT_CACHE'] - # invalidate cache - pelican.settings['LOAD_CONTENT_CACHE'] = False + original_load_cache = settings['LOAD_CONTENT_CACHE'] + print(pelican.settings['AUTORELOAD_IGNORE_CACHE']) + _ignore_cache(pelican) if any(modified.values()): print('\n-> Modified: {}. re-generating...'.format( @@ -368,13 +371,8 @@ def main(): if modified['theme'] is None: logger.warning('Empty theme folder. Using `basic` ' 'theme.') - elif modified['theme']: - # theme modified, needs full rebuild -> no cache - if not first_run: # but not on first run - pelican.settings['LOAD_CONTENT_CACHE'] = False pelican.run() - first_run = False # restore original caching policy pelican.settings['LOAD_CONTENT_CACHE'] = original_load_cache diff --git a/pelican/generators.py b/pelican/generators.py index a2d7320a..3cc84fa8 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -28,10 +28,11 @@ from pelican import signals logger = logging.getLogger(__name__) -class Generator(FileStampDataCacher): +class Generator(object): """Baseclass generator""" - def __init__(self, context, settings, path, theme, output_path, **kwargs): + def __init__(self, context, settings, path, theme, output_path, + readers_cache_name='', **kwargs): self.context = context self.settings = settings self.path = path @@ -41,7 +42,7 @@ class Generator(FileStampDataCacher): for arg, value in kwargs.items(): setattr(self, arg, value) - self.readers = Readers(self.settings) + self.readers = Readers(self.settings, readers_cache_name) # templates cache self._templates = {} @@ -74,10 +75,6 @@ class Generator(FileStampDataCacher): custom_filters = self.settings['JINJA_FILTERS'] self.env.filters.update(custom_filters) - # set up caching - super(Generator, self).__init__(settings, 'CACHE_CONTENT', - 'LOAD_CONTENT_CACHE') - signals.generator_init.send(self) def get_template(self, name): @@ -153,6 +150,35 @@ class Generator(FileStampDataCacher): self.context[item] = value +class CachingGenerator(Generator, FileStampDataCacher): + '''Subclass of Generator and FileStampDataCacher classes + + enables content caching, either at the generator or reader level + ''' + + def __init__(self, *args, **kwargs): + '''Initialize the generator, then set up caching + + note the multiple inheritance structure + ''' + cls_name = self.__class__.__name__ + Generator.__init__(self, *args, + readers_cache_name=(cls_name + '-Readers'), + **kwargs) + + cache_this_level = self.settings['CONTENT_CACHING_LAYER'] == 'generator' + caching_policy = cache_this_level and self.settings['CACHE_CONTENT'] + load_policy = cache_this_level and self.settings['LOAD_CONTENT_CACHE'] + FileStampDataCacher.__init__(self, self.settings, cls_name, + caching_policy, load_policy + ) + + def _get_file_stamp(self, filename): + '''Get filestamp for path relative to generator.path''' + filename = os.path.join(self.path, filename) + return super(Generator, self)._get_file_stamp(filename) + + class _FileLoader(BaseLoader): def __init__(self, path, basedir): @@ -183,7 +209,7 @@ class TemplatePagesGenerator(Generator): del self.env.loader.loaders[0] -class ArticlesGenerator(Generator): +class ArticlesGenerator(CachingGenerator): """Generate blog articles""" def __init__(self, *args, **kwargs): @@ -537,6 +563,7 @@ class ArticlesGenerator(Generator): self._update_context(('articles', 'dates', 'tags', 'categories', 'tag_cloud', 'authors', 'related_posts')) self.save_cache() + self.readers.save_cache() signals.article_generator_finalized.send(self) def generate_output(self, writer): @@ -545,7 +572,7 @@ class ArticlesGenerator(Generator): signals.article_writer_finalized.send(self, writer=writer) -class PagesGenerator(Generator): +class PagesGenerator(CachingGenerator): """Generate pages""" def __init__(self, *args, **kwargs): @@ -599,6 +626,7 @@ class PagesGenerator(Generator): self.context['PAGES'] = self.pages self.save_cache() + self.readers.save_cache() signals.page_generator_finalized.send(self) def generate_output(self, writer): diff --git a/pelican/readers.py b/pelican/readers.py index fa9d92ae..c63b8981 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -33,7 +33,7 @@ except ImportError: from pelican import signals from pelican.contents import Page, Category, Tag, Author -from pelican.utils import get_date, pelican_open +from pelican.utils import get_date, pelican_open, FileStampDataCacher METADATA_PROCESSORS = { @@ -382,7 +382,7 @@ class AsciiDocReader(BaseReader): return content, metadata -class Readers(object): +class Readers(FileStampDataCacher): """Interface for all readers. This class contains a mapping of file extensions / Reader classes, to know @@ -392,7 +392,7 @@ class Readers(object): """ - def __init__(self, settings=None): + def __init__(self, settings=None, cache_name=''): self.settings = settings or {} self.readers = {} self.reader_classes = {} @@ -417,6 +417,15 @@ class Readers(object): self.readers[fmt] = reader_class(self.settings) + # set up caching + cache_this_level = (cache_name != '' and + self.settings['CONTENT_CACHING_LAYER'] == 'reader') + caching_policy = cache_this_level and self.settings['CACHE_CONTENT'] + load_policy = cache_this_level and self.settings['LOAD_CONTENT_CACHE'] + super(Readers, self).__init__(settings, cache_name, + caching_policy, load_policy, + ) + @property def extensions(self): return self.readers.keys() @@ -455,7 +464,10 @@ class Readers(object): source_path=source_path, settings=self.settings, process=reader.process_metadata)) - content, reader_metadata = reader.read(path) + content, reader_metadata = self.get_cached_data(path, (None, None)) + if content is None: + content, reader_metadata = reader.read(path) + self.cache_data(path, (content, reader_metadata)) metadata.update(reader_metadata) if content: diff --git a/pelican/settings.py b/pelican/settings.py index 1d0ada0c..abf16b32 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -123,10 +123,12 @@ DEFAULT_CONFIG = { 'INTRASITE_LINK_REGEX': '[{|](?P.*?)[|}]', 'SLUGIFY_SOURCE': 'title', 'CACHE_CONTENT': True, + 'CONTENT_CACHING_LAYER': 'reader', 'CACHE_DIRECTORY': 'cache', 'GZIP_CACHE': True, 'CHECK_MODIFIED_METHOD': 'mtime', 'LOAD_CONTENT_CACHE': True, + 'AUTORELOAD_IGNORE_CACHE': False, 'WRITE_SELECTED': [], } @@ -266,6 +268,14 @@ def configure_settings(settings): if not 'FEED_DOMAIN' in settings: settings['FEED_DOMAIN'] = settings['SITEURL'] + # check content caching layer and warn of incompatibilities + if (settings.get('CACHE_CONTENT', False) and + settings.get('CONTENT_CACHING_LAYER', '') == 'generator' and + settings.get('WITH_FUTURE_DATES', DEFAULT_CONFIG['WITH_FUTURE_DATES'])): + logger.warning('WITH_FUTURE_DATES conflicts with ' + "CONTENT_CACHING_LAYER set to 'generator', " + "use 'reader' layer instead") + # Warn if feeds are generated with both SITEURL & FEED_DOMAIN undefined feed_keys = [ 'FEED_ATOM', 'FEED_RSS', diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index f951f0cb..9463047e 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -288,10 +288,11 @@ class TestArticlesGenerator(unittest.TestCase): authors_expected = ['alexis-metaireau', 'first-author', 'second-author'] self.assertEqual(sorted(authors), sorted(authors_expected)) - def test_content_caching(self): - """Test that the articles are read only once when caching""" + def test_article_object_caching(self): + """Test Article objects caching at the generator level""" settings = get_settings(filenames={}) settings['CACHE_DIRECTORY'] = self.temp_cache + settings['CONTENT_CACHING_LAYER'] = 'generator' settings['READERS'] = {'asc': None} generator = ArticlesGenerator( @@ -307,10 +308,32 @@ class TestArticlesGenerator(unittest.TestCase): generator.generate_context() generator.readers.read_file.assert_called_count == 0 - def test_full_rebuild(self): + def test_reader_content_caching(self): + """Test raw content caching at the reader level""" + settings = get_settings(filenames={}) + settings['CACHE_DIRECTORY'] = self.temp_cache + settings['READERS'] = {'asc': None} + + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.generate_context() + self.assertTrue(hasattr(generator.readers, '_cache')) + + generator = ArticlesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + readers = generator.readers.readers + for reader in readers.values(): + reader.read = MagicMock() + generator.generate_context() + for reader in readers.values(): + reader.read.assert_called_count == 0 + + def test_ignore_cache(self): """Test that all the articles are read again when not loading cache - used in --full-rebuild or autoreload mode""" + used in --ignore-cache or autoreload mode""" settings = get_settings(filenames={}) settings['CACHE_DIRECTORY'] = self.temp_cache settings['READERS'] = {'asc': None} @@ -376,30 +399,52 @@ class TestPageGenerator(unittest.TestCase): self.assertEqual(sorted(pages_expected), sorted(pages)) self.assertEqual(sorted(hidden_pages_expected), sorted(hidden_pages)) - def test_content_caching(self): - """Test that the pages are read only once when caching""" + def test_page_object_caching(self): + """Test Page objects caching at the generator level""" settings = get_settings(filenames={}) - settings['CACHE_DIRECTORY'] = 'cache_dir' #TODO settings['CACHE_DIRECTORY'] = self.temp_cache + settings['CONTENT_CACHING_LAYER'] = 'generator' settings['READERS'] = {'asc': None} generator = PagesGenerator( context=settings.copy(), settings=settings, - path=CUR_DIR, theme=settings['THEME'], output_path=None) + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) generator.generate_context() self.assertTrue(hasattr(generator, '_cache')) generator = PagesGenerator( context=settings.copy(), settings=settings, - path=CUR_DIR, theme=settings['THEME'], output_path=None) + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) generator.readers.read_file = MagicMock() generator.generate_context() generator.readers.read_file.assert_called_count == 0 - def test_full_rebuild(self): + def test_reader_content_caching(self): + """Test raw content caching at the reader level""" + settings = get_settings(filenames={}) + settings['CACHE_DIRECTORY'] = self.temp_cache + settings['READERS'] = {'asc': None} + + generator = PagesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + generator.generate_context() + self.assertTrue(hasattr(generator.readers, '_cache')) + + generator = PagesGenerator( + context=settings.copy(), settings=settings, + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) + readers = generator.readers.readers + for reader in readers.values(): + reader.read = MagicMock() + generator.generate_context() + for reader in readers.values(): + reader.read.assert_called_count == 0 + + def test_ignore_cache(self): """Test that all the pages are read again when not loading cache - used in --full-rebuild or autoreload mode""" + used in --ignore_cache or autoreload mode""" settings = get_settings(filenames={}) settings['CACHE_DIRECTORY'] = self.temp_cache settings['READERS'] = {'asc': None} diff --git a/pelican/utils.py b/pelican/utils.py index cda3108e..7b58a231 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -552,28 +552,30 @@ def split_all(path): class FileDataCacher(object): '''Class that can cache data contained in files''' - def __init__(self, settings, cache_policy_key, load_policy_key): - '''Load the specified cache within CACHE_DIRECTORY + def __init__(self, settings, cache_name, caching_policy, load_policy): + '''Load the specified cache within CACHE_DIRECTORY in settings - only if load_policy_key in setttings is True, - May use gzip if GZIP_CACHE. - Sets caching policy according to *cache_policy_key* - in *settings* + only if *load_policy* is True, + May use gzip if GZIP_CACHE ins settings is True. + Sets caching policy according to *caching_policy*. ''' self.settings = settings - name = self.__class__.__name__ - self._cache_path = os.path.join(self.settings['CACHE_DIRECTORY'], name) - self._cache_data_policy = self.settings[cache_policy_key] + self._cache_path = os.path.join(self.settings['CACHE_DIRECTORY'], + cache_name) + self._cache_data_policy = caching_policy if self.settings['GZIP_CACHE']: import gzip self._cache_open = gzip.open else: self._cache_open = open - if self.settings[load_policy_key]: + if load_policy: try: - with self._cache_open(self._cache_path, 'rb') as f: - self._cache = pickle.load(f) - except Exception as e: + with self._cache_open(self._cache_path, 'rb') as fhandle: + self._cache = pickle.load(fhandle) + except (IOError, OSError, pickle.UnpicklingError) as err: + logger.warning(('Cannot load cache {}, ' + 'proceeding with empty cache.\n{}').format( + self._cache_path, err)) self._cache = {} else: self._cache = {} @@ -583,7 +585,7 @@ class FileDataCacher(object): if self._cache_data_policy: self._cache[filename] = data - def get_cached_data(self, filename, default={}): + def get_cached_data(self, filename, default=None): '''Get cached data for the given file if no data is cached, return the default object @@ -595,20 +597,23 @@ class FileDataCacher(object): if self._cache_data_policy: try: mkdir_p(self.settings['CACHE_DIRECTORY']) - with self._cache_open(self._cache_path, 'wb') as f: - pickle.dump(self._cache, f) - except Exception as e: + with self._cache_open(self._cache_path, 'wb') as fhandle: + pickle.dump(self._cache, fhandle) + except (IOError, OSError, pickle.PicklingError) as err: logger.warning('Could not save cache {}\n{}'.format( - self._cache_path, e)) + self._cache_path, err)) class FileStampDataCacher(FileDataCacher): '''Subclass that also caches the stamp of the file''' - def __init__(self, settings, cache_policy_key, load_policy_key): - '''This sublcass additionaly sets filestamp function''' - super(FileStampDataCacher, self).__init__(settings, cache_policy_key, - load_policy_key) + def __init__(self, settings, cache_name, caching_policy, load_policy): + '''This sublcass additionaly sets filestamp function + and base path for filestamping operations + ''' + super(FileStampDataCacher, self).__init__(settings, cache_name, + caching_policy, + load_policy) method = self.settings['CHECK_MODIFIED_METHOD'] if method == 'mtime': @@ -616,10 +621,14 @@ class FileStampDataCacher(FileDataCacher): else: try: hash_func = getattr(hashlib, method) - def filestamp_func(buf): - return hash_func(buf).digest() + def filestamp_func(filename): + '''return hash of file contents''' + with open(filename, 'rb') as fhandle: + return hash_func(fhandle.read()).digest() self._filestamp_func = filestamp_func - except ImportError: + except AttributeError as err: + logger.warning('Could not get hashing function\n{}'.format( + err)) self._filestamp_func = None def cache_data(self, filename, data): @@ -636,11 +645,11 @@ class FileStampDataCacher(FileDataCacher): a hash for a function name in the hashlib module or an empty bytes string otherwise ''' - filename = os.path.join(self.path, filename) try: - with open(filename, 'rb') as f: - return self._filestamp_func(f.read()) - except Exception: + return self._filestamp_func(filename) + except (IOError, OSError, TypeError) as err: + logger.warning('Cannot get modification stamp for {}\n{}'.format( + filename, err)) return b'' def get_cached_data(self, filename, default=None): @@ -648,7 +657,7 @@ class FileStampDataCacher(FileDataCacher): if the file has not been modified. If no record exists or file has been modified, return default. - Modification is checked by compaing the cached + Modification is checked by comparing the cached and current file stamp. ''' stamp, data = super(FileStampDataCacher, self).get_cached_data(