Merge branch 'master' of https://github.com/getpelican/pelican into AddBaseThemeSetting

Conflicts: pelican/generators.py pelican/settings.py
2025-10-15 20:28:56 +02:00 · 2014-04-23 18:38:45 -04:00 · 2014-04-23 18:38:45 -04:00 · edb8fd5e71
commit edb8fd5e71
parent d7993c0e3f cd35e713e0
34 changed files with 1142 additions and 168 deletions
--- a/README.rst
+++ b/README.rst
@ -29,6 +29,7 @@ Pelican currently supports:
 * Code syntax highlighting
 * Import from WordPress, Dotclear, or RSS feeds
 * Integration with external tools: Twitter, Google Analytics, etc. (optional)
+* Fast rebuild times thanks to content caching and selective output writing.

 Have a look at the `Pelican documentation`_ for more information.

--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@ -9,3 +9,6 @@ typogrify

 # To perform release
 bumpr==0.2.0
+
+# For docs theme
+sphinx_rtd_theme
--- a/docs/conf.py
+++ b/docs/conf.py
@ -2,6 +2,8 @@
 from __future__ import unicode_literals
 import sys, os

+on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+
 sys.path.append(os.path.abspath(os.pardir))

 from pelican import __version__
@ -21,29 +23,43 @@ rst_prolog = '''
 .. |last_stable| replace:: :pelican-doc:`{0}`
 '''.format(last_stable)

+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
 extlinks = {
    'pelican-doc':  ('http://docs.getpelican.com/%s/', '')
 }

 # -- Options for HTML output ---------------------------------------------------

-html_theme_path = ['_themes']
-html_theme = 'pelican'
-
-html_theme_options = {
-    'nosidebar': True,
-    'index_logo': 'pelican.png',
-    'github_fork': 'getpelican/pelican',
-}
+html_theme = 'default'
+if not on_rtd:
+    try:
+        import sphinx_rtd_theme
+        html_theme = 'sphinx_rtd_theme'
+        html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+    except ImportError:
+        pass

 html_static_path = ['_static']

 # Output file base name for HTML help builder.
 htmlhelp_basename = 'Pelicandoc'

+html_use_smartypants = True
+
+# If false, no module index is generated.
+html_use_modindex = False
+
+# If false, no index is generated.
+html_use_index = False
+
+# If true, links to the reST sources are added to the pages.
+html_show_sourcelink = False
+
 # -- Options for LaTeX output --------------------------------------------------
 latex_documents = [
-  ('index', 'Pelican.tex', 'Pelican Documentation',
+    ('index', 'Pelican.tex', 'Pelican Documentation',
   'Alexis Métaireau', 'manual'),
 ]

--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@ -143,3 +143,41 @@ and Python 3 at the same time:
  changed it where I felt necessary.

 - Changed xrange() back to range(), so it is valid in both Python versions.
+
+
+Logging tips
+============
+
+Try to use logging with appropriate levels.
+
+For logging messages that are not repeated, use the usual Python way:
+
+    # at top of file
+    import logging
+    logger = logging.getLogger(__name__)
+
+    # when needed
+    logger.warning("A warning that would usually occur only once")
+
+However, if you want to log messages that may occur several times, instead of
+a string, give a tuple to the logging method, with two arguments:
+
+ 1. The message to log for the initial execution
+ 2. A generic message that will appear if the previous one would occur too many
+    times.
+
+For example, if you want to log missing resources, use the following code:
+
+    for resource in resources:
+        if resource.is_missing:
+            logger.warning((
+                'The resource {r} is missing'.format(r=resource.name),
+                'Other resources were missing'))
+
+The log messages will be displayed as follows:
+
+    WARNING: The resource prettiest_cat.jpg is missing
+    WARNING: The resource best_cat_ever.jpg is missing
+    WARNING: The resource cutest_cat.jpg is missing
+    WARNING: The resource lolcat.jpg is missing
+    WARNING: Other resources were missing
--- a/docs/faq.rst
+++ b/docs/faq.rst
@ -205,3 +205,27 @@ You can also disable generation of tag-related pages via::

    TAGS_SAVE_AS = ''
    TAG_SAVE_AS = ''
+
+Why does Pelican always write all HTML files even with content caching enabled?
+===============================================================================
+
+In order to reliably determine whether the HTML output is different
+before writing it, a large part of the generation environment
+including the template contexts, imported plugins, etc. would have to
+be saved and compared, at least in the form of a hash (which would
+require special handling of unhashable types), because of all the
+possible combinations of plugins, pagination, etc. which may change in
+many different ways. This would require a lot more processing time
+and memory and storage space. Simply writing the files each time is a
+lot faster and a lot more reliable.
+
+However, this means that the modification time of the files changes
+every time, so a ``rsync`` based upload will transfer them even if
+their content hasn't changed. A simple solution is to make ``rsync``
+use the ``--checksum`` option, which will make it compare the file
+checksums in a much faster way than Pelican would.
+
+When only several specific output files are of interest (e.g. when
+working on some specific page or the theme templates), the
+`WRITE_SELECTED` option may help, see
+:ref:`writing_only_selected_content`.
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@ -130,6 +130,8 @@ automatically installed without any action on your part:
  utilities
 * `MarkupSafe <http://pypi.python.org/pypi/MarkupSafe>`_, for a markup safe
  string implementation
+* `python-dateutil <https://pypi.python.org/pypi/python-dateutil>`_, to read
+  the date metadata

 If you want the following optional packages, you will need to install them
 manually via ``pip``:
--- a/docs/index.rst
+++ b/docs/index.rst
@ -33,6 +33,7 @@ Pelican |version| currently supports:
 * Code syntax highlighting
 * Import from WordPress, Dotclear, or RSS feeds
 * Integration with external tools: Twitter, Google Analytics, etc. (optional)
+* Fast rebuild times thanks to content caching and selective output writing.

 Why the name "Pelican"?
 -----------------------
--- a/docs/plugins.rst
+++ b/docs/plugins.rst
@ -20,12 +20,12 @@ Alternatively, another method is to import them and add them to the list::
    from package import myplugin
    PLUGINS = [myplugin,]

-If your plugins are not in an importable path, you can specify a ``PLUGIN_PATH``
-in the settings. ``PLUGIN_PATH`` can be an absolute path or a path relative to
-the settings file::
+If your plugins are not in an importable path, you can specify a list of paths
+via the ``PLUGIN_PATH`` setting. As shown in the following example, paths in
+the ``PLUGIN_PATH`` list can be absolute or relative to the settings file::

-    PLUGIN_PATH = "plugins"
-    PLUGINS = ["list", "of", "plugins"]
+    PLUGIN_PATH = ["plugins", "/srv/pelican/plugins"]
+    PLUGINS = ["assets", "liquid_tags", "sitemap"]

 Where to find plugins
 =====================
--- a/docs/settings.rst
+++ b/docs/settings.rst
@ -36,7 +36,7 @@ Setting name (default value)
 ===============================================================================  =====================================================================
 `AUTHOR`                                                                         Default author (put your name)
 `DATE_FORMATS` (``{}``)                                                          If you manage multiple languages, you can set the date formatting
-                                                                                 here. See the "Date format and locales" section below for details.
+                                                                                 here. See the "Date format and locale" section below for details.
 `USE_FOLDER_AS_CATEGORY` (``True``)                                              When you don't specify a category in your post metadata, set this
                                                                                 setting to ``True``, and organize your articles in subfolders, the
                                                                                 subfolder will become the category of your post. If set to ``False``,
@ -88,6 +88,9 @@ Setting name (default value)
                                                                                 here or a single string representing one locale.
                                                                                 When providing a list, all the locales will be tried
                                                                                 until one works.
+`LOG_FILTER` (``[]``)                                                            A list of tuples containing the logging level (up to ``warning``)
+                                                                                 and the message to be ignored.
+                                                                                 For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
 `READERS` (``{}``)                                                               A dictionary of file extensions / Reader classes for Pelican to
                                                                                 process or ignore. For example, to avoid processing .html files,
                                                                                 set: ``READERS = {'html': None}``. To add a custom reader for the
@ -158,6 +161,7 @@ Setting name (default value)
                                                                                 <http://www.methods.co.nz/asciidoc/manpage.html>`_
 `WITH_FUTURE_DATES` (``True``)                                                   If disabled, content with dates in the future will get a
                                                                                 default status of ``draft``.
+										 see :ref:`reading_only_modified_content` for details.
 `INTRASITE_LINK_REGEX` (``'[{|](?P<what>.*?)[|}]'``)                             Regular expression that is used to parse internal links.
                                                                                 Default syntax of links to internal files, tags, etc., is
                                                                                 to enclose the identifier, say ``filename``, in ``{}`` or ``||``.
@ -167,9 +171,23 @@ Setting name (default value)
                                                                                 code blocks. See :ref:`internal_pygments_options` for a list of
                                                                                 supported options.

-`SLUGIFY_SOURCE` (``'input'``)                                                   Specifies where you want the slug to be automatically generated 
-                                                                                 from. Can be set to 'title' to use the 'Title:' metadata tag or 
-                                                                                 'basename' to use the articles basename when creating the slug. 
+`SLUGIFY_SOURCE` (``'input'``)                                                   Specifies where you want the slug to be automatically generated
+                                                                                 from. Can be set to 'title' to use the 'Title:' metadata tag or
+                                                                                 'basename' to use the articles basename when creating the slug.
+`CACHE_CONTENT` (``True``)                                                       If ``True``, save content in a cache file.
+                                                                                 See :ref:`reading_only_modified_content` for details about caching.
+`CONTENT_CACHING_LAYER` (``'reader'``)                                           If set to ``'reader'``, save only the raw content and metadata returned
+                                                                                 by readers, if set to ``'generator'``, save processed content objects.
+`CACHE_DIRECTORY` (``cache``)                                                    Directory in which to store cache files.
+`GZIP_CACHE` (``True``)                                                          If ``True``, use gzip to (de)compress the cache files.
+`CHECK_MODIFIED_METHOD` (``mtime``)                                              Controls how files are checked for modifications.
+`LOAD_CONTENT_CACHE` (``True``)                                                  If ``True``, load unmodified content from cache.
+`AUTORELOAD_IGNORE_CACHE` (``False``)                                            If ``True``, do not load content cache in autoreload mode
+                                                                                 when the settings file changes.
+`WRITE_SELECTED` (``[]``)                                                        If this list is not empty, **only** output files with their paths
+                                                                                 in this list are written. Paths should be either relative to the current
+										 working directory of Pelican or absolute. For possible use cases see
+										 :ref:`writing_only_selected_content`.
 ===============================================================================  =====================================================================

 .. [#] Default is the system locale.
@ -240,9 +258,9 @@ posts for the month at ``posts/2011/Aug/index.html``.
    arrive at an appropriate archive of posts, without having to specify
    a page name.

-====================================================    =====================================================
+======================================================  =====================================================
 Setting name (default value)                            What does it do?
-====================================================    =====================================================
+======================================================  =====================================================
 `ARTICLE_URL` (``'{slug}.html'``)                       The URL to refer to an article.
 `ARTICLE_SAVE_AS` (``'{slug}.html'``)                   The place where we will save an article.
 `ARTICLE_LANG_URL` (``'{slug}-{lang}.html'``)           The URL to refer to an article which doesn't use the
@ -253,7 +271,7 @@ Setting name (default value)                            What does it do?
 `DRAFT_SAVE_AS` (``'drafts/{slug}.html'``)              The place where we will save an article draft.
 `DRAFT_LANG_URL` (``'drafts/{slug}-{lang}.html'``)      The URL to refer to an article draft which doesn't
                                                        use the default language.
-`DRAFT_LANG_SAVE_AS` (``'drafts/{slug}-{lang}.html'``)  The place where we will save an article draft which 
+`DRAFT_LANG_SAVE_AS` (``'drafts/{slug}-{lang}.html'``)  The place where we will save an article draft which
                                                        doesn't use the default language.
 `PAGE_URL` (``'pages/{slug}.html'``)                    The URL we will use to link to a page.
 `PAGE_SAVE_AS` (``'pages/{slug}.html'``)                The location we will save the page. This value has to be
@ -269,20 +287,20 @@ Setting name (default value)                            What does it do?
 `TAG_SAVE_AS` (``'tag/{slug}.html'``)                   The location to save the tag page.
 `AUTHOR_URL` (``'author/{slug}.html'``)                 The URL to use for an author.
 `AUTHOR_SAVE_AS` (``'author/{slug}.html'``)             The location to save an author.
-`YEAR_ARCHIVE_SAVE_AS` (False)                          The location to save per-year archives of your posts.
-`MONTH_ARCHIVE_SAVE_AS` (False)                         The location to save per-month archives of your posts.
-`DAY_ARCHIVE_SAVE_AS` (False)                           The location to save per-day archives of your posts.
+`YEAR_ARCHIVE_SAVE_AS` (``''``)                         The location to save per-year archives of your posts.
+`MONTH_ARCHIVE_SAVE_AS` (``''``)                        The location to save per-month archives of your posts.
+`DAY_ARCHIVE_SAVE_AS` (``''``)                          The location to save per-day archives of your posts.
 `SLUG_SUBSTITUTIONS`  (``()``)                          Substitutions to make prior to stripping out
                                                        non-alphanumerics when generating slugs. Specified
                                                        as a list of 2-tuples of ``(from, to)`` which are
                                                        applied in order.
-====================================================    =====================================================
+======================================================  =====================================================

 .. note::

    If you do not want one or more of the default pages to be created (e.g.,
    you are the only author on your site and thus do not need an Authors page),
-    set the corresponding ``*_SAVE_AS`` setting to ``None`` to prevent the
+    set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the
    relevant page from being generated.

 `DIRECT_TEMPLATES`
@ -461,6 +479,8 @@ Setting name (default value)                        What does it do?
                                                    language.
 `CATEGORY_FEED_ATOM` ('feeds/%s.atom.xml'[2]_)      Where to put the category Atom feeds.
 `CATEGORY_FEED_RSS` (``None``, i.e. no RSS)         Where to put the category RSS feeds.
+`AUTHOR_FEED_ATOM` ('feeds/%s.atom.xml'[2]_)        Where to put the author Atom feeds.
+`AUTHOR_FEED_RSS` ('feeds/%s.rss.xml'[2]_)          Where to put the author RSS feeds.
 `TAG_FEED_ATOM` (``None``, i.e. no tag feed)        Relative URL to output the tag Atom feed. It should
                                                    be defined using a "%s" match in the tag name.
 `TAG_FEED_RSS` (``None``, ie no RSS tag feed)       Relative URL to output the tag RSS feed
@ -599,7 +619,7 @@ Setting name (default value)                             What does it do?
 .. [3] %s is the language

 Ordering content
-=================
+================

 ================================================    =====================================================
 Setting name (default value)                        What does it do?
@ -694,6 +714,102 @@ adding the following to your configuration::

    CSS_FILE = "wide.css"

+Logging
+=======
+
+Sometimes, a long list of warnings may appear during site generation. Finding
+the **meaningful** error message in the middle of tons of annoying log output
+can be quite tricky. In order to filter out redundant log messages, Pelican
+comes with the ``LOG_FILTER`` setting.
+
+``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being
+composed of the logging level (up to ``warning``) and the message to be ignored.
+Simply populate the list with the log messages you want to hide, and they will
+be filtered out.
+
+For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
+
+.. _reading_only_modified_content:
+
+Reading only modified content
+=============================
+
+To speed up the build process, pelican can optionally read only articles
+and pages with modified content.
+
+When Pelican is about to read some content source file:
+
+1. The hash or modification time information for the file from a
+   previous build are loaded from a cache file if `LOAD_CONTENT_CACHE`
+   is ``True``. These files are stored in the `CACHE_DIRECTORY`
+   directory.  If the file has no record in the cache file, it is read
+   as usual.
+2. The file is checked according to `CHECK_MODIFIED_METHOD`:
+
+    - If set to ``'mtime'``, the modification time of the file is
+      checked.
+    - If set to a name of a function provided by the ``hashlib``
+      module, e.g. ``'md5'``, the file hash is checked.
+    - If set to anything else or the necessary information about the
+      file cannot be found in the cache file, the content is read as
+      usual.
+
+3. If the file is considered unchanged, the content data saved in a
+   previous build corresponding to the file is loaded from the cache
+   and the file is not read.
+4. If the file is considered changed, the file is read and the new
+   modification information and the content data are saved to the
+   cache if `CACHE_CONTENT` is ``True``.
+
+Depending on `CONTENT_CACHING_LAYER` either the raw content and
+metadata returned by a reader are cached if set to ``'reader'``, or
+the processed content object is cached if set to ``'generator'``.
+Caching the processed content object may conflict with plugins (as
+some reading related signals may be skipped) or e.g. the
+`WITH_FUTURE_DATES` functionality (as the ``draft`` status of the
+cached content objects would not change automatically over time).
+
+Modification time based checking is faster than comparing file hashes,
+but is not as reliable, because mtime information can be lost when
+e.g. copying the content sources using the ``cp`` or ``rsync``
+commands without the mtime preservation mode (invoked e.g. by
+``--archive``).
+
+The cache files are Python pickles, so they may not be readable by
+different versions of Python as the pickle format often changes. If
+such an error is encountered, the cache files have to be rebuilt by
+running pelican after removing them or by using the pelican
+command-line option ``--ignore-cache``.  The cache files also have to
+be rebuilt when changing the `GZIP_CACHE` setting for cache file
+reading to work.
+
+The ``--ignore-cache`` command-line option is also useful when the
+whole cache needs to be regenerated due to e.g. modifications to the
+settings file which should change the cached content or just for
+debugging purposes. When pelican runs in autoreload mode, modification
+of the settings file will make it ignore the cache automatically if
+`AUTORELOAD_IGNORE_CACHE` is ``True``.
+
+Note that even when using cached content, all output is always
+written, so the modification times of the ``*.html`` files always
+change.  Therefore, ``rsync`` based upload may benefit from the
+``--checksum`` option.
+
+.. _writing_only_selected_content:
+
+Writing only selected content
+=============================
+
+When one article or page or the theme is being worked on it is often
+desirable to display selected output files as soon as possible. In
+such cases generating and writing all output is often unnecessary.
+These selected output files can be given as output paths in the
+`WRITE_SELECTED` list and **only** those files will be written. This
+list can be also specified on the command-line using the
+``--write-selected`` option which accepts a comma separated list
+of output file paths. By default the list is empty so all output is
+written.
+
 Example settings
 ================

--- a/pelican/init.py
+++ b/pelican/init.py
@ -11,12 +11,15 @@ import argparse
 import locale
 import collections

+# pelican.log has to be the first pelican module to be loaded
+# because logging.setLoggerClass has to be called before logging.getLogger
+from pelican.log import init
+
 from pelican import signals

 from pelican.generators import (ArticlesGenerator, PagesGenerator,
                                StaticGenerator, SourceFileGenerator,
                                TemplatePagesGenerator)
-from pelican.log import init
 from pelican.readers import Readers
 from pelican.settings import read_settings
 from pelican.utils import clean_output_dir, folder_watcher, file_watcher
@ -63,7 +66,8 @@ class Pelican(object):
        self.plugins = []
        logger.debug('Temporarily adding PLUGIN_PATH to system path')
        _sys_path = sys.path[:]
-        sys.path.insert(0, self.settings['PLUGIN_PATH'])
+        for pluginpath in self.settings['PLUGIN_PATH']:
+            sys.path.insert(0, pluginpath)
        for plugin in self.settings['PLUGINS']:
            # if it's a string, then import it
            if isinstance(plugin, six.string_types):
@ -264,6 +268,15 @@ def parse_arguments():
                        action='store_true',
                        help='Relaunch pelican each time a modification occurs'
                        ' on the content files.')
+
+    parser.add_argument('-c', '--ignore-cache', action='store_true',
+                        dest='ignore_cache', help='Ignore content cache '
+                        'from previous runs by not loading cache files.')
+
+    parser.add_argument('-w', '--write-selected', type=str,
+                        dest='selected_paths', default=None,
+                        help='Comma separated list of selected paths to write')
+
    return parser.parse_args()


@ -282,6 +295,10 @@ def get_config(args):
        config['BASE_THEME'] = absbasetheme if os.path.exists(absbasetheme) else args.base_theme
    if args.delete_outputdir is not None:
        config['DELETE_OUTPUT_DIRECTORY'] = args.delete_outputdir
+    if args.ignore_cache:
+        config['LOAD_CONTENT_CACHE'] = False
+    if args.selected_paths:
+        config['WRITE_SELECTED'] = args.selected_paths.split(',')

    # argparse returns bytes in Py2. There is no definite answer as to which
    # encoding argparse (or sys.argv) uses.
@ -337,6 +354,10 @@ def main():
            print('  --- AutoReload Mode: Monitoring `content`, `theme`, `base_theme` and'
                  ' `settings` for changes. ---')

+            def _ignore_cache(pelican_obj):
+                if pelican_obj.settings['AUTORELOAD_IGNORE_CACHE']:
+                    pelican_obj.settings['LOAD_CONTENT_CACHE'] = False
+
            while True:
                try:
                    # Check source dir for changed files ending with the given
@ -345,9 +366,13 @@ def main():
                    # have changed, no matter what extension the filenames
                    # have.
                    modified = {k: next(v) for k, v in watchers.items()}
+                    original_load_cache = settings['LOAD_CONTENT_CACHE']

                    if modified['settings']:
                        pelican, settings = get_instance(args)
+                        original_load_cache = settings['LOAD_CONTENT_CACHE']
+                        print(pelican.settings['AUTORELOAD_IGNORE_CACHE'])
+                        _ignore_cache(pelican)

                    if any(modified.values()):
                        print('\n-> Modified: {}. re-generating...'.format(
@ -365,6 +390,8 @@ def main():
                                           'theme.')

                        pelican.run()
+                        # restore original caching policy
+                        pelican.settings['LOAD_CONTENT_CACHE'] = original_load_cache

                except KeyboardInterrupt:
                    logger.warning("Keyboard interrupt, quitting.")
--- a/pelican/contents.py
+++ b/pelican/contents.py
@ -239,8 +239,10 @@ class Content(object):
                             self._context['filenames'][path].url))
                    origin = origin.replace('\\', '/')  # for Windows paths.
                else:
-                    logger.warning("Unable to find {fn}, skipping url"
-                                   " replacement".format(fn=path))
+                    logger.warning(("Unable to find {fn}, skipping url"
+                                    " replacement".format(fn=value),
+                                    "Other resources were not found"
+                                    " and their urls not replaced"))
            elif what == 'category':
                origin = Category(path, self.settings).url
            elif what == 'tag':
@ -323,6 +325,13 @@ class Content(object):
            os.path.abspath(self.settings['PATH']))
        )

+    def __eq__(self, other):
+        """Compare with metadata and content of other Content object"""
+        return other and self.metadata == other.metadata and self.content == other.content
+
+    # keep basic hashing functionality for caching to work
+    __hash__ = object.__hash__
+

 class Page(Content):
    mandatory_properties = ('title',)
--- a/pelican/generators.py
+++ b/pelican/generators.py
@ -20,7 +20,8 @@ from jinja2 import (Environment, FileSystemLoader, PrefixLoader, ChoiceLoader,

 from pelican.contents import Article, Draft, Page, Static, is_valid_content
 from pelican.readers import Readers
-from pelican.utils import copy, process_translations, mkdir_p, DateFormatter
+from pelican.utils import (copy, process_translations, mkdir_p, DateFormatter,
+                           FileStampDataCacher)
 from pelican import signals


@ -30,7 +31,8 @@ logger = logging.getLogger(__name__)
 class Generator(object):
    """Baseclass generator"""

-    def __init__(self, context, settings, path, theme, base_theme, output_path, **kwargs):
+    def __init__(self, context, settings, path, theme, base_theme, output_path,
+                 readers_cache_name='', **kwargs):
        self.context = context
        self.settings = settings
        self.path = path
@ -41,7 +43,7 @@ class Generator(object):
        for arg, value in kwargs.items():
            setattr(self, arg, value)

-        self.readers = Readers(self.settings)
+        self.readers = Readers(self.settings, readers_cache_name)

        # templates cache
        self._templates = {}
@ -155,6 +157,35 @@ class Generator(object):
            self.context[item] = value


+class CachingGenerator(Generator, FileStampDataCacher):
+    '''Subclass of Generator and FileStampDataCacher classes
+
+    enables content caching, either at the generator or reader level
+    '''
+
+    def __init__(self, *args, **kwargs):
+        '''Initialize the generator, then set up caching
+
+        note the multiple inheritance structure
+        '''
+        cls_name = self.__class__.__name__
+        Generator.__init__(self, *args,
+                           readers_cache_name=(cls_name + '-Readers'),
+                           **kwargs)
+
+        cache_this_level = self.settings['CONTENT_CACHING_LAYER'] == 'generator'
+        caching_policy = cache_this_level and self.settings['CACHE_CONTENT']
+        load_policy = cache_this_level and self.settings['LOAD_CONTENT_CACHE']
+        FileStampDataCacher.__init__(self, self.settings, cls_name,
+                                     caching_policy, load_policy
+                                     )
+
+    def _get_file_stamp(self, filename):
+        '''Get filestamp for path relative to generator.path'''
+        filename = os.path.join(self.path, filename)
+        return super(Generator, self)._get_file_stamp(filename)
+
+
 class _FileLoader(BaseLoader):

    def __init__(self, path, basedir):
@ -185,7 +216,7 @@ class TemplatePagesGenerator(Generator):
                del self.env.loader.loaders[0]


-class ArticlesGenerator(Generator):
+class ArticlesGenerator(CachingGenerator):
    """Generate blog articles"""

    def __init__(self, *args, **kwargs):
@ -241,6 +272,18 @@ class ArticlesGenerator(Generator):
                                  self.settings['CATEGORY_FEED_RSS']
                                  % cat.slug, feed_type='rss')

+        for auth, arts in self.authors:
+            arts.sort(key=attrgetter('date'), reverse=True)
+            if self.settings.get('AUTHOR_FEED_ATOM'):
+                writer.write_feed(arts, self.context,
+                                  self.settings['AUTHOR_FEED_ATOM']
+                                  % auth.slug)
+
+            if self.settings.get('AUTHOR_FEED_RSS'):
+                writer.write_feed(arts, self.context,
+                                  self.settings['AUTHOR_FEED_RSS']
+                                  % auth.slug, feed_type='rss')
+
        if (self.settings.get('TAG_FEED_ATOM')
                or self.settings.get('TAG_FEED_RSS')):
            for tag, arts in self.tags.items():
@ -311,7 +354,20 @@ class ArticlesGenerator(Generator):
                # format string syntax can be used for specifying the
                # period archive dates
                date = archive[0].date
-                save_as = save_as_fmt.format(date=date)
+                # Under python 2, with non-ascii locales, u"{:%b}".format(date) might raise UnicodeDecodeError
+                # because u"{:%b}".format(date) will call date.__format__(u"%b"), which will return a byte string
+                # and not a unicode string.
+                # eg:
+                # locale.setlocale(locale.LC_ALL, 'ja_JP.utf8')
+                # date.__format__(u"%b") == '12\xe6\x9c\x88' # True
+                try:
+                    save_as = save_as_fmt.format(date=date)
+                except UnicodeDecodeError:
+                    # Python2 only:
+                    # Let date.__format__() work with byte strings instead of characters since it fails to work with characters
+                    bytes_save_as_fmt = save_as_fmt.encode('utf8')
+                    bytes_save_as     = bytes_save_as_fmt.format(date=date)
+                    save_as           = unicode(bytes_save_as,'utf8')
                context = self.context.copy()

                if key == period_date_key['year']:
@ -415,20 +471,24 @@ class ArticlesGenerator(Generator):
        for f in self.get_files(
                self.settings['ARTICLE_DIR'],
                exclude=self.settings['ARTICLE_EXCLUDES']):
-            try:
-                article = self.readers.read_file(
-                    base_path=self.path, path=f, content_class=Article,
-                    context=self.context,
-                    preread_signal=signals.article_generator_preread,
-                    preread_sender=self,
-                    context_signal=signals.article_generator_context,
-                    context_sender=self)
-            except Exception as e:
-                logger.warning('Could not process {}\n{}'.format(f, e))
-                continue
+            article = self.get_cached_data(f, None)
+            if article is None:
+                try:
+                    article = self.readers.read_file(
+                        base_path=self.path, path=f, content_class=Article,
+                        context=self.context,
+                        preread_signal=signals.article_generator_preread,
+                        preread_sender=self,
+                        context_signal=signals.article_generator_context,
+                        context_sender=self)
+                except Exception as e:
+                    logger.warning('Could not process {}\n{}'.format(f, e))
+                    continue

-            if not is_valid_content(article, f):
-                continue
+                if not is_valid_content(article, f):
+                    continue
+
+                self.cache_data(f, article)

            self.add_source_path(article)

@ -509,7 +569,8 @@ class ArticlesGenerator(Generator):

        self._update_context(('articles', 'dates', 'tags', 'categories',
                              'tag_cloud', 'authors', 'related_posts'))
-
+        self.save_cache()
+        self.readers.save_cache()
        signals.article_generator_finalized.send(self)

    def generate_output(self, writer):
@ -518,7 +579,7 @@ class ArticlesGenerator(Generator):
        signals.article_writer_finalized.send(self, writer=writer)


-class PagesGenerator(Generator):
+class PagesGenerator(CachingGenerator):
    """Generate pages"""

    def __init__(self, *args, **kwargs):
@ -534,20 +595,24 @@ class PagesGenerator(Generator):
        for f in self.get_files(
                self.settings['PAGE_DIR'],
                exclude=self.settings['PAGE_EXCLUDES']):
-            try:
-                page = self.readers.read_file(
-                    base_path=self.path, path=f, content_class=Page,
-                    context=self.context,
-                    preread_signal=signals.page_generator_preread,
-                    preread_sender=self,
-                    context_signal=signals.page_generator_context,
-                    context_sender=self)
-            except Exception as e:
-                logger.warning('Could not process {}\n{}'.format(f, e))
-                continue
+            page = self.get_cached_data(f, None)
+            if page is None:
+                try:
+                    page = self.readers.read_file(
+                        base_path=self.path, path=f, content_class=Page,
+                        context=self.context,
+                        preread_signal=signals.page_generator_preread,
+                        preread_sender=self,
+                        context_signal=signals.page_generator_context,
+                        context_sender=self)
+                except Exception as e:
+                    logger.warning('Could not process {}\n{}'.format(f, e))
+                    continue

-            if not is_valid_content(page, f):
-                continue
+                if not is_valid_content(page, f):
+                    continue
+
+                self.cache_data(f, page)

            self.add_source_path(page)

@ -567,6 +632,8 @@ class PagesGenerator(Generator):
        self._update_context(('pages', ))
        self.context['PAGES'] = self.pages

+        self.save_cache()
+        self.readers.save_cache()
        signals.page_generator_finalized.send(self)

    def generate_output(self, writer):
--- a/pelican/log.py
+++ b/pelican/log.py
@ -9,7 +9,7 @@ import os
 import sys
 import logging

-from logging import Formatter, getLogger, StreamHandler, DEBUG
+from collections import defaultdict


 RESET_TERM = '\033[0;m'
@ -30,7 +30,7 @@ def ansi(color, text):
    return '\033[1;{0}m{1}{2}'.format(code, text, RESET_TERM)


-class ANSIFormatter(Formatter):
+class ANSIFormatter(logging.Formatter):
    """Convert a `logging.LogRecord' object into colored text, using ANSI
       escape sequences.

@ -51,7 +51,7 @@ class ANSIFormatter(Formatter):
            return ansi('white', record.levelname) + ': ' + msg


-class TextFormatter(Formatter):
+class TextFormatter(logging.Formatter):
    """
    Convert a `logging.LogRecord' object into text.
    """
@ -63,7 +63,62 @@ class TextFormatter(Formatter):
            return record.levelname + ': ' + record.getMessage()


-def init(level=None, logger=getLogger(), handler=StreamHandler()):
+class LimitFilter(logging.Filter):
+    """
+    Remove duplicates records, and limit the number of records in the same
+    group.
+
+    Groups are specified by the message to use when the number of records in
+    the same group hit the limit.
+    E.g.: log.warning(('43 is not the answer', 'More erroneous answers'))
+    """
+
+    ignore = set()
+    threshold = 5
+    group_count = defaultdict(int)
+
+    def filter(self, record):
+        # don't limit log messages for anything above "warning"
+        if record.levelno > logging.WARN:
+            return record
+        # extract group
+        group = None
+        if len(record.msg) == 2:
+            record.msg, group = record.msg
+        # ignore record if it was already raised
+        # use .getMessage() and not .msg for string formatting
+        ignore_key = (record.levelno, record.getMessage())
+        to_ignore = ignore_key in LimitFilter.ignore
+        LimitFilter.ignore.add(ignore_key)
+        if to_ignore:
+            return False
+        # check if we went over threshold
+        if group:
+            key = (record.levelno, group)
+            LimitFilter.group_count[key] += 1
+            if LimitFilter.group_count[key] == LimitFilter.threshold:
+                record.msg = group
+            if LimitFilter.group_count[key] > LimitFilter.threshold:
+                return False
+        return record
+
+
+class LimitLogger(logging.Logger):
+    """
+    A logger which adds LimitFilter automatically
+    """
+
+    limit_filter = LimitFilter()
+
+    def __init__(self, *args, **kwargs):
+        super(LimitLogger, self).__init__(*args, **kwargs)
+        self.addFilter(LimitLogger.limit_filter)
+
+logging.setLoggerClass(LimitLogger)
+
+
+def init(level=None, handler=logging.StreamHandler()):
+
    logger = logging.getLogger()

    if (os.isatty(sys.stdout.fileno())
@ -79,7 +134,7 @@ def init(level=None, logger=getLogger(), handler=StreamHandler()):


 if __name__ == '__main__':
-    init(level=DEBUG)
+    init(level=logging.DEBUG)

    root_logger = logging.getLogger()
    root_logger.debug('debug')
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -6,16 +6,13 @@ import logging
 import os
 import re

-try:
-    import docutils
-    import docutils.core
-    import docutils.io
-    from docutils.writers.html4css1 import HTMLTranslator
+import docutils
+import docutils.core
+import docutils.io
+from docutils.writers.html4css1 import HTMLTranslator

-    # import the directives to have pygments support
-    from pelican import rstdirectives  # NOQA
-except ImportError:
-    docutils = False
+# import the directives to have pygments support
+from pelican import rstdirectives  # NOQA
 try:
    from markdown import Markdown
 except ImportError:
@ -36,7 +33,7 @@ except ImportError:

 from pelican import signals
 from pelican.contents import Page, Category, Tag, Author
-from pelican.utils import get_date, pelican_open
+from pelican.utils import get_date, pelican_open, FileStampDataCacher


 METADATA_PROCESSORS = {
@ -204,12 +201,18 @@ class MarkdownReader(BaseReader):
        for name, value in meta.items():
            name = name.lower()
            if name == "summary":
+                # handle summary metadata as markdown
+                # summary metadata is special case and join all list values
                summary_values = "\n".join(value)
                # reset the markdown instance to clear any state
                self._md.reset()
                summary = self._md.convert(summary_values)
                output[name] = self.process_metadata(name, summary)
+            elif len(value) > 1:
+                # handle list metadata as list of string
+                output[name] = self.process_metadata(name, value)
            else:
+                # otherwise, handle metadata as single string
                output[name] = self.process_metadata(name, value[0])
        return output

@ -318,7 +321,11 @@ class HTMLReader(BaseReader):
            if not contents:
                contents = self._attr_value(attrs, 'contents', '')
                if contents:
-                    logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)
+                    logger.warning((
+                        "Meta tag attribute 'contents' used in file {}, should"
+                        " be changed to 'content'".format(self._filename),
+                        "Other files have meta tag attribute 'contents' that"
+                        " should be changed to 'content'"))

            if name == 'keywords':
                name = 'tags'
@ -375,7 +382,7 @@ class AsciiDocReader(BaseReader):
        return content, metadata


-class Readers(object):
+class Readers(FileStampDataCacher):
    """Interface for all readers.

    This class contains a mapping of file extensions / Reader classes, to know
@ -385,27 +392,20 @@ class Readers(object):

    """

-    # used to warn about missing dependencies only once, at the first
-    # instanciation of a Readers object.
-    warn_missing_deps = True
-
-    def __init__(self, settings=None):
+    def __init__(self, settings=None, cache_name=''):
        self.settings = settings or {}
        self.readers = {}
        self.reader_classes = {}

        for cls in [BaseReader] + BaseReader.__subclasses__():
            if not cls.enabled:
-                if self.__class__.warn_missing_deps:
-                    logger.debug('Missing dependencies for {}'
-                                 .format(', '.join(cls.file_extensions)))
+                logger.debug('Missing dependencies for {}'
+                             .format(', '.join(cls.file_extensions)))
                continue

            for ext in cls.file_extensions:
                self.reader_classes[ext] = cls

-        self.__class__.warn_missing_deps = False
-
        if self.settings['READERS']:
            self.reader_classes.update(self.settings['READERS'])

@ -417,6 +417,15 @@ class Readers(object):

            self.readers[fmt] = reader_class(self.settings)

+        # set up caching
+        cache_this_level = (cache_name != '' and
+                            self.settings['CONTENT_CACHING_LAYER'] == 'reader')
+        caching_policy = cache_this_level and self.settings['CACHE_CONTENT']
+        load_policy = cache_this_level and self.settings['LOAD_CONTENT_CACHE']
+        super(Readers, self).__init__(settings, cache_name,
+                                      caching_policy, load_policy,
+                                      )
+
    @property
    def extensions(self):
        return self.readers.keys()
@ -455,7 +464,10 @@ class Readers(object):
            source_path=source_path, settings=self.settings,
            process=reader.process_metadata))

-        content, reader_metadata = reader.read(path)
+        content, reader_metadata = self.get_cached_data(path, (None, None))
+        if content is None:
+            content, reader_metadata = reader.read(path)
+            self.cache_data(path, (content, reader_metadata))
        metadata.update(reader_metadata)

        if content:
@ -505,19 +517,10 @@ def find_empty_alt(content, path):
            src=(['"])(.*)\5
        )
        """, re.X)
-    matches = re.findall(imgs, content)
-    # find a correct threshold
-    nb_warnings = 10
-    if len(matches) == nb_warnings + 1:
-        nb_warnings += 1  # avoid bad looking case
-    # print one warning per image with empty alt until threshold
-    for match in matches[:nb_warnings]:
-        logger.warning('Empty alt attribute for image {} in {}'.format(
-            os.path.basename(match[1] + match[5]), path))
-    # print one warning for the other images with empty alt
-    if len(matches) > nb_warnings:
-        logger.warning('{} other images with empty alt attributes'
-                       .format(len(matches) - nb_warnings))
+    for match in re.findall(imgs, content):
+        logger.warning(('Empty alt attribute for image {} in {}'.format(
+            os.path.basename(match[1] + match[5]), path),
+            'Other images have empty alt attributes'))


 def default_metadata(settings=None, process=None):
--- a/pelican/settings.py
+++ b/pelican/settings.py
@ -19,6 +19,8 @@ except ImportError:

 from os.path import isabs

+from pelican.log import LimitFilter
+

 logger = logging.getLogger(__name__)

@ -44,6 +46,8 @@ DEFAULT_CONFIG = {
    'BASE_THEME_STATIC_PATHS': ['static', ],
    'FEED_ALL_ATOM': os.path.join('feeds', 'all.atom.xml'),
    'CATEGORY_FEED_ATOM': os.path.join('feeds', '%s.atom.xml'),
+    'AUTHOR_FEED_ATOM': os.path.join('feeds', '%s.atom.xml'),
+    'AUTHOR_FEED_RSS': os.path.join('feeds', '%s.rss.xml'),
    'TRANSLATION_FEED_ATOM': os.path.join('feeds', 'all-%s.atom.xml'),
    'FEED_MAX_ITEMS': '',
    'SITEURL': '',
@ -86,9 +90,9 @@ DEFAULT_CONFIG = {
    'PAGINATION_PATTERNS': [
        (0, '{name}{number}{extension}', '{name}{number}{extension}'),
    ],
-    'YEAR_ARCHIVE_SAVE_AS': False,
-    'MONTH_ARCHIVE_SAVE_AS': False,
-    'DAY_ARCHIVE_SAVE_AS': False,
+    'YEAR_ARCHIVE_SAVE_AS': '',
+    'MONTH_ARCHIVE_SAVE_AS': '',
+    'DAY_ARCHIVE_SAVE_AS': '',
    'RELATIVE_URLS': False,
    'DEFAULT_LANG': 'en',
    'TAG_CLOUD_STEPS': 4,
@ -103,6 +107,7 @@ DEFAULT_CONFIG = {
    'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra'],
    'JINJA_EXTENSIONS': [],
    'JINJA_FILTERS': {},
+    'LOG_FILTER': [],
    'LOCALE': [''],  # defaults to user locale
    'DEFAULT_PAGINATION': False,
    'DEFAULT_ORPHANS': 0,
@ -114,14 +119,22 @@ DEFAULT_CONFIG = {
    'ARTICLE_PERMALINK_STRUCTURE': '',
    'TYPOGRIFY': False,
    'SUMMARY_MAX_LENGTH': 50,
-    'PLUGIN_PATH': '',
+    'PLUGIN_PATH': [],
    'PLUGINS': [],
    'PYGMENTS_RST_OPTIONS': {},
    'TEMPLATE_PAGES': {},
    'IGNORE_FILES': ['.#*'],
    'SLUG_SUBSTITUTIONS': (),
    'INTRASITE_LINK_REGEX': '[{|](?P<what>.*?)[|}]',
-    'SLUGIFY_SOURCE': 'title'
+    'SLUGIFY_SOURCE': 'title',
+    'CACHE_CONTENT': True,
+    'CONTENT_CACHING_LAYER': 'reader',
+    'CACHE_DIRECTORY': 'cache',
+    'GZIP_CACHE': True,
+    'CHECK_MODIFIED_METHOD': 'mtime',
+    'LOAD_CONTENT_CACHE': True,
+    'AUTORELOAD_IGNORE_CACHE': False,
+    'WRITE_SELECTED': [],
    }

 PYGMENTS_RST_OPTIONS = None
@ -131,13 +144,22 @@ def read_settings(path=None, override=None):
    if path:
        local_settings = get_settings_from_file(path)
        # Make the paths relative to the settings file
-        for p in ['PATH', 'OUTPUT_PATH', 'THEME', 'BASE_THEME', 'PLUGIN_PATH']:
+
+        for p in ['PATH', 'OUTPUT_PATH', 'THEME', 'BASE_THEME']:
            if p in local_settings and local_settings[p] is not None \
                    and not isabs(local_settings[p]):
                absp = os.path.abspath(os.path.normpath(os.path.join(
                    os.path.dirname(path), local_settings[p])))
-                if p not in ('THEME', 'BASE_THEME', 'PLUGIN_PATH') or os.path.exists(absp):
+                if p not in ('THEME', 'BASE_THEME') or os.path.exists(absp):
                    local_settings[p] = absp
+
+        if isinstance(local_settings['PLUGIN_PATH'], six.string_types):
+            logger.warning("Defining %s setting as string has been deprecated (should be a list)" % 'PLUGIN_PATH')
+            local_settings['PLUGIN_PATH'] = [local_settings['PLUGIN_PATH']]
+        else:
+            if 'PLUGIN_PATH' in local_settings and local_settings['PLUGIN_PATH'] is not None:
+                local_settings['PLUGIN_PATH'] = [os.path.abspath(os.path.normpath(os.path.join(os.path.dirname(path), pluginpath)))
+                                    if not isabs(pluginpath) else pluginpath for pluginpath in local_settings['PLUGIN_PATH']]
    else:
        local_settings = copy.deepcopy(DEFAULT_CONFIG)

@ -173,14 +195,18 @@ def get_settings_from_file(path, default_settings=DEFAULT_CONFIG):


 def configure_settings(settings):
-    """Provide optimizations, error checking and warnings for the given
+    """Provide optimizations, error checking, and warnings for the given
    settings.
-
+    Also, specify the log messages to be ignored.
    """
    if not 'PATH' in settings or not os.path.isdir(settings['PATH']):
        raise Exception('You need to specify a path containing the content'
                        ' (see pelican --help for more information)')

+    # specify the log messages to be ignored
+    LimitFilter.ignore.update(set(settings.get('LOG_FILTER',
+                                               DEFAULT_CONFIG['LOG_FILTER'])))
+
    # lookup the theme in "pelican/themes" if the given one doesn't exist
    if not os.path.isdir(settings['THEME']):
        theme_path = os.path.join(
@ -205,6 +231,13 @@ def configure_settings(settings):
            raise Exception("Could not find the base theme %s"
                            % settings['BASE_THEME'])

+    # make paths selected for writing absolute if necessary
+    settings['WRITE_SELECTED'] = [
+        os.path.abspath(path) for path in
+        settings.get('WRITE_SELECTED', DEFAULT_CONFIG['WRITE_SELECTED'])
+        ]
+
+
    # standardize strings to lowercase strings
    for key in [
            'DEFAULT_LANG',
@ -254,11 +287,20 @@ def configure_settings(settings):
        if not 'FEED_DOMAIN' in settings:
            settings['FEED_DOMAIN'] = settings['SITEURL']

+    # check content caching layer and warn of incompatibilities
+    if (settings.get('CACHE_CONTENT', False) and
+        settings.get('CONTENT_CACHING_LAYER', '') == 'generator' and
+        settings.get('WITH_FUTURE_DATES', DEFAULT_CONFIG['WITH_FUTURE_DATES'])):
+        logger.warning('WITH_FUTURE_DATES conflicts with '
+                        "CONTENT_CACHING_LAYER set to 'generator', "
+                        "use 'reader' layer instead")
+
    # Warn if feeds are generated with both SITEURL & FEED_DOMAIN undefined
    feed_keys = [
        'FEED_ATOM', 'FEED_RSS',
        'FEED_ALL_ATOM', 'FEED_ALL_RSS',
        'CATEGORY_FEED_ATOM', 'CATEGORY_FEED_RSS',
+        'AUTHOR_FEED_ATOM', 'AUTHOR_FEED_RSS',
        'TAG_FEED_ATOM', 'TAG_FEED_RSS',
        'TRANSLATION_FEED_ATOM', 'TRANSLATION_FEED_RSS',
    ]
--- a/pelican/tests/content/article_with_markdown_and_footnote.md
+++ b/pelican/tests/content/article_with_markdown_and_footnote.md
@ -2,6 +2,12 @@ Title: Article with markdown containing footnotes
 Date: 2012-10-31
 Modified: 2012-11-01
 Summary: Summary with **inline** markup *should* be supported.
+Multiline: Line Metadata should be handle properly.
+    See syntax of Meta-Data extension of Python Markdown package:
+    If a line is indented by 4 or more spaces,
+    that line is assumed to be an additional line of the value
+    for the previous keyword.
+    A keyword may have as many lines as desired.

 This is some content[^1] with some footnotes[^footnote]

--- a/pelican/tests/output/basic/feeds/alexis-metaireau.atom.xml
+++ b/pelican/tests/output/basic/feeds/alexis-metaireau.atom.xml
@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom"><title>A Pelican Blog</title><link href="/" rel="alternate"></link><link href="/feeds/alexis-metaireau.atom.xml" rel="self"></link><id>/</id><updated>2013-11-17T23:29:00Z</updated><entry><title>This is a super article !</title><link href="/this-is-a-super-article.html" rel="alternate"></link><updated>2013-11-17T23:29:00Z</updated><author><name>Alexis Métaireau</name></author><id>tag:,2010-12-02:this-is-a-super-article.html</id><summary type="html">&lt;p&gt;Some content here !&lt;/p&gt;
+&lt;div class="section" id="this-is-a-simple-title"&gt;
+&lt;h2&gt;This is a simple title&lt;/h2&gt;
+&lt;p&gt;And here comes the cool &lt;a class="reference external" href="http://books.couchdb.org/relax/design-documents/views"&gt;stuff&lt;/a&gt;.&lt;/p&gt;
+&lt;img alt="alternate text" src="|filename|/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /&gt;
+&lt;img alt="alternate text" src="|filename|/pictures/Sushi_Macro.jpg" style="width: 600px; height: 450px;" /&gt;
+&lt;pre class="literal-block"&gt;
+&amp;gt;&amp;gt;&amp;gt; from ipdb import set_trace
+&amp;gt;&amp;gt;&amp;gt; set_trace()
+&lt;/pre&gt;
+&lt;p&gt;→ And now try with some utf8 hell: ééé&lt;/p&gt;
+&lt;/div&gt;
+</summary><category term="foo"></category><category term="bar"></category><category term="foobar"></category></entry><entry><title>Oh yeah !</title><link href="/oh-yeah.html" rel="alternate"></link><updated>2010-10-20T10:14:00Z</updated><author><name>Alexis Métaireau</name></author><id>tag:,2010-10-20:oh-yeah.html</id><summary type="html">&lt;div class="section" id="why-not"&gt;
+&lt;h2&gt;Why not ?&lt;/h2&gt;
+&lt;p&gt;After all, why not ? It's pretty simple to do it, and it will allow me to write my blogposts in rst !
+YEAH !&lt;/p&gt;
+&lt;img alt="alternate text" src="|filename|/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /&gt;
+&lt;/div&gt;
+</summary><category term="oh"></category><category term="bar"></category><category term="yeah"></category></entry></feed>
--- a/pelican/tests/output/basic/feeds/alexis-metaireau.rss.xml
+++ b/pelican/tests/output/basic/feeds/alexis-metaireau.rss.xml
@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>A Pelican Blog</title><link>/</link><description></description><atom:link href="/feeds/alexis-metaireau.rss.xml" rel="self"></atom:link><lastBuildDate>Sun, 17 Nov 2013 23:29:00 -0000</lastBuildDate><item><title>This is a super article !</title><link>/this-is-a-super-article.html</link><description>&lt;p&gt;Some content here !&lt;/p&gt;
+&lt;div class="section" id="this-is-a-simple-title"&gt;
+&lt;h2&gt;This is a simple title&lt;/h2&gt;
+&lt;p&gt;And here comes the cool &lt;a class="reference external" href="http://books.couchdb.org/relax/design-documents/views"&gt;stuff&lt;/a&gt;.&lt;/p&gt;
+&lt;img alt="alternate text" src="|filename|/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /&gt;
+&lt;img alt="alternate text" src="|filename|/pictures/Sushi_Macro.jpg" style="width: 600px; height: 450px;" /&gt;
+&lt;pre class="literal-block"&gt;
+&amp;gt;&amp;gt;&amp;gt; from ipdb import set_trace
+&amp;gt;&amp;gt;&amp;gt; set_trace()
+&lt;/pre&gt;
+&lt;p&gt;→ And now try with some utf8 hell: ééé&lt;/p&gt;
+&lt;/div&gt;
+</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Alexis Métaireau</dc:creator><pubDate>Sun, 17 Nov 2013 23:29:00 -0000</pubDate><guid>tag:,2010-12-02:this-is-a-super-article.html</guid><category>foo</category><category>bar</category><category>foobar</category></item><item><title>Oh yeah !</title><link>/oh-yeah.html</link><description>&lt;div class="section" id="why-not"&gt;
+&lt;h2&gt;Why not ?&lt;/h2&gt;
+&lt;p&gt;After all, why not ? It's pretty simple to do it, and it will allow me to write my blogposts in rst !
+YEAH !&lt;/p&gt;
+&lt;img alt="alternate text" src="|filename|/pictures/Sushi.jpg" style="width: 600px; height: 450px;" /&gt;
+&lt;/div&gt;
+</description><dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Alexis Métaireau</dc:creator><pubDate>Wed, 20 Oct 2010 10:14:00 -0000</pubDate><guid>tag:,2010-10-20:oh-yeah.html</guid><category>oh</category><category>bar</category><category>yeah</category></item></channel></rss>
--- a/pelican/tests/output/custom/feeds/alexis-metaireau.atom.xml
+++ b/pelican/tests/output/custom/feeds/alexis-metaireau.atom.xml
--- a/pelican/tests/output/custom/feeds/alexis-metaireau.rss.xml
+++ b/pelican/tests/output/custom/feeds/alexis-metaireau.rss.xml
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals, absolute_import
 import six
 from datetime import datetime
 from sys import platform
+import locale

 from pelican.tests.support import unittest, get_settings

@ -22,6 +23,8 @@ class TestPage(unittest.TestCase):

    def setUp(self):
        super(TestPage, self).setUp()
+        self.old_locale = locale.setlocale(locale.LC_ALL)
+        locale.setlocale(locale.LC_ALL, str('C'))
        self.page_kwargs = {
            'content': TEST_CONTENT,
            'context': {
@ -35,6 +38,9 @@ class TestPage(unittest.TestCase):
            'source_path': '/path/to/file/foo.ext'
        }

+    def tearDown(self):
+        locale.setlocale(locale.LC_ALL, self.old_locale)
+
    def test_use_args(self):
        # Creating a page with arguments passed to the constructor should use
        # them to initialise object's attributes.
@ -129,9 +135,15 @@ class TestPage(unittest.TestCase):
        page_kwargs['metadata']['date'] = dt
        page = Page(**page_kwargs)

-        self.assertEqual(page.locale_date,
-            dt.strftime(DEFAULT_CONFIG['DEFAULT_DATE_FORMAT']))
+        # page.locale_date is a unicode string in both python2 and python3
+        dt_date = dt.strftime(DEFAULT_CONFIG['DEFAULT_DATE_FORMAT']) 
+        # dt_date is a byte string in python2, and a unicode string in python3
+        # Let's make sure it is a unicode string (relies on python 3.3 supporting the u prefix)
+        if type(dt_date) != type(u''):
+            # python2:
+            dt_date = unicode(dt_date, 'utf8')

+        self.assertEqual(page.locale_date, dt_date )
        page_kwargs['settings'] = get_settings()

        # I doubt this can work on all platforms ...
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -14,6 +14,7 @@ from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator,
                                TemplatePagesGenerator)
 from pelican.writers import Writer
 from pelican.tests.support import unittest, get_settings
+import locale

 CUR_DIR = os.path.dirname(__file__)
 CONTENT_DIR = os.path.join(CUR_DIR, 'content')
@ -21,11 +22,17 @@ CONTENT_DIR = os.path.join(CUR_DIR, 'content')

 class TestGenerator(unittest.TestCase):
    def setUp(self):
+        self.old_locale = locale.setlocale(locale.LC_ALL)
+        locale.setlocale(locale.LC_ALL, str('C'))
        self.settings = get_settings()
        self.settings['READERS'] = {'asc': None}
        self.generator = Generator(self.settings.copy(), self.settings,
                                   CUR_DIR, self.settings['THEME'], self.settings['BASE_THEME'], None)

+    def tearDown(self):
+        locale.setlocale(locale.LC_ALL, self.old_locale)
+
+
    def test_include_path(self):
        filename = os.path.join(CUR_DIR, 'content', 'article.rst')
        include_path = self.generator._include_path
@ -42,6 +49,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings['DEFAULT_CATEGORY'] = 'Default'
        settings['DEFAULT_DATE'] = (1970, 1, 1)
        settings['READERS'] = {'asc': None}
+        settings['CACHE_CONTENT'] = False   # cache not needed for this logic tests

        cls.generator = ArticlesGenerator(
            context=settings.copy(), settings=settings,
@ -50,8 +58,15 @@ class TestArticlesGenerator(unittest.TestCase):
        cls.articles = [[page.title, page.status, page.category.name,
                         page.template] for page in cls.generator.articles]

+    def setUp(self):
+        self.temp_cache = mkdtemp(prefix='pelican_cache.')
+
+    def tearDown(self):
+        rmtree(self.temp_cache)
+
    def test_generate_feeds(self):
        settings = get_settings()
+        settings['CACHE_DIRECTORY'] = self.temp_cache
        generator = ArticlesGenerator(
            context=settings, settings=settings,
            path=None, theme=settings['THEME'], base_theme=settings['BASE_THEME'], output_path=None)
@ -127,6 +142,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings['DEFAULT_CATEGORY'] = 'Default'
        settings['DEFAULT_DATE'] = (1970, 1, 1)
        settings['USE_FOLDER_AS_CATEGORY'] = False
+        settings['CACHE_DIRECTORY'] = self.temp_cache
        settings['READERS'] = {'asc': None}
        settings['filenames'] = {}
        generator = ArticlesGenerator(
@ -151,6 +167,7 @@ class TestArticlesGenerator(unittest.TestCase):
    def test_direct_templates_save_as_default(self):

        settings = get_settings(filenames={})
+        settings['CACHE_DIRECTORY'] = self.temp_cache
        generator = ArticlesGenerator(
            context=settings, settings=settings,
            path=None, theme=settings['THEME'], base_theme=settings['BASE_THEME'], output_path=None)
@ -165,6 +182,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings = get_settings()
        settings['DIRECT_TEMPLATES'] = ['archives']
        settings['ARCHIVES_SAVE_AS'] = 'archives/index.html'
+        settings['CACHE_DIRECTORY'] = self.temp_cache
        generator = ArticlesGenerator(
            context=settings, settings=settings,
            path=None, theme=settings['THEME'], base_theme=settings['BASE_THEME'], output_path=None)
@ -180,6 +198,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings = get_settings()
        settings['DIRECT_TEMPLATES'] = ['archives']
        settings['ARCHIVES_SAVE_AS'] = 'archives/index.html'
+        settings['CACHE_DIRECTORY'] = self.temp_cache
        generator = ArticlesGenerator(
            context=settings, settings=settings,
            path=None, theme=settings['THEME'], base_theme=settings['BASE_THEME'], output_path=None)
@ -206,6 +225,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings = get_settings(filenames={})

        settings['YEAR_ARCHIVE_SAVE_AS'] = 'posts/{date:%Y}/index.html'
+        settings['CACHE_DIRECTORY'] = self.temp_cache
        generator = ArticlesGenerator(
            context=settings, settings=settings,
            path=CONTENT_DIR, theme=settings['THEME'], base_theme=settings['BASE_THEME'], output_path=None)
@ -268,6 +288,72 @@ class TestArticlesGenerator(unittest.TestCase):
        authors_expected = ['alexis-metaireau', 'first-author', 'second-author']
        self.assertEqual(sorted(authors), sorted(authors_expected))

+    def test_article_object_caching(self):
+        """Test Article objects caching at the generator level"""
+        settings = get_settings(filenames={})
+        settings['CACHE_DIRECTORY'] = self.temp_cache
+        settings['CONTENT_CACHING_LAYER'] = 'generator'
+        settings['READERS'] = {'asc': None}
+
+        generator = ArticlesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.generate_context()
+        self.assertTrue(hasattr(generator, '_cache'))
+
+        generator = ArticlesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.readers.read_file = MagicMock()
+        generator.generate_context()
+        generator.readers.read_file.assert_called_count == 0
+
+    def test_reader_content_caching(self):
+        """Test raw content caching at the reader level"""
+        settings = get_settings(filenames={})
+        settings['CACHE_DIRECTORY'] = self.temp_cache
+        settings['READERS'] = {'asc': None}
+
+        generator = ArticlesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.generate_context()
+        self.assertTrue(hasattr(generator.readers, '_cache'))
+
+        generator = ArticlesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        readers = generator.readers.readers
+        for reader in readers.values():
+            reader.read = MagicMock()
+        generator.generate_context()
+        for reader in readers.values():
+            reader.read.assert_called_count == 0
+
+    def test_ignore_cache(self):
+        """Test that all the articles are read again when not loading cache
+
+        used in --ignore-cache or autoreload mode"""
+        settings = get_settings(filenames={})
+        settings['CACHE_DIRECTORY'] = self.temp_cache
+        settings['READERS'] = {'asc': None}
+
+        generator = ArticlesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.readers.read_file = MagicMock()
+        generator.generate_context()
+        self.assertTrue(hasattr(generator, '_cache_open'))
+        orig_call_count = generator.readers.read_file.call_count
+
+        settings['LOAD_CONTENT_CACHE'] = False
+        generator = ArticlesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.readers.read_file = MagicMock()
+        generator.generate_context()
+        generator.readers.read_file.assert_called_count == orig_call_count
+

 class TestPageGenerator(unittest.TestCase):
    # Note: Every time you want to test for a new field; Make sure the test
@ -275,12 +361,19 @@ class TestPageGenerator(unittest.TestCase):
    # distill_pages Then update the assertEqual in test_generate_context
    # to match expected

+    def setUp(self):
+        self.temp_cache = mkdtemp(prefix='pelican_cache.')
+
+    def tearDown(self):
+        rmtree(self.temp_cache)
+
    def distill_pages(self, pages):
        return [[page.title, page.status, page.template] for page in pages]

    def test_generate_context(self):
        settings = get_settings(filenames={})
        settings['PAGE_DIR'] = 'TestPages'  # relative to CUR_DIR
+        settings['CACHE_DIRECTORY'] = self.temp_cache
        settings['DEFAULT_DATE'] = (1970, 1, 1)

        generator = PagesGenerator(
@ -306,6 +399,72 @@ class TestPageGenerator(unittest.TestCase):
        self.assertEqual(sorted(pages_expected), sorted(pages))
        self.assertEqual(sorted(hidden_pages_expected), sorted(hidden_pages))

+    def test_page_object_caching(self):
+        """Test Page objects caching at the generator level"""
+        settings = get_settings(filenames={})
+        settings['CACHE_DIRECTORY'] = self.temp_cache
+        settings['CONTENT_CACHING_LAYER'] = 'generator'
+        settings['READERS'] = {'asc': None}
+
+        generator = PagesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.generate_context()
+        self.assertTrue(hasattr(generator, '_cache'))
+
+        generator = PagesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.readers.read_file = MagicMock()
+        generator.generate_context()
+        generator.readers.read_file.assert_called_count == 0
+
+    def test_reader_content_caching(self):
+        """Test raw content caching at the reader level"""
+        settings = get_settings(filenames={})
+        settings['CACHE_DIRECTORY'] = self.temp_cache
+        settings['READERS'] = {'asc': None}
+
+        generator = PagesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.generate_context()
+        self.assertTrue(hasattr(generator.readers, '_cache'))
+
+        generator = PagesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        readers = generator.readers.readers
+        for reader in readers.values():
+            reader.read = MagicMock()
+        generator.generate_context()
+        for reader in readers.values():
+            reader.read.assert_called_count == 0
+
+    def test_ignore_cache(self):
+        """Test that all the pages are read again when not loading cache
+
+        used in --ignore_cache or autoreload mode"""
+        settings = get_settings(filenames={})
+        settings['CACHE_DIRECTORY'] = self.temp_cache
+        settings['READERS'] = {'asc': None}
+
+        generator = PagesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.readers.read_file = MagicMock()
+        generator.generate_context()
+        self.assertTrue(hasattr(generator, '_cache_open'))
+        orig_call_count = generator.readers.read_file.call_count
+
+        settings['LOAD_CONTENT_CACHE'] = False
+        generator = PagesGenerator(
+            context=settings.copy(), settings=settings,
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+        generator.readers.read_file = MagicMock()
+        generator.generate_context()
+        generator.readers.read_file.assert_called_count == orig_call_count
+

 class TestTemplatePagesGenerator(unittest.TestCase):

@ -314,10 +473,14 @@ class TestTemplatePagesGenerator(unittest.TestCase):
    def setUp(self):
        self.temp_content = mkdtemp(prefix='pelicantests.')
        self.temp_output = mkdtemp(prefix='pelicantests.')
+        self.old_locale = locale.setlocale(locale.LC_ALL)
+        locale.setlocale(locale.LC_ALL, str('C'))
+

    def tearDown(self):
        rmtree(self.temp_content)
        rmtree(self.temp_output)
+        locale.setlocale(locale.LC_ALL, self.old_locale)

    def test_generate_output(self):

--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function
 import os
 import re

+import locale
 from pelican.tools.pelican_import import wp2fields, fields2pelican, decode_wp_content, build_header, build_markdown_header, get_attachments, download_attachments
 from pelican.tests.support import (unittest, temporary_folder, mute,
                                   skipIfNoExecutable)
@ -30,9 +31,14 @@ except ImportError:
 class TestWordpressXmlImporter(unittest.TestCase):

    def setUp(self):
+        self.old_locale = locale.setlocale(locale.LC_ALL)
+        locale.setlocale(locale.LC_ALL, str('C'))
        self.posts = list(wp2fields(WORDPRESS_XML_SAMPLE))
        self.custposts = list(wp2fields(WORDPRESS_XML_SAMPLE, True))

+    def tearDown(self):
+        locale.setlocale(locale.LC_ALL, self.old_locale)
+
    def test_ignore_empty_posts(self):
        self.assertTrue(self.posts)
        for title, content, fname, date, author, categ, tags, kind, format in self.posts:
@ -261,8 +267,13 @@ class TestBuildHeader(unittest.TestCase):
@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
 class TestWordpressXMLAttachements(unittest.TestCase):
    def setUp(self):
+        self.old_locale = locale.setlocale(locale.LC_ALL)
+        locale.setlocale(locale.LC_ALL, str('C'))
        self.attachments = get_attachments(WORDPRESS_XML_SAMPLE)

+    def tearDown(self):
+        locale.setlocale(locale.LC_ALL, self.old_locale)
+
    def test_recognise_attachments(self):
        self.assertTrue(self.attachments)
        self.assertTrue(len(self.attachments.keys()) == 3)
--- a/pelican/tests/test_paginator.py
+++ b/pelican/tests/test_paginator.py
@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals, absolute_import
 import six
+import locale

 from pelican.tests.support import unittest, get_settings

@ -16,6 +17,8 @@ TEST_SUMMARY = generate_lorem_ipsum(n=1, html=False)
 class TestPage(unittest.TestCase):
    def setUp(self):
        super(TestPage, self).setUp()
+        self.old_locale = locale.setlocale(locale.LC_ALL)
+        locale.setlocale(locale.LC_ALL, str('C'))
        self.page_kwargs = {
            'content': TEST_CONTENT,
            'context': {
@ -29,6 +32,9 @@ class TestPage(unittest.TestCase):
            'source_path': '/path/to/file/foo.ext'
        }

+    def tearDown(self):
+        locale.setlocale(locale.LC_ALL, self.old_locale)
+
    def test_save_as_preservation(self):
        settings = get_settings()
        # fix up pagination rules
@ -47,4 +53,4 @@ class TestPage(unittest.TestCase):
        object_list = [Article(**self.page_kwargs), Article(**self.page_kwargs)]
        paginator = Paginator('foobar.foo', object_list, settings)
        page = paginator.page(1)
-        self.assertEqual(page.save_as, 'foobar.foo')
+        self.assertEqual(page.save_as, 'foobar.foo')
--- a/pelican/tests/test_pelican.py
+++ b/pelican/tests/test_pelican.py
@ -43,12 +43,14 @@ class TestPelican(LoggedTestCase):
    def setUp(self):
        super(TestPelican, self).setUp()
        self.temp_path = mkdtemp(prefix='pelicantests.')
-        self.old_locale = locale.setlocale(locale.LC_ALL)
+        self.temp_cache = mkdtemp(prefix='pelican_cache.')
        self.maxDiff = None
+        self.old_locale = locale.setlocale(locale.LC_ALL)
        locale.setlocale(locale.LC_ALL, str('C'))

    def tearDown(self):
        rmtree(self.temp_path)
+        rmtree(self.temp_cache)
        locale.setlocale(locale.LC_ALL, self.old_locale)
        super(TestPelican, self).tearDown()

@ -77,13 +79,14 @@ class TestPelican(LoggedTestCase):
        settings = read_settings(path=None, override={
            'PATH': INPUT_PATH,
            'OUTPUT_PATH': self.temp_path,
+            'CACHE_DIRECTORY': self.temp_cache,
            'LOCALE': locale.normalize('en_US'),
            })
        pelican = Pelican(settings=settings)
        mute(True)(pelican.run)()
        self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'basic'))
        self.assertLogCountEqual(
-            count=4,
+            count=3,
            msg="Unable to find.*skipping url replacement",
            level=logging.WARNING)

@ -92,6 +95,7 @@ class TestPelican(LoggedTestCase):
        settings = read_settings(path=SAMPLE_CONFIG, override={
            'PATH': INPUT_PATH,
            'OUTPUT_PATH': self.temp_path,
+            'CACHE_DIRECTORY': self.temp_cache,
            'LOCALE': locale.normalize('en_US'),
            })
        pelican = Pelican(settings=settings)
@ -103,6 +107,7 @@ class TestPelican(LoggedTestCase):
        settings = read_settings(path=SAMPLE_CONFIG, override={
            'PATH': INPUT_PATH,
            'OUTPUT_PATH': self.temp_path,
+            'CACHE_DIRECTORY': self.temp_cache,
            'THEME_STATIC_PATHS': [os.path.join(SAMPLES_PATH, 'very'),
                                   os.path.join(SAMPLES_PATH, 'kinda'),
                                   os.path.join(SAMPLES_PATH, 'theme_standard')]
@ -123,6 +128,7 @@ class TestPelican(LoggedTestCase):
        settings = read_settings(path=SAMPLE_CONFIG, override={
            'PATH': INPUT_PATH,
            'OUTPUT_PATH': self.temp_path,
+            'CACHE_DIRECTORY': self.temp_cache,
            'THEME_STATIC_PATHS': [os.path.join(SAMPLES_PATH, 'theme_standard')]
            })

@ -132,3 +138,26 @@ class TestPelican(LoggedTestCase):

        for file in ['a_stylesheet', 'a_template']:
            self.assertTrue(os.path.exists(os.path.join(theme_output, file)))
+
+    def test_write_only_selected(self):
+        """Test that only the selected files are written"""
+        settings = read_settings(path=None, override={
+            'PATH': INPUT_PATH,
+            'OUTPUT_PATH': self.temp_path,
+            'CACHE_DIRECTORY': self.temp_cache,
+            'WRITE_SELECTED': [
+                os.path.join(self.temp_path, 'oh-yeah.html'),
+                os.path.join(self.temp_path, 'categories.html'),
+                ],
+            'LOCALE': locale.normalize('en_US'),
+            })
+        pelican = Pelican(settings=settings)
+        logger = logging.getLogger()
+        orig_level = logger.getEffectiveLevel()
+        logger.setLevel(logging.INFO)
+        mute(True)(pelican.run)()
+        logger.setLevel(orig_level)
+        self.assertLogCountEqual(
+            count=2,
+            msg="writing .*",
+            level=logging.INFO)
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -214,6 +214,14 @@ class MdReaderTest(ReaderTest):
            'date': datetime.datetime(2012, 10, 31),
            'modified': datetime.datetime(2012, 11, 1),
            'slug': 'article-with-markdown-containing-footnotes',
+            'multiline': [
+                'Line Metadata should be handle properly.',
+                'See syntax of Meta-Data extension of Python Markdown package:',
+                'If a line is indented by 4 or more spaces,',
+                'that line is assumed to be an additional line of the value',
+                'for the previous keyword.',
+                'A keyword may have as many lines as desired.',
+            ]
        }
        self.assertEqual(content, expected_content)
        for key, value in metadata.items():
--- a/pelican/tests/test_settings.py
+++ b/pelican/tests/test_settings.py
@ -16,10 +16,15 @@ class TestSettingsConfiguration(unittest.TestCase):
    optimizations.
    """
    def setUp(self):
+        self.old_locale = locale.setlocale(locale.LC_ALL)
+        locale.setlocale(locale.LC_ALL, str('C'))
        self.PATH = abspath(dirname(__file__))
        default_conf = join(self.PATH, 'default_conf.py')
        self.settings = read_settings(default_conf)

+    def tearDown(self):
+        locale.setlocale(locale.LC_ALL, self.old_locale)
+
    def test_overwrite_existing_settings(self):
        self.assertEqual(self.settings.get('SITENAME'), "Alexis' log")
        self.assertEqual(self.settings.get('SITEURL'),
--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@ -41,6 +41,12 @@ class TestUtils(LoggedTestCase):
        date = datetime.datetime(year=2012, month=11, day=22)
        date_hour = datetime.datetime(
            year=2012, month=11, day=22, hour=22, minute=11)
+        date_hour_z = datetime.datetime(
+            year=2012, month=11, day=22, hour=22, minute=11,
+            tzinfo=pytz.timezone('UTC'))
+        date_hour_est = datetime.datetime(
+            year=2012, month=11, day=22, hour=22, minute=11,
+            tzinfo=pytz.timezone('EST'))
        date_hour_sec = datetime.datetime(
            year=2012, month=11, day=22, hour=22, minute=11, second=10)
        date_hour_sec_z = datetime.datetime(
@ -61,22 +67,42 @@ class TestUtils(LoggedTestCase):
            '22/11/2012': date,
            '22.11.2012': date,
            '22.11.2012 22:11': date_hour,
+            '2012-11-22T22:11Z': date_hour_z,
+            '2012-11-22T22:11-0500': date_hour_est,
            '2012-11-22 22:11:10': date_hour_sec,
            '2012-11-22T22:11:10Z': date_hour_sec_z,
            '2012-11-22T22:11:10-0500': date_hour_sec_est,
            '2012-11-22T22:11:10.123Z': date_hour_sec_frac_z,
            }

+        # examples from http://www.w3.org/TR/NOTE-datetime
+        iso_8601_date = datetime.datetime(year=1997, month=7, day=16)
+        iso_8601_date_hour_tz = datetime.datetime(
+            year=1997, month=7, day=16, hour=19, minute=20,
+            tzinfo=pytz.timezone('CET'))
+        iso_8601_date_hour_sec_tz = datetime.datetime(
+            year=1997, month=7, day=16, hour=19, minute=20, second=30,
+            tzinfo=pytz.timezone('CET'))
+        iso_8601_date_hour_sec_ms_tz = datetime.datetime(
+            year=1997, month=7, day=16, hour=19, minute=20, second=30,
+            microsecond=450000, tzinfo=pytz.timezone('CET'))
+        iso_8601 = {
+            '1997-07-16': iso_8601_date,
+            '1997-07-16T19:20+01:00': iso_8601_date_hour_tz,
+            '1997-07-16T19:20:30+01:00': iso_8601_date_hour_sec_tz,
+            '1997-07-16T19:20:30.45+01:00': iso_8601_date_hour_sec_ms_tz,
+        }
+
        # invalid ones
        invalid_dates = ['2010-110-12', 'yay']

-        if version_info < (3, 2):
-            dates.pop('2012-11-22T22:11:10-0500')
-            invalid_dates.append('2012-11-22T22:11:10-0500')

        for value, expected in dates.items():
            self.assertEqual(utils.get_date(value), expected, value)

+        for value, expected in iso_8601.items():
+            self.assertEqual(utils.get_date(value), expected, value)
+
        for item in invalid_dates:
            self.assertRaises(ValueError, utils.get_date, item)

@ -328,9 +354,12 @@ class TestCopy(unittest.TestCase):

    def setUp(self):
        self.root_dir = mkdtemp(prefix='pelicantests.')
+        self.old_locale = locale.setlocale(locale.LC_ALL)
+        locale.setlocale(locale.LC_ALL, str('C'))

    def tearDown(self):
        shutil.rmtree(self.root_dir)
+        locale.setlocale(locale.LC_ALL, self.old_locale)

    def _create_file(self, *path):
        with open(os.path.join(self.root_dir, *path), 'w') as f:
@ -429,6 +458,25 @@ class TestDateFormatter(unittest.TestCase):
        locale.setlocale(locale.LC_ALL, '')


+    @unittest.skipUnless(locale_available('fr_FR.UTF-8') or
+                         locale_available('French'),
+                         'French locale needed')
+    def test_french_strftime(self):
+        # This test tries to reproduce an issue that occured with python3.3 under macos10 only
+        locale.setlocale(locale.LC_ALL, str('fr_FR.UTF-8'))
+        date = datetime.datetime(2014,8,14)
+        # we compare the lower() dates since macos10 returns "Jeudi" for %A whereas linux reports "jeudi"
+        self.assertEqual( u'jeudi, 14 août 2014', utils.strftime(date, date_format="%A, %d %B %Y").lower() )
+        df = utils.DateFormatter()
+        self.assertEqual( u'jeudi, 14 août 2014', df(date, date_format="%A, %d %B %Y").lower() )
+        # Let us now set the global locale to C:
+        locale.setlocale(locale.LC_ALL, str('C'))
+        # DateFormatter should still work as expected since it is the whole point of DateFormatter
+        # (This is where pre-2014/4/15 code fails on macos10)
+        df_date = df(date, date_format="%A, %d %B %Y").lower()
+        self.assertEqual( u'jeudi, 14 août 2014', df_date )
+
+
    @unittest.skipUnless(locale_available('fr_FR.UTF-8') or
                         locale_available('French'),
                         'French locale needed')
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@ -135,7 +135,7 @@ def wp2fields(xml, wp_custpost=False):
                title = HTMLParser().unescape(item.title.contents[0])
            except IndexError:
                title = 'No title [%s]' % item.find('post_name').string
-                logger.warn('Post "%s" is lacking a proper title' % title)
+                logger.warning('Post "%s" is lacking a proper title' % title)

            filename = item.find('post_name').string
            post_id = item.find('post_id').string
@ -601,11 +601,21 @@ def download_attachments(output_path, urls):
        except URLError as e:
            error = ("No file could be downloaded from {}; Error {}"
                    .format(url, e))
-            logger.warn(error)
+            logger.warning(error)
        except IOError as e: #Python 2.7 throws an IOError rather Than URLError
-            error = ("No file could be downloaded from {}; Error {}"
-                    .format(url, e))
-            logger.warn(error)
+            # For japanese, the error might look kind of like this:
+            # e = IOError( 'socket error', socket.error(111, u'\u63a5\u7d9a\u3092\u62d2\u5426\u3055\u308c\u307e\u3057\u305f') )
+            # and not be suitable to use in "{}".format(e) , raising UnicodeDecodeError
+            # (This is at least the case on my Fedora running Python 2.7.5 
+            # (default, Feb 19 2014, 13:47:28) [GCC 4.8.2 20131212 (Red Hat 4.8.2-7)] on linux2
+            try:
+                error = ("No file could be downloaded from {}; Error {}"
+                        .format(url, e))
+            except UnicodeDecodeError:
+                # For lack of a better log message because we could not decode e, let's use repr(e)
+                error = ("No file could be downloaded from {}; Error {}"
+                        .format(url, repr(e)))
+            logger.warning(error)
    return locations


--- a/pelican/tools/templates/Makefile.in
+++ b/pelican/tools/templates/Makefile.in
@ -97,7 +97,7 @@ ftp_upload: publish
 	lftp ftp://$$(FTP_USER)@$$(FTP_HOST) -e "mirror -R $$(OUTPUTDIR) $$(FTP_TARGET_DIR) ; quit"

 s3_upload: publish
-	s3cmd sync $(OUTPUTDIR)/ s3://$(S3_BUCKET) --acl-public --delete-removed
+        s3cmd sync $(OUTPUTDIR)/ s3://$(S3_BUCKET) --acl-public --delete-removed --guess-mime-type

 cf_upload: publish
 	cd $(OUTPUTDIR) && swift -v -A https://auth.api.rackspacecloud.com/v1.0 -U $(CLOUDFILES_USERNAME) -K $(CLOUDFILES_API_KEY) upload -c $(CLOUDFILES_CONTAINER) .
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -12,10 +12,12 @@ import pytz
 import re
 import shutil
 import traceback
+import pickle
+import hashlib

 from collections import Hashable
 from contextlib import contextmanager
-from datetime import datetime
+import dateutil.parser
 from functools import partial
 from itertools import groupby
 from jinja2 import Markup
@ -181,39 +183,10 @@ def get_date(string):
    If no format matches the given date, raise a ValueError.
    """
    string = re.sub(' +', ' ', string)
-    formats = [
-        # ISO 8601
-        '%Y',
-        '%Y-%m',
-        '%Y-%m-%d',
-        '%Y-%m-%dT%H:%M%z',
-        '%Y-%m-%dT%H:%MZ',
-        '%Y-%m-%dT%H:%M',
-        '%Y-%m-%dT%H:%M:%S%z',
-        '%Y-%m-%dT%H:%M:%SZ',
-        '%Y-%m-%dT%H:%M:%S',
-        '%Y-%m-%dT%H:%M:%S.%f%z',
-        '%Y-%m-%dT%H:%M:%S.%fZ',
-        '%Y-%m-%dT%H:%M:%S.%f',
-        # end ISO 8601 forms
-        '%Y-%m-%d %H:%M',
-        '%Y-%m-%d %H:%M:%S',
-        '%Y/%m/%d %H:%M',
-        '%Y/%m/%d',
-        '%d-%m-%Y',
-        '%d.%m.%Y %H:%M',
-        '%d.%m.%Y',
-        '%d/%m/%Y',
-        ]
-    for date_format in formats:
-        try:
-            date = datetime.strptime(string, date_format)
-        except ValueError:
-            continue
-        if date_format.endswith('Z'):
-            date = date.replace(tzinfo=pytz.timezone('UTC'))
-        return date
-    raise ValueError('{0!r} is not a valid date'.format(string))
+    try:
+        return dateutil.parser.parse(string)
+    except (TypeError, ValueError):
+        raise ValueError('{0!r} is not a valid date'.format(string))


@contextmanager
@ -574,3 +547,135 @@ def split_all(path):
            break
        path = head
    return components
+
+
+class FileDataCacher(object):
+    '''Class that can cache data contained in files'''
+
+    def __init__(self, settings, cache_name, caching_policy, load_policy):
+        '''Load the specified cache within CACHE_DIRECTORY in settings
+
+        only if *load_policy* is True,
+        May use gzip if GZIP_CACHE ins settings is True.
+        Sets caching policy according to *caching_policy*.
+        '''
+        self.settings = settings
+        self._cache_path = os.path.join(self.settings['CACHE_DIRECTORY'],
+                                        cache_name)
+        self._cache_data_policy = caching_policy
+        if self.settings['GZIP_CACHE']:
+            import gzip
+            self._cache_open = gzip.open
+        else:
+            self._cache_open = open
+        if load_policy:
+            try:
+                with self._cache_open(self._cache_path, 'rb') as fhandle:
+                    self._cache = pickle.load(fhandle)
+            except (IOError, OSError, pickle.UnpicklingError) as err:
+                logger.warning(('Cannot load cache {}, '
+                    'proceeding with empty cache.\n{}').format(
+                        self._cache_path, err))
+                self._cache = {}
+        else:
+            self._cache = {}
+
+    def cache_data(self, filename, data):
+        '''Cache data for given file'''
+        if self._cache_data_policy:
+            self._cache[filename] = data
+
+    def get_cached_data(self, filename, default=None):
+        '''Get cached data for the given file
+
+        if no data is cached, return the default object
+        '''
+        return self._cache.get(filename, default)
+
+    def save_cache(self):
+        '''Save the updated cache'''
+        if self._cache_data_policy:
+            try:
+                mkdir_p(self.settings['CACHE_DIRECTORY'])
+                with self._cache_open(self._cache_path, 'wb') as fhandle:
+                    pickle.dump(self._cache, fhandle)
+            except (IOError, OSError, pickle.PicklingError) as err:
+                logger.warning('Could not save cache {}\n{}'.format(
+                    self._cache_path, err))
+
+
+class FileStampDataCacher(FileDataCacher):
+    '''Subclass that also caches the stamp of the file'''
+
+    def __init__(self, settings, cache_name, caching_policy, load_policy):
+        '''This sublcass additionaly sets filestamp function
+        and base path for filestamping operations
+        '''
+        super(FileStampDataCacher, self).__init__(settings, cache_name,
+                                                  caching_policy,
+                                                  load_policy)
+
+        method = self.settings['CHECK_MODIFIED_METHOD']
+        if method == 'mtime':
+            self._filestamp_func = os.path.getmtime
+        else:
+            try:
+                hash_func = getattr(hashlib, method)
+                def filestamp_func(filename):
+                    '''return hash of file contents'''
+                    with open(filename, 'rb') as fhandle:
+                        return hash_func(fhandle.read()).digest()
+                self._filestamp_func = filestamp_func
+            except AttributeError as err:
+                logger.warning('Could not get hashing function\n{}'.format(
+                    err))
+                self._filestamp_func = None
+
+    def cache_data(self, filename, data):
+        '''Cache stamp and data for the given file'''
+        stamp = self._get_file_stamp(filename)
+        super(FileStampDataCacher, self).cache_data(filename, (stamp, data))
+
+    def _get_file_stamp(self, filename):
+        '''Check if the given file has been modified
+        since the previous build.
+
+        depending on CHECK_MODIFIED_METHOD
+        a float may be returned for 'mtime',
+        a hash for a function name in the hashlib module
+        or an empty bytes string otherwise
+        '''
+        try:
+            return self._filestamp_func(filename)
+        except (IOError, OSError, TypeError) as err:
+            logger.warning('Cannot get modification stamp for {}\n{}'.format(
+                filename, err))
+            return b''
+
+    def get_cached_data(self, filename, default=None):
+        '''Get the cached data for the given filename
+        if the file has not been modified.
+
+        If no record exists or file has been modified, return default.
+        Modification is checked by comparing the cached
+        and current file stamp.
+        '''
+        stamp, data = super(FileStampDataCacher, self).get_cached_data(
+            filename, (None, default))
+        if stamp != self._get_file_stamp(filename):
+            return default
+        return data
+
+
+def is_selected_for_writing(settings, path):
+    '''Check whether path is selected for writing
+    according to the WRITE_SELECTED list
+
+    If WRITE_SELECTED is an empty list (default),
+    any path is selected for writing.
+    '''
+    if settings['WRITE_SELECTED']:
+        return path in settings['WRITE_SELECTED']
+    else:
+        return True
+        
--- a/pelican/writers.py
+++ b/pelican/writers.py
@ -16,7 +16,8 @@ from feedgenerator import Atom1Feed, Rss201rev2Feed
 from jinja2 import Markup

 from pelican.paginator import Paginator
-from pelican.utils import get_relative_path, path_to_url, set_date_tzinfo
+from pelican.utils import (get_relative_path, path_to_url, set_date_tzinfo,
+                           is_selected_for_writing)
 from pelican import signals

 logger = logging.getLogger(__name__)
@ -92,6 +93,8 @@ class Writer(object):
        :param path: the path to output.
        :param feed_type: the feed type to use (atom or rss)
        """
+        if not is_selected_for_writing(self.settings, path):
+            return
        old_locale = locale.setlocale(locale.LC_ALL)
        locale.setlocale(locale.LC_ALL, str('C'))
        try:
@ -140,7 +143,9 @@ class Writer(object):
        :param **kwargs: additional variables to pass to the templates
        """

-        if name is False:
+        if name is False or name == "" or\
+           not is_selected_for_writing(self.settings,\
+               os.path.join(self.output_path, name)):
            return
        elif not name:
            # other stuff, just return for now
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@
 from setuptools import setup

 requires = ['feedgenerator >= 1.6', 'jinja2 >= 2.7', 'pygments', 'docutils',
-            'pytz >= 0a', 'blinker', 'unidecode', 'six']
+            'pytz >= 0a', 'blinker', 'unidecode', 'six', 'python-dateutil']

 entry_points = {
    'console_scripts': [
--- a/tox.ini
+++ b/tox.ini
@ -2,7 +2,7 @@
 # depends on some external libraries that aren't released yet.

 [tox]
-envlist = py27,py33
+envlist = py27,py33,py34

 [testenv]
 commands =