Make StaticGenerator skip content sources. Refs #1019.

This change partially addresses issue #1019, by teaching Pelican to distinguish
between static files and content source files. A user can now safely add the
same directory to both STATIC_PATHS and PAGE_PATHS (or ARTICLE_PATHS). Pelican
will then process the content source files in that directory normally, and
treat the remaining files as static, without copying the raw content source
files to the output directory. (The OUTPUT_SOURCES setting still works.)

In other words, images and markdown/reST files can now safely live together.

To keep those files together in the generated site, STATIC_SAVE_AS and
PAGE_SAVE_AS (or ARTICLE_SAVE_AS) should point to the same output directory.

There are two new configuration settings:

STATIC_EXCLUDES=[]  # This works just like PAGE_EXCLUDES and ARTICLE_EXCLUDES.
STATIC_EXCLUDE_SOURCES=True  # Set this to False to get the old behavior.

Two small but noteworthy internal changes:

StaticGenerator now runs after all the other generators. This allows it to see
which files are meant to be processed by other generators, and avoid them.

Generators now include files that they fail to process (e.g. those with missing
mandatory metadata) along with all the other paths in context['filenames'].
This allows such files to be excluded from StaticGenerator's file list, so they
won't end up accidentally published. Since these files have no Content object,
their value in context['filenames'] is None. The code that uses that dict has
been updated accordingly.
This commit is contained in:
Forest 2014-10-18 13:11:59 -07:00
commit 48f4f0850d
10 changed files with 131 additions and 10 deletions

View file

@ -142,10 +142,15 @@ Setting name (followed by default value, if any)
slash at the end. Example: ``SITEURL = 'http://mydomain.com'`` slash at the end. Example: ``SITEURL = 'http://mydomain.com'``
``TEMPLATE_PAGES = None`` A mapping containing template pages that will be rendered with ``TEMPLATE_PAGES = None`` A mapping containing template pages that will be rendered with
the blog entries. See :ref:`template_pages`. the blog entries. See :ref:`template_pages`.
``STATIC_PATHS = ['images']`` The static paths you want to have accessible ``STATIC_PATHS = ['images']`` A list of directories (relative to ``PATH``) in which to look for
on the output path "static". By default, static files. Such files will be copied to the output directory
Pelican will copy the "images" folder to the without modification. Articles, pages, and other content source
output folder. files will normally be skipped, so it is safe for a directory to
appear both here and in ``PAGE_PATHS`` or ``ARTICLE_PATHS``.
Pelican's default settings include the "images" directory here.
``STATIC_EXCLUDES = []`` A list of directories to exclude when looking for static files.
``STATIC_EXCLUDE_SOURCES = True`` If set to False, content source files will not be skipped when
copying files found in ``STATIC_PATHS``.
``TIMEZONE`` The timezone used in the date information, to ``TIMEZONE`` The timezone used in the date information, to
generate Atom and RSS feeds. See the *Timezone* generate Atom and RSS feeds. See the *Timezone*
section below for more info. section below for more info.

View file

@ -144,8 +144,10 @@ class Pelican(object):
start_time = time.time() start_time = time.time()
context = self.settings.copy() context = self.settings.copy()
context['filenames'] = {} # share the dict between all the generators # Share these among all the generators and content objects:
context['localsiteurl'] = self.settings['SITEURL'] # share context['filenames'] = {} # maps source path to Content object or None
context['localsiteurl'] = self.settings['SITEURL']
generators = [ generators = [
cls( cls(
context=context, context=context,
@ -188,7 +190,7 @@ class Pelican(object):
time.time() - start_time)) time.time() - start_time))
def get_generator_classes(self): def get_generator_classes(self):
generators = [StaticGenerator, ArticlesGenerator, PagesGenerator] generators = [ArticlesGenerator, PagesGenerator]
if self.settings['TEMPLATE_PAGES']: if self.settings['TEMPLATE_PAGES']:
generators.append(TemplatePagesGenerator) generators.append(TemplatePagesGenerator)
@ -206,6 +208,8 @@ class Pelican(object):
logger.debug('Found generator: %s', v) logger.debug('Found generator: %s', v)
generators.append(v) generators.append(v)
# StaticGenerator runs last so it can see which files the others handle
generators.append(StaticGenerator)
return generators return generators
def get_writer(self): def get_writer(self):

View file

@ -234,7 +234,7 @@ class Content(object):
if unquoted_path in self._context['filenames']: if unquoted_path in self._context['filenames']:
path = unquoted_path path = unquoted_path
if path in self._context['filenames']: if self._context['filenames'].get(path):
origin = '/'.join((siteurl, origin = '/'.join((siteurl,
self._context['filenames'][path].url)) self._context['filenames'][path].url))
origin = origin.replace('\\', '/') # for Windows paths. origin = origin.replace('\\', '/') # for Windows paths.

View file

@ -141,9 +141,27 @@ class Generator(object):
return files return files
def add_source_path(self, content): def add_source_path(self, content):
"""Record a source file path that a Generator found and processed.
Store a reference to its Content object, for url lookups later.
"""
location = content.get_relative_source_path() location = content.get_relative_source_path()
self.context['filenames'][location] = content self.context['filenames'][location] = content
def _add_failed_source_path(self, path):
"""Record a source file path that a Generator failed to process.
(For example, one that was missing mandatory metadata.)
The path argument is expected to be relative to self.path.
"""
self.context['filenames'][os.path.normpath(path)] = None
def _is_potential_source_path(self, path):
"""Return True if path was supposed to be used as a source file.
(This includes all source files that have been found by generators
before this method is called, even if they failed to process.)
The path argument is expected to be relative to self.path.
"""
return os.path.normpath(path) in self.context['filenames']
def _update_context(self, items): def _update_context(self, items):
"""Update the context with the given items from the currrent """Update the context with the given items from the currrent
processor. processor.
@ -477,9 +495,11 @@ class ArticlesGenerator(CachingGenerator):
except Exception as e: except Exception as e:
logger.error('Could not process %s\n%s', f, e, logger.error('Could not process %s\n%s', f, e,
exc_info=self.settings.get('DEBUG', False)) exc_info=self.settings.get('DEBUG', False))
self._add_failed_source_path(f)
continue continue
if not is_valid_content(article, f): if not is_valid_content(article, f):
self._add_failed_source_path(f)
continue continue
self.cache_data(f, article) self.cache_data(f, article)
@ -602,9 +622,11 @@ class PagesGenerator(CachingGenerator):
except Exception as e: except Exception as e:
logger.error('Could not process %s\n%s', f, e, logger.error('Could not process %s\n%s', f, e,
exc_info=self.settings.get('DEBUG', False)) exc_info=self.settings.get('DEBUG', False))
self._add_failed_source_path(f)
continue continue
if not is_valid_content(page, f): if not is_valid_content(page, f):
self._add_failed_source_path(f)
continue continue
self.cache_data(f, page) self.cache_data(f, page)
@ -663,7 +685,14 @@ class StaticGenerator(Generator):
def generate_context(self): def generate_context(self):
self.staticfiles = [] self.staticfiles = []
for f in self.get_files(self.settings['STATIC_PATHS'], for f in self.get_files(self.settings['STATIC_PATHS'],
exclude=self.settings['STATIC_EXCLUDES'],
extensions=False): extensions=False):
# skip content source files unless the user explicitly wants them
if self.settings['STATIC_EXCLUDE_SOURCES']:
if self._is_potential_source_path(f):
continue
static = self.readers.read_file( static = self.readers.read_file(
base_path=self.path, path=f, content_class=Static, base_path=self.path, path=f, content_class=Static,
fmt='static', context=self.context, fmt='static', context=self.context,

View file

@ -36,7 +36,9 @@ DEFAULT_CONFIG = {
'THEME': DEFAULT_THEME, 'THEME': DEFAULT_THEME,
'OUTPUT_PATH': 'output', 'OUTPUT_PATH': 'output',
'READERS': {}, 'READERS': {},
'STATIC_PATHS': ['images', ], 'STATIC_PATHS': ['images'],
'STATIC_EXCLUDES': [],
'STATIC_EXCLUDE_SOURCES': True,
'THEME_STATIC_DIR': 'theme', 'THEME_STATIC_DIR': 'theme',
'THEME_STATIC_PATHS': ['static', ], 'THEME_STATIC_PATHS': ['static', ],
'FEED_ALL_ATOM': os.path.join('feeds', 'all.atom.xml'), 'FEED_ALL_ATOM': os.path.join('feeds', 'all.atom.xml'),
@ -339,6 +341,7 @@ def configure_settings(settings):
'JINJA_EXTENSIONS', 'JINJA_EXTENSIONS',
'PAGINATED_DIRECT_TEMPLATES', 'PAGINATED_DIRECT_TEMPLATES',
'PLUGINS', 'PLUGINS',
'STATIC_EXCLUDES',
'STATIC_PATHS', 'STATIC_PATHS',
'THEME_STATIC_PATHS', 'THEME_STATIC_PATHS',
'ARTICLE_PATHS', 'ARTICLE_PATHS',

View file

@ -0,0 +1,3 @@
Title: Short Page
This is a page with little text.

View file

@ -12,7 +12,7 @@ from shutil import rmtree
from tempfile import mkdtemp from tempfile import mkdtemp
from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator, from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator,
TemplatePagesGenerator) StaticGenerator, TemplatePagesGenerator)
from pelican.writers import Writer from pelican.writers import Writer
from pelican.tests.support import unittest, get_settings from pelican.tests.support import unittest, get_settings
import locale import locale
@ -558,3 +558,69 @@ class TestTemplatePagesGenerator(unittest.TestCase):
# output content is correct # output content is correct
with open(output_path, 'r') as output_file: with open(output_path, 'r') as output_file:
self.assertEqual(output_file.read(), 'foo: bar') self.assertEqual(output_file.read(), 'foo: bar')
class TestStaticGenerator(unittest.TestCase):
def setUp(self):
self.content_path = os.path.join(CUR_DIR, 'mixed_content')
def test_static_excludes(self):
"""Test that StaticGenerator respects STATIC_EXCLUDES.
"""
settings = get_settings(STATIC_EXCLUDES=['subdir'],
PATH=self.content_path, STATIC_PATHS=[''])
context = settings.copy()
context['filenames'] = {}
StaticGenerator(context=context, settings=settings,
path=settings['PATH'], output_path=None,
theme=settings['THEME']).generate_context()
staticnames = [os.path.basename(c.source_path)
for c in context['staticfiles']]
self.assertNotIn('subdir_fake_image.jpg', staticnames,
"StaticGenerator processed a file in a STATIC_EXCLUDES directory")
self.assertIn('fake_image.jpg', staticnames,
"StaticGenerator skipped a file that it should have included")
def test_static_exclude_sources(self):
"""Test that StaticGenerator respects STATIC_EXCLUDE_SOURCES.
"""
# Test STATIC_EXCLUDE_SOURCES=True
settings = get_settings(STATIC_EXCLUDE_SOURCES=True,
PATH=self.content_path, PAGE_PATHS=[''], STATIC_PATHS=[''],
CACHE_CONTENT=False)
context = settings.copy()
context['filenames'] = {}
for generator_class in (PagesGenerator, StaticGenerator):
generator_class(context=context, settings=settings,
path=settings['PATH'], output_path=None,
theme=settings['THEME']).generate_context()
staticnames = [os.path.basename(c.source_path)
for c in context['staticfiles']]
self.assertFalse(any(name.endswith(".md") for name in staticnames),
"STATIC_EXCLUDE_SOURCES=True failed to exclude a markdown file")
# Test STATIC_EXCLUDE_SOURCES=False
settings.update(STATIC_EXCLUDE_SOURCES=False)
context = settings.copy()
context['filenames'] = {}
for generator_class in (PagesGenerator, StaticGenerator):
generator_class(context=context, settings=settings,
path=settings['PATH'], output_path=None,
theme=settings['THEME']).generate_context()
staticnames = [os.path.basename(c.source_path)
for c in context['staticfiles']]
self.assertTrue(any(name.endswith(".md") for name in staticnames),
"STATIC_EXCLUDE_SOURCES=False failed to include a markdown file")

View file

@ -10,6 +10,7 @@ import logging
import subprocess import subprocess
from pelican import Pelican from pelican import Pelican
from pelican.generators import StaticGenerator
from pelican.settings import read_settings from pelican.settings import read_settings
from pelican.tests.support import LoggedTestCase, mute, locale_available, unittest from pelican.tests.support import LoggedTestCase, mute, locale_available, unittest
@ -75,6 +76,16 @@ class TestPelican(LoggedTestCase):
assert not out, out assert not out, out
assert not err, err assert not err, err
def test_order_of_generators(self):
# StaticGenerator must run last, so it can find files that were
# skipped by the other generators.
pelican = Pelican(settings=read_settings(path=None))
generator_classes = pelican.get_generator_classes()
self.assertTrue(generator_classes[-1] is StaticGenerator,
"StaticGenerator must be the last generator, but it isn't!")
def test_basic_generation_works(self): def test_basic_generation_works(self):
# when running pelican without settings, it should pick up the default # when running pelican without settings, it should pick up the default
# ones and generate correct output without raising any exception # ones and generate correct output without raising any exception