Make StaticGenerator skip content sources. Refs #1019.

This change partially addresses issue #1019, by teaching Pelican to distinguish
between static files and content source files. A user can now safely add the
same directory to both STATIC_PATHS and PAGE_PATHS (or ARTICLE_PATHS). Pelican
will then process the content source files in that directory normally, and
treat the remaining files as static, without copying the raw content source
files to the output directory. (The OUTPUT_SOURCES setting still works.)

In other words, images and markdown/reST files can now safely live together.

To keep those files together in the generated site, STATIC_SAVE_AS and
PAGE_SAVE_AS (or ARTICLE_SAVE_AS) should point to the same output directory.

There are two new configuration settings:

STATIC_EXCLUDES=[]  # This works just like PAGE_EXCLUDES and ARTICLE_EXCLUDES.
STATIC_EXCLUDE_SOURCES=True  # Set this to False to get the old behavior.

Two small but noteworthy internal changes:

StaticGenerator now runs after all the other generators. This allows it to see
which files are meant to be processed by other generators, and avoid them.

Generators now include files that they fail to process (e.g. those with missing
mandatory metadata) along with all the other paths in context['filenames'].
This allows such files to be excluded from StaticGenerator's file list, so they
won't end up accidentally published. Since these files have no Content object,
their value in context['filenames'] is None. The code that uses that dict has
been updated accordingly.
This commit is contained in:
Forest 2014-10-18 13:11:59 -07:00
commit 48f4f0850d
10 changed files with 131 additions and 10 deletions

View file

@ -142,10 +142,15 @@ Setting name (followed by default value, if any)
slash at the end. Example: ``SITEURL = 'http://mydomain.com'``
``TEMPLATE_PAGES = None`` A mapping containing template pages that will be rendered with
the blog entries. See :ref:`template_pages`.
``STATIC_PATHS = ['images']`` The static paths you want to have accessible
on the output path "static". By default,
Pelican will copy the "images" folder to the
output folder.
``STATIC_PATHS = ['images']`` A list of directories (relative to ``PATH``) in which to look for
static files. Such files will be copied to the output directory
without modification. Articles, pages, and other content source
files will normally be skipped, so it is safe for a directory to
appear both here and in ``PAGE_PATHS`` or ``ARTICLE_PATHS``.
Pelican's default settings include the "images" directory here.
``STATIC_EXCLUDES = []`` A list of directories to exclude when looking for static files.
``STATIC_EXCLUDE_SOURCES = True`` If set to False, content source files will not be skipped when
copying files found in ``STATIC_PATHS``.
``TIMEZONE`` The timezone used in the date information, to
generate Atom and RSS feeds. See the *Timezone*
section below for more info.

View file

@ -144,8 +144,10 @@ class Pelican(object):
start_time = time.time()
context = self.settings.copy()
context['filenames'] = {} # share the dict between all the generators
context['localsiteurl'] = self.settings['SITEURL'] # share
# Share these among all the generators and content objects:
context['filenames'] = {} # maps source path to Content object or None
context['localsiteurl'] = self.settings['SITEURL']
generators = [
cls(
context=context,
@ -188,7 +190,7 @@ class Pelican(object):
time.time() - start_time))
def get_generator_classes(self):
generators = [StaticGenerator, ArticlesGenerator, PagesGenerator]
generators = [ArticlesGenerator, PagesGenerator]
if self.settings['TEMPLATE_PAGES']:
generators.append(TemplatePagesGenerator)
@ -206,6 +208,8 @@ class Pelican(object):
logger.debug('Found generator: %s', v)
generators.append(v)
# StaticGenerator runs last so it can see which files the others handle
generators.append(StaticGenerator)
return generators
def get_writer(self):

View file

@ -234,7 +234,7 @@ class Content(object):
if unquoted_path in self._context['filenames']:
path = unquoted_path
if path in self._context['filenames']:
if self._context['filenames'].get(path):
origin = '/'.join((siteurl,
self._context['filenames'][path].url))
origin = origin.replace('\\', '/') # for Windows paths.

View file

@ -141,9 +141,27 @@ class Generator(object):
return files
def add_source_path(self, content):
"""Record a source file path that a Generator found and processed.
Store a reference to its Content object, for url lookups later.
"""
location = content.get_relative_source_path()
self.context['filenames'][location] = content
def _add_failed_source_path(self, path):
"""Record a source file path that a Generator failed to process.
(For example, one that was missing mandatory metadata.)
The path argument is expected to be relative to self.path.
"""
self.context['filenames'][os.path.normpath(path)] = None
def _is_potential_source_path(self, path):
"""Return True if path was supposed to be used as a source file.
(This includes all source files that have been found by generators
before this method is called, even if they failed to process.)
The path argument is expected to be relative to self.path.
"""
return os.path.normpath(path) in self.context['filenames']
def _update_context(self, items):
"""Update the context with the given items from the currrent
processor.
@ -477,9 +495,11 @@ class ArticlesGenerator(CachingGenerator):
except Exception as e:
logger.error('Could not process %s\n%s', f, e,
exc_info=self.settings.get('DEBUG', False))
self._add_failed_source_path(f)
continue
if not is_valid_content(article, f):
self._add_failed_source_path(f)
continue
self.cache_data(f, article)
@ -602,9 +622,11 @@ class PagesGenerator(CachingGenerator):
except Exception as e:
logger.error('Could not process %s\n%s', f, e,
exc_info=self.settings.get('DEBUG', False))
self._add_failed_source_path(f)
continue
if not is_valid_content(page, f):
self._add_failed_source_path(f)
continue
self.cache_data(f, page)
@ -663,7 +685,14 @@ class StaticGenerator(Generator):
def generate_context(self):
self.staticfiles = []
for f in self.get_files(self.settings['STATIC_PATHS'],
exclude=self.settings['STATIC_EXCLUDES'],
extensions=False):
# skip content source files unless the user explicitly wants them
if self.settings['STATIC_EXCLUDE_SOURCES']:
if self._is_potential_source_path(f):
continue
static = self.readers.read_file(
base_path=self.path, path=f, content_class=Static,
fmt='static', context=self.context,

View file

@ -36,7 +36,9 @@ DEFAULT_CONFIG = {
'THEME': DEFAULT_THEME,
'OUTPUT_PATH': 'output',
'READERS': {},
'STATIC_PATHS': ['images', ],
'STATIC_PATHS': ['images'],
'STATIC_EXCLUDES': [],
'STATIC_EXCLUDE_SOURCES': True,
'THEME_STATIC_DIR': 'theme',
'THEME_STATIC_PATHS': ['static', ],
'FEED_ALL_ATOM': os.path.join('feeds', 'all.atom.xml'),
@ -339,6 +341,7 @@ def configure_settings(settings):
'JINJA_EXTENSIONS',
'PAGINATED_DIRECT_TEMPLATES',
'PLUGINS',
'STATIC_EXCLUDES',
'STATIC_PATHS',
'THEME_STATIC_PATHS',
'ARTICLE_PATHS',

View file

@ -0,0 +1,3 @@
Title: Short Page
This is a page with little text.

View file

@ -12,7 +12,7 @@ from shutil import rmtree
from tempfile import mkdtemp
from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator,
TemplatePagesGenerator)
StaticGenerator, TemplatePagesGenerator)
from pelican.writers import Writer
from pelican.tests.support import unittest, get_settings
import locale
@ -558,3 +558,69 @@ class TestTemplatePagesGenerator(unittest.TestCase):
# output content is correct
with open(output_path, 'r') as output_file:
self.assertEqual(output_file.read(), 'foo: bar')
class TestStaticGenerator(unittest.TestCase):
def setUp(self):
self.content_path = os.path.join(CUR_DIR, 'mixed_content')
def test_static_excludes(self):
"""Test that StaticGenerator respects STATIC_EXCLUDES.
"""
settings = get_settings(STATIC_EXCLUDES=['subdir'],
PATH=self.content_path, STATIC_PATHS=[''])
context = settings.copy()
context['filenames'] = {}
StaticGenerator(context=context, settings=settings,
path=settings['PATH'], output_path=None,
theme=settings['THEME']).generate_context()
staticnames = [os.path.basename(c.source_path)
for c in context['staticfiles']]
self.assertNotIn('subdir_fake_image.jpg', staticnames,
"StaticGenerator processed a file in a STATIC_EXCLUDES directory")
self.assertIn('fake_image.jpg', staticnames,
"StaticGenerator skipped a file that it should have included")
def test_static_exclude_sources(self):
"""Test that StaticGenerator respects STATIC_EXCLUDE_SOURCES.
"""
# Test STATIC_EXCLUDE_SOURCES=True
settings = get_settings(STATIC_EXCLUDE_SOURCES=True,
PATH=self.content_path, PAGE_PATHS=[''], STATIC_PATHS=[''],
CACHE_CONTENT=False)
context = settings.copy()
context['filenames'] = {}
for generator_class in (PagesGenerator, StaticGenerator):
generator_class(context=context, settings=settings,
path=settings['PATH'], output_path=None,
theme=settings['THEME']).generate_context()
staticnames = [os.path.basename(c.source_path)
for c in context['staticfiles']]
self.assertFalse(any(name.endswith(".md") for name in staticnames),
"STATIC_EXCLUDE_SOURCES=True failed to exclude a markdown file")
# Test STATIC_EXCLUDE_SOURCES=False
settings.update(STATIC_EXCLUDE_SOURCES=False)
context = settings.copy()
context['filenames'] = {}
for generator_class in (PagesGenerator, StaticGenerator):
generator_class(context=context, settings=settings,
path=settings['PATH'], output_path=None,
theme=settings['THEME']).generate_context()
staticnames = [os.path.basename(c.source_path)
for c in context['staticfiles']]
self.assertTrue(any(name.endswith(".md") for name in staticnames),
"STATIC_EXCLUDE_SOURCES=False failed to include a markdown file")

View file

@ -10,6 +10,7 @@ import logging
import subprocess
from pelican import Pelican
from pelican.generators import StaticGenerator
from pelican.settings import read_settings
from pelican.tests.support import LoggedTestCase, mute, locale_available, unittest
@ -75,6 +76,16 @@ class TestPelican(LoggedTestCase):
assert not out, out
assert not err, err
def test_order_of_generators(self):
# StaticGenerator must run last, so it can find files that were
# skipped by the other generators.
pelican = Pelican(settings=read_settings(path=None))
generator_classes = pelican.get_generator_classes()
self.assertTrue(generator_classes[-1] is StaticGenerator,
"StaticGenerator must be the last generator, but it isn't!")
def test_basic_generation_works(self):
# when running pelican without settings, it should pick up the default
# ones and generate correct output without raising any exception