From 48f4f0850d7fbbe2b03f1fbd309768b7ae940822 Mon Sep 17 00:00:00 2001 From: Forest Date: Sat, 18 Oct 2014 13:11:59 -0700 Subject: [PATCH] Make StaticGenerator skip content sources. Refs #1019. This change partially addresses issue #1019, by teaching Pelican to distinguish between static files and content source files. A user can now safely add the same directory to both STATIC_PATHS and PAGE_PATHS (or ARTICLE_PATHS). Pelican will then process the content source files in that directory normally, and treat the remaining files as static, without copying the raw content source files to the output directory. (The OUTPUT_SOURCES setting still works.) In other words, images and markdown/reST files can now safely live together. To keep those files together in the generated site, STATIC_SAVE_AS and PAGE_SAVE_AS (or ARTICLE_SAVE_AS) should point to the same output directory. There are two new configuration settings: STATIC_EXCLUDES=[] # This works just like PAGE_EXCLUDES and ARTICLE_EXCLUDES. STATIC_EXCLUDE_SOURCES=True # Set this to False to get the old behavior. Two small but noteworthy internal changes: StaticGenerator now runs after all the other generators. This allows it to see which files are meant to be processed by other generators, and avoid them. Generators now include files that they fail to process (e.g. those with missing mandatory metadata) along with all the other paths in context['filenames']. This allows such files to be excluded from StaticGenerator's file list, so they won't end up accidentally published. Since these files have no Content object, their value in context['filenames'] is None. The code that uses that dict has been updated accordingly. --- docs/settings.rst | 13 ++-- pelican/__init__.py | 10 ++- pelican/contents.py | 2 +- pelican/generators.py | 29 ++++++++ pelican/settings.py | 5 +- pelican/tests/mixed_content/fake_image.jpg | 0 pelican/tests/mixed_content/short_page.md | 3 + .../subdir/subdir_fake_image.jpg | 0 pelican/tests/test_generators.py | 68 ++++++++++++++++++- pelican/tests/test_pelican.py | 11 +++ 10 files changed, 131 insertions(+), 10 deletions(-) create mode 100644 pelican/tests/mixed_content/fake_image.jpg create mode 100644 pelican/tests/mixed_content/short_page.md create mode 100644 pelican/tests/mixed_content/subdir/subdir_fake_image.jpg diff --git a/docs/settings.rst b/docs/settings.rst index 7a3f08e6..d785cf9b 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -142,10 +142,15 @@ Setting name (followed by default value, if any) slash at the end. Example: ``SITEURL = 'http://mydomain.com'`` ``TEMPLATE_PAGES = None`` A mapping containing template pages that will be rendered with the blog entries. See :ref:`template_pages`. -``STATIC_PATHS = ['images']`` The static paths you want to have accessible - on the output path "static". By default, - Pelican will copy the "images" folder to the - output folder. +``STATIC_PATHS = ['images']`` A list of directories (relative to ``PATH``) in which to look for + static files. Such files will be copied to the output directory + without modification. Articles, pages, and other content source + files will normally be skipped, so it is safe for a directory to + appear both here and in ``PAGE_PATHS`` or ``ARTICLE_PATHS``. + Pelican's default settings include the "images" directory here. +``STATIC_EXCLUDES = []`` A list of directories to exclude when looking for static files. +``STATIC_EXCLUDE_SOURCES = True`` If set to False, content source files will not be skipped when + copying files found in ``STATIC_PATHS``. ``TIMEZONE`` The timezone used in the date information, to generate Atom and RSS feeds. See the *Timezone* section below for more info. diff --git a/pelican/__init__.py b/pelican/__init__.py index 3080be37..455201be 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -144,8 +144,10 @@ class Pelican(object): start_time = time.time() context = self.settings.copy() - context['filenames'] = {} # share the dict between all the generators - context['localsiteurl'] = self.settings['SITEURL'] # share + # Share these among all the generators and content objects: + context['filenames'] = {} # maps source path to Content object or None + context['localsiteurl'] = self.settings['SITEURL'] + generators = [ cls( context=context, @@ -188,7 +190,7 @@ class Pelican(object): time.time() - start_time)) def get_generator_classes(self): - generators = [StaticGenerator, ArticlesGenerator, PagesGenerator] + generators = [ArticlesGenerator, PagesGenerator] if self.settings['TEMPLATE_PAGES']: generators.append(TemplatePagesGenerator) @@ -206,6 +208,8 @@ class Pelican(object): logger.debug('Found generator: %s', v) generators.append(v) + # StaticGenerator runs last so it can see which files the others handle + generators.append(StaticGenerator) return generators def get_writer(self): diff --git a/pelican/contents.py b/pelican/contents.py index 2e17b56f..433d1ac7 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -234,7 +234,7 @@ class Content(object): if unquoted_path in self._context['filenames']: path = unquoted_path - if path in self._context['filenames']: + if self._context['filenames'].get(path): origin = '/'.join((siteurl, self._context['filenames'][path].url)) origin = origin.replace('\\', '/') # for Windows paths. diff --git a/pelican/generators.py b/pelican/generators.py index 07945f13..4d5cb6cb 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -141,9 +141,27 @@ class Generator(object): return files def add_source_path(self, content): + """Record a source file path that a Generator found and processed. + Store a reference to its Content object, for url lookups later. + """ location = content.get_relative_source_path() self.context['filenames'][location] = content + def _add_failed_source_path(self, path): + """Record a source file path that a Generator failed to process. + (For example, one that was missing mandatory metadata.) + The path argument is expected to be relative to self.path. + """ + self.context['filenames'][os.path.normpath(path)] = None + + def _is_potential_source_path(self, path): + """Return True if path was supposed to be used as a source file. + (This includes all source files that have been found by generators + before this method is called, even if they failed to process.) + The path argument is expected to be relative to self.path. + """ + return os.path.normpath(path) in self.context['filenames'] + def _update_context(self, items): """Update the context with the given items from the currrent processor. @@ -477,9 +495,11 @@ class ArticlesGenerator(CachingGenerator): except Exception as e: logger.error('Could not process %s\n%s', f, e, exc_info=self.settings.get('DEBUG', False)) + self._add_failed_source_path(f) continue if not is_valid_content(article, f): + self._add_failed_source_path(f) continue self.cache_data(f, article) @@ -602,9 +622,11 @@ class PagesGenerator(CachingGenerator): except Exception as e: logger.error('Could not process %s\n%s', f, e, exc_info=self.settings.get('DEBUG', False)) + self._add_failed_source_path(f) continue if not is_valid_content(page, f): + self._add_failed_source_path(f) continue self.cache_data(f, page) @@ -663,7 +685,14 @@ class StaticGenerator(Generator): def generate_context(self): self.staticfiles = [] for f in self.get_files(self.settings['STATIC_PATHS'], + exclude=self.settings['STATIC_EXCLUDES'], extensions=False): + + # skip content source files unless the user explicitly wants them + if self.settings['STATIC_EXCLUDE_SOURCES']: + if self._is_potential_source_path(f): + continue + static = self.readers.read_file( base_path=self.path, path=f, content_class=Static, fmt='static', context=self.context, diff --git a/pelican/settings.py b/pelican/settings.py index 631c6d24..794733d7 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -36,7 +36,9 @@ DEFAULT_CONFIG = { 'THEME': DEFAULT_THEME, 'OUTPUT_PATH': 'output', 'READERS': {}, - 'STATIC_PATHS': ['images', ], + 'STATIC_PATHS': ['images'], + 'STATIC_EXCLUDES': [], + 'STATIC_EXCLUDE_SOURCES': True, 'THEME_STATIC_DIR': 'theme', 'THEME_STATIC_PATHS': ['static', ], 'FEED_ALL_ATOM': os.path.join('feeds', 'all.atom.xml'), @@ -339,6 +341,7 @@ def configure_settings(settings): 'JINJA_EXTENSIONS', 'PAGINATED_DIRECT_TEMPLATES', 'PLUGINS', + 'STATIC_EXCLUDES', 'STATIC_PATHS', 'THEME_STATIC_PATHS', 'ARTICLE_PATHS', diff --git a/pelican/tests/mixed_content/fake_image.jpg b/pelican/tests/mixed_content/fake_image.jpg new file mode 100644 index 00000000..e69de29b diff --git a/pelican/tests/mixed_content/short_page.md b/pelican/tests/mixed_content/short_page.md new file mode 100644 index 00000000..46ca45ac --- /dev/null +++ b/pelican/tests/mixed_content/short_page.md @@ -0,0 +1,3 @@ +Title: Short Page + +This is a page with little text. diff --git a/pelican/tests/mixed_content/subdir/subdir_fake_image.jpg b/pelican/tests/mixed_content/subdir/subdir_fake_image.jpg new file mode 100644 index 00000000..e69de29b diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index 470d3236..2f53ac95 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -12,7 +12,7 @@ from shutil import rmtree from tempfile import mkdtemp from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator, - TemplatePagesGenerator) + StaticGenerator, TemplatePagesGenerator) from pelican.writers import Writer from pelican.tests.support import unittest, get_settings import locale @@ -558,3 +558,69 @@ class TestTemplatePagesGenerator(unittest.TestCase): # output content is correct with open(output_path, 'r') as output_file: self.assertEqual(output_file.read(), 'foo: bar') + + +class TestStaticGenerator(unittest.TestCase): + + def setUp(self): + self.content_path = os.path.join(CUR_DIR, 'mixed_content') + + def test_static_excludes(self): + """Test that StaticGenerator respects STATIC_EXCLUDES. + """ + settings = get_settings(STATIC_EXCLUDES=['subdir'], + PATH=self.content_path, STATIC_PATHS=['']) + context = settings.copy() + context['filenames'] = {} + + StaticGenerator(context=context, settings=settings, + path=settings['PATH'], output_path=None, + theme=settings['THEME']).generate_context() + + staticnames = [os.path.basename(c.source_path) + for c in context['staticfiles']] + + self.assertNotIn('subdir_fake_image.jpg', staticnames, + "StaticGenerator processed a file in a STATIC_EXCLUDES directory") + self.assertIn('fake_image.jpg', staticnames, + "StaticGenerator skipped a file that it should have included") + + def test_static_exclude_sources(self): + """Test that StaticGenerator respects STATIC_EXCLUDE_SOURCES. + """ + # Test STATIC_EXCLUDE_SOURCES=True + + settings = get_settings(STATIC_EXCLUDE_SOURCES=True, + PATH=self.content_path, PAGE_PATHS=[''], STATIC_PATHS=[''], + CACHE_CONTENT=False) + context = settings.copy() + context['filenames'] = {} + + for generator_class in (PagesGenerator, StaticGenerator): + generator_class(context=context, settings=settings, + path=settings['PATH'], output_path=None, + theme=settings['THEME']).generate_context() + + staticnames = [os.path.basename(c.source_path) + for c in context['staticfiles']] + + self.assertFalse(any(name.endswith(".md") for name in staticnames), + "STATIC_EXCLUDE_SOURCES=True failed to exclude a markdown file") + + # Test STATIC_EXCLUDE_SOURCES=False + + settings.update(STATIC_EXCLUDE_SOURCES=False) + context = settings.copy() + context['filenames'] = {} + + for generator_class in (PagesGenerator, StaticGenerator): + generator_class(context=context, settings=settings, + path=settings['PATH'], output_path=None, + theme=settings['THEME']).generate_context() + + staticnames = [os.path.basename(c.source_path) + for c in context['staticfiles']] + + self.assertTrue(any(name.endswith(".md") for name in staticnames), + "STATIC_EXCLUDE_SOURCES=False failed to include a markdown file") + diff --git a/pelican/tests/test_pelican.py b/pelican/tests/test_pelican.py index c851ef81..83988d62 100644 --- a/pelican/tests/test_pelican.py +++ b/pelican/tests/test_pelican.py @@ -10,6 +10,7 @@ import logging import subprocess from pelican import Pelican +from pelican.generators import StaticGenerator from pelican.settings import read_settings from pelican.tests.support import LoggedTestCase, mute, locale_available, unittest @@ -75,6 +76,16 @@ class TestPelican(LoggedTestCase): assert not out, out assert not err, err + def test_order_of_generators(self): + # StaticGenerator must run last, so it can find files that were + # skipped by the other generators. + + pelican = Pelican(settings=read_settings(path=None)) + generator_classes = pelican.get_generator_classes() + + self.assertTrue(generator_classes[-1] is StaticGenerator, + "StaticGenerator must be the last generator, but it isn't!") + def test_basic_generation_works(self): # when running pelican without settings, it should pick up the default # ones and generate correct output without raising any exception