diff --git a/docs/content.rst b/docs/content.rst index 5a1f3ad0..570a32d1 100644 --- a/docs/content.rst +++ b/docs/content.rst @@ -151,24 +151,25 @@ Linking to internal content From Pelican 3.1 onwards, it is now possible to specify intra-site links to files in the *source content* hierarchy instead of files in the *generated* -hierarchy. This makes it easier to link from the current post to other posts -and images that may be sitting alongside the current post (instead of having -to determine where those resources will be placed after site generation). +hierarchy. This makes it easier to link from the current post to other content +that may be sitting alongside that post (instead of having to determine where +the other content will be placed after site generation). To link to internal content (files in the ``content`` directory), use the -following syntax: ``{filename}path/to/file``:: +following syntax for the link target: ``{filename}path/to/file`` +For example, a Pelican project might be structured like this:: website/ ├── content - │   ├── article1.rst - │   ├── cat/ - │   │   └── article2.md + │   ├── category/ + │   │   └── article1.rst + │   ├── article2.md │ └── pages │      └── about.md └── pelican.conf.py -In this example, ``article1.rst`` could look like:: +In this example, ``article1.rst`` could look like this:: The first article ################# @@ -177,8 +178,8 @@ In this example, ``article1.rst`` could look like:: See below intra-site link examples in reStructuredText format. - `a link relative to content root <{filename}/cat/article2.rst>`_ - `a link relative to current file <{filename}cat/article2.rst>`_ + `a link relative to the current file <{filename}../article2.md>`_ + `a link relative to the content root <{filename}/article2.md>`_ and ``article2.md``:: @@ -187,43 +188,154 @@ and ``article2.md``:: See below intra-site link examples in Markdown format. - [a link relative to content root]({filename}/article1.md) - [a link relative to current file]({filename}../article1.md) + [a link relative to the current file]({filename}category/article1.rst) + [a link relative to the content root]({filename}/category/article1.rst) -Embedding non-article or non-page content is slightly different in that the -directories need to be specified in ``pelicanconf.py`` file. The ``images`` -directory is configured for this by default but others will need to be added -manually:: +Linking to static files +----------------------- + +Linking to non-article or non-page content uses the same ``{filename}`` syntax +as described above. It is important to remember that those files will not be +copied to the output directory unless the source directories containing them +are included in the ``STATIC_PATHS`` setting of the project's ``pelicanconf.py`` +file. Pelican's default configuration includes the ``images`` directory for +this, but others must be added manually. Forgetting to do so will result in +broken links. + +For example, a project's content directory might be structured like this:: content ├── images │   └── han.jpg - └── misc -    └── image-test.md + ├── pdfs + │   └── menu.pdf + └── pages +    └── test.md -And ``image-test.md`` would include:: +``test.md`` would include:: ![Alt Text]({filename}/images/han.jpg) + [Our Menu]({filename}/pdfs/menu.pdf) -Any content can be linked in this way. What happens is that the ``images`` -directory gets copied to ``output/`` during site generation because Pelican -includes ``images`` in the ``STATIC_PATHS`` setting's list by default. If -you want to have another directory, say ``pdfs``, copied from your content to -your output during site generation, you would need to add the following to -your settings file:: +``pelicanconf.py`` would include:: STATIC_PATHS = ['images', 'pdfs'] -After the above line has been added, subsequent site generation should copy the -``content/pdfs/`` directory to ``output/pdfs/``. +Site generation would then copy ``han.jpg`` to ``output/images/han.jpg``, +``menu.pdf`` to ``output/pdfs/menu.pdf``, and write the appropriate links +in ``test.md``. -You can also link to categories or tags, using the ``{tag}tagname`` and +Mixed content in the same directory +----------------------------------- + +Starting with Pelican 3.5, static files can safely share a source directory with +page source files, without exposing the page sources in the generated site. +Any such directory must be added to both ``STATIC_PATHS`` and ``PAGE_PATHS`` +(or ``STATIC_PATHS`` and ``ARTICLE_PATHS``). Pelican will identify and process +the page source files normally, and copy the remaining files as if they lived +in a separate directory reserved for static files. + +Note: Placing static and content source files together in the same source +directory does not guarantee that they will end up in the same place in the +generated site. The easiest way to do this is by using the ``{attach}`` link +syntax (described below). Alternatively, the ``STATIC_SAVE_AS``, +``PAGE_SAVE_AS``, and ``ARTICLE_SAVE_AS`` settings (and the corresponding +``*_URL`` settings) can be configured to place files of different types +together, just as they could in earlier versions of Pelican. + +Attaching static files +---------------------- + +Starting with Pelican 3.5, static files can be "attached" to a page or article +using this syntax for the link target: ``{attach}path/to/file`` This works +like the ``{filename}`` syntax, but also relocates the static file into the +linking document's output directory. If the static file originates from a +subdirectory beneath the linking document's source, that relationship will be +preserved on output. Otherwise, it will become a sibling of the linking +document. + +This only works for linking to static files, and only when they originate from +a directory included in the ``STATIC_PATHS`` setting. + +For example, a project's content directory might be structured like this:: + + content + ├── blog + │   ├── icons + │   │   └── icon.png + │   ├── photo.jpg + │   └── testpost.md + └── downloads + └── archive.zip + +``pelicanconf.py`` would include:: + + PATH = 'content' + STATIC_PATHS = ['blog', 'downloads'] + ARTICLE_PATHS = ['blog'] + ARTICLE_SAVE_AS = '{date:%Y}/{slug}.html' + ARTICLE_URL = '{date:%Y}/{slug}.html' + +``testpost.md`` would include:: + + Title: Test Post + Category: test + Date: 2014-10-31 + + ![Icon]({attach}icons/icon.png) + ![Photo]({attach}photo.jpg) + [Downloadable File]({attach}/downloads/archive.zip) + +Site generation would then produce an output directory structured like this:: + + output + └── 2014 + ├── archive.zip + ├── icons + │   └── icon.png + ├── photo.jpg + └── test-post.html + +Notice that all the files linked using ``{attach}`` ended up in or beneath +the article's output directory. + +If a static file is linked multiple times, the relocating feature of +``{attach}`` will only work in the first of those links to be processed. +After the first link, Pelican will treat ``{attach}`` like ``{filename}``. +This avoids breaking the already-processed links. + +**Be careful when linking to a file from multiple documents:** +Since the first link to a file finalizes its location and Pelican does +not define the order in which documents are processed, using ``{attach}`` on a +file linked by multiple documents can cause its location to change from one +site build to the next. (Whether this happens in practice will depend on the +operating system, file system, version of Pelican, and documents being added, +modified, or removed from the project.) Any external sites linking to the +file's old location might then find their links broken. **It is therefore +advisable to use {attach} only if you use it in all links to a file, and only +if the linking documents share a single directory.** Under these conditions, +the file's output location will not change in future builds. In cases where +these precautions are not possible, consider using ``{filename}`` links instead +of ``{attach}``, and letting the file's location be determined by the project's +``STATIC_SAVE_AS`` and ``STATIC_URL`` settings. (Per-file ``save_as`` and +``url`` overrides can still be set in ``EXTRA_PATH_METADATA``.) + +Linking to tags and categories +------------------------------ + +You can link to tags and categories using the ``{tag}tagname`` and ``{category}foobar`` syntax. -For backward compatibility, Pelican also supports bars (``||``) in addition to -curly braces (``{}``). For example: ``|filename|an_article.rst``, -``|tag|tagname``, ``|category|foobar``. The syntax was changed from ``||`` to -``{}`` to avoid collision with Markdown extensions or reST directives. +Deprecated internal link syntax +------------------------------- + +To remain compatible with earlier versions, Pelican still supports vertical bars +(``||``) in addition to curly braces (``{}``) for internal links. For example: +``|filename|an_article.rst``, ``|tag|tagname``, ``|category|foobar``. +The syntax was changed from ``||`` to ``{}`` to avoid collision with Markdown +extensions or reST directives. Support for the old syntax may eventually be +removed. + Importing an existing site ========================== diff --git a/pelican/__init__.py b/pelican/__init__.py index 076375ba..d0056ded 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -208,7 +208,9 @@ class Pelican(object): logger.debug('Found generator: %s', v) generators.append(v) - # StaticGenerator runs last so it can see which files the others handle + # StaticGenerator must run last, so it can identify files that + # were skipped by the other generators, and so static files can + # have their output paths overridden by the {attach} link syntax. generators.append(StaticGenerator) return generators diff --git a/pelican/contents.py b/pelican/contents.py index 01f51651..beff2106 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -219,7 +219,7 @@ class Content(object): origin = m.group('path') # XXX Put this in a different location. - if what == 'filename': + if what in {'filename', 'attach'}: if path.startswith('/'): path = path[1:] else: @@ -234,9 +234,16 @@ class Content(object): if unquoted_path in self._context['filenames']: path = unquoted_path - if self._context['filenames'].get(path): - origin = '/'.join((siteurl, - self._context['filenames'][path].url)) + linked_content = self._context['filenames'].get(path) + if linked_content: + if what == 'attach': + if isinstance(linked_content, Static): + linked_content.attach_to(self) + else: + logger.warning("%s used {attach} link syntax on a " + "non-static file. Use {filename} instead.", + self.get_relative_source_path()) + origin = '/'.join((siteurl, linked_content.url)) origin = origin.replace('\\', '/') # for Windows paths. else: logger.warning( @@ -359,6 +366,10 @@ class Quote(Page): @python_2_unicode_compatible class Static(Page): + def __init__(self, *args, **kwargs): + super(Static, self).__init__(*args, **kwargs) + self._output_location_referenced = False + @deprecated_attribute(old='filepath', new='source_path', since=(3, 2, 0)) def filepath(): return None @@ -371,6 +382,65 @@ class Static(Page): def dst(): return None + @property + def url(self): + # Note when url has been referenced, so we can avoid overriding it. + self._output_location_referenced = True + return super(Static, self).url + + @property + def save_as(self): + # Note when save_as has been referenced, so we can avoid overriding it. + self._output_location_referenced = True + return super(Static, self).save_as + + def attach_to(self, content): + """Override our output directory with that of the given content object. + """ + # Determine our file's new output path relative to the linking document. + # If it currently lives beneath the linking document's source directory, + # preserve that relationship on output. Otherwise, make it a sibling. + linking_source_dir = os.path.dirname(content.source_path) + tail_path = os.path.relpath(self.source_path, linking_source_dir) + if tail_path.startswith(os.pardir + os.sep): + tail_path = os.path.basename(tail_path) + new_save_as = os.path.join( + os.path.dirname(content.save_as), tail_path) + + # We do not build our new url by joining tail_path with the linking + # document's url, because we cannot know just by looking at the latter + # whether it points to the document itself or to its parent directory. + # (An url like 'some/content' might mean a directory named 'some' + # with a file named 'content', or it might mean a directory named + # 'some/content' with a file named 'index.html'.) Rather than trying + # to figure it out by comparing the linking document's url and save_as + # path, we simply build our new url from our new save_as path. + new_url = path_to_url(new_save_as) + + def _log_reason(reason): + logger.warning("The {attach} link in %s cannot relocate %s " + "because %s. Falling back to {filename} link behavior instead.", + content.get_relative_source_path(), + self.get_relative_source_path(), reason, + extra={'limit_msg': "More {attach} warnings silenced."}) + + # We never override an override, because we don't want to interfere + # with user-defined overrides that might be in EXTRA_PATH_METADATA. + if hasattr(self, 'override_save_as') or hasattr(self, 'override_url'): + if new_save_as != self.save_as or new_url != self.url: + _log_reason("its output location was already overridden") + return + + # We never change an output path that has already been referenced, + # because we don't want to break links that depend on that path. + if self._output_location_referenced: + if new_save_as != self.save_as or new_url != self.url: + _log_reason("another link already referenced its location") + return + + self.override_save_as = new_save_as + self.override_url = new_url + def is_valid_content(content, f): try: diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index e64b3804..01ee9ca2 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -4,12 +4,13 @@ from __future__ import unicode_literals, absolute_import import six from sys import platform import locale +import os.path from pelican.tests.support import unittest, get_settings -from pelican.contents import Page, Article, URLWrapper +from pelican.contents import Page, Article, Static, URLWrapper from pelican.settings import DEFAULT_CONFIG -from pelican.utils import truncate_html_words, SafeDatetime +from pelican.utils import path_to_url, truncate_html_words, SafeDatetime from pelican.signals import content_object_init from jinja2.utils import generate_lorem_ipsum @@ -401,6 +402,148 @@ class TestArticle(TestPage): self.assertEqual(article.save_as, 'obrien/csharp-stuff/fnord/index.html') +class TestStatic(unittest.TestCase): + + def setUp(self): + + self.settings = get_settings( + STATIC_SAVE_AS='{path}', + STATIC_URL='{path}', + PAGE_SAVE_AS=os.path.join('outpages', '{slug}.html'), + PAGE_URL='outpages/{slug}.html') + self.context = self.settings.copy() + + self.static = Static(content=None, metadata={}, settings=self.settings, + source_path=os.path.join('dir', 'foo.jpg'), context=self.context) + + self.context['filenames'] = {self.static.source_path: self.static} + + def tearDown(self): + pass + + def test_attach_to_same_dir(self): + """attach_to() overrides a static file's save_as and url. + """ + page = Page(content="fake page", + metadata={'title': 'fakepage'}, settings=self.settings, + source_path=os.path.join('dir', 'fakepage.md')) + self.static.attach_to(page) + + expected_save_as = os.path.join('outpages', 'foo.jpg') + self.assertEqual(self.static.save_as, expected_save_as) + self.assertEqual(self.static.url, path_to_url(expected_save_as)) + + def test_attach_to_parent_dir(self): + """attach_to() preserves dirs inside the linking document dir. + """ + page = Page(content="fake page", metadata={'title': 'fakepage'}, + settings=self.settings, source_path='fakepage.md') + self.static.attach_to(page) + + expected_save_as = os.path.join('outpages', 'dir', 'foo.jpg') + self.assertEqual(self.static.save_as, expected_save_as) + self.assertEqual(self.static.url, path_to_url(expected_save_as)) + + def test_attach_to_other_dir(self): + """attach_to() ignores dirs outside the linking document dir. + """ + page = Page(content="fake page", + metadata={'title': 'fakepage'}, settings=self.settings, + source_path=os.path.join('dir', 'otherdir', 'fakepage.md')) + self.static.attach_to(page) + + expected_save_as = os.path.join('outpages', 'foo.jpg') + self.assertEqual(self.static.save_as, expected_save_as) + self.assertEqual(self.static.url, path_to_url(expected_save_as)) + + def test_attach_to_ignores_subsequent_calls(self): + """attach_to() does nothing when called a second time. + """ + page = Page(content="fake page", + metadata={'title': 'fakepage'}, settings=self.settings, + source_path=os.path.join('dir', 'fakepage.md')) + + self.static.attach_to(page) + + otherdir_settings = self.settings.copy() + otherdir_settings.update(dict( + PAGE_SAVE_AS=os.path.join('otherpages', '{slug}.html'), + PAGE_URL='otherpages/{slug}.html')) + otherdir_page = Page(content="other page", + metadata={'title': 'otherpage'}, settings=otherdir_settings, + source_path=os.path.join('dir', 'otherpage.md')) + + self.static.attach_to(otherdir_page) + + otherdir_save_as = os.path.join('otherpages', 'foo.jpg') + self.assertNotEqual(self.static.save_as, otherdir_save_as) + self.assertNotEqual(self.static.url, path_to_url(otherdir_save_as)) + + def test_attach_to_does_nothing_after_save_as_referenced(self): + """attach_to() does nothing if the save_as was already referenced. + (For example, by a {filename} link an a document processed earlier.) + """ + original_save_as = self.static.save_as + + page = Page(content="fake page", + metadata={'title': 'fakepage'}, settings=self.settings, + source_path=os.path.join('dir', 'fakepage.md')) + self.static.attach_to(page) + + self.assertEqual(self.static.save_as, original_save_as) + self.assertEqual(self.static.url, path_to_url(original_save_as)) + + def test_attach_to_does_nothing_after_url_referenced(self): + """attach_to() does nothing if the url was already referenced. + (For example, by a {filename} link an a document processed earlier.) + """ + original_url = self.static.url + + page = Page(content="fake page", + metadata={'title': 'fakepage'}, settings=self.settings, + source_path=os.path.join('dir', 'fakepage.md')) + self.static.attach_to(page) + + self.assertEqual(self.static.save_as, self.static.source_path) + self.assertEqual(self.static.url, original_url) + + def test_attach_to_does_not_override_an_override(self): + """attach_to() does not override paths that were overridden elsewhere. + (For example, by the user with EXTRA_PATH_METADATA) + """ + customstatic = Static(content=None, + metadata=dict(save_as='customfoo.jpg', url='customfoo.jpg'), + settings=self.settings, + source_path=os.path.join('dir', 'foo.jpg'), + context=self.settings.copy()) + + page = Page(content="fake page", + metadata={'title': 'fakepage'}, settings=self.settings, + source_path=os.path.join('dir', 'fakepage.md')) + + customstatic.attach_to(page) + + self.assertEqual(customstatic.save_as, 'customfoo.jpg') + self.assertEqual(customstatic.url, 'customfoo.jpg') + + def test_attach_link_syntax(self): + """{attach} link syntax triggers output path override & url replacement. + """ + html = 'link' + page = Page(content=html, + metadata={'title': 'fakepage'}, settings=self.settings, + source_path=os.path.join('dir', 'otherdir', 'fakepage.md'), + context=self.context) + content = page.get_content('') + + self.assertNotEqual(content, html, + "{attach} link syntax did not trigger URL replacement.") + + expected_save_as = os.path.join('outpages', 'foo.jpg') + self.assertEqual(self.static.save_as, expected_save_as) + self.assertEqual(self.static.url, path_to_url(expected_save_as)) + + class TestURLWrapper(unittest.TestCase): def test_comparisons(self): # URLWrappers are sorted by name diff --git a/pelican/tests/test_pelican.py b/pelican/tests/test_pelican.py index 83988d62..190d5e06 100644 --- a/pelican/tests/test_pelican.py +++ b/pelican/tests/test_pelican.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals, print_function +import collections import os import sys from tempfile import mkdtemp @@ -77,14 +78,17 @@ class TestPelican(LoggedTestCase): assert not err, err def test_order_of_generators(self): - # StaticGenerator must run last, so it can find files that were - # skipped by the other generators. + # StaticGenerator must run last, so it can identify files that + # were skipped by the other generators, and so static files can + # have their output paths overridden by the {attach} link syntax. pelican = Pelican(settings=read_settings(path=None)) generator_classes = pelican.get_generator_classes() self.assertTrue(generator_classes[-1] is StaticGenerator, "StaticGenerator must be the last generator, but it isn't!") + self.assertIsInstance(generator_classes, collections.Sequence, + "get_generator_classes() must return a Sequence to preserve order") def test_basic_generation_works(self): # when running pelican without settings, it should pick up the default