diff --git a/docs/changelog.rst b/docs/changelog.rst
index aa594a2c..c306c342 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -5,6 +5,7 @@ Next release
============
* New signal: ``feed_generated``
+* Add support for the ``{include}`` syntax
3.7.1 (2017-01-10)
==================
diff --git a/docs/content.rst b/docs/content.rst
index 507593bf..38c90018 100644
--- a/docs/content.rst
+++ b/docs/content.rst
@@ -337,6 +337,45 @@ Linking to authors, categories, index and tags
You can link to authors, categories, index and tags using the ``{author}name``,
``{category}foobar``, ``{index}`` and ``{tag}tagname`` syntax.
+Including common text into your content
+---------------------------------------
+
+From Pelican 3.7.2 you can include common text snippets into your content using
+the ``{include}file.ext`` syntax. You can specify semi-absolute paths starting
+from the ``PATH`` directory, e.g. ``{include}/pages/disclaimer.html`` or use
+relative paths, e.g. ``{include}notice.html``. Relativity is
+calculated based on the location of the file containing the ``{include}``.
+For example when you have the following content layout::
+
+ content
+ └── notice2.html
+ └── pages
+ ├── page1.html
+ └── notice1.html
+
+Then the includes may look like::
+
+
+
+ PAGE 1
+
+
+ This is the content of page 1
+
+ {include}../notice2.html
+
+
+
+
+``notice2.html`` looks like::
+
+ {include}pages/notice1.html
+ This is the second warning about relative paths
+
+When using ``{include}`` it is best to blacklist the included files using the
+``IGNORE_FILES`` setting. Otherwise Pelican will try to render them as regular
+content and will most likely fail!
+
Deprecated internal link syntax
-------------------------------
diff --git a/pelican/contents.py b/pelican/contents.py
index 3d1128c9..ffa2b9a0 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -11,7 +11,7 @@ import sys
import pytz
import six
-from six.moves.urllib.parse import urlparse, urlunparse
+from six.moves.urllib.parse import unquote, urlparse, urlunparse
from pelican import signals
from pelican.settings import DEFAULT_CONFIG
@@ -151,8 +151,20 @@ class Content(object):
if 'summary' in metadata:
self._summary = metadata['summary']
+ # used for rendering {includes}
+ self._readers = None
+
signals.content_object_init.send(self)
+ @property
+ def readers(self):
+ if self._readers is None:
+ # import here due to circular imports
+ from pelican.readers import Readers
+ self._readers = Readers(self.settings)
+
+ return self._readers
+
def __str__(self):
return self.source_path or repr(self)
@@ -204,6 +216,30 @@ class Content(object):
key = key if self.in_default_lang else 'lang_%s' % key
return self._expand_settings(key)
+ def _path_replacer(self, path, relative_dir=None):
+ """
+ Update path depending on whether this is an absolute
+ or relative value.
+ """
+ if not relative_dir:
+ relative_dir = self.relative_dir
+
+ if path.startswith('/'):
+ path = path[1:]
+ else:
+ # relative to the source path of this content
+ path = self.get_relative_source_path(
+ os.path.join(relative_dir, path)
+ )
+
+ if path not in self._context['filenames']:
+ unquoted_path = unquote(path)
+
+ if unquoted_path in self._context['filenames']:
+ path = unquoted_path
+
+ return path
+
def _update_content(self, content, siteurl):
"""Update the content attribute.
@@ -235,19 +271,7 @@ class Content(object):
# XXX Put this in a different location.
if what in {'filename', 'attach'}:
- if path.startswith('/'):
- path = path[1:]
- else:
- # relative to the source path of this content
- path = self.get_relative_source_path(
- os.path.join(self.relative_dir, path)
- )
-
- if path not in self._context['filenames']:
- unquoted_path = path.replace('%20', ' ')
-
- if unquoted_path in self._context['filenames']:
- path = unquoted_path
+ path = self._path_replacer(path)
linked_content = self._context['filenames'].get(path)
if linked_content:
@@ -294,12 +318,55 @@ class Content(object):
def get_siteurl(self):
return self._context.get('localsiteurl', '')
+ def _update_includes(self, content, source_path=None):
+ """
+ Replace {include}some.file with the
+ contents of this file.
+ """
+ regex = r"""[{|]include[|}](?P[\w./]+)"""
+ hrefs = re.compile(regex, re.X)
+
+ def replacer(m):
+ path = m.group('path')
+ path = self._path_replacer(path, source_path)
+ path = posixize_path(
+ os.path.abspath(
+ os.path.join(self.settings['PATH'], path)
+ )
+ )
+
+ if not os.path.isfile(path):
+ logger.warning("Unable to find `%s`, skipping include.", path)
+ return ''.join(('{include}', m.group('path')))
+
+ _, ext = os.path.splitext(path)
+ # remove leading dot
+ ext = ext[1:]
+
+ if ext not in self.readers.reader_classes.keys():
+ logger.warning("Unable to read `%s`, skipping include.", path)
+ return ''.join(('{include}', m.group('path')))
+
+ reader = self.readers.reader_classes[ext](self.settings)
+ text, meta = reader.read(path)
+
+ # if we recurse into another file to perform more includes
+ # self._path_replacer needs to know in which directory
+ # it operates otherwise it produces wrong paths
+ source_dir = posixize_path(os.path.dirname(path))
+
+ text = self._update_includes(text, source_dir)
+ return text
+
+ return hrefs.sub(replacer, content)
+
@memoized
def get_content(self, siteurl):
if hasattr(self, '_get_content'):
content = self._get_content()
else:
content = self._content
+ content = self._update_includes(content)
return self._update_content(content, siteurl)
@property
diff --git a/pelican/readers.py b/pelican/readers.py
index 46055962..10a501e7 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -432,7 +432,12 @@ class HTMLReader(BaseReader):
metadata = {}
for k in parser.metadata:
metadata[k] = self.process_metadata(k, parser.metadata[k])
- return parser.body, metadata
+
+ if parser.body:
+ return parser.body, metadata
+ else:
+ # in case we're parsing HTML includes
+ return content, metadata
class Readers(FileStampDataCacher):
diff --git a/pelican/tests/content/include.markdown b/pelican/tests/content/include.markdown
new file mode 100644
index 00000000..9055424a
--- /dev/null
+++ b/pelican/tests/content/include.markdown
@@ -0,0 +1,2 @@
+**this is Markdown**
+Here is a [link](http://MrSenko.com).
diff --git a/pelican/tests/content/include.unknown b/pelican/tests/content/include.unknown
new file mode 100644
index 00000000..9055424a
--- /dev/null
+++ b/pelican/tests/content/include.unknown
@@ -0,0 +1,2 @@
+**this is Markdown**
+Here is a [link](http://MrSenko.com).
diff --git a/pelican/tests/content/include/include3.html b/pelican/tests/content/include/include3.html
new file mode 100644
index 00000000..d65793c1
--- /dev/null
+++ b/pelican/tests/content/include/include3.html
@@ -0,0 +1,2 @@
+this file includes another in a different directory
+{include}../include1.html
diff --git a/pelican/tests/content/include/include4.html b/pelican/tests/content/include/include4.html
new file mode 100644
index 00000000..5aed913b
--- /dev/null
+++ b/pelican/tests/content/include/include4.html
@@ -0,0 +1,2 @@
+this file includes another via absolute path
+{include}/include1.html
diff --git a/pelican/tests/content/include1.html b/pelican/tests/content/include1.html
new file mode 100644
index 00000000..5d27544c
--- /dev/null
+++ b/pelican/tests/content/include1.html
@@ -0,0 +1 @@
+this content has been included
diff --git a/pelican/tests/content/include2.html b/pelican/tests/content/include2.html
new file mode 100644
index 00000000..69232fe4
--- /dev/null
+++ b/pelican/tests/content/include2.html
@@ -0,0 +1,2 @@
+this file includes another
+{include}include1.html
diff --git a/pelican/tests/test_cache.py b/pelican/tests/test_cache.py
index 1f26deec..549cacac 100644
--- a/pelican/tests/test_cache.py
+++ b/pelican/tests/test_cache.py
@@ -157,8 +157,11 @@ class TestCache(unittest.TestCase):
- 2012-11-30_md_w_filename_meta#foo-bar.md
- empty.md
- empty_with_bom.md
+
+ There are 5 more include* files which are HTML or Markdown snippets
+ and also not valid.
"""
- self.assertEqual(generator.readers.read_file.call_count, 6)
+ self.assertEqual(generator.readers.read_file.call_count, 11)
@unittest.skipUnless(MagicMock, 'Needs Mock module')
def test_article_reader_content_caching(self):
diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py
index 56928b81..040f95e3 100644
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@@ -22,6 +22,8 @@ from pelican.utils import SafeDatetime, path_to_url, truncate_html_words
TEST_CONTENT = str(generate_lorem_ipsum(n=1))
TEST_SUMMARY = generate_lorem_ipsum(n=1, html=False)
+CONTENT_PATH = os.path.join(os.path.dirname(__file__), 'content')
+
class TestPage(LoggedTestCase):
@@ -418,6 +420,142 @@ class TestPage(LoggedTestCase):
'link'
)
+ def test_includes(self):
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = CONTENT_PATH
+ args['context']['filenames'] = {}
+ settings = get_settings()
+ settings['PATH'] = CONTENT_PATH
+ args['settings'] = settings
+
+ # test inclusion b/w files of different types
+ # HTML includes Markdown
+ args['content'] = (
+ 'HTML includes Markdown '
+ '{include}include.markdown\n'
+ 'Included content is above'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(
+ content,
+ 'HTML includes Markdown '
+ 'this is Markdown\n'
+ 'Here is a link.
\n'
+ 'Included content is above'
+ )
+
+ # test inclusion b/w files of different types
+ # where we don't know how to render the included type
+ args['content'] = (
+ 'HTML includes Unknown '
+ '{include}include.unknown'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ # we have a warning in this case
+ self.assertLogCountEqual(
+ count=1,
+ msg="Unable to read `.*`, skipping include\.",
+ level=logging.WARNING)
+ self.assertEqual(
+ content,
+ 'HTML includes Unknown '
+ '{include}include.unknown'
+ )
+
+ # one include via relative path
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}include1.html\n'
+ 'Included content is above'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this content has been included\n\n'
+ 'Included content is above'
+ )
+
+ # two nested includes via relative paths
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}include2.html\n'
+ 'Included content is above'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this file includes another\n'
+ 'this content has been included\n\n\n'
+ 'Included content is above'
+ )
+
+ # include via full path
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}/include1.html'
+ ' Included content is above'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this content has been included\n'
+ ' Included content is above'
+ )
+
+ # 2nd include is in different directory
+ # include paths are relative to the caller directory
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}include/include3.html'
+ ' Included content is above'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this file includes another in a different directory\n'
+ 'this content has been included\n\n'
+ ' Included content is above'
+ )
+
+ # 2nd include using absolute path in the included file
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}include/include4.html'
+ ' Included content is above'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ 'this file includes another via absolute path\n'
+ 'this content has been included\n\n'
+ ' Included content is above'
+ )
+
+ # include non-existing file => inclusion is skipped
+ args['content'] = (
+ 'There is a simple include here '
+ '{include}missing.html'
+ ' Included content is above'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ # we have a warning in this case
+ self.assertLogCountEqual(
+ count=1,
+ msg="Unable to find `.*`, skipping include\.",
+ level=logging.WARNING)
+ self.assertEqual(
+ content,
+ 'There is a simple include here '
+ '{include}missing.html'
+ ' Included content is above'
+ )
+
def test_multiple_authors(self):
"""Test article with multiple authors."""
args = self.page_kwargs.copy()