New feature: support for {include} syntax. Fixes #1902.

The new {include} syntax makes it possible to include
frequently used text snippets into your content.
This commit is contained in:
Mr. Senko 2016-05-26 17:36:48 +03:00 committed by Lucas Cimon
commit 9ee2792e11
No known key found for this signature in database
GPG key ID: 08DA831E717571EE
30 changed files with 477 additions and 47 deletions

2
.gitignore vendored
View file

@ -2,6 +2,7 @@
.*.swp
.*.swo
*.pyc
.cache/
.DS_Store
docs/_build
docs/fr/_build
@ -16,3 +17,4 @@ six-*.egg/
venv
samples/output
*.pem
pip-wheel-metadata/

3
RELEASE.md Normal file
View file

@ -0,0 +1,3 @@
Release type: minor
Add support for the ``{include}`` syntax

View file

@ -369,6 +369,45 @@ Linking to authors, categories, index and tags
You can link to authors, categories, index and tags using the ``{author}name``,
``{category}foobar``, ``{index}`` and ``{tag}tagname`` syntax.
Including common text into your content
---------------------------------------
From Pelican 4.2 onward, you can include common text snippets into your content using
the ``{include}file.ext`` syntax. You can specify semi-absolute paths starting
from the ``PATH`` directory, e.g. ``{include}/pages/disclaimer.html`` or use
relative paths, e.g. ``{include}notice.html``. Relativity is
calculated based on the location of the file containing the ``{include}``.
For example when you have the following content layout::
content
└── notice2.html
└── pages
├── page1.html
└── notice1.html
Then the includes may look like::
<html>
<head>
<title>PAGE 1</title>
</head>
<body>
This is the content of page 1
{include}../notice2.html
</body>
</html>
``notice2.html`` looks like::
{include}pages/notice1.html
This is the second warning about relative paths
When using ``{include}`` it is best to blacklist the included files using the
``IGNORE_FILES`` setting. Otherwise Pelican will try to render them as regular
content and will most likely fail!
Deprecated internal link syntax
-------------------------------

View file

@ -26,6 +26,9 @@ from pelican.urlwrappers import (Author, Category, Tag, URLWrapper) # NOQA
logger = logging.getLogger(__name__)
INCLUDE_RE = re.compile(r'(?P<indent>\n[ \t]+)?'
r'[{|]include[|}](?P<path>[\w./]+)')
@python_2_unicode_compatible
class Content(object):
@ -359,14 +362,8 @@ class Content(object):
path = value.path
if what not in {'static', 'attach'}:
continue
if path.startswith('/'):
path = path[1:]
else:
# relative to the source path of this content
path = self.get_relative_source_path(
os.path.join(self.relative_dir, path)
)
path = path.replace('%20', ' ')
path = relativize_path(self.settings['PATH'],
self.relative_dir, path)
static_links.add(path)
return static_links
@ -449,24 +446,11 @@ class Content(object):
"""
if not source_path:
source_path = self.source_path
if source_path is None:
return None
return posixize_path(
os.path.relpath(
os.path.abspath(os.path.join(
self.settings['PATH'],
source_path)),
os.path.abspath(self.settings['PATH'])
))
return get_relative_source_path(self.settings['PATH'], source_path)
@property
def relative_dir(self):
return posixize_path(
os.path.dirname(
os.path.relpath(
os.path.abspath(self.source_path),
os.path.abspath(self.settings['PATH']))))
return relative_dir(self.settings['PATH'], self.source_path)
def refresh_metadata_intersite_links(self):
for key in self.settings['FORMATTED_FIELDS']:
@ -613,3 +597,111 @@ class Static(Content):
self.override_save_as = new_save_as
self.override_url = new_url
def get_relative_source_path(content_path, source_path):
if source_path is None:
return None
return posixize_path(
os.path.relpath(
os.path.abspath(os.path.join(
content_path,
source_path)),
os.path.abspath(content_path)
))
def relativize_path(content_path, relative_dir, path):
"""
Update path depending on whether this is an absolute
or relative value.
"""
if path.startswith('/'):
path = path[1:]
else:
path = get_relative_source_path(content_path,
os.path.join(relative_dir, path))
path = path.replace('%20', ' ')
return path
def relative_dir(content_path, path):
return posixize_path(
os.path.dirname(
os.path.relpath(
os.path.abspath(path),
os.path.abspath(content_path))))
def insert_included_content(content,
source_path,
content_path,
exclude_exts=()):
'''
Replace {include}some.file with the
contents of this file.
'''
processed_paths = set()
# In Python 3.x we can use the `nonlocal` declaration, in `replacer()`,
# to tell Python we mean to assign to the `source_path` variable from
# `insert_included_content()`.
# In Python 2.x we simply can't assign to `source_path` in `replacer()`.
# However, we work around this by not assigning to the variable itself,
# but using a mutable container to keep track about the current working
# directory while doing the recursion.
source_dir = [relative_dir(content_path, source_path)]
def replacer(m):
path, indent = m.group('path'), m.group('indent')
path = relativize_path(content_path, source_dir[0], path)
path = posixize_path(
os.path.abspath(
os.path.join(content_path, path)
)
)
if not os.path.isfile(path):
logger.warning("Unable to find `%s`, skipping include.", path)
return m.group()
_, ext = os.path.splitext(path)
# remove leading dot
ext = ext[1:]
if ext in exclude_exts:
return m.group()
with open(path) as content_file:
text = content_file.read()
if indent:
prefix = ''
if indent[0] == '\n':
prefix = '\n'
indent = indent[1:]
text = prefix + '\n'.join(indent + line
for line in text.split('\n'))
# recursion stop
if path in processed_paths:
logger.warning("Circular inclusion detected for '%s'" % path)
return text
processed_paths.add(path)
# if we recurse into another file to perform more includes
# _path_replacer needs to know in which directory
# it operates otherwise it produces wrong paths
source_dir[0] = posixize_path(os.path.dirname(path))
current_source_dir = source_dir[0]
# recursively replace other includes
text = INCLUDE_RE.sub(replacer, text)
# restore source dir
source_dir[0] = current_source_dir
return text
return INCLUDE_RE.sub(replacer, content)

View file

@ -155,16 +155,15 @@ class Generator(object):
if os.path.isdir(root):
for dirpath, dirs, temp_files in os.walk(
root, followlinks=True):
drop = []
root, topdown=True, followlinks=True):
excl = exclusions_by_dirpath.get(dirpath, ())
for d in dirs:
# We copy the `dirs` list as we will modify it in the loop:
for d in list(dirs):
if (d in excl or
any(fnmatch.fnmatch(d, ignore)
for ignore in ignores)):
drop.append(d)
for d in drop:
dirs.remove(d)
if d in dirs:
dirs.remove(d)
reldir = os.path.relpath(dirpath, self.path)
for f in temp_files:

View file

@ -5,6 +5,7 @@ import logging
import os
import re
from collections import OrderedDict
from tempfile import NamedTemporaryFile
import docutils
import docutils.core
@ -19,7 +20,8 @@ from six.moves.html_parser import HTMLParser
from pelican import rstdirectives # NOQA
from pelican import signals
from pelican.cache import FileStampDataCacher
from pelican.contents import Author, Category, Page, Tag
from pelican.contents import Author, Category, Page, Tag, \
insert_included_content
from pelican.utils import SafeDatetime, escape_html, get_date, pelican_open, \
posixize_path
@ -286,9 +288,28 @@ class RstReader(BaseReader):
def read(self, source_path):
"""Parses restructured text"""
pub = self._get_publisher(source_path)
parts = pub.writer.parts
content = parts.get('body')
with pelican_open(source_path) as content:
exclude_exts = set(Readers(self.settings).extensions)
exclude_exts -= set(self.file_extensions)
content = insert_included_content(content, source_path,
self.settings['PATH'],
exclude_exts)
# We have pre-processed the file content,
# but docutils require a file as input,
# so with use a temporary one:
with NamedTemporaryFile() as tmp_file:
tmp_file.write(content.encode('utf8'))
tmp_file.seek(0)
try:
pub = self._get_publisher(tmp_file.name)
parts = pub.writer.parts
content = parts.get('body')
except docutils.ApplicationError as err:
# We fix any potential error message
# to reference the original file:
msg = err.args[0].replace(tmp_file.name, source_path)
err.args = (msg,)
raise err
metadata = self._parse_metadata(pub.document, source_path)
metadata.setdefault('title', parts.get('title'))
@ -349,6 +370,11 @@ class MarkdownReader(BaseReader):
self._source_path = source_path
self._md = Markdown(**self.settings['MARKDOWN'])
with pelican_open(source_path) as text:
exclude_exts = set(Readers(self.settings).extensions)
exclude_exts -= set(self.file_extensions)
text = insert_included_content(text, source_path,
self.settings['PATH'],
exclude_exts)
content = self._md.convert(text)
if hasattr(self._md, 'Meta'):
@ -500,7 +526,12 @@ class HTMLReader(BaseReader):
metadata = {}
for k in parser.metadata:
metadata[k] = self.process_metadata(k, parser.metadata[k])
return parser.body, metadata
if parser.body:
return parser.body, metadata
else:
# in case we're parsing HTML includes
return content, metadata
class Readers(FileStampDataCacher):
@ -596,6 +627,13 @@ class Readers(FileStampDataCacher):
metadata.update(_filter_discardable_metadata(reader_metadata))
if content:
# We excluded file extensions already processed
# by the dedicated readers:
exclude_exts = set(MarkdownReader.file_extensions)
exclude_exts |= set(RstReader.file_extensions)
content = insert_included_content(content, path,
self.settings['PATH'],
exclude_exts)
# find images with empty alt
find_empty_alt(content, path)

View file

@ -0,0 +1,5 @@
_includes HTML_:
{include}subdir/include_other.html
^Included content above^

View file

@ -0,0 +1,5 @@
_includes HTML_:
{include}included.html
^Included content above^

View file

@ -0,0 +1,6 @@
Article including some HTML file
################################
{include}included.html
^Included content above^

View file

@ -0,0 +1,5 @@
_includes HTML_:
{include}/pelican/tests/content/include/included.html
^Included content above^

View file

@ -0,0 +1 @@
{include}include_sibling.html

View file

@ -0,0 +1 @@
{include}include_other.html

View file

@ -0,0 +1 @@
<span>this content has been included</span>

View file

@ -0,0 +1,2 @@
**this is Markdown**
Here is a [link](https://docs.getpelican.com).

View file

@ -0,0 +1,5 @@
import antigravity
import this
_ = antigravity + this

View file

@ -0,0 +1,2 @@
**this is reStructuredText**
Here is a `link <https://docs.getpelican.com>`_.

View file

@ -0,0 +1,5 @@
START
{include}md_includer.md
END

View file

@ -0,0 +1,5 @@
_includes HTML_:
{include}inexisting_file.html
^Included content above^

View file

@ -0,0 +1,2 @@
<em>includes Markdown</em>: {include}included.md
^Included content above^

View file

@ -0,0 +1,2 @@
_inline includes Markdown_: {include}included.md
^Included content above^

View file

@ -0,0 +1,3 @@
```
{include}included.py
```

View file

@ -0,0 +1,6 @@
Article with an indented code block
###################################
.. code-block:: python
{include}included.py

View file

@ -0,0 +1,5 @@
Article with an inline included reStructuredText file
#####################################################
Inline includes *reStructuredText*: {include}included.rst
^Included content above^

View file

@ -0,0 +1,2 @@
this file includes another via absolute path
{include}/pelican/tests/content/include/subdir/include_parent.html

View file

@ -0,0 +1,2 @@
this file includes another in a parent directory
{include}../included.html

View file

@ -188,16 +188,16 @@ class LogCountHandler(BufferingHandler):
"""Capturing and counting logged messages."""
def __init__(self, capacity=1000):
logging.handlers.BufferingHandler.__init__(self, capacity)
super(LogCountHandler, self).__init__(capacity)
def count_logs(self, msg=None, level=None):
return len([
def get_logs(self, msg=None, level=None):
return [
l
for l
in self.buffer
if (msg is None or re.match(msg, l.getMessage())) and
(level is None or l.levelno == level)
])
if ((msg is None or re.match(msg, l.getMessage())) and
(level is None or l.levelno == level))
]
class LoggedTestCase(unittest.TestCase):
@ -213,8 +213,17 @@ class LoggedTestCase(unittest.TestCase):
super(LoggedTestCase, self).tearDown()
def assertLogCountEqual(self, count=None, msg=None, **kwargs):
actual = self._logcount_handler.count_logs(msg=msg, **kwargs)
actual_logs = self._logcount_handler.get_logs(msg=msg, **kwargs)
self.assertEqual(
actual, count,
len(actual_logs), count,
msg='expected {} occurrences of {!r}, but found {}'.format(
count, msg, actual))
count, msg, len(actual_logs)))
def assertNoLogs(self, count=None, msg=None, **kwargs):
'Better than .assertLogCountEqual(0) because'
' it prints the generated logs, if any'
actual_logs = self._logcount_handler.get_logs(msg=msg, **kwargs)
if actual_logs:
self.fail('Some logs were generated:\n'
+ '\n'.join('{}: {}'.format(log.levelname, log.message)
for log in actual_logs))

View file

@ -33,6 +33,7 @@ class TestCache(unittest.TestCase):
settings['CACHE_CONTENT'] = True
settings['LOAD_CONTENT_CACHE'] = True
settings['CACHE_PATH'] = self.temp_cache
settings['IGNORE_FILES'] = ['include']
return settings
def test_generator_caching(self):

View file

@ -183,6 +183,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['READERS'] = {'asc': None}
settings['CACHE_CONTENT'] = False
settings['IGNORE_FILES'] = ['include']
context = get_context(settings)
cls.generator = ArticlesGenerator(
@ -307,6 +308,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings['USE_FOLDER_AS_CATEGORY'] = False
settings['CACHE_PATH'] = self.temp_cache
settings['READERS'] = {'asc': None}
settings['IGNORE_FILES'] = ['include']
context = get_context(settings)
generator = ArticlesGenerator(
context=context, settings=settings,
@ -404,6 +406,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings['YEAR_ARCHIVE_SAVE_AS'] = 'posts/{date:%Y}/index.html'
settings['YEAR_ARCHIVE_URL'] = 'posts/{date:%Y}/'
settings['CACHE_PATH'] = self.temp_cache
settings['IGNORE_FILES'] = ['include']
context = get_context(settings)
generator = ArticlesGenerator(
context=context, settings=settings,
@ -514,6 +517,7 @@ class TestArticlesGenerator(unittest.TestCase):
# DEFAULT_CATEGORY
('category', 'Random'),
('tags', 'general, untagged'))
settings['IGNORE_FILES'] = ['include']
context = get_context(settings)
generator = ArticlesGenerator(
context=context, settings=settings,
@ -543,6 +547,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings['DEFAULT_CATEGORY'] = 'Default'
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['ARTICLE_ORDER_BY'] = 'title'
settings['IGNORE_FILES'] = ['include']
context = get_context(settings)
generator = ArticlesGenerator(
@ -590,6 +595,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings['DEFAULT_CATEGORY'] = 'Default'
settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['ARTICLE_ORDER_BY'] = 'reversed-title'
settings['IGNORE_FILES'] = ['include']
context = get_context(settings)
generator = ArticlesGenerator(

View file

@ -207,8 +207,12 @@ class TestPelican(LoggedTestCase):
mute(True)(pelican.run)()
logger.setLevel(orig_level)
self.assertLogCountEqual(
count=2,
msg="Writing .*",
count=1,
msg='Writing .+/oh-yeah.html',
level=logging.INFO)
self.assertLogCountEqual(
count=1,
msg='Writing .+/categories.html',
level=logging.INFO)
def test_cyclic_intersite_links_no_warnings(self):

View file

@ -1,12 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import print_function, unicode_literals
import logging
import os
import six
from pelican import contents
from pelican import readers
from pelican.tests.support import get_settings, unittest
from pelican.tests.support import LoggedTestCase, get_settings, unittest
from pelican.utils import SafeDatetime
try:
@ -25,7 +27,7 @@ def _path(*args):
return os.path.join(CONTENT_PATH, *args)
class ReaderTest(unittest.TestCase):
class ReaderTest(LoggedTestCase):
def read_file(self, path, **kwargs):
# Isolate from future API changes to readers.read_file
@ -84,6 +86,20 @@ class TestAssertDictHasSubset(ReaderTest):
class DefaultReaderTest(ReaderTest):
maxDiff = None
def setUp(self):
super(DefaultReaderTest, self).setUp()
self._initial_contents_log_level = contents.logger.level
contents.logger.level = logging.INFO
self._initial_readers_log_level = readers.logger.level
readers.logger.level = logging.INFO
def tearDown(self):
super(DefaultReaderTest, self).tearDown()
contents.logger.level = self._initial_contents_log_level
readers.logger.level = self._initial_readers_log_level
def test_readfile_unknown_extension(self):
with self.assertRaises(TypeError):
self.read_file(path='article_with_metadata.unknownextension')
@ -104,6 +120,162 @@ class DefaultReaderTest(ReaderTest):
'Other images have empty alt attributes'}
)
def test_include_markdown_from_markdown(self):
page = self.read_file('include/md_includer.md')
self.assertEqual(
page.get_content(''),
'<p><em>inline includes Markdown</em>: '
'<strong>this is Markdown</strong>\n'
'Here is a <a href="https://docs.getpelican.com">link</a>.</p>\n'
'<p>^Included content above^</p>'
)
self.assertNoLogs()
def test_include_html_from_markdown(self):
page = self.read_file('include/html_includer.md')
self.assertEqual(
page.get_content(''),
'<p><em>includes HTML</em>:</p>\n'
'<p><span>this content has been included</span>\n'
'</p>\n'
'<p>^Included content above^</p>'
)
self.assertNoLogs()
def test_include_markdown_from_html(self):
page = self.read_file('include/md_includer.html')
self.assertEqual(
page.get_content(''),
'<em>includes Markdown</em>: {include}included.md\n'
'^Included content above^\n'
)
self.assertNoLogs()
def test_include_rst_from_rst(self):
page = self.read_file('include/rst_includer.rst')
self.assertEqual(
page.get_content(''),
'<p>Inline includes <em>reStructuredText</em>: '
'<strong>this is reStructuredText</strong>\n'
'Here is a <a class="reference external" '
'href="https://docs.getpelican.com">link</a>.</p>\n'
'<p>^Included content above^</p>\n'
)
self.assertNoLogs()
def test_include_html_from_rst(self):
page = self.read_file('include/html_includer.rst')
self.assertEqual(
page.get_content(''),
'<p><span>this content has been included</span>\n'
'</p>\n'
'<p>^Included content above^</p>\n'
)
self.assertNoLogs()
def test_include_code_from_markdown(self):
page = self.read_file('include/py_includer.md')
self.assertEqual(
page.get_content(''),
'<div class="highlight"><pre>'
'<span></span>'
'<span class="kn">import</span> '
'<span class="nn">antigravity</span>\n'
'\n'
'<span class="kn">import</span> '
'<span class="nn">this</span>\n'
'\n'
'<span class="n">_</span> '
'<span class="o">=</span> '
'<span class="n">antigravity</span> '
'<span class="o">+</span> '
'<span class="n">this</span>\n'
'</pre></div>'
)
def test_include_code_from_rst(self):
page = self.read_file('include/py_includer.rst')
self.assertEqual(
page.get_content(''),
'<div class="highlight"><pre>'
'<span></span>'
'<span class="kn">import</span> '
'<span class="nn">antigravity</span>\n'
'\n'
'<span class="kn">import</span> '
'<span class="nn">this</span>\n'
'\n'
'<span class="n">_</span> '
'<span class="o">=</span> '
'<span class="n">antigravity</span> '
'<span class="o">+</span> '
'<span class="n">this</span>\n'
'</pre></div>\n'
)
self.assertNoLogs()
def test_include_nested_markdown(self):
page = self.read_file('include/includer_of_md_includer.md')
self.assertEqual(
page.get_content(''),
'<p>START</p>\n'
'<p><em>inline includes Markdown</em>: '
'<strong>this is Markdown</strong>\n'
'Here is a <a href="https://docs.getpelican.com">link</a>.</p>\n'
'<p>^Included content above^</p>\n'
'<p>END</p>'
)
self.assertNoLogs()
def test_include_html_with_full_path(self):
page = self.read_file('include/html_includer_with_full_path.md')
self.assertEqual(
page.get_content(''),
'<p><em>includes HTML</em>:</p>\n'
'<p><span>this content has been included</span>\n'
'</p>\n'
'<p>^Included content above^</p>'
)
self.assertNoLogs()
def test_include_html_in_subdirectory(self):
page = self.read_file('include/html_from_subdir_includer.md')
self.assertEqual(
page.get_content(''),
'<p><em>includes HTML</em>:</p>\n'
'<p>this file includes another via absolute path\n'
'this file includes another in a parent directory\n'
'<span>this content has been included</span>\n\n\n'
'</p>\n'
'<p>^Included content above^</p>'
)
self.assertNoLogs()
def test_include_non_existing_file(self):
page = self.read_file('include/inexisting_file_includer.md')
self.assertEqual(
page.get_content(''),
'<p><em>includes HTML</em>:</p>\n'
'<p>{include}inexisting_file.html</p>\n'
'<p>^Included content above^</p>'
)
self.assertLogCountEqual(
count=1,
msg='Unable to find `.*`, skipping include.',
level=logging.WARNING)
def test_include_with_recursion_loop(self):
page = self.read_file('include/include_sibling.html')
self.assertEqual(
page.get_content(''),
'{include}include_sibling.html\n\n\n\n'
)
self.assertLogCountEqual(
count=1,
msg="Circular inclusion detected for "
"'.+/include/include_other.html'",
level=logging.WARNING)
class RstReaderTest(ReaderTest):