From 5e2e88ca92efcfd2196f3392e5a6116d70d8670f Mon Sep 17 00:00:00 2001
From: Stefan 'hr' Berder
Date: Sun, 28 Sep 2014 00:33:05 +0800
Subject: [PATCH 01/11] add objects details to theme docs
---
docs/themes.rst | 102 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 102 insertions(+)
diff --git a/docs/themes.rst b/docs/themes.rst
index 4be9a8e5..16453881 100644
--- a/docs/themes.rst
+++ b/docs/themes.rst
@@ -316,6 +316,108 @@ period A tuple of the form (`year`, `month`, `day`) that
You can see an example of how to use `period` in the ``simple`` theme's
period_archives.html
+Objects
+=======
+
+Detail objects attributes that are available and useful in templates. Not all
+attributes are listed here, this is a selection of attributes considered useful
+in a template.
+
+.. _object-article:
+
+Article
+-------
+
+The string representation of an Article is the `source_path` attribute.
+
+=================== ===================================================
+Attribute Description
+=================== ===================================================
+author The :ref:`Author ` of
+ this article.
+authors A list of :ref:`Authors `
+ of this article.
+category The :ref:`Category `
+ of this article.
+content The rendered content of the article.
+date Datetime object representing the article date.
+date_format Either default date format or locale date format.
+default_template Default template name.
+in_default_lang Boolean representing if the article is written
+ in the default language.
+lang Language of the article.
+locale_date Date formated by the `date_format`.
+metadata Article header metadata `dict`.
+save_as Location to save the article page.
+slug Page slug.
+source_path Full system path of the article source file.
+status The article status, can be any of 'published' or
+ 'draft'.
+summary Rendered summary content.
+tags List of :ref:`Tag `
+ objects.
+template Template name to use for rendering.
+title Title of the article.
+translations List of translations
+ :ref:`Article ` objects.
+url URL to the article page.
+=================== ===================================================
+
+.. _object-author_cat_tag:
+
+Author / Category / Tag
+-----------------------
+
+The string representation of those objects is the `name` attribute.
+
+=================== ===================================================
+Attribute Description
+=================== ===================================================
+name Name of this object [1]_.
+page_name Author page name.
+save_as Location to save the author page.
+slug Page slug.
+url URL to the author page.
+=================== ===================================================
+
+.. [1] for Author object, coming from `:authors:` or `AUTHOR`.
+
+.. _object-page:
+
+Page
+----
+
+The string representation of a Page is the `source_path` attribute.
+
+=================== ===================================================
+Attribute Description
+=================== ===================================================
+author The :ref:`Author ` of
+ this page.
+content The rendered content of the page.
+date Datetime object representing the page date.
+date_format Either default date format or locale date format.
+default_template Default template name.
+in_default_lang Boolean representing if the article is written
+ in the default language.
+lang Language of the article.
+locale_date Date formated by the `date_format`.
+metadata Page header metadata `dict`.
+save_as Location to save the page.
+slug Page slug.
+source_path Full system path of the page source file.
+status The page status, can be any of 'published' or
+ 'draft'.
+summary Rendered summary content.
+tags List of :ref:`Tag `
+ objects.
+template Template name to use for rendering.
+title Title of the page.
+translations List of translations
+ :ref:`Article ` objects.
+url URL to the page.
+=================== ===================================================
+
Feeds
=====
From bc3a0e8c592e70db432ee987e9b42f069aee6ba8 Mon Sep 17 00:00:00 2001
From: SkyLothar
Date: Sun, 22 Feb 2015 16:27:18 +0800
Subject: [PATCH 02/11] remove useless if condition in index template
---
pelican/themes/notmyidea/templates/index.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pelican/themes/notmyidea/templates/index.html b/pelican/themes/notmyidea/templates/index.html
index c8982476..3eac8a3a 100644
--- a/pelican/themes/notmyidea/templates/index.html
+++ b/pelican/themes/notmyidea/templates/index.html
@@ -23,7 +23,7 @@
{% endif %}
{# other items #}
{% else %}
- {% if loop.first and articles_page.has_previous %}
+ {% if loop.first %}
{% endif %}
From 87d86d724c6ae01adf1488b2f65dd0ff1e48ca46 Mon Sep 17 00:00:00 2001
From: Kevin Yap
Date: Sat, 28 Feb 2015 15:33:54 -0800
Subject: [PATCH 03/11] Change phrasing and formatting of README
Made a few changes to the README to emphasize Pelican's position as a
general-purpose static site generator, and not just a blogging tool.
See #1645 for more details.
---
README.rst | 59 +++++++++++++++++++++++++++---------------------------
1 file changed, 30 insertions(+), 29 deletions(-)
diff --git a/README.rst b/README.rst
index 564cc77c..a5643514 100644
--- a/README.rst
+++ b/README.rst
@@ -3,57 +3,58 @@ Pelican |build-status| |coverage-status|
Pelican is a static site generator, written in Python_.
-* Write your weblog entries directly with your editor of choice (vim!)
- in reStructuredText_ or Markdown_
-* Includes a simple CLI tool to (re)generate the weblog
-* Easy to interface with DVCSes and web hooks
-* Completely static output is easy to host anywhere
+* Write content in reStructuredText_ or Markdown_ using your editor of choice.
+* Includes a simple command line tool to (re)generate site files.
+* Easy to interface with version control systems and web hooks.
+* Completely static output is simple to host anywhere.
+
Features
--------
Pelican currently supports:
-* Blog articles and pages
-* Comments, via an external service (Disqus). (Please note that while
- useful, Disqus is an external service, and thus the comment data will be
- somewhat outside of your control and potentially subject to data loss.)
-* Theming support (themes are created using Jinja2_ templates)
-* PDF generation of the articles/pages (optional)
+* Blog articles and static pages
+* Integration with external services (ex. Google Analytics and Disqus)
+* Site themes (created using Jinja2_ templates)
* Publication of articles in multiple languages
-* Atom/RSS feeds
-* Code syntax highlighting
-* Import from WordPress, Dotclear, or RSS feeds
-* Integration with external tools: Twitter, Google Analytics, etc. (optional)
-* Fast rebuild times thanks to content caching and selective output writing.
+* Generation of Atom and RSS feeds
+* Syntax highlighting via Pygments_
+* Importing existing content from WordPress, Dotclear, and more services
+* Fast rebuild times due to content caching and selective output writing
-Have a look at the `Pelican documentation`_ for more information.
+Check out `Pelican's documentation`_ for further information.
-Why the name "Pelican"?
------------------------
-
-"Pelican" is an anagram for *calepin*, which means "notebook" in French. ;)
-
-Source code
------------
-
-You can access the source code at: https://github.com/getpelican/pelican
-
-If you feel hackish, have a look at the explanation of `Pelican's internals`_.
How to get help, contribute, or provide feedback
------------------------------------------------
See our `contribution submission and feedback guidelines `_.
+
+Source code
+-----------
+
+Pelican's source code is `hosted on GitHub`_. If you're feeling hackish,
+take a look at `Pelican's internals`_.
+
+
+Why the name "Pelican"?
+-----------------------
+
+"Pelican" is an anagram of *calepin*, which means "notebook" in French.
+
+
.. Links
.. _Python: http://www.python.org/
.. _reStructuredText: http://docutils.sourceforge.net/rst.html
.. _Markdown: http://daringfireball.net/projects/markdown/
.. _Jinja2: http://jinja.pocoo.org/
-.. _`Pelican documentation`: http://docs.getpelican.com/
+.. _Pygments: http://pygments.org/
+.. _`Pelican's documentation`: http://docs.getpelican.com/
.. _`Pelican's internals`: http://docs.getpelican.com/en/latest/internals.html
+.. _`hosted on GitHub`: https://github.com/getpelican/pelican
.. |build-status| image:: https://img.shields.io/travis/getpelican/pelican/master.svg
:target: https://travis-ci.org/getpelican/pelican
From e35ca1d6ff4fd9a31b6dd60b2bb345c2fee0828e Mon Sep 17 00:00:00 2001
From: Justin Mayer
Date: Thu, 5 Mar 2015 12:04:39 -0800
Subject: [PATCH 04/11] Minor improvements to README
---
README.rst | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/README.rst b/README.rst
index a5643514..0bb3bcc8 100644
--- a/README.rst
+++ b/README.rst
@@ -3,10 +3,10 @@ Pelican |build-status| |coverage-status|
Pelican is a static site generator, written in Python_.
-* Write content in reStructuredText_ or Markdown_ using your editor of choice.
-* Includes a simple command line tool to (re)generate site files.
-* Easy to interface with version control systems and web hooks.
-* Completely static output is simple to host anywhere.
+* Write content in reStructuredText_ or Markdown_ using your editor of choice
+* Includes a simple command line tool to (re)generate site files
+* Easy to interface with version control systems and web hooks
+* Completely static output is simple to host anywhere
Features
@@ -14,13 +14,13 @@ Features
Pelican currently supports:
-* Blog articles and static pages
-* Integration with external services (ex. Google Analytics and Disqus)
+* Chronological content (e.g., articles, blog posts) as well as static pages
+* Integration with external services (e.g., Google Analytics and Disqus)
* Site themes (created using Jinja2_ templates)
* Publication of articles in multiple languages
* Generation of Atom and RSS feeds
* Syntax highlighting via Pygments_
-* Importing existing content from WordPress, Dotclear, and more services
+* Importing existing content from WordPress, Dotclear, and other services
* Fast rebuild times due to content caching and selective output writing
Check out `Pelican's documentation`_ for further information.
@@ -35,7 +35,7 @@ See our `contribution submission and feedback guidelines `_.
Source code
-----------
-Pelican's source code is `hosted on GitHub`_. If you're feeling hackish,
+Pelican's source code is `hosted on GitHub`_. If you feel like hacking,
take a look at `Pelican's internals`_.
From 3ea45420152a8465db33fd4a67ac88f1c1426df5 Mon Sep 17 00:00:00 2001
From: Deniz Turgut
Date: Tue, 17 Feb 2015 20:05:00 -0500
Subject: [PATCH 05/11] Make sure Content uses URLWrappers
---
pelican/contents.py | 14 +++-----------
pelican/readers.py | 4 ++++
pelican/tests/test_contents.py | 11 ++++++-----
pelican/tests/test_generators.py | 32 ++++++++++++++++++++++++++++++++
pelican/tests/test_paginator.py | 4 ++--
5 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/pelican/contents.py b/pelican/contents.py
index 074c28be..90121316 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -53,7 +53,7 @@ class Content(object):
self._context = context
self.translations = []
- local_metadata = dict(settings['DEFAULT_METADATA'])
+ local_metadata = dict()
local_metadata.update(metadata)
# set metadata as attributes
@@ -166,21 +166,13 @@ class Content(object):
"""Returns the URL, formatted with the proper values"""
metadata = copy.copy(self.metadata)
path = self.metadata.get('path', self.get_relative_source_path())
- default_category = self.settings['DEFAULT_CATEGORY']
- slug_substitutions = self.settings.get('SLUG_SUBSTITUTIONS', ())
metadata.update({
'path': path_to_url(path),
'slug': getattr(self, 'slug', ''),
'lang': getattr(self, 'lang', 'en'),
'date': getattr(self, 'date', SafeDatetime.now()),
- 'author': slugify(
- getattr(self, 'author', ''),
- slug_substitutions
- ),
- 'category': slugify(
- getattr(self, 'category', default_category),
- slug_substitutions
- )
+ 'author': self.author.slug if hasattr(self, 'author') else '',
+ 'category': self.category.slug if hasattr(self, 'category') else ''
})
return metadata
diff --git a/pelican/readers.py b/pelican/readers.py
index 731fb5da..a9b71bed 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -537,6 +537,10 @@ def find_empty_alt(content, path):
def default_metadata(settings=None, process=None):
metadata = {}
if settings:
+ for name, value in dict(settings.get('DEFAULT_METADATA', {})).items():
+ if process:
+ value = process(name, value)
+ metadata[name] = value
if 'DEFAULT_CATEGORY' in settings:
value = settings['DEFAULT_CATEGORY']
if process:
diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py
index 4b692e29..004d512e 100644
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@@ -8,7 +8,7 @@ import os.path
from pelican.tests.support import unittest, get_settings
-from pelican.contents import Page, Article, Static, URLWrapper
+from pelican.contents import Page, Article, Static, URLWrapper, Author, Category
from pelican.settings import DEFAULT_CONFIG
from pelican.utils import path_to_url, truncate_html_words, SafeDatetime, posix_join
from pelican.signals import content_object_init
@@ -33,7 +33,7 @@ class TestPage(unittest.TestCase):
'metadata': {
'summary': TEST_SUMMARY,
'title': 'foo bar',
- 'author': 'Blogger',
+ 'author': Author('Blogger', DEFAULT_CONFIG),
},
'source_path': '/path/to/file/foo.ext'
}
@@ -374,7 +374,8 @@ class TestPage(unittest.TestCase):
content = Page(**args)
assert content.authors == [content.author]
args['metadata'].pop('author')
- args['metadata']['authors'] = ['First Author', 'Second Author']
+ args['metadata']['authors'] = [Author('First Author', DEFAULT_CONFIG),
+ Author('Second Author', DEFAULT_CONFIG)]
content = Page(**args)
assert content.authors
assert content.author == content.authors[0]
@@ -396,8 +397,8 @@ class TestArticle(TestPage):
settings['ARTICLE_URL'] = '{author}/{category}/{slug}/'
settings['ARTICLE_SAVE_AS'] = '{author}/{category}/{slug}/index.html'
article_kwargs = self._copy_page_kwargs()
- article_kwargs['metadata']['author'] = "O'Brien"
- article_kwargs['metadata']['category'] = 'C# & stuff'
+ article_kwargs['metadata']['author'] = Author("O'Brien", settings)
+ article_kwargs['metadata']['category'] = Category('C# & stuff', settings)
article_kwargs['metadata']['title'] = 'fnord'
article_kwargs['settings'] = settings
article = Article(**article_kwargs)
diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py
index 9f38c002..acf767f2 100644
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@@ -413,6 +413,38 @@ class TestArticlesGenerator(unittest.TestCase):
generator.generate_context()
generator.readers.read_file.assert_called_count == orig_call_count
+ def test_standard_metadata_in_default_metadata(self):
+ settings = get_settings(filenames={})
+ settings['CACHE_CONTENT'] = False
+ settings['DEFAULT_CATEGORY'] = 'Default'
+ settings['DEFAULT_DATE'] = (1970, 1, 1)
+ settings['DEFAULT_METADATA'] = (('author', 'Blogger'),
+ # category will be ignored in favor of
+ # DEFAULT_CATEGORY
+ ('category', 'Random'),
+ ('tags', 'general, untagged'))
+ generator = ArticlesGenerator(
+ context=settings.copy(), settings=settings,
+ path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
+ generator.generate_context()
+
+ authors = sorted([author.name for author, _ in generator.authors])
+ authors_expected = sorted(['Alexis Métaireau', 'Blogger',
+ 'First Author', 'Second Author'])
+ self.assertEqual(authors, authors_expected)
+
+ categories = sorted([category.name
+ for category, _ in generator.categories])
+ categories_expected = [
+ sorted(['Default', 'TestCategory', 'yeah', 'test', '指導書']),
+ sorted(['Default', 'TestCategory', 'Yeah', 'test', '指導書'])]
+ self.assertIn(categories, categories_expected)
+
+ tags = sorted([tag.name for tag in generator.tags])
+ tags_expected = sorted(['bar', 'foo', 'foobar', 'general', 'untagged',
+ 'パイソン', 'マック'])
+ self.assertEqual(tags, tags_expected)
+
class TestPageGenerator(unittest.TestCase):
# Note: Every time you want to test for a new field; Make sure the test
diff --git a/pelican/tests/test_paginator.py b/pelican/tests/test_paginator.py
index 5494fda8..002d9e07 100644
--- a/pelican/tests/test_paginator.py
+++ b/pelican/tests/test_paginator.py
@@ -5,7 +5,7 @@ import locale
from pelican.tests.support import unittest, get_settings
from pelican.paginator import Paginator
-from pelican.contents import Article
+from pelican.contents import Article, Author
from pelican.settings import DEFAULT_CONFIG
from jinja2.utils import generate_lorem_ipsum
@@ -26,7 +26,6 @@ class TestPage(unittest.TestCase):
'metadata': {
'summary': TEST_SUMMARY,
'title': 'foo bar',
- 'author': 'Blogger',
},
'source_path': '/path/to/file/foo.ext'
}
@@ -49,6 +48,7 @@ class TestPage(unittest.TestCase):
key=lambda r: r[0],
)
+ self.page_kwargs['metadata']['author'] = Author('Blogger', settings)
object_list = [Article(**self.page_kwargs), Article(**self.page_kwargs)]
paginator = Paginator('foobar.foo', object_list, settings)
page = paginator.page(1)
From 4e896c427ddef6b9a19088dfc653f7b8c15f5c08 Mon Sep 17 00:00:00 2001
From: Kevin Yap
Date: Fri, 6 Mar 2015 23:51:26 -0800
Subject: [PATCH 06/11] Standardize formatting of .travis.yml
Use 2 spaces for indentation.
---
.travis.yml | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index a052252b..f5a7f04f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,25 +1,24 @@
language: python
python:
- - "2.7"
- - "3.3"
- - "3.4"
+ - "2.7"
+ - "3.3"
+ - "3.4"
addons:
apt_packages:
- pandoc
before_install:
- - sudo apt-get update -qq
- - sudo locale-gen fr_FR.UTF-8 tr_TR.UTF-8
+ - sudo apt-get update -qq
+ - sudo locale-gen fr_FR.UTF-8 tr_TR.UTF-8
install:
- - pip install .
- - pip install -r dev_requirements.txt
- - pip install nose-cov
+ - pip install .
+ - pip install -r dev_requirements.txt
+ - pip install nose-cov
script: nosetests -sv --with-coverage --cover-package=pelican pelican
after_success:
- # Report coverage results to coveralls.io
- pip install coveralls
- coveralls
notifications:
- irc:
- channels:
- - "irc.freenode.org#pelican"
- on_success: change
+ irc:
+ channels:
+ - "irc.freenode.org#pelican"
+ on_success: change
From ffe71d324d4812925b2eeddbc52b66f5ebbf3801 Mon Sep 17 00:00:00 2001
From: robertlagrant
Date: Fri, 13 Mar 2015 13:42:56 +0200
Subject: [PATCH 07/11] Change docs wording on cache regen for #1630
---
docs/settings.rst | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/docs/settings.rst b/docs/settings.rst
index 11444d2e..9fb97883 100644
--- a/docs/settings.rst
+++ b/docs/settings.rst
@@ -847,13 +847,11 @@ can be invoked by passing the ``--archive`` flag).
The cache files are Python pickles, so they may not be readable by
different versions of Python as the pickle format often changes. If
-such an error is encountered, the cache files have to be rebuilt by
-removing them and re-running Pelican, or by using the Pelican
-command-line option ``--ignore-cache``. The cache files also have to
-be rebuilt when changing the ``GZIP_CACHE`` setting for cache file
-reading to work properly.
+such an error is encountered, it is caught and the cache file is
+rebuilt automatically in the new format. The cache files will also be
+rebuilt after the ``GZIP_CACHE`` setting has been changed.
-The ``--ignore-cache`` command-line option is also useful when the
+The ``--ignore-cache`` command-line option is useful when the
whole cache needs to be regenerated, such as when making modifications
to the settings file that will affect the cached content, or just for
debugging purposes. When Pelican runs in autoreload mode, modification
From 0f7f328206b4b3eb085335aa86c620150143ee6e Mon Sep 17 00:00:00 2001
From: Kevin Yap
Date: Fri, 13 Mar 2015 23:01:31 -0700
Subject: [PATCH 08/11] Remove a couple of unused imports
As reported by Pyflakes.
---
pelican/contents.py | 2 +-
pelican/writers.py | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/pelican/contents.py b/pelican/contents.py
index 074c28be..a680c411 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import six
-from six.moves.urllib.parse import (unquote, urlparse, urlunparse)
+from six.moves.urllib.parse import urlparse, urlunparse
import copy
import locale
diff --git a/pelican/writers.py b/pelican/writers.py
index bf32e272..e90a0004 100644
--- a/pelican/writers.py
+++ b/pelican/writers.py
@@ -3,7 +3,6 @@ from __future__ import with_statement, unicode_literals, print_function
import six
import os
-import locale
import logging
if not six.PY3:
From ef737c22393174571fe17a6175eb98465c6ec246 Mon Sep 17 00:00:00 2001
From: Deniz Turgut
Date: Sat, 14 Mar 2015 13:36:51 -0400
Subject: [PATCH 09/11] Use `--relative-urls` only if it is specified
Otherwise, `RELATIVE_URLS` in the config file is ignored and
`RELATIVE_URLS` is set to `False` if `--relative-urls` is not
specified.
Fixes an issue introduced in #1592
---
pelican/__init__.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/pelican/__init__.py b/pelican/__init__.py
index 3013744d..056c45ef 100644
--- a/pelican/__init__.py
+++ b/pelican/__init__.py
@@ -321,7 +321,8 @@ def get_config(args):
config['CACHE_PATH'] = args.cache_path
if args.selected_paths:
config['WRITE_SELECTED'] = args.selected_paths.split(',')
- config['RELATIVE_URLS'] = args.relative_paths
+ if args.relative_paths:
+ config['RELATIVE_URLS'] = args.relative_paths
config['DEBUG'] = args.verbosity == logging.DEBUG
# argparse returns bytes in Py2. There is no definite answer as to which
From 875c4a5e05d818c776be3019506921b863b13dc0 Mon Sep 17 00:00:00 2001
From: Anton Antonov
Date: Tue, 17 Mar 2015 01:23:29 +0200
Subject: [PATCH 10/11] Nitpick Content decorators
A bit more readable this way.
---
pelican/contents.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/pelican/contents.py b/pelican/contents.py
index 96466a94..005d045c 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -90,7 +90,7 @@ class Content(object):
self.in_default_lang = (self.lang == default_lang)
- # create the slug if not existing, generate slug according to
+ # create the slug if not existing, generate slug according to
# setting of SLUG_ATTRIBUTE
if not hasattr(self, 'slug'):
if settings['SLUGIFY_SOURCE'] == 'title' and hasattr(self, 'title'):
@@ -308,8 +308,13 @@ class Content(object):
"""Dummy function"""
pass
- url = property(functools.partial(get_url_setting, key='url'))
- save_as = property(functools.partial(get_url_setting, key='save_as'))
+ @property
+ def url(self):
+ return self.get_url_setting('url')
+
+ @property
+ def save_as(self):
+ return self.get_url_setting('save_as')
def _get_template(self):
if hasattr(self, 'template') and self.template is not None:
From db2e5174502787e447d3df32298cf950c6c894ae Mon Sep 17 00:00:00 2001
From: Forest
Date: Mon, 29 Sep 2014 22:51:13 -0700
Subject: [PATCH 11/11] Ignore empty metadata. Fixes #1469. Fixes #1398.
Some metadata values cause problems when empty. For example, a markdown file
containing a Slug: line with no additional text causing Pelican to produce a
file named ".html" instead of generating a proper file name. Others, like
those created by a PATH_METADATA regex, must be preserved even if empty,
so things like PAGE_URL="filename{customvalue}.html" will always work.
Essentially, we want to discard empty metadata that we know will be useless
or problematic. This is better than raising an exception because (a) it
allows users to deliberately keep empty metadata in their source files for
filling in later, and (b) users shouldn't be forced to fix empty metadata
created by blog migration tools (see #1398).
The metadata processors are the ideal place to do this, because they know
the type of data they are handling and whether an empty value is wanted.
Unfortunately, they can't discard items, and neither can process_metadata(),
because their return values are always saved by calling code. We can't
safely change the calling code, because some of it lives in custom reader
classes out in the field, and we don't want to break those working systems.
Discarding empty values at the time of use isn't good enough, because that
still allows useless empty values in a source file to override configured
defaults.
My solution:
- When processing a list of values, a metadata processor will omit any
unwanted empty ones from the list it returns.
- When processing an entirely unwanted value, it will return something easily
identifiable that will pass through the reader code.
- When collecting the processed metadata, read_file() will filter out items
identified as unwanted.
These metadata are affected by this change:
author, authors, category, slug, status, tags.
I also removed a bit of now-superfluous code from generators.py that was
discarding empty authors at the time of use.
---
pelican/generators.py | 4 +---
pelican/readers.py | 48 ++++++++++++++++++++++++++++++++++---------
2 files changed, 39 insertions(+), 13 deletions(-)
diff --git a/pelican/generators.py b/pelican/generators.py
index f0a6d264..75bd6b2a 100644
--- a/pelican/generators.py
+++ b/pelican/generators.py
@@ -544,10 +544,8 @@ class ArticlesGenerator(CachingGenerator):
if hasattr(article, 'tags'):
for tag in article.tags:
self.tags[tag].append(article)
- # ignore blank authors as well as undefined
for author in getattr(article, 'authors', []):
- if author.name != '':
- self.authors[author].append(article)
+ self.authors[author].append(article)
# sort the articles by date
self.articles.sort(key=attrgetter('date'), reverse=True)
self.dates = list(self.articles)
diff --git a/pelican/readers.py b/pelican/readers.py
index a9b71bed..3656cd96 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -28,16 +28,44 @@ from pelican.contents import Page, Category, Tag, Author
from pelican.utils import get_date, pelican_open, FileStampDataCacher, SafeDatetime, posixize_path
+def strip_split(text, sep=','):
+ """Return a list of stripped, non-empty substrings, delimited by sep."""
+ items = [x.strip() for x in text.split(sep)]
+ return [x for x in items if x]
+
+
+# Metadata processors have no way to discard an unwanted value, so we have
+# them return this value instead to signal that it should be discarded later.
+# This means that _filter_discardable_metadata() must be called on processed
+# metadata dicts before use, to remove the items with the special value.
+_DISCARD = object()
+
+
+def _process_if_nonempty(processor, name, settings):
+ """Removes extra whitespace from name and applies a metadata processor.
+ If name is empty or all whitespace, returns _DISCARD instead.
+ """
+ name = name.strip()
+ return processor(name, settings) if name else _DISCARD
+
+
METADATA_PROCESSORS = {
- 'tags': lambda x, y: [Tag(tag, y) for tag in x.split(',')],
+ 'tags': lambda x, y: [Tag(tag, y) for tag in strip_split(x)] or _DISCARD,
'date': lambda x, y: get_date(x.replace('_', ' ')),
'modified': lambda x, y: get_date(x),
- 'status': lambda x, y: x.strip(),
- 'category': Category,
- 'author': Author,
- 'authors': lambda x, y: [Author(author.strip(), y) for author in x.split(',')],
+ 'status': lambda x, y: x.strip() or _DISCARD,
+ 'category': lambda x, y: _process_if_nonempty(Category, x, y),
+ 'author': lambda x, y: _process_if_nonempty(Author, x, y),
+ 'authors': lambda x, y: [Author(a, y) for a in strip_split(x)] or _DISCARD,
+ 'slug': lambda x, y: x.strip() or _DISCARD,
}
+
+def _filter_discardable_metadata(metadata):
+ """Return a copy of a dict, minus any items marked as discardable."""
+ return {name: val for name, val in metadata.items() if val is not _DISCARD}
+
+
logger = logging.getLogger(__name__)
class BaseReader(object):
@@ -447,14 +475,14 @@ class Readers(FileStampDataCacher):
reader = self.readers[fmt]
- metadata = default_metadata(
- settings=self.settings, process=reader.process_metadata)
+ metadata = _filter_discardable_metadata(default_metadata(
+ settings=self.settings, process=reader.process_metadata))
metadata.update(path_metadata(
full_path=path, source_path=source_path,
settings=self.settings))
- metadata.update(parse_path_metadata(
+ metadata.update(_filter_discardable_metadata(parse_path_metadata(
source_path=source_path, settings=self.settings,
- process=reader.process_metadata))
+ process=reader.process_metadata)))
reader_name = reader.__class__.__name__
metadata['reader'] = reader_name.replace('Reader', '').lower()
@@ -462,7 +490,7 @@ class Readers(FileStampDataCacher):
if content is None:
content, reader_metadata = reader.read(path)
self.cache_data(path, (content, reader_metadata))
- metadata.update(reader_metadata)
+ metadata.update(_filter_discardable_metadata(reader_metadata))
if content:
# find images with empty alt