Sitemap plugin & get_generators signal

This is a combination of 13 commits:

1. New signal for registering custom generators
2. New plugin: pelican.plugins.sitemap
3. pelican.plugins.sitemap: more settings
4. pelican.plugins.sitemap: translations are indexed
5. pelican.plugins.sitemap: added documentation
6. pelican.plugins.sitemap: added XML DTD & W3C dates
7. pelican.plugins.sitemap: removed a <changefreq> bug
8. the `get_generators` can now return a tuple
9. pelican.plugins.sitemap: cleaned the code
10. pelican.plugin.sitemap: settings changes
11. sitemap plugin: improved configuration & documentation
12. sitemap plugin: :set spell
13. sitemap plugin: removed useless whitespaces
This commit is contained in:
m-r-r 2012-08-21 13:08:21 +02:00
commit 229b0e4dcc
4 changed files with 301 additions and 1 deletions

View file

@ -59,6 +59,9 @@ Signal Arguments Description
initialized pelican object
article_generate_context article_generator, metadata
article_generator_init article_generator invoked in the ArticlesGenerator.__init__
get_generators generators invoked in Pelican.get_generator_classes,
can return a Generator, or several
generator in a tuple or in a list.
pages_generate_context pages_generator, metadata
pages_generator_init pages_generator invoked in the PagesGenerator.__init__
========================= ============================ =========================================
@ -108,3 +111,79 @@ variable, as in the example::
``github_activity`` is a list of lists. The first element is the title
and the second element is the raw HTML from GitHub.
Sitemap
-------
The plugin generates a sitemap of the blog.
It can generates plain text sitemaps or XML sitemaps.
Configuration
"""""""""""""
You can use the setting ``SITEMAP`` variable to configure the behavior of the
plugin.
The ``SITEMAP`` variable must be a Python dictionary, it can contain tree keys:
- ``format``, which set the output format of the plugin (``xml`` or ``txt``)
- ``priorities``, which is a dictionary with three keys:
- ``articles``, the priority for the URLs of the articles and their
translations
- ``pages``, the priority for the URLs of the static pages
- ``indexes``, the priority for the URLs of the index pages, such as tags,
author pages, categories indexes, archives, etc...
All the values of this dictionary must be decimal numbers between ``0`` and ``1``.
- ``changefreqs``, which is a dictionary with three items:
- ``articles``, the update frequency of the articles
- ``pages``, the update frequency of the pages
- ``indexes``, the update frequency of the index pages
An valid value is ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``,
``yearly`` or ``never``.
If a key is missing or a value is incorrect, it will be replaced with the
default value.
The sitemap is saved in ``<output_path>/sitemap.<format>``.
.. note::
``priorities`` and ``changefreqs`` are informations for search engines.
They are only used in the XML sitemaps.
For more information: <http://www.sitemaps.org/protocol.html#xmlTagDefinitions>
Example
"""""""
Here is an example of configuration (it's also the default settings):
.. code-block:: python
PLUGINS=['pelican.plugins.sitemap',]
SITEMAP = {
'format': 'xml',
'priorities': {
'articles': 0.5,
'indexes': 0.5,
'pages': 0.5
},
'changefreqs': {
'articles': 'monthly',
'indexes': 'daily',
'pages': 'monthly'
}
}

View file

@ -8,7 +8,7 @@ import argparse
from pelican import signals
from pelican.generators import (ArticlesGenerator, PagesGenerator,
from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator,
StaticGenerator, PdfGenerator, LessCSSGenerator)
from pelican.log import init
from pelican.settings import read_settings, _DEFAULT_CONFIG
@ -185,6 +185,18 @@ class Pelican(object):
generators.append(PdfGenerator)
if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc
generators.append(LessCSSGenerator)
for pair in signals.get_generators.send(self):
(funct, value) = pair
if not isinstance(value, (tuple, list)):
value = (value, )
for v in value:
if isinstance(v, type):
logger.debug('Found generator: {0}'.format(v))
generators.append(v)
return generators
def get_writer(self):

208
pelican/plugins/sitemap.py Normal file
View file

@ -0,0 +1,208 @@
import os.path
from datetime import datetime
from logging import debug, warning, error, info
from codecs import open
from pelican import signals, contents
TXT_HEADER = u"""{0}/index.html
{0}/archives.html
{0}/tags.html
{0}/categories.html
"""
XML_HEADER = u"""<?xml version="1.0" encoding="utf-8"?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>{0}/index.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/archives.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/tags.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/categories.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
"""
XML_URL = u"""
<url>
<loc>{0}/{1}</loc>
<lastmod>{2}</lastmod>
<changefreq>{3}</changefreq>
<priority>{4}</priority>
</url>
"""
XML_FOOTER = u"""
</urlset>
"""
def format_date(date):
if date.tzinfo:
tz = date.strftime('%s')
tz = tz[:-2] + ':' + tz[-2:]
else:
tz = "-00:00"
return date.strftime("%Y-%m-%dT%H:%M:%S") + tz
class SitemapGenerator(object):
def __init__(self, context, settings, path, theme, output_path, *null):
self.output_path = output_path
self.context = context
self.now = datetime.now()
self.siteurl = settings.get('SITEURL')
self.format = 'xml'
self.changefreqs = {
'articles': 'monthly',
'indexes': 'daily',
'pages': 'monthly'
}
self.priorities = {
'articles': 0.5,
'indexes': 0.5,
'pages': 0.5
}
config = settings.get('SITEMAP', {})
if not isinstance(config, dict):
warning("sitemap plugin: the SITEMAP setting must be a dict")
else:
fmt = config.get('format')
pris = config.get('priorities')
chfreqs = config.get('changefreqs')
if fmt not in ('xml', 'txt'):
warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'")
warning("sitemap plugin: Setting SITEMAP['format'] on `xml'")
elif fmt == 'txt':
self.format = fmt
return
valid_keys = ('articles', 'indexes', 'pages')
valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly',
'yearly', 'never')
if isinstance(pris, dict):
for k, v in pris.iteritems():
if k in valid_keys and not isinstance(v, (int, float)):
default = self.priorities[k]
warning("sitemap plugin: priorities must be numbers")
warning("sitemap plugin: setting SITEMAP['priorities']"
"['{0}'] on {1}".format(k, default))
pris[k] = default
self.priorities.update(pris)
elif pris is not None:
warning("sitemap plugin: SITEMAP['priorities'] must be a dict")
warning("sitemap plugin: using the default values")
if isinstance(chfreqs, dict):
for k, v in chfreqs.iteritems():
if k in valid_keys and v not in valid_chfreqs:
default = self.changefreqs[k]
warning("sitemap plugin: invalid changefreq `{0}'".format(v))
warning("sitemap plugin: setting SITEMAP['changefreqs']"
"['{0}'] on '{1}'".format(k, default))
chfreqs[k] = default
self.changefreqs.update(chfreqs)
elif chfreqs is not None:
warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict")
warning("sitemap plugin: using the default values")
def write_url(self, page, fd):
if getattr(page, 'status', 'published') != 'published':
return
lastmod = format_date(getattr(page, 'date', self.now))
if isinstance(page, contents.Article):
pri = self.priorities['articles']
chfreq = self.changefreqs['articles']
elif isinstance(page, contents.Page):
pri = self.priorities['pages']
chfreq = self.changefreqs['pages']
else:
pri = self.priorities['indexes']
chfreq = self.changefreqs['indexes']
if self.format == 'xml':
fd.write(XML_URL.format(self.siteurl, page.url, lastmod, chfreq, pri))
else:
fd.write(self.siteurl + '/' + loc + '\n')
def generate_output(self, writer):
path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format))
pages = self.context['pages'] + self.context['articles'] \
+ [ c for (c, a) in self.context['categories']] \
+ [ t for (t, a) in self.context['tags']] \
+ [ a for (a, b) in self.context['authors']]
for article in self.context['articles']:
pages += article.translations
info('writing {0}'.format(path))
with open(path, 'w', encoding='utf-8') as fd:
if self.format == 'xml':
fd.write(XML_HEADER.format(
self.siteurl,
format_date(self.now),
self.changefreqs['indexes'],
self.priorities['indexes']
)
)
else:
fd.write(TXT_HEADER.format(self.siteurl))
for page in pages:
self.write_url(page, fd)
if self.format == 'xml':
fd.write(XML_FOOTER)
def get_generators(generators):
return SitemapGenerator
def register():
signals.get_generators.connect(get_generators)

View file

@ -3,5 +3,6 @@ from blinker import signal
initialized = signal('pelican_initialized')
article_generate_context = signal('article_generate_context')
article_generator_init = signal('article_generator_init')
get_generators = signal('get_generators')
pages_generate_context = signal('pages_generate_context')
pages_generator_init = signal('pages_generator_init')