Sitemap plugin & get_generators signal

This is a combination of 13 commits:

1. New signal for registering custom generators
2. New plugin: pelican.plugins.sitemap
3. pelican.plugins.sitemap: more settings
4. pelican.plugins.sitemap: translations are indexed
5. pelican.plugins.sitemap: added documentation
6. pelican.plugins.sitemap: added XML DTD & W3C dates
7. pelican.plugins.sitemap: removed a <changefreq> bug
8. the `get_generators` can now return a tuple
9. pelican.plugins.sitemap: cleaned the code
10. pelican.plugin.sitemap: settings changes
11. sitemap plugin: improved configuration & documentation
12. sitemap plugin: :set spell
13. sitemap plugin: removed useless whitespaces
This commit is contained in:
m-r-r 2012-08-21 13:08:21 +02:00
commit 229b0e4dcc
4 changed files with 301 additions and 1 deletions

View file

@ -59,6 +59,9 @@ Signal Arguments Description
initialized pelican object initialized pelican object
article_generate_context article_generator, metadata article_generate_context article_generator, metadata
article_generator_init article_generator invoked in the ArticlesGenerator.__init__ article_generator_init article_generator invoked in the ArticlesGenerator.__init__
get_generators generators invoked in Pelican.get_generator_classes,
can return a Generator, or several
generator in a tuple or in a list.
pages_generate_context pages_generator, metadata pages_generate_context pages_generator, metadata
pages_generator_init pages_generator invoked in the PagesGenerator.__init__ pages_generator_init pages_generator invoked in the PagesGenerator.__init__
========================= ============================ ========================================= ========================= ============================ =========================================
@ -108,3 +111,79 @@ variable, as in the example::
``github_activity`` is a list of lists. The first element is the title ``github_activity`` is a list of lists. The first element is the title
and the second element is the raw HTML from GitHub. and the second element is the raw HTML from GitHub.
Sitemap
-------
The plugin generates a sitemap of the blog.
It can generates plain text sitemaps or XML sitemaps.
Configuration
"""""""""""""
You can use the setting ``SITEMAP`` variable to configure the behavior of the
plugin.
The ``SITEMAP`` variable must be a Python dictionary, it can contain tree keys:
- ``format``, which set the output format of the plugin (``xml`` or ``txt``)
- ``priorities``, which is a dictionary with three keys:
- ``articles``, the priority for the URLs of the articles and their
translations
- ``pages``, the priority for the URLs of the static pages
- ``indexes``, the priority for the URLs of the index pages, such as tags,
author pages, categories indexes, archives, etc...
All the values of this dictionary must be decimal numbers between ``0`` and ``1``.
- ``changefreqs``, which is a dictionary with three items:
- ``articles``, the update frequency of the articles
- ``pages``, the update frequency of the pages
- ``indexes``, the update frequency of the index pages
An valid value is ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``,
``yearly`` or ``never``.
If a key is missing or a value is incorrect, it will be replaced with the
default value.
The sitemap is saved in ``<output_path>/sitemap.<format>``.
.. note::
``priorities`` and ``changefreqs`` are informations for search engines.
They are only used in the XML sitemaps.
For more information: <http://www.sitemaps.org/protocol.html#xmlTagDefinitions>
Example
"""""""
Here is an example of configuration (it's also the default settings):
.. code-block:: python
PLUGINS=['pelican.plugins.sitemap',]
SITEMAP = {
'format': 'xml',
'priorities': {
'articles': 0.5,
'indexes': 0.5,
'pages': 0.5
},
'changefreqs': {
'articles': 'monthly',
'indexes': 'daily',
'pages': 'monthly'
}
}

View file

@ -8,7 +8,7 @@ import argparse
from pelican import signals from pelican import signals
from pelican.generators import (ArticlesGenerator, PagesGenerator, from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator,
StaticGenerator, PdfGenerator, LessCSSGenerator) StaticGenerator, PdfGenerator, LessCSSGenerator)
from pelican.log import init from pelican.log import init
from pelican.settings import read_settings, _DEFAULT_CONFIG from pelican.settings import read_settings, _DEFAULT_CONFIG
@ -185,6 +185,18 @@ class Pelican(object):
generators.append(PdfGenerator) generators.append(PdfGenerator)
if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc
generators.append(LessCSSGenerator) generators.append(LessCSSGenerator)
for pair in signals.get_generators.send(self):
(funct, value) = pair
if not isinstance(value, (tuple, list)):
value = (value, )
for v in value:
if isinstance(v, type):
logger.debug('Found generator: {0}'.format(v))
generators.append(v)
return generators return generators
def get_writer(self): def get_writer(self):

208
pelican/plugins/sitemap.py Normal file
View file

@ -0,0 +1,208 @@
import os.path
from datetime import datetime
from logging import debug, warning, error, info
from codecs import open
from pelican import signals, contents
TXT_HEADER = u"""{0}/index.html
{0}/archives.html
{0}/tags.html
{0}/categories.html
"""
XML_HEADER = u"""<?xml version="1.0" encoding="utf-8"?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>{0}/index.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/archives.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/tags.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/categories.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
"""
XML_URL = u"""
<url>
<loc>{0}/{1}</loc>
<lastmod>{2}</lastmod>
<changefreq>{3}</changefreq>
<priority>{4}</priority>
</url>
"""
XML_FOOTER = u"""
</urlset>
"""
def format_date(date):
if date.tzinfo:
tz = date.strftime('%s')
tz = tz[:-2] + ':' + tz[-2:]
else:
tz = "-00:00"
return date.strftime("%Y-%m-%dT%H:%M:%S") + tz
class SitemapGenerator(object):
def __init__(self, context, settings, path, theme, output_path, *null):
self.output_path = output_path
self.context = context
self.now = datetime.now()
self.siteurl = settings.get('SITEURL')
self.format = 'xml'
self.changefreqs = {
'articles': 'monthly',
'indexes': 'daily',
'pages': 'monthly'
}
self.priorities = {
'articles': 0.5,
'indexes': 0.5,
'pages': 0.5
}
config = settings.get('SITEMAP', {})
if not isinstance(config, dict):
warning("sitemap plugin: the SITEMAP setting must be a dict")
else:
fmt = config.get('format')
pris = config.get('priorities')
chfreqs = config.get('changefreqs')
if fmt not in ('xml', 'txt'):
warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'")
warning("sitemap plugin: Setting SITEMAP['format'] on `xml'")
elif fmt == 'txt':
self.format = fmt
return
valid_keys = ('articles', 'indexes', 'pages')
valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly',
'yearly', 'never')
if isinstance(pris, dict):
for k, v in pris.iteritems():
if k in valid_keys and not isinstance(v, (int, float)):
default = self.priorities[k]
warning("sitemap plugin: priorities must be numbers")
warning("sitemap plugin: setting SITEMAP['priorities']"
"['{0}'] on {1}".format(k, default))
pris[k] = default
self.priorities.update(pris)
elif pris is not None:
warning("sitemap plugin: SITEMAP['priorities'] must be a dict")
warning("sitemap plugin: using the default values")
if isinstance(chfreqs, dict):
for k, v in chfreqs.iteritems():
if k in valid_keys and v not in valid_chfreqs:
default = self.changefreqs[k]
warning("sitemap plugin: invalid changefreq `{0}'".format(v))
warning("sitemap plugin: setting SITEMAP['changefreqs']"
"['{0}'] on '{1}'".format(k, default))
chfreqs[k] = default
self.changefreqs.update(chfreqs)
elif chfreqs is not None:
warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict")
warning("sitemap plugin: using the default values")
def write_url(self, page, fd):
if getattr(page, 'status', 'published') != 'published':
return
lastmod = format_date(getattr(page, 'date', self.now))
if isinstance(page, contents.Article):
pri = self.priorities['articles']
chfreq = self.changefreqs['articles']
elif isinstance(page, contents.Page):
pri = self.priorities['pages']
chfreq = self.changefreqs['pages']
else:
pri = self.priorities['indexes']
chfreq = self.changefreqs['indexes']
if self.format == 'xml':
fd.write(XML_URL.format(self.siteurl, page.url, lastmod, chfreq, pri))
else:
fd.write(self.siteurl + '/' + loc + '\n')
def generate_output(self, writer):
path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format))
pages = self.context['pages'] + self.context['articles'] \
+ [ c for (c, a) in self.context['categories']] \
+ [ t for (t, a) in self.context['tags']] \
+ [ a for (a, b) in self.context['authors']]
for article in self.context['articles']:
pages += article.translations
info('writing {0}'.format(path))
with open(path, 'w', encoding='utf-8') as fd:
if self.format == 'xml':
fd.write(XML_HEADER.format(
self.siteurl,
format_date(self.now),
self.changefreqs['indexes'],
self.priorities['indexes']
)
)
else:
fd.write(TXT_HEADER.format(self.siteurl))
for page in pages:
self.write_url(page, fd)
if self.format == 'xml':
fd.write(XML_FOOTER)
def get_generators(generators):
return SitemapGenerator
def register():
signals.get_generators.connect(get_generators)

View file

@ -3,5 +3,6 @@ from blinker import signal
initialized = signal('pelican_initialized') initialized = signal('pelican_initialized')
article_generate_context = signal('article_generate_context') article_generate_context = signal('article_generate_context')
article_generator_init = signal('article_generator_init') article_generator_init = signal('article_generator_init')
get_generators = signal('get_generators')
pages_generate_context = signal('pages_generate_context') pages_generate_context = signal('pages_generate_context')
pages_generator_init = signal('pages_generator_init') pages_generator_init = signal('pages_generator_init')