Welcome Pelican 2.0 ! Refactoring of the internals to be more extensible.

--HG--
rename : pelican/bloggenerator.py => pelican/generators.py
This commit is contained in:
Alexis Metaireau 2010-10-30 00:56:40 +01:00
commit fdb920e50a
9 changed files with 430 additions and 337 deletions

View file

@ -1,15 +1,16 @@
#!/usr/bin/python
from pelican.bloggenerator import generate_blog
#!/usr/bin/env python
import argparse
parser = argparse.ArgumentParser(description="""A tool to generate a
from pelican.generators import ArticlesGenerator
parser = argparse.ArgumentParser(description="""A tool to generate a
static blog, with restructured text input files.""")
parser.add_argument(dest='path',
help='Path where to find the content files (default is "content").')
parser.add_argument('-t', '--templates-path', dest='templates',
help='Path where to find the templates. If not specified, will uses the'
' ones included with pelican.')
parser.add_argument('-t', '--theme-path', dest='theme',
help='Path where to find the theme templates. If not specified, it will'
'use the ones included with pelican.')
parser.add_argument('-o', '--output', dest='output',
help='Where to output the generated files. If not specified, a directory'
' will be created, named "output" in the current path.')
@ -18,25 +19,10 @@ parser.add_argument('-m', '--markup', default='rst', dest='markup',
' available.')
parser.add_argument('-s', '--settings', dest='settings',
help='the settings of the application. Default to None.')
parser.add_argument('-b', '--debug', dest='debug', action='store_true')
def run(args):
generate_blog(args.path, args.templates, args.output, args.markup,
args.settings)
print 'Done !'
if __name__ == '__main__':
args = parser.parse_args()
files = []
if args.debug:
run(args)
else:
try:
run(args)
except Exception, e:
if args.debug:
raise e
else:
print 'Error ! %s' % e
gen = ArticlesGenerator(args.settings)
gen.generate(args.path, args.theme, args.output, args.markup)
print 'Done !'

View file

@ -1,311 +0,0 @@
# -*- coding: utf-8 -*-
import os
import re
import shutil
from codecs import open
from datetime import datetime
from docutils import core
from functools import partial
from operator import attrgetter
from jinja2 import Environment, FileSystemLoader
from feedgenerator import Atom1Feed
# import the directives to have pygments support
import rstdirectives
from utils import truncate_html_words
## Constants ##########################################################
_TEMPLATES = ('index', 'tag', 'tags', 'article', 'category', 'categories',
'archives')
_DIRECT_TEMPLATES = ('index', 'tags', 'categories', 'archives')
_DEFAULT_THEME = os.sep.join([os.path.dirname(os.path.abspath(__file__)),
"themes"])
_DEFAULT_CONFIG = {'PATH': None,
'THEME': _DEFAULT_THEME,
'OUTPUT_PATH': 'output/',
'MARKUP': 'rst',
'STATIC_PATHS': ['css', 'images'],
'FEED': 'feeds/all.atom.xml',
'CATEGORY_FEED': 'feeds/%s.atom.xml',
'BLOGNAME': 'A Pelican Blog',
}
def generate_blog(path=None, theme=None, output_path=None, markup=None,
settings=None):
"""Search the given path for files, and generate a static blog in output,
using the given theme.
That's the main logic of pelican.
:param path: the path where to find the files to parse
:param theme: where to search for templates
:param output_path: where to output the generated files
:param markup: the markup language to use while parsing
:param settings: the settings file to use
"""
# get the settings
context = read_settings(settings)
path = path or context['PATH']
theme = theme or context['THEME']
output_path = output_path or context['OUTPUT_PATH']
output_path = os.path.realpath(output_path)
markup = markup or context['MARKUP']
# get the list of files to parse
if not path:
raise Exception('you need to specify a path to search the docs on !')
# remove all the existing content from the output folder
try:
shutil.rmtree(os.path.join(output_path))
except:
pass
files = []
for root, dirs, temp_files in os.walk(path, followlinks=True):
files.extend([os.sep.join((root, f)) for f in temp_files
if f.endswith('.%s' % markup)])
articles, dates, years, tags, categories = [], {}, {}, {}, {}
# for each file, get the informations.
for f in files:
f = os.path.abspath(f)
content = open(f, encoding='utf-8').read()
article = Article(content, markup, context, os.stat(f))
if not hasattr(article, 'category'):
# try to get the category from the dirname
category = os.path.dirname(f).replace(os.path.abspath(path)+'/', '')
if category != '':
article.category = unicode(category)
articles.append(article)
if hasattr(article, 'date'):
update_dict(dates, article.date.strftime('%Y-%m-%d'), article)
update_dict(years, article.date.year, article)
if hasattr(article, 'tags'):
for tag in article.tags:
update_dict(tags, tag, article)
if hasattr(article, 'category'):
update_dict(categories, article.category, article)
# order the articles by date
articles.sort(key=attrgetter('date'), reverse=True)
templates = get_templates(theme)
for item in ('articles', 'dates', 'years', 'tags', 'categories'):
value = locals()[item]
if hasattr(value, 'items'):
value = value.items()
context[item] = value
if 'BLOGURL' not in context:
context['BLOGURL'] = output_path
generate_feed(articles, context, output_path, context['FEED'])
for cat, arts in categories.items():
arts.sort(key=attrgetter('date'), reverse=True)
generate_feed(arts, context, output_path,
context['CATEGORY_FEED'] % cat)
# generate the output
generate = partial(generate_file, output_path)
for template in _DIRECT_TEMPLATES:
generate('%s.html' % template, templates[template], context, blog=True)
for tag in tags:
generate('tag/%s.html' % tag, templates['tag'], context, tag=tag)
for cat in categories:
generate('category/%s.html' % cat, templates['category'], context,
category=cat, articles=categories[cat])
for article in articles:
generate('%s' % article.url,
templates['article'], context, article=article,
category=article.category)
# copy static paths to output
for path in context['STATIC_PATHS']:
try:
shutil.copytree(os.path.join(theme, path),
os.path.join(output_path, path))
except OSError:
pass
def generate_feed(articles, context, output_path=None, filename=None):
"""Generate a feed with the list of articles provided
Return the feed. If no output_path or filename is specified, just return
the feed object.
:param articles: the articles to put on the feed.
:param context: the context to get the feed metadatas.
:param output_path: where to output the file.
:param filename: the filename to output.
"""
feed = Atom1Feed(
title=context['BLOGNAME'],
link=context['BLOGURL'],
feed_url='%s/%s' % (context['BLOGURL'], filename),
description=context.get('BLOGSUBTITLE', ''))
for article in articles:
feed.add_item(
title=article.title,
link='%s/%s' % (context['BLOGURL'], article.url),
description=article.content,
author_name=getattr(article, 'author', 'John Doe'),
pubdate=article.date)
if output_path and filename:
complete_path = os.path.join(output_path, filename)
try:
os.makedirs(os.path.dirname(complete_path))
except Exception:
pass
fp = open(complete_path, 'w')
feed.write(fp, 'utf-8')
fp.close()
return feed
def generate_file(path, name, template, context, **kwargs):
"""Write the file with the given informations
:param path: where to generate the file.
:param name: name of the file to output
:param template: template to use to generate the content
:param context: dict to pass to the templates.
:param **kwargs: additional variables to pass to the templates
"""
context.update(kwargs)
output = template.render(context)
filename = os.sep.join((path, name))
try:
os.makedirs(os.path.dirname(filename))
except Exception:
pass
with open(filename, 'w', encoding='utf-8') as f:
f.write(output)
print 'writing %s' % filename
def get_templates(path=None):
"""Return the templates to use"""
path = os.path.join(path, 'templates')
env = Environment(loader=FileSystemLoader(path))
templates = {}
for template in _TEMPLATES:
templates[template] = env.get_template('%s.html' % template)
return templates
def update_dict(mapping, key, value):
"""Update a dict intenal list
:param mapping: the mapping to update
:param key: the key of the mapping to update.
:param value: the value to append to the list.
"""
if key not in mapping:
mapping[key] = []
mapping[key].append(value)
def read_settings(filename):
"""Load a Python file into a dictionary.
"""
context = _DEFAULT_CONFIG.copy()
if filename:
tempdict = {}
execfile(filename, tempdict)
for key in tempdict:
if key.isupper():
context[key] = tempdict[key]
return context
_METADATA = re.compile(':([a-z]+): (.*)\s', re.M)
_METADATAS_FIELDS = {'tags': lambda x: x.split(', '),
'date': lambda x: get_date(x),
'category': lambda x: x,
'author': lambda x: x}
def get_date(string):
"""Return a datetime object from a string.
If no format matches the given date, raise a ValuEerror
"""
formats = ['%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M', '%Y-%m-%d', '%Y/%m/%d',
'%d/%m/%Y']
for date_format in formats:
try:
return datetime.strptime(string, date_format)
except ValueError:
pass
raise ValueError("'%s' is not a valid date" % string)
def parse_metadata(string):
"""Return a dict, containing a list of metadata informations, found
whithin the given string.
:param string: the string to search the metadata in
"""
output = {}
for m in _METADATA.finditer(string):
name = m.group(1).lower()
value = m.group(2)
if name in _METADATAS_FIELDS:
output[name] = _METADATAS_FIELDS[name](value)
return output
def slugify(value):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
Took from django sources.
"""
import unicodedata
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
return re.sub('[-\s]+', '-', value)
class Article(object):
"""Represents an article.
Given a string, complete it's properties from here.
:param string: the string to parse, containing the original content.
:param markup: the markup language to use while parsing.
"""
def __init__(self, string, markup=None, config={}, file_infos=None):
if markup == None:
markup = 'rst'
for key, value in parse_metadata(string).items():
setattr(self, key, value)
if markup == 'rst':
extra_params = {'input_encoding': 'unicode',
'initial_header_level': '2'}
rendered_content = core.publish_parts(string, writer_name='html',
settings_overrides=extra_params)
self.title = rendered_content.get('title')
self.content = rendered_content.get('body')
if not hasattr(self, 'author'):
if 'AUTHOR' in config:
self.author = config['AUTHOR']
if not hasattr(self, 'date'):
self.date = datetime.fromtimestamp(file_infos.st_ctime)
@property
def url(self):
return '%s.html' % slugify(self.title)
@property
def summary(self):
return truncate_html_words(self.content, 50)

42
pelican/contents.py Normal file
View file

@ -0,0 +1,42 @@
from pelican.utils import slugify, truncate_html_words
class Page(object):
"""Represents a page..
Given a content, and metadatas, create an adequate object.
:param string: the string to parse, containing the original content.
:param markup: the markup language to use while parsing.
"""
mandatory_properties = ('author', 'title')
def __init__(self, content, metadatas={}, settings={}):
self.content = content
for key, value in metadatas.items():
setattr(self, key, value)
if not hasattr(self, 'author'):
if 'AUTHOR' in settings:
self.author = settings['AUTHOR']
def check_properties(self):
"""test that each mandatory property is set."""
for prop in self.mandatory_properties:
if not hasattr(self, prop):
raise NameError(prop)
@property
def url(self):
return '%s.html' % slugify(self.title)
@property
def summary(self):
return truncate_html_words(self.content, 50)
class Article(Page):
mandatory_properties = ('author', 'title', 'date', 'category')
class Quote(Page):
base_properties = ('author', 'date')

251
pelican/generators.py Normal file
View file

@ -0,0 +1,251 @@
# -*- coding: utf-8 -*-
import os
import shutil
from codecs import open
from operator import attrgetter
from jinja2 import Environment, FileSystemLoader
from jinja2.exceptions import TemplateNotFound
from feedgenerator import Atom1Feed
from pelican.utils import update_dict
from pelican.settings import read_settings
from pelican.contents import Article
from pelican.readers import read_file
## Constants ##########################################################
_TEMPLATES = ('index', 'tag', 'tags', 'article', 'category', 'categories',
'archives')
_DIRECT_TEMPLATES = ('index', 'tags', 'categories', 'archives')
class Generator(object):
"""Base class generator"""
def __init__(self, settings):
self.settings = read_settings(settings)
def _init_params(self, path=None, theme=None, output_path=None, fmt=None):
"""Initialize parameters for this object.
:param path: the path where to find the files to parse
:param theme: where to search for templates
:param output_path: where to output the generated files
:param settings: the settings file to use
:param fmt: the format of the files to read. It's a list.
"""
# get the settings
self.path = path or self.settings['PATH']
self.theme = theme or self.settings['THEME']
output_path = output_path or self.settings['OUTPUT_PATH']
self.output_path = os.path.realpath(output_path)
self.format = fmt or self.settings['FORMAT']
# get the list of files to parse
if not path:
raise Exception('you need to specify a path to search the docs on !')
def generate_feed(self, elements, context, output_path=None, filename=None):
"""Generate a feed with the list of articles provided
Return the feed. If no output_path or filename is specified, just return
the feed object.
:param articles: the articles to put on the feed.
:param context: the context to get the feed metadatas.
:param output_path: where to output the file.
:param filename: the filename to output.
"""
feed = Atom1Feed(
title=context['SITENAME'],
link=context['SITEURL'],
feed_url='%s/%s' % (context['SITEURL'], filename),
description=context.get('SITESUBTITLE', ''))
for element in elements:
feed.add_item(
title=element.title,
link='%s/%s' % (context['SITEURL'], element.url),
description=element.content,
author_name=getattr(element, 'author', 'John Doe'),
pubdate=element.date)
if output_path and filename:
complete_path = os.path.join(output_path, filename)
try:
os.makedirs(os.path.dirname(complete_path))
except Exception:
pass
fp = open(complete_path, 'w')
feed.write(fp, 'utf-8')
fp.close()
return feed
def generate_file(self, name, template, context, **kwargs):
"""Write the file with the given informations
:param name: name of the file to output
:param template: template to use to generate the content
:param context: dict to pass to the templates.
:param **kwargs: additional variables to pass to the templates
"""
context.update(kwargs)
output = template.render(context)
filename = os.sep.join((self.output_path, name))
try:
os.makedirs(os.path.dirname(filename))
except Exception:
pass
with open(filename, 'w', encoding='utf-8') as f:
f.write(output)
print 'writing %s' % filename
def get_templates(self, path=None):
"""Return the templates to use.
:param path: the path to load the templates from
"""
path = os.path.expanduser(os.path.join(path, 'templates'))
env = Environment(loader=FileSystemLoader(path))
templates = {}
for template in _TEMPLATES:
try:
templates[template] = env.get_template('%s.html' % template)
except TemplateNotFound:
raise Exception('Unable to load %s.html from %s' % (
template, path))
return templates
def clean_output_dir(self):
"""Remove all the files from the output directory"""
# remove all the existing content from the output folder
try:
shutil.rmtree(os.path.join(self.output_path))
except:
pass
class ArticlesGenerator(Generator):
def __init__(self, settings=None):
super(ArticlesGenerator, self).__init__(settings)
self.articles = []
self.dates = {}
self.years = {}
self.tags = {}
self.categories = {}
def get_files(self, path):
"""Return the files to use to use in this generator"""
files = []
for root, dirs, temp_files in os.walk(path, followlinks=True):
files.extend([os.sep.join((root, f)) for f in temp_files
if f.endswith(self.format)])
return files
def process_files(self, files):
"""Process all the files and build the lists and dicts of
articles/categories/etc.
"""
for f in files:
content, metadatas = read_file(f)
if 'category' not in metadatas.keys():
category = os.path.dirname(f).replace(
os.path.expanduser(self.path)+'/', '')
if category != '':
metadatas['category'] = unicode(category)
article = Article(content, metadatas, settings=self.settings)
try:
article.check_properties()
except NameError as e:
print "Error, The '%s' metadata is not present in %s" % (e, f)
continue
update_dict(self.dates, article.date.strftime('%Y-%m-%d'), article)
update_dict(self.years, article.date.year, article)
update_dict(self.categories, article.category, article)
if hasattr(article, 'tags'):
for tag in article.tags:
update_dict(self.tags, tag, article)
self.articles.append(article)
def _get_context(self):
"""Return the context to be used in templates"""
context = self.settings.copy()
# put all we need in the context, to generate the output
for item in ('articles', 'dates', 'years', 'tags', 'categories'):
value = getattr(self, item)
if hasattr(value, 'items'):
value = value.items()
context[item] = value
return context
def generate_feeds(self, context):
"""Generate the feeds from the current context, and output files."""
if 'SITEURL' not in context:
context['SITEURL'] = self.output_path
self.generate_feed(self.articles, context, self.output_path,
context['FEED'])
for cat, arts in self.categories.items():
arts.sort(key=attrgetter('date'), reverse=True)
self.generate_feed(arts, context, self.output_path,
context['CATEGORY_FEED'] % cat)
def generate_pages(self, context):
"""Generate the pages on the disk"""
templates = self.get_templates(self.theme)
generate = self.generate_file
for template in _DIRECT_TEMPLATES:
generate('%s.html' % template, templates[template], context, blog=True)
for tag in self.tags:
generate('tag/%s.html' % tag, templates['tag'], context, tag=tag)
for cat in self.categories:
generate('category/%s.html' % cat, templates['category'], context,
category=cat, articles=self.categories[cat])
for article in self.articles:
generate('%s' % article.url,
templates['article'], context, article=article,
category=article.category)
def generate_static_content(self):
"""copy static paths to output"""
for path in self.settings['STATIC_PATHS']:
try:
shutil.copytree(os.path.join(self.theme, path),
os.path.join(self.output_path, path))
except OSError:
pass
def generate(self, path=None, theme=None, output_path=None, fmt=None):
"""Search the given path for files, and generate a static blog in output,
using the given theme.
:param path: the path where to find the files to parse
:param theme: where to search for templates
:param output_path: where to output the generated files
:param settings: the settings file to use
:param fmt: the format of the files to read. It's a list.
"""
self._init_params(path, theme, output_path, fmt)
# build the list of articles / categories / etc.
self.process_files(self.get_files(path))
# sort the articles by date
self.articles.sort(key=attrgetter('date'), reverse=True)
# and generate the output :)
context = self._get_context()
self.generate_feeds(context)
self.generate_pages(context)
self.generate_static_content()

50
pelican/readers.py Normal file
View file

@ -0,0 +1,50 @@
from docutils import core
import re
# import the directives to have pygments support
import rstdirectives
from pelican.utils import get_date, open
_METADATAS_FIELDS = {'tags': lambda x: x.split(', '),
'date': lambda x: get_date(x),
'category': lambda x: x,
'author': lambda x: x}
class RstReader(object):
def _parse_metadata(self, content):
"""Return the dict containing metadatas"""
output = {}
for m in re.compile(':([a-z]+): (.*)\s', re.M).finditer(content):
name, value = m.group(1).lower(), m.group(2)
output[name] = _METADATAS_FIELDS[name](value)
return output
def read(self, filename):
"""Parse restructured text"""
text = open(filename)
metadatas = self._parse_metadata(text)
extra_params = {'input_encoding': 'unicode',
'initial_header_level': '2'}
rendered_content = core.publish_parts(text, writer_name='html',
settings_overrides=extra_params)
title = rendered_content.get('title')
content = rendered_content.get('body')
if not metadatas.has_key('title'):
metadatas['title'] = title
return content, metadatas
_EXTENSIONS = {'rst': RstReader} # supported formats
def read_file(filename, fmt=None):
"""Return a reader object using the given format."""
if not fmt:
fmt = 'rst'
if fmt not in _EXTENSIONS.keys():
raise TypeError('Pelican does not know how to parse %s files' % fmt)
reader = _EXTENSIONS[fmt]()
return reader.read(filename)

25
pelican/settings.py Normal file
View file

@ -0,0 +1,25 @@
import os
_DEFAULT_THEME = os.sep.join([os.path.dirname(os.path.abspath(__file__)),
"themes"])
_DEFAULT_CONFIG = {'PATH': None,
'THEME': _DEFAULT_THEME,
'OUTPUT_PATH': 'output/',
'MARKUP': 'rst',
'STATIC_PATHS': ['css', 'images'],
'FEED': 'feeds/all.atom.xml',
'CATEGORY_FEED': 'feeds/%s.atom.xml',
'SITENAME': 'A Pelican Blog',
}
def read_settings(filename):
"""Load a Python file into a dictionary.
"""
context = _DEFAULT_CONFIG.copy()
if filename:
tempdict = {}
execfile(filename, tempdict)
for key in tempdict:
if key.isupper():
context[key] = tempdict[key]
return context

View file

@ -1,4 +1,51 @@
import re
from datetime import datetime
from codecs import open as _open
def update_dict(mapping, key, value):
"""Update a dict intenal list
:param mapping: the mapping to update
:param key: the key of the mapping to update.
:param value: the value to append to the list.
"""
if key not in mapping:
mapping[key] = []
mapping[key].append(value)
def get_date(string):
"""Return a datetime object from a string.
If no format matches the given date, raise a ValuEerror
"""
formats = ['%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M', '%Y-%m-%d', '%Y/%m/%d',
'%d/%m/%Y']
for date_format in formats:
try:
return datetime.strptime(string, date_format)
except ValueError:
pass
raise ValueError("'%s' is not a valid date" % string)
def open(filename):
"""Open a file and return it's content"""
return _open(filename, encoding='utf-8').read()
def slugify(value):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
Took from django sources.
"""
import unicodedata
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
return re.sub('[-\s]+', '-', value)
def truncate_html_words(s, num, end_text='...'):
"""Truncates HTML to a certain number of words (not counting tags and
@ -13,6 +60,7 @@ def truncate_html_words(s, num, end_text='...'):
if length <= 0:
return u''
html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
# Set up regular expressions
re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
@ -65,3 +113,4 @@ def truncate_html_words(s, num, end_text='...'):
out += '</%s>' % tag
# Return string
return out

View file

@ -20,7 +20,8 @@
</head>
<body id="index" class="home">
<a href="http://github.com/ametaireau"><img style="position: absolute; top: 0; right: 0; border: 0;" src="http://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png" alt="Fork me on GitHub" /></a>
<header id="banner" class="body">
<h1><a href="{{ BLOGURL }}">{{ BLOGNAME }} {% if BLOGSUBTITLE %} <strong>{{ BLOGSUBTITLE }}</strong>{% endif %}</a></h1>
<nav><ul>

View file

@ -7,7 +7,7 @@ if sys.version_info < (2,7):
setup(
name = "pelican",
version = '1.2.5',
version = '2.0',
url = 'http://hg.lolnet.org/pelican/',
author = 'Alexis Metaireau',
author_email = 'alexis@notmyidea.org',