1
0
Fork 0
forked from github/pelican
pelican-theme/pelican/generators.py

596 lines
23 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import os
import math
import random
import logging
import datetime
import shutil
2012-08-18 20:32:43 +03:00
from codecs import open
from collections import defaultdict
from functools import partial
from itertools import chain
from operator import attrgetter, itemgetter
from jinja2 import (
Environment, FileSystemLoader, PrefixLoader, ChoiceLoader, BaseLoader,
TemplateNotFound
)
from pelican.contents import (
Article, Page, Category, StaticContent, is_valid_content
)
from pelican.readers import read_file
from pelican.utils import copy, process_translations, mkdir_p
2011-06-18 01:03:53 +02:00
from pelican import signals
logger = logging.getLogger(__name__)
class Generator(object):
"""Baseclass generator"""
def __init__(self, *args, **kwargs):
for idx, item in enumerate(('context', 'settings', 'path', 'theme',
'output_path', 'markup')):
setattr(self, item, args[idx])
for arg, value in kwargs.items():
setattr(self, arg, value)
# templates cache
self._templates = {}
self._templates_path = []
self._templates_path.append(os.path.expanduser(
os.path.join(self.theme, 'templates')))
self._templates_path += self.settings.get('EXTRA_TEMPLATES_PATHS', [])
2012-03-10 12:21:54 +01:00
theme_path = os.path.dirname(os.path.abspath(__file__))
2012-03-10 11:32:22 +01:00
simple_loader = FileSystemLoader(os.path.join(theme_path,
"themes", "simple", "templates"))
2012-05-07 17:11:57 +02:00
self.env = Environment(
trim_blocks=True,
loader=ChoiceLoader([
FileSystemLoader(self._templates_path),
simple_loader, # implicit inheritance
PrefixLoader({'!simple': simple_loader}) # explicit one
]),
extensions=self.settings.get('JINJA_EXTENSIONS', []),
)
2012-05-07 17:11:57 +02:00
logger.debug('template list: {0}'.format(self.env.list_templates()))
# get custom Jinja filters from user settings
custom_filters = self.settings.get('JINJA_FILTERS', {})
2012-05-07 17:11:57 +02:00
self.env.filters.update(custom_filters)
signals.generator_init.send(self)
def get_template(self, name):
"""Return the template by name.
2010-11-20 02:25:42 +00:00
Use self.theme to get the templates to use, and return a list of
templates ready to use with Jinja2.
"""
if name not in self._templates:
try:
2012-05-07 17:11:57 +02:00
self._templates[name] = self.env.get_template(name + '.html')
except TemplateNotFound:
raise Exception(
('[templates] unable to load %s.html from %s'
% (name, self._templates_path)))
return self._templates[name]
2010-11-20 02:25:42 +00:00
def get_files(self, path, exclude=[], extensions=None):
"""Return a list of files to use, based on rules
2010-10-30 20:17:23 +01:00
:param path: the path to search the file on
:param exclude: the list of path to exclude
:param extensions: the list of allowed extensions (if False, all
extensions are allowed)
2010-10-30 20:17:23 +01:00
"""
if extensions is None:
2010-11-20 02:25:42 +00:00
extensions = self.markup
2010-10-30 20:17:23 +01:00
files = []
try:
iter = os.walk(path, followlinks=True)
except TypeError: # python 2.5 does not support followlinks
iter = os.walk(path)
for root, dirs, temp_files in iter:
2010-10-30 20:17:23 +01:00
for e in exclude:
if e in dirs:
dirs.remove(e)
for f in temp_files:
if extensions is False or \
(True in [f.endswith(ext) for ext in extensions]):
files.append(os.sep.join((root, f)))
2010-10-30 20:17:23 +01:00
return files
def add_source_path(self, content):
location = os.path.relpath(os.path.abspath(content.source_path),
os.path.abspath(self.path))
self.context['filenames'][location] = content
def _update_context(self, items):
"""Update the context with the given items from the currrent
processor.
"""
for item in items:
value = getattr(self, item)
if hasattr(value, 'items'):
2013-01-11 18:55:04 +01:00
value = list(value.items()) # py3k safeguard for iterators
self.context[item] = value
class _FileLoader(BaseLoader):
def __init__(self, path, basedir):
self.path = path
self.fullpath = os.path.join(basedir, path)
def get_source(self, environment, template):
if template != self.path or not os.path.exists(self.fullpath):
raise TemplateNotFound(template)
mtime = os.path.getmtime(self.fullpath)
with open(self.fullpath, 'r', encoding='utf-8') as f:
source = f.read()
return source, self.fullpath, \
lambda: mtime == os.path.getmtime(self.fullpath)
2012-10-30 00:27:18 +01:00
class TemplatePagesGenerator(Generator):
def generate_output(self, writer):
for source, dest in self.settings['TEMPLATE_PAGES'].items():
self.env.loader.loaders.insert(0, _FileLoader(source, self.path))
try:
template = self.env.get_template(source)
rurls = self.settings.get('RELATIVE_URLS')
writer.write_file(dest, template, self.context, rurls)
finally:
del self.env.loader.loaders[0]
class ArticlesGenerator(Generator):
"""Generate blog articles"""
def __init__(self, *args, **kwargs):
"""initialize properties"""
self.articles = [] # only articles in default language
self.translations = []
self.dates = {}
self.tags = defaultdict(list)
self.categories = defaultdict(list)
self.related_posts = []
self.authors = defaultdict(list)
self.drafts = []
super(ArticlesGenerator, self).__init__(*args, **kwargs)
signals.article_generator_init.send(self)
def generate_feeds(self, writer):
"""Generate the feeds from the current context, and output files."""
if self.settings.get('FEED_ATOM'):
writer.write_feed(self.articles, self.context,
self.settings['FEED_ATOM'])
if self.settings.get('FEED_RSS'):
writer.write_feed(self.articles, self.context,
self.settings['FEED_RSS'], feed_type='rss')
if (self.settings.get('FEED_ALL_ATOM')
or self.settings.get('FEED_ALL_RSS')):
all_articles = list(self.articles)
for article in self.articles:
all_articles.extend(article.translations)
all_articles.sort(key=attrgetter('date'), reverse=True)
if self.settings.get('FEED_ALL_ATOM'):
writer.write_feed(all_articles, self.context,
self.settings['FEED_ALL_ATOM'])
if self.settings.get('FEED_ALL_RSS'):
writer.write_feed(all_articles, self.context,
self.settings['FEED_ALL_RSS'],
feed_type='rss')
for cat, arts in self.categories:
arts.sort(key=attrgetter('date'), reverse=True)
if self.settings.get('CATEGORY_FEED_ATOM'):
writer.write_feed(arts, self.context,
self.settings['CATEGORY_FEED_ATOM'] % cat)
if self.settings.get('CATEGORY_FEED_RSS'):
writer.write_feed(arts, self.context,
self.settings['CATEGORY_FEED_RSS'] % cat,
feed_type='rss')
if (self.settings.get('TAG_FEED_ATOM')
or self.settings.get('TAG_FEED_RSS')):
2010-12-17 09:25:19 +01:00
for tag, arts in self.tags.items():
arts.sort(key=attrgetter('date'), reverse=True)
if self.settings.get('TAG_FEED_ATOM'):
writer.write_feed(arts, self.context,
self.settings['TAG_FEED_ATOM'] % tag)
2010-12-17 09:25:19 +01:00
if self.settings.get('TAG_FEED_RSS'):
writer.write_feed(arts, self.context,
self.settings['TAG_FEED_RSS'] % tag,
feed_type='rss')
if (self.settings.get('TRANSLATION_FEED_ATOM')
or self.settings.get('TRANSLATION_FEED_RSS')):
translations_feeds = defaultdict(list)
for article in chain(self.articles, self.translations):
translations_feeds[article.lang].append(article)
for lang, items in translations_feeds.items():
items.sort(key=attrgetter('date'), reverse=True)
if self.settings.get('TRANSLATION_FEED_ATOM'):
writer.write_feed(items, self.context,
2012-10-22 23:05:18 +02:00
self.settings['TRANSLATION_FEED_ATOM'] % lang)
if self.settings.get('TRANSLATION_FEED_RSS'):
writer.write_feed(items, self.context,
2012-10-22 23:05:18 +02:00
self.settings['TRANSLATION_FEED_RSS'] % lang,
feed_type='rss')
2010-12-17 09:25:19 +01:00
def generate_articles(self, write):
2012-04-29 10:34:20 +01:00
"""Generate the articles."""
for article in chain(self.translations, self.articles):
write(article.save_as, self.get_template(article.template),
self.context, article=article, category=article.category)
def generate_direct_templates(self, write):
2012-04-29 10:34:20 +01:00
"""Generate direct templates pages"""
PAGINATED_TEMPLATES = self.settings.get('PAGINATED_DIRECT_TEMPLATES')
for template in self.settings.get('DIRECT_TEMPLATES'):
2011-02-15 14:36:55 +01:00
paginated = {}
if template in PAGINATED_TEMPLATES:
2011-02-15 14:36:55 +01:00
paginated = {'articles': self.articles, 'dates': self.dates}
save_as = self.settings.get("%s_SAVE_AS" % template.upper(),
'%s.html' % template)
if not save_as:
continue
write(save_as, self.get_template(template),
self.context, blog=True, paginated=paginated,
page_name=template)
def generate_tags(self, write):
2012-04-29 10:34:20 +01:00
"""Generate Tags pages."""
tag_template = self.get_template('tag')
2010-12-05 19:15:02 +00:00
for tag, articles in self.tags.items():
2011-06-12 22:18:50 +02:00
articles.sort(key=attrgetter('date'), reverse=True)
dates = [article for article in self.dates if article in articles]
write(tag.save_as, tag_template, self.context, tag=tag,
articles=articles, dates=dates,
2011-02-15 14:36:55 +01:00
paginated={'articles': articles, 'dates': dates},
page_name=tag.page_name)
def generate_categories(self, write):
2012-04-29 10:34:20 +01:00
"""Generate category pages."""
category_template = self.get_template('category')
for cat, articles in self.categories:
dates = [article for article in self.dates if article in articles]
write(cat.save_as, category_template, self.context,
2011-02-15 14:36:55 +01:00
category=cat, articles=articles, dates=dates,
paginated={'articles': articles, 'dates': dates},
page_name=cat.page_name)
def generate_authors(self, write):
2012-04-29 10:34:20 +01:00
"""Generate Author pages."""
author_template = self.get_template('author')
for aut, articles in self.authors:
dates = [article for article in self.dates if article in articles]
write(aut.save_as, author_template, self.context,
author=aut, articles=articles, dates=dates,
paginated={'articles': articles, 'dates': dates},
page_name=aut.page_name)
def generate_drafts(self, write):
2012-04-29 10:34:20 +01:00
"""Generate drafts pages."""
for article in self.drafts:
write('drafts/%s.html' % article.slug,
self.get_template(article.template), self.context,
article=article, category=article.category)
2012-04-29 10:34:20 +01:00
def generate_pages(self, writer):
"""Generate the pages on the disk"""
write = partial(writer.write_file,
relative_urls=self.settings.get('RELATIVE_URLS'))
2012-04-29 10:34:20 +01:00
# to minimize the number of relative path stuff modification
# in writer, articles pass first
self.generate_articles(write)
self.generate_direct_templates(write)
2012-04-29 10:34:20 +01:00
# and subfolders after that
self.generate_tags(write)
self.generate_categories(write)
self.generate_authors(write)
self.generate_drafts(write)
2012-04-29 10:34:20 +01:00
def generate_context(self):
"""Add the articles into the shared context"""
article_path = os.path.normpath( # we have to remove trailing slashes
os.path.join(self.path, self.settings['ARTICLE_DIR'])
)
all_articles = []
for f in self.get_files(
article_path,
exclude=self.settings['ARTICLE_EXCLUDES']):
try:
2012-11-14 22:12:46 -08:00
signals.article_generate_preread.send(self)
content, metadata = read_file(f, settings=self.settings)
except Exception as e:
logger.warning('Could not process %s\n%s' % (f, str(e)))
continue
# if no category is set, use the name of the path as a category
if 'category' not in metadata:
if (self.settings['USE_FOLDER_AS_CATEGORY']
and os.path.dirname(f) != article_path):
# if the article is in a subdirectory
category = os.path.basename(os.path.dirname(f))
else:
# if the article is not in a subdirectory
category = self.settings['DEFAULT_CATEGORY']
if category != '':
metadata['category'] = Category(category, self.settings)
if 'date' not in metadata and self.settings.get('DEFAULT_DATE'):
if self.settings['DEFAULT_DATE'] == 'fs':
metadata['date'] = datetime.datetime.fromtimestamp(
os.stat(f).st_ctime)
else:
metadata['date'] = datetime.datetime(
*self.settings['DEFAULT_DATE'])
2011-06-18 01:03:53 +02:00
signals.article_generate_context.send(self, metadata=metadata)
2011-05-07 20:00:30 +01:00
article = Article(content, metadata, settings=self.settings,
source_path=f, context=self.context)
if not is_valid_content(article, f):
continue
self.add_source_path(article)
if article.status == "published":
if hasattr(article, 'tags'):
for tag in article.tags:
self.tags[tag].append(article)
all_articles.append(article)
elif article.status == "draft":
self.drafts.append(article)
else:
logger.warning("Unknown status %s for file %s, skipping it." %
(repr(article.status),
repr(f)))
self.articles, self.translations = process_translations(all_articles)
for article in self.articles:
# only main articles are listed in categories, not translations
self.categories[article.category].append(article)
# ignore blank authors as well as undefined
if hasattr(article, 'author') and article.author.name != '':
self.authors[article.author].append(article)
# sort the articles by date
self.articles.sort(key=attrgetter('date'), reverse=True)
self.dates = list(self.articles)
self.dates.sort(key=attrgetter('date'),
reverse=self.context['NEWEST_FIRST_ARCHIVES'])
# create tag cloud
tag_cloud = defaultdict(int)
for article in self.articles:
2011-03-23 16:41:24 +00:00
for tag in getattr(article, 'tags', []):
tag_cloud[tag] += 1
tag_cloud = sorted(tag_cloud.items(), key=itemgetter(1), reverse=True)
tag_cloud = tag_cloud[:self.settings.get('TAG_CLOUD_MAX_ITEMS')]
tags = list(map(itemgetter(1), tag_cloud))
2011-03-27 12:49:28 +02:00
if tags:
max_count = max(tags)
steps = self.settings.get('TAG_CLOUD_STEPS')
# calculate word sizes
self.tag_cloud = [
(
tag,
int(math.floor(steps - (steps - 1) * math.log(count)
/ (math.log(max_count)or 1)))
)
for tag, count in tag_cloud
]
# put words in chaos
random.shuffle(self.tag_cloud)
# and generate the output :)
# order the categories per name
self.categories = list(self.categories.items())
2012-05-06 02:43:13 +02:00
self.categories.sort(
reverse=self.settings['REVERSE_CATEGORY_ORDER'])
self.authors = list(self.authors.items())
self.authors.sort()
self._update_context(('articles', 'dates', 'tags', 'categories',
'tag_cloud', 'authors', 'related_posts'))
signals.article_generator_finalized.send(self)
def generate_output(self, writer):
self.generate_feeds(writer)
self.generate_pages(writer)
class PagesGenerator(Generator):
"""Generate pages"""
def __init__(self, *args, **kwargs):
self.pages = []
self.hidden_pages = []
self.hidden_translations = []
super(PagesGenerator, self).__init__(*args, **kwargs)
signals.pages_generator_init.send(self)
def generate_context(self):
all_pages = []
hidden_pages = []
for f in self.get_files(
os.path.join(self.path, self.settings['PAGE_DIR']),
exclude=self.settings['PAGE_EXCLUDES']):
try:
2012-09-08 21:39:10 -07:00
content, metadata = read_file(f, settings=self.settings)
except Exception as e:
logger.warning('Could not process %s\n%s' % (f, str(e)))
continue
signals.pages_generate_context.send(self, metadata=metadata)
2011-05-07 20:00:30 +01:00
page = Page(content, metadata, settings=self.settings,
source_path=f, context=self.context)
if not is_valid_content(page, f):
continue
self.add_source_path(page)
if page.status == "published":
all_pages.append(page)
elif page.status == "hidden":
hidden_pages.append(page)
else:
logger.warning("Unknown status %s for file %s, skipping it." %
(repr(page.status),
repr(f)))
self.pages, self.translations = process_translations(all_pages)
self.hidden_pages, self.hidden_translations = (
process_translations(hidden_pages))
self._update_context(('pages', ))
2010-12-14 15:48:35 +00:00
self.context['PAGES'] = self.pages
signals.pages_generator_finalized.send(self)
def generate_output(self, writer):
for page in chain(self.translations, self.pages,
self.hidden_translations, self.hidden_pages):
writer.write_file(page.save_as, self.get_template(page.template),
self.context, page=page,
relative_urls=self.settings.get('RELATIVE_URLS'))
class StaticGenerator(Generator):
2012-04-07 18:02:40 -06:00
"""copy static paths (what you want to copy, like images, medias etc.
2010-12-05 19:15:02 +00:00
to output"""
def _copy_paths(self, paths, source, destination, output_path,
final_path=None):
"""Copy all the paths from source to destination"""
for path in paths:
copy(path, source, os.path.join(output_path, destination),
final_path, overwrite=True)
def generate_context(self):
self.staticfiles = []
# walk static paths
for static_path in self.settings['STATIC_PATHS']:
for f in self.get_files(
os.path.join(self.path, static_path), extensions=False):
f_rel = os.path.relpath(f, self.path)
# TODO remove this hardcoded 'static' subdirectory
sc = StaticContent(f_rel, os.path.join('static', f_rel),
settings=self.settings)
self.staticfiles.append(sc)
self.context['filenames'][f_rel] = sc
# same thing for FILES_TO_COPY
for src, dest in self.settings['FILES_TO_COPY']:
sc = StaticContent(src, dest, settings=self.settings)
self.staticfiles.append(sc)
self.context['filenames'][src] = sc
2012-05-07 17:11:57 +02:00
def generate_output(self, writer):
2010-12-05 19:15:02 +00:00
self._copy_paths(self.settings['THEME_STATIC_PATHS'], self.theme,
'theme', self.output_path, '.')
# copy all StaticContent files
for sc in self.staticfiles:
mkdir_p(os.path.dirname(sc.save_as))
shutil.copy(sc.source_path, sc.save_as)
logger.info('copying {} to {}'.format(sc.source_path, sc.save_as))
class PdfGenerator(Generator):
"""Generate PDFs on the output dir, for all articles and pages coming from
rst"""
def __init__(self, *args, **kwargs):
super(PdfGenerator, self).__init__(*args, **kwargs)
try:
from rst2pdf.createpdf import RstToPdf
pdf_style_path = os.path.join(self.settings['PDF_STYLE_PATH']) \
if 'PDF_STYLE_PATH' in self.settings.keys() \
else ''
pdf_style = self.settings['PDF_STYLE'] if 'PDF_STYLE' \
in self.settings.keys() \
else 'twelvepoint'
self.pdfcreator = RstToPdf(breakside=0,
stylesheets=[pdf_style],
style_path=[pdf_style_path])
except ImportError:
raise Exception("unable to find rst2pdf")
def _create_pdf(self, obj, output_path):
if obj.source_path.endswith('.rst'):
filename = obj.slug + ".pdf"
output_pdf = os.path.join(output_path, filename)
# print('Generating pdf for', obj.source_path, 'in', output_pdf)
with open(obj.source_path) as f:
2012-08-18 20:32:43 +03:00
self.pdfcreator.createPdf(text=f.read(), output=output_pdf)
logger.info(' [ok] writing %s' % output_pdf)
def generate_context(self):
pass
def generate_output(self, writer=None):
2011-02-15 13:48:57 +00:00
# we don't use the writer passed as argument here
# since we write our own files
logger.info(' Generating PDF files...')
pdf_path = os.path.join(self.output_path, 'pdf')
if not os.path.exists(pdf_path):
try:
os.mkdir(pdf_path)
except OSError:
logger.error("Couldn't create the pdf output folder in " +
pdf_path)
for article in self.context['articles']:
self._create_pdf(article, pdf_path)
for page in self.context['pages']:
self._create_pdf(page, pdf_path)
2012-04-15 02:20:20 +03:00
class SourceFileGenerator(Generator):
def generate_context(self):
self.output_extension = self.settings['OUTPUT_SOURCES_EXTENSION']
def _create_source(self, obj, output_path):
output_path = os.path.splitext(obj.save_as)[0]
dest = os.path.join(output_path, output_path + self.output_extension)
copy('', obj.source_path, dest)
def generate_output(self, writer=None):
logger.info(' Generating source files...')
for object in chain(self.context['articles'], self.context['pages']):
self._create_source(object, self.output_path)