diff --git a/docs/internals.rst b/docs/internals.rst
index 704122ba..f69a9bb8 100644
--- a/docs/internals.rst
+++ b/docs/internals.rst
@@ -24,7 +24,7 @@ The logic is separated into different classes and concepts:
then passed to the generators.
* **Readers** are used to read from various formats (AsciiDoc, HTML, Markdown and
- reStructuredText for now, but the system is extensible). Given a file, they
+ reStructuredText for now, but the system is extensible). Given a file, they
return metadata (author, tags, category, etc.) and content (HTML-formatted).
* **Generators** generate the different outputs. For instance, Pelican comes with
@@ -44,7 +44,7 @@ method that returns HTML content and some metadata.
Take a look at the Markdown reader::
- class MarkdownReader(Reader):
+ class MarkdownReader(BaseReader):
enabled = bool(Markdown)
def read(self, source_path):
diff --git a/docs/plugins.rst b/docs/plugins.rst
index 93307afb..582f40a7 100644
--- a/docs/plugins.rst
+++ b/docs/plugins.rst
@@ -71,6 +71,7 @@ finalized pelican object invoked after al
- minifying js/css assets.
- notify/ping search engines with an updated sitemap.
generator_init generator invoked in the Generator.__init__
+readers_init readers invoked in the Readers.__init__
article_generate_context article_generator, metadata
article_generate_preread article_generator invoked before a article is read in ArticlesGenerator.generate_context;
use if code needs to do something before every article is parsed
@@ -144,13 +145,13 @@ write and don't slow down pelican itself when they're not active.
No more talking, here is the example::
from pelican import signals
- from pelican.readers import EXTENSIONS, Reader
+ from pelican.readers import BaseReader
- # Create a new reader class, inheriting from the pelican.reader.Reader
- class NewReader(Reader):
+ # Create a new reader class, inheriting from the pelican.reader.BaseReader
+ class NewReader(BaseReader):
enabled = True # Yeah, you probably want that :-)
- # The list of extensions you want this reader to match with.
+ # The list of file extensions you want this reader to match with.
# In the case multiple readers use the same extensions, the latest will
# win (so the one you're defining here, most probably).
file_extensions = ['yeah']
@@ -168,12 +169,12 @@ No more talking, here is the example::
return "Some content", parsed
- def add_reader(arg):
- EXTENSIONS['yeah'] = NewReader
+ def add_reader(readers):
+ readers.reader_classes['yeah'] = NewReader
# this is how pelican works.
def register():
- signals.initialized.connect(add_reader)
+ signals.readers_init.connect(add_reader)
Adding a new generator
diff --git a/docs/settings.rst b/docs/settings.rst
index 0d8f924f..55721c11 100644
--- a/docs/settings.rst
+++ b/docs/settings.rst
@@ -84,9 +84,10 @@ Setting name (default value) What doe
here or a single string representing one locale.
When providing a list, all the locales will be tried
until one works.
-`MARKUP` (``('rst', 'md')``) A list of available markup languages you want
- to use. For the moment, the only available values
- are `rst`, `md`, `markdown`, `mkd`, `mdown`, `html`, and `htm`.
+`READERS` (``{}``) A dict of file extensions / Reader classes to overwrite or
+ add file readers. for instance, to avoid processing .html files:
+ ``READERS = {'html': None}``. Or to add a custom reader for the
+ `foo` extension: ``READERS = {'foo': FooReader}``
`IGNORE_FILES` (``['.#*']``) A list of file globbing patterns to match against the
source files to be ignored by the processor. For example,
the default ``['.#*']`` will ignore emacs lock files.
diff --git a/pelican/__init__.py b/pelican/__init__.py
index 9bce4926..8ba79e0a 100644
--- a/pelican/__init__.py
+++ b/pelican/__init__.py
@@ -17,6 +17,7 @@ from pelican.generators import (ArticlesGenerator, PagesGenerator,
StaticGenerator, SourceFileGenerator,
TemplatePagesGenerator)
from pelican.log import init
+from pelican.readers import Readers
from pelican.settings import read_settings
from pelican.utils import clean_output_dir, folder_watcher, file_watcher
from pelican.writers import Writer
@@ -46,7 +47,6 @@ class Pelican(object):
self.path = settings['PATH']
self.theme = settings['THEME']
self.output_path = settings['OUTPUT_PATH']
- self.markup = settings['MARKUP']
self.ignore_files = settings['IGNORE_FILES']
self.delete_outputdir = settings['DELETE_OUTPUT_DIRECTORY']
self.output_retention = settings['OUTPUT_RETENTION']
@@ -164,7 +164,6 @@ class Pelican(object):
path=self.path,
theme=self.theme,
output_path=self.output_path,
- markup=self.markup,
) for cls in self.get_generator_classes()
]
@@ -236,10 +235,6 @@ def parse_arguments():
help='Where to output the generated files. If not specified, a '
'directory will be created, named "output" in the current path.')
- parser.add_argument('-m', '--markup', dest='markup',
- help='The list of markup language to use (rst or md). Please indicate '
- 'them separated by commas.')
-
parser.add_argument('-s', '--settings', dest='settings',
help='The settings of the application, this is automatically set to '
'{0} if a file exists with this name.'.format(DEFAULT_CONFIG_NAME))
@@ -279,8 +274,6 @@ def get_config(args):
if args.output:
config['OUTPUT_PATH'] = \
os.path.abspath(os.path.expanduser(args.output))
- if args.markup:
- config['MARKUP'] = [a.strip().lower() for a in args.markup.split(',')]
if args.theme:
abstheme = os.path.abspath(os.path.expanduser(args.theme))
config['THEME'] = abstheme if os.path.exists(abstheme) else args.theme
@@ -296,8 +289,6 @@ def get_config(args):
for key in config:
if key in ('PATH', 'OUTPUT_PATH', 'THEME'):
config[key] = config[key].decode(enc)
- if key == "MARKUP":
- config[key] = [a.decode(enc) for a in config[key]]
return config
@@ -315,16 +306,17 @@ def get_instance(args):
module = __import__(module)
cls = getattr(module, cls_name)
- return cls(settings)
+ return cls(settings), settings
def main():
args = parse_arguments()
init(args.verbosity)
- pelican = get_instance(args)
+ pelican, settings = get_instance(args)
+ readers = Readers(settings)
watchers = {'content': folder_watcher(pelican.path,
- pelican.markup,
+ readers.extensions,
pelican.ignore_files),
'theme': folder_watcher(pelican.theme,
[''],
@@ -333,8 +325,8 @@ def main():
try:
if args.autoreload:
- print(' --- AutoReload Mode: Monitoring `content`, `theme` and `settings`'
- ' for changes. ---')
+ print(' --- AutoReload Mode: Monitoring `content`, `theme` and'
+ ' `settings` for changes. ---')
while True:
try:
diff --git a/pelican/generators.py b/pelican/generators.py
index 1444c95c..72c76b32 100644
--- a/pelican/generators.py
+++ b/pelican/generators.py
@@ -13,16 +13,13 @@ from functools import partial
from itertools import chain, groupby
from operator import attrgetter, itemgetter
-from jinja2 import (
- Environment, FileSystemLoader, PrefixLoader, ChoiceLoader, BaseLoader,
- TemplateNotFound
-)
+from jinja2 import (Environment, FileSystemLoader, PrefixLoader, ChoiceLoader,
+ BaseLoader, TemplateNotFound)
from pelican.contents import Article, Page, Static, is_valid_content
-from pelican.readers import read_file
+from pelican.readers import Readers
from pelican.utils import copy, process_translations, mkdir_p, DateFormatter
from pelican import signals
-import pelican.utils
logger = logging.getLogger(__name__)
@@ -31,23 +28,23 @@ logger = logging.getLogger(__name__)
class Generator(object):
"""Baseclass generator"""
- def __init__(self, context, settings, path, theme, output_path, markup,
- **kwargs):
+ def __init__(self, context, settings, path, theme, output_path, **kwargs):
self.context = context
self.settings = settings
self.path = path
self.theme = theme
self.output_path = output_path
- self.markup = markup
for arg, value in kwargs.items():
setattr(self, arg, value)
+ self.readers = Readers(self.settings)
+
# templates cache
self._templates = {}
self._templates_path = []
self._templates_path.append(os.path.expanduser(
- os.path.join(self.theme, 'templates')))
+ os.path.join(self.theme, 'templates')))
self._templates_path += self.settings['EXTRA_TEMPLATES_PATHS']
theme_path = os.path.dirname(os.path.abspath(__file__))
@@ -85,9 +82,8 @@ class Generator(object):
try:
self._templates[name] = self.env.get_template(name + '.html')
except TemplateNotFound:
- raise Exception(
- ('[templates] unable to load %s.html from %s'
- % (name, self._templates_path)))
+ raise Exception('[templates] unable to load %s.html from %s'
+ % (name, self._templates_path))
return self._templates[name]
def _include_path(self, path, extensions=None):
@@ -98,7 +94,7 @@ class Generator(object):
extensions are allowed)
"""
if extensions is None:
- extensions = tuple(self.markup)
+ extensions = tuple(self.readers.extensions)
basename = os.path.basename(path)
if extensions is False or basename.endswith(extensions):
return True
@@ -388,9 +384,9 @@ class ArticlesGenerator(Generator):
self.settings['ARTICLE_DIR'],
exclude=self.settings['ARTICLE_EXCLUDES']):
try:
- article = read_file(
+ article = self.readers.read_file(
base_path=self.path, path=f, content_class=Article,
- settings=self.settings, context=self.context,
+ context=self.context,
preread_signal=signals.article_generator_preread,
preread_sender=self,
context_signal=signals.article_generator_context,
@@ -496,9 +492,9 @@ class PagesGenerator(Generator):
self.settings['PAGE_DIR'],
exclude=self.settings['PAGE_EXCLUDES']):
try:
- page = read_file(
+ page = self.readers.read_file(
base_path=self.path, path=f, content_class=Page,
- settings=self.settings, context=self.context,
+ context=self.context,
preread_signal=signals.page_generator_preread,
preread_sender=self,
context_signal=signals.page_generator_context,
@@ -557,10 +553,9 @@ class StaticGenerator(Generator):
for static_path in self.settings['STATIC_PATHS']:
for f in self.get_files(
static_path, extensions=False):
- static = read_file(
+ static = self.readers.read_file(
base_path=self.path, path=f, content_class=Static,
- fmt='static',
- settings=self.settings, context=self.context,
+ fmt='static', context=self.context,
preread_signal=signals.static_generator_preread,
preread_sender=self,
context_signal=signals.static_generator_context,
diff --git a/pelican/readers.py b/pelican/readers.py
index 3923245e..97a184d0 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -34,10 +34,10 @@ try:
except ImportError:
from HTMLParser import HTMLParser
+from pelican import signals
from pelican.contents import Page, Category, Tag, Author
from pelican.utils import get_date, pelican_open
-logger = logging.getLogger(__name__)
METADATA_PROCESSORS = {
'tags': lambda x, y: [Tag(tag, y) for tag in x.split(',')],
@@ -50,7 +50,19 @@ METADATA_PROCESSORS = {
logger = logging.getLogger(__name__)
-class Reader(object):
+class BaseReader(object):
+ """Base class to read files.
+
+ This class is used to process static files, and it can be inherited for
+ other types of file. A Reader class must have the following attributes:
+
+ - enabled: (boolean) tell if the Reader class is enabled. It
+ generally depends on the import of some dependency.
+ - file_extensions: a list of file extensions that the Reader will process.
+ - extensions: a list of extensions to use in the reader (typical use is
+ Markdown).
+
+ """
enabled = True
file_extensions = ['static']
extensions = None
@@ -110,7 +122,9 @@ class PelicanHTMLTranslator(HTMLTranslator):
return HTMLTranslator.visit_image(self, node)
-class RstReader(Reader):
+class RstReader(BaseReader):
+ """Reader for reStructuredText files"""
+
enabled = bool(docutils)
file_extensions = ['rst']
@@ -166,7 +180,9 @@ class RstReader(Reader):
return content, metadata
-class MarkdownReader(Reader):
+class MarkdownReader(BaseReader):
+ """Reader for Markdown files"""
+
enabled = bool(Markdown)
file_extensions = ['md', 'markdown', 'mkd', 'mdown']
@@ -174,7 +190,6 @@ class MarkdownReader(Reader):
super(MarkdownReader, self).__init__(*args, **kwargs)
self.extensions = self.settings['MD_EXTENSIONS']
self.extensions.append('meta')
- self._md = Markdown(extensions=self.extensions)
def _parse_metadata(self, meta):
"""Return the dict containing document metadata"""
@@ -194,6 +209,7 @@ class MarkdownReader(Reader):
def read(self, source_path):
"""Parse content and metadata of markdown files"""
+ self._md = Markdown(extensions=self.extensions)
with pelican_open(source_path) as text:
content = self._md.convert(text)
@@ -201,8 +217,9 @@ class MarkdownReader(Reader):
return content, metadata
-class HTMLReader(Reader):
+class HTMLReader(BaseReader):
"""Parses HTML files as input, looking for meta, title, and body tags"""
+
file_extensions = ['htm', 'html']
enabled = True
@@ -312,7 +329,9 @@ class HTMLReader(Reader):
return parser.body, metadata
-class AsciiDocReader(Reader):
+class AsciiDocReader(BaseReader):
+ """Reader for AsciiDoc files"""
+
enabled = bool(asciidoc)
file_extensions = ['asc']
default_options = ["--no-header-footer", "-a newline=\\n"]
@@ -344,109 +363,142 @@ class AsciiDocReader(Reader):
return content, metadata
-EXTENSIONS = {}
+class Readers(object):
+ """Interface for all readers.
-for cls in [Reader] + Reader.__subclasses__():
- for ext in cls.file_extensions:
- EXTENSIONS[ext] = cls
+ This class contains a mapping of file extensions / Reader classes, to know
+ which Reader class must be used to read a file (based on its extension).
+ This is customizable both with the 'READERS' setting, and with the
+ 'readers_init' signall for plugins.
+ """
+ def __init__(self, settings=None):
+ self.settings = settings or {}
+ self.readers = {}
+ self.reader_classes = {}
-def read_file(base_path, path, content_class=Page, fmt=None,
- settings=None, context=None,
- preread_signal=None, preread_sender=None,
- context_signal=None, context_sender=None):
- """Return a content object parsed with the given format."""
- path = os.path.abspath(os.path.join(base_path, path))
- source_path = os.path.relpath(path, base_path)
- base, ext = os.path.splitext(os.path.basename(path))
- logger.debug('read file {} -> {}'.format(
+ for cls in [BaseReader] + BaseReader.__subclasses__():
+ for ext in cls.file_extensions:
+ self.reader_classes[ext] = cls
+
+ if self.settings['READERS']:
+ self.reader_classes.update(self.settings['READERS'])
+
+ signals.readers_init.send(self)
+
+ for fmt, reader_class in self.reader_classes.items():
+ if not reader_class:
+ continue
+
+ if not reader_class.enabled:
+ logger.warning('Missing dependencies for {}'.format(fmt))
+ continue
+
+ self.readers[fmt] = reader_class(self.settings)
+
+ settings_key = '%s_EXTENSIONS' % fmt.upper()
+
+ if settings_key in self.settings:
+ self.readers[fmt].extensions = self.settings[settings_key]
+
+ @property
+ def extensions(self):
+ return self.readers.keys()
+
+ def read_file(self, base_path, path, content_class=Page, fmt=None,
+ context=None, preread_signal=None, preread_sender=None,
+ context_signal=None, context_sender=None):
+ """Return a content object parsed with the given format."""
+
+ path = os.path.abspath(os.path.join(base_path, path))
+ source_path = os.path.relpath(path, base_path)
+ logger.debug('read file {} -> {}'.format(
source_path, content_class.__name__))
- if not fmt:
- fmt = ext[1:]
- if fmt not in EXTENSIONS:
- raise TypeError('Pelican does not know how to parse {}'.format(path))
+ if not fmt:
+ _, ext = os.path.splitext(os.path.basename(path))
+ fmt = ext[1:]
- if preread_signal:
- logger.debug('signal {}.send({})'.format(
+ if fmt not in self.readers:
+ raise TypeError(
+ 'Pelican does not know how to parse {}'.format(path))
+
+ if preread_signal:
+ logger.debug('signal {}.send({})'.format(
preread_signal, preread_sender))
- preread_signal.send(preread_sender)
+ preread_signal.send(preread_sender)
- if settings is None:
- settings = {}
+ reader = self.readers[fmt]
- reader_class = EXTENSIONS[fmt]
- if not reader_class.enabled:
- raise ValueError('Missing dependencies for {}'.format(fmt))
-
- reader = reader_class(settings)
-
- settings_key = '%s_EXTENSIONS' % fmt.upper()
-
- if settings and settings_key in settings:
- reader.extensions = settings[settings_key]
-
- metadata = default_metadata(
- settings=settings, process=reader.process_metadata)
- metadata.update(path_metadata(
- full_path=path, source_path=source_path, settings=settings))
- metadata.update(parse_path_metadata(
- source_path=source_path, settings=settings,
+ metadata = default_metadata(
+ settings=self.settings, process=reader.process_metadata)
+ metadata.update(path_metadata(
+ full_path=path, source_path=source_path,
+ settings=self.settings))
+ metadata.update(parse_path_metadata(
+ source_path=source_path, settings=self.settings,
process=reader.process_metadata))
- content, reader_metadata = reader.read(path)
- metadata.update(reader_metadata)
- # create warnings for all images with empty alt (up to a certain number)
- # as they are really likely to be accessibility flaws
- if content:
- # find images with empty alt
- imgs = re.compile(r"""
- (?:
- # src before alt
- ]*
- src=(['"])(.*)\1
- [^\>]*
- alt=(['"])\3
- )|(?:
- # alt before src
-
]*
- alt=(['"])\4
- [^\>]*
- src=(['"])(.*)\5
- )
- """, re.X)
- matches = re.findall(imgs, content)
- # find a correct threshold
- nb_warnings = 10
- if len(matches) == nb_warnings + 1:
- nb_warnings += 1 # avoid bad looking case
- # print one warning per image with empty alt until threshold
- for match in matches[:nb_warnings]:
- logger.warning('Empty alt attribute for image {} in {}'.format(
- os.path.basename(match[1] + match[5]), path))
- # print one warning for the other images with empty alt
- if len(matches) > nb_warnings:
- logger.warning('{} other images with empty alt attributes'.format(
- len(matches) - nb_warnings))
+ content, reader_metadata = reader.read(path)
+ metadata.update(reader_metadata)
- # eventually filter the content with typogrify if asked so
- if content and settings and settings['TYPOGRIFY']:
- from typogrify.filters import typogrify
- content = typogrify(content)
- metadata['title'] = typogrify(metadata['title'])
+ if content:
+ # find images with empty alt
+ find_empty_alt(content, path)
- if context_signal:
- logger.debug('signal {}.send({},
]*
+ src=(['"])(.*)\1
+ [^\>]*
+ alt=(['"])\3
+ )|(?:
+ # alt before src
+
]*
+ alt=(['"])\4
+ [^\>]*
+ src=(['"])(.*)\5
+ )
+ """, re.X)
+ matches = re.findall(imgs, content)
+ # find a correct threshold
+ nb_warnings = 10
+ if len(matches) == nb_warnings + 1:
+ nb_warnings += 1 # avoid bad looking case
+ # print one warning per image with empty alt until threshold
+ for match in matches[:nb_warnings]:
+ logger.warning('Empty alt attribute for image {} in {}'.format(
+ os.path.basename(match[1] + match[5]), path))
+ # print one warning for the other images with empty alt
+ if len(matches) > nb_warnings:
+ logger.warning('{} other images with empty alt attributes'
+ .format(len(matches) - nb_warnings))
def default_metadata(settings=None, process=None):
@@ -469,7 +521,7 @@ def path_metadata(full_path, source_path, settings=None):
metadata['date'] = datetime.datetime.fromtimestamp(
os.stat(full_path).st_ctime)
metadata.update(settings.get('EXTRA_PATH_METADATA', {}).get(
- source_path, {}))
+ source_path, {}))
return metadata
@@ -482,7 +534,7 @@ def parse_path_metadata(source_path, settings=None, process=None):
... 'PATH_METADATA':
... '(?P
Numbered footnote ' diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 0e65003a..0642926e 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -353,12 +353,13 @@ class TestDateFormatter(unittest.TestCase): 'French locale needed') def test_french_locale(self): settings = read_settings( - override = {'LOCALE': locale.normalize('fr_FR.UTF-8'), - 'TEMPLATE_PAGES': {'template/source.html': - 'generated/file.html'}}) + override={'LOCALE': locale.normalize('fr_FR.UTF-8'), + 'TEMPLATE_PAGES': {'template/source.html': + 'generated/file.html'}}) - generator = TemplatePagesGenerator({'date': self.date}, settings, - self.temp_content, '', self.temp_output, None) + generator = TemplatePagesGenerator( + {'date': self.date}, settings, + self.temp_content, '', self.temp_output) generator.env.filters.update({'strftime': utils.DateFormatter()}) writer = Writer(self.temp_output, settings=settings) @@ -385,8 +386,9 @@ class TestDateFormatter(unittest.TestCase): 'TEMPLATE_PAGES': {'template/source.html': 'generated/file.html'}}) - generator = TemplatePagesGenerator({'date': self.date}, settings, - self.temp_content, '', self.temp_output, None) + generator = TemplatePagesGenerator( + {'date': self.date}, settings, + self.temp_content, '', self.temp_output) generator.env.filters.update({'strftime': utils.DateFormatter()}) writer = Writer(self.temp_output, settings=settings)