From f47f054d0be29d95ecb22e025f1a50932c00fd2b Mon Sep 17 00:00:00 2001 From: Simon Conseil Date: Sun, 4 Aug 2013 22:03:37 +0200 Subject: [PATCH] Add documentation for readers. --- docs/internals.rst | 4 ++-- docs/plugins.rst | 15 ++++++++------- docs/settings.rst | 7 ++++--- pelican/readers.py | 26 ++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 704122ba..f69a9bb8 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -24,7 +24,7 @@ The logic is separated into different classes and concepts: then passed to the generators. * **Readers** are used to read from various formats (AsciiDoc, HTML, Markdown and - reStructuredText for now, but the system is extensible). Given a file, they + reStructuredText for now, but the system is extensible). Given a file, they return metadata (author, tags, category, etc.) and content (HTML-formatted). * **Generators** generate the different outputs. For instance, Pelican comes with @@ -44,7 +44,7 @@ method that returns HTML content and some metadata. Take a look at the Markdown reader:: - class MarkdownReader(Reader): + class MarkdownReader(BaseReader): enabled = bool(Markdown) def read(self, source_path): diff --git a/docs/plugins.rst b/docs/plugins.rst index 93307afb..582f40a7 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -71,6 +71,7 @@ finalized pelican object invoked after al - minifying js/css assets. - notify/ping search engines with an updated sitemap. generator_init generator invoked in the Generator.__init__ +readers_init readers invoked in the Readers.__init__ article_generate_context article_generator, metadata article_generate_preread article_generator invoked before a article is read in ArticlesGenerator.generate_context; use if code needs to do something before every article is parsed @@ -144,13 +145,13 @@ write and don't slow down pelican itself when they're not active. No more talking, here is the example:: from pelican import signals - from pelican.readers import EXTENSIONS, Reader + from pelican.readers import BaseReader - # Create a new reader class, inheriting from the pelican.reader.Reader - class NewReader(Reader): + # Create a new reader class, inheriting from the pelican.reader.BaseReader + class NewReader(BaseReader): enabled = True # Yeah, you probably want that :-) - # The list of extensions you want this reader to match with. + # The list of file extensions you want this reader to match with. # In the case multiple readers use the same extensions, the latest will # win (so the one you're defining here, most probably). file_extensions = ['yeah'] @@ -168,12 +169,12 @@ No more talking, here is the example:: return "Some content", parsed - def add_reader(arg): - EXTENSIONS['yeah'] = NewReader + def add_reader(readers): + readers.reader_classes['yeah'] = NewReader # this is how pelican works. def register(): - signals.initialized.connect(add_reader) + signals.readers_init.connect(add_reader) Adding a new generator diff --git a/docs/settings.rst b/docs/settings.rst index 8ecac7c9..04574127 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -84,9 +84,10 @@ Setting name (default value) What doe here or a single string representing one locale. When providing a list, all the locales will be tried until one works. -`MARKUP` (``('rst', 'md')``) A list of available markup languages you want - to use. For the moment, the only available values - are `rst`, `md`, `markdown`, `mkd`, `mdown`, `html`, and `htm`. +`READERS` (``{}``) A dict of file extensions / Reader classes to overwrite or + add file readers. for instance, to avoid processing .html files: + ``READERS = {'html': None}``. Or to add a custom reader for the + `foo` extension: ``READERS = {'foo': FooReader}`` `IGNORE_FILES` (``['.#*']``) A list of file globbing patterns to match against the source files to be ignored by the processor. For example, the default ``['.#*']`` will ignore emacs lock files. diff --git a/pelican/readers.py b/pelican/readers.py index 3b3bfd12..97a184d0 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -51,6 +51,18 @@ logger = logging.getLogger(__name__) class BaseReader(object): + """Base class to read files. + + This class is used to process static files, and it can be inherited for + other types of file. A Reader class must have the following attributes: + + - enabled: (boolean) tell if the Reader class is enabled. It + generally depends on the import of some dependency. + - file_extensions: a list of file extensions that the Reader will process. + - extensions: a list of extensions to use in the reader (typical use is + Markdown). + + """ enabled = True file_extensions = ['static'] extensions = None @@ -111,6 +123,8 @@ class PelicanHTMLTranslator(HTMLTranslator): class RstReader(BaseReader): + """Reader for reStructuredText files""" + enabled = bool(docutils) file_extensions = ['rst'] @@ -167,6 +181,8 @@ class RstReader(BaseReader): class MarkdownReader(BaseReader): + """Reader for Markdown files""" + enabled = bool(Markdown) file_extensions = ['md', 'markdown', 'mkd', 'mdown'] @@ -203,6 +219,7 @@ class MarkdownReader(BaseReader): class HTMLReader(BaseReader): """Parses HTML files as input, looking for meta, title, and body tags""" + file_extensions = ['htm', 'html'] enabled = True @@ -313,6 +330,8 @@ class HTMLReader(BaseReader): class AsciiDocReader(BaseReader): + """Reader for AsciiDoc files""" + enabled = bool(asciidoc) file_extensions = ['asc'] default_options = ["--no-header-footer", "-a newline=\\n"] @@ -345,7 +364,14 @@ class AsciiDocReader(BaseReader): class Readers(object): + """Interface for all readers. + This class contains a mapping of file extensions / Reader classes, to know + which Reader class must be used to read a file (based on its extension). + This is customizable both with the 'READERS' setting, and with the + 'readers_init' signall for plugins. + + """ def __init__(self, settings=None): self.settings = settings or {} self.readers = {}