From f47f054d0be29d95ecb22e025f1a50932c00fd2b Mon Sep 17 00:00:00 2001
From: Simon Conseil <contact@saimon.org>
Date: Sun, 4 Aug 2013 22:03:37 +0200
Subject: [PATCH] Add documentation for readers.

---
 docs/internals.rst |  4 ++--
 docs/plugins.rst   | 15 ++++++++-------
 docs/settings.rst  |  7 ++++---
 pelican/readers.py | 26 ++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/docs/internals.rst b/docs/internals.rst
index 704122ba..f69a9bb8 100644
--- a/docs/internals.rst
+++ b/docs/internals.rst
@@ -24,7 +24,7 @@ The logic is separated into different classes and concepts:
   then passed to the generators.
 
 * **Readers** are used to read from various formats (AsciiDoc, HTML, Markdown and
-  reStructuredText for now, but the system is extensible). Given a file, they 
+  reStructuredText for now, but the system is extensible). Given a file, they
   return metadata (author, tags, category, etc.) and content (HTML-formatted).
 
 * **Generators** generate the different outputs. For instance, Pelican comes with
@@ -44,7 +44,7 @@ method that returns HTML content and some metadata.
 
 Take a look at the Markdown reader::
 
-    class MarkdownReader(Reader):
+    class MarkdownReader(BaseReader):
         enabled = bool(Markdown)
 
         def read(self, source_path):
diff --git a/docs/plugins.rst b/docs/plugins.rst
index 93307afb..582f40a7 100644
--- a/docs/plugins.rst
+++ b/docs/plugins.rst
@@ -71,6 +71,7 @@ finalized                       pelican object                  invoked after al
                                                                 - minifying js/css assets.
                                                                 - notify/ping search engines with an updated sitemap.
 generator_init                  generator                       invoked in the Generator.__init__
+readers_init                    readers                         invoked in the Readers.__init__
 article_generate_context        article_generator, metadata
 article_generate_preread        article_generator               invoked before a article is read in ArticlesGenerator.generate_context;
                                                                 use if code needs to do something before every article is parsed
@@ -144,13 +145,13 @@ write and don't slow down pelican itself when they're not active.
 No more talking, here is the example::
 
     from pelican import signals
-    from pelican.readers import EXTENSIONS, Reader
+    from pelican.readers import BaseReader
 
-    # Create a new reader class, inheriting from the pelican.reader.Reader
-    class NewReader(Reader):
+    # Create a new reader class, inheriting from the pelican.reader.BaseReader
+    class NewReader(BaseReader):
         enabled = True  # Yeah, you probably want that :-)
 
-        # The list of extensions you want this reader to match with.
+        # The list of file extensions you want this reader to match with.
         # In the case multiple readers use the same extensions, the latest will
         # win (so the one you're defining here, most probably).
         file_extensions = ['yeah']
@@ -168,12 +169,12 @@ No more talking, here is the example::
 
             return "Some content", parsed
 
-    def add_reader(arg):
-        EXTENSIONS['yeah'] = NewReader
+    def add_reader(readers):
+        readers.reader_classes['yeah'] = NewReader
 
     # this is how pelican works.
     def register():
-        signals.initialized.connect(add_reader)
+        signals.readers_init.connect(add_reader)
 
 
 Adding a new generator
diff --git a/docs/settings.rst b/docs/settings.rst
index 8ecac7c9..04574127 100644
--- a/docs/settings.rst
+++ b/docs/settings.rst
@@ -84,9 +84,10 @@ Setting name (default value)                                            What doe
                                                                         here or a single string representing one locale.
                                                                         When providing a list, all the locales will be tried
                                                                         until one works.
-`MARKUP` (``('rst', 'md')``)                                            A list of available markup languages you want
-                                                                        to use. For the moment, the only available values
-                                                                        are `rst`, `md`, `markdown`, `mkd`, `mdown`, `html`, and `htm`.
+`READERS` (``{}``)                                                      A dict of file extensions / Reader classes to overwrite or
+                                                                        add file readers. for instance, to avoid processing .html files:
+                                                                        ``READERS = {'html': None}``. Or to add a custom reader for the
+                                                                        `foo` extension: ``READERS = {'foo': FooReader}``
 `IGNORE_FILES` (``['.#*']``)                                            A list of file globbing patterns to match against the
                                                                         source files to be ignored by the processor. For example,
                                                                         the default ``['.#*']`` will ignore emacs lock files.
diff --git a/pelican/readers.py b/pelican/readers.py
index 3b3bfd12..97a184d0 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -51,6 +51,18 @@ logger = logging.getLogger(__name__)
 
 
 class BaseReader(object):
+    """Base class to read files.
+
+    This class is used to process static files, and it can be inherited for
+    other types of file. A Reader class must have the following attributes:
+
+    - enabled: (boolean) tell if the Reader class is enabled. It
+      generally depends on the import of some dependency.
+    - file_extensions: a list of file extensions that the Reader will process.
+    - extensions: a list of extensions to use in the reader (typical use is
+      Markdown).
+
+    """
     enabled = True
     file_extensions = ['static']
     extensions = None
@@ -111,6 +123,8 @@ class PelicanHTMLTranslator(HTMLTranslator):
 
 
 class RstReader(BaseReader):
+    """Reader for reStructuredText files"""
+
     enabled = bool(docutils)
     file_extensions = ['rst']
 
@@ -167,6 +181,8 @@ class RstReader(BaseReader):
 
 
 class MarkdownReader(BaseReader):
+    """Reader for Markdown files"""
+
     enabled = bool(Markdown)
     file_extensions = ['md', 'markdown', 'mkd', 'mdown']
 
@@ -203,6 +219,7 @@ class MarkdownReader(BaseReader):
 
 class HTMLReader(BaseReader):
     """Parses HTML files as input, looking for meta, title, and body tags"""
+
     file_extensions = ['htm', 'html']
     enabled = True
 
@@ -313,6 +330,8 @@ class HTMLReader(BaseReader):
 
 
 class AsciiDocReader(BaseReader):
+    """Reader for AsciiDoc files"""
+
     enabled = bool(asciidoc)
     file_extensions = ['asc']
     default_options = ["--no-header-footer", "-a newline=\\n"]
@@ -345,7 +364,14 @@ class AsciiDocReader(BaseReader):
 
 
 class Readers(object):
+    """Interface for all readers.
 
+    This class contains a mapping of file extensions / Reader classes, to know
+    which Reader class must be used to read a file (based on its extension).
+    This is customizable both with the 'READERS' setting, and with the
+    'readers_init' signall for plugins.
+
+    """
     def __init__(self, settings=None):
         self.settings = settings or {}
         self.readers = {}