Add documentation for readers.

2025-10-15 20:28:56 +02:00 · 2013-08-04 22:03:37 +02:00 · 2013-08-04 22:03:37 +02:00 · f47f054d0b
commit f47f054d0b
parent bab8d0b26a
4 changed files with 40 additions and 12 deletions
--- a/docs/internals.rst
+++ b/docs/internals.rst
@ -24,7 +24,7 @@ The logic is separated into different classes and concepts:
  then passed to the generators.

 * **Readers** are used to read from various formats (AsciiDoc, HTML, Markdown and
-  reStructuredText for now, but the system is extensible). Given a file, they 
+  reStructuredText for now, but the system is extensible). Given a file, they
  return metadata (author, tags, category, etc.) and content (HTML-formatted).

 * **Generators** generate the different outputs. For instance, Pelican comes with
@ -44,7 +44,7 @@ method that returns HTML content and some metadata.

 Take a look at the Markdown reader::

-    class MarkdownReader(Reader):
+    class MarkdownReader(BaseReader):
        enabled = bool(Markdown)

        def read(self, source_path):
--- a/docs/plugins.rst
+++ b/docs/plugins.rst
@ -71,6 +71,7 @@ finalized                       pelican object                  invoked after al
                                                                - minifying js/css assets.
                                                                - notify/ping search engines with an updated sitemap.
 generator_init                  generator                       invoked in the Generator.__init__
+readers_init                    readers                         invoked in the Readers.__init__
 article_generate_context        article_generator, metadata
 article_generate_preread        article_generator               invoked before a article is read in ArticlesGenerator.generate_context;
                                                                use if code needs to do something before every article is parsed
@ -144,13 +145,13 @@ write and don't slow down pelican itself when they're not active.
 No more talking, here is the example::

    from pelican import signals
-    from pelican.readers import EXTENSIONS, Reader
+    from pelican.readers import BaseReader

-    # Create a new reader class, inheriting from the pelican.reader.Reader
-    class NewReader(Reader):
+    # Create a new reader class, inheriting from the pelican.reader.BaseReader
+    class NewReader(BaseReader):
        enabled = True  # Yeah, you probably want that :-)

-        # The list of extensions you want this reader to match with.
+        # The list of file extensions you want this reader to match with.
        # In the case multiple readers use the same extensions, the latest will
        # win (so the one you're defining here, most probably).
        file_extensions = ['yeah']
@ -168,12 +169,12 @@ No more talking, here is the example::

            return "Some content", parsed

-    def add_reader(arg):
-        EXTENSIONS['yeah'] = NewReader
+    def add_reader(readers):
+        readers.reader_classes['yeah'] = NewReader

    # this is how pelican works.
    def register():
-        signals.initialized.connect(add_reader)
+        signals.readers_init.connect(add_reader)


 Adding a new generator
--- a/docs/settings.rst
+++ b/docs/settings.rst
@ -84,9 +84,10 @@ Setting name (default value)                                            What doe
                                                                        here or a single string representing one locale.
                                                                        When providing a list, all the locales will be tried
                                                                        until one works.
-`MARKUP` (``('rst', 'md')``)                                            A list of available markup languages you want
-                                                                        to use. For the moment, the only available values
-                                                                        are `rst`, `md`, `markdown`, `mkd`, `mdown`, `html`, and `htm`.
+`READERS` (``{}``)                                                      A dict of file extensions / Reader classes to overwrite or
+                                                                        add file readers. for instance, to avoid processing .html files:
+                                                                        ``READERS = {'html': None}``. Or to add a custom reader for the
+                                                                        `foo` extension: ``READERS = {'foo': FooReader}``
 `IGNORE_FILES` (``['.#*']``)                                            A list of file globbing patterns to match against the
                                                                        source files to be ignored by the processor. For example,
                                                                        the default ``['.#*']`` will ignore emacs lock files.
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -51,6 +51,18 @@ logger = logging.getLogger(__name__)


 class BaseReader(object):
+    """Base class to read files.
+
+    This class is used to process static files, and it can be inherited for
+    other types of file. A Reader class must have the following attributes:
+
+    - enabled: (boolean) tell if the Reader class is enabled. It
+      generally depends on the import of some dependency.
+    - file_extensions: a list of file extensions that the Reader will process.
+    - extensions: a list of extensions to use in the reader (typical use is
+      Markdown).
+
+    """
    enabled = True
    file_extensions = ['static']
    extensions = None
@ -111,6 +123,8 @@ class PelicanHTMLTranslator(HTMLTranslator):


 class RstReader(BaseReader):
+    """Reader for reStructuredText files"""
+
    enabled = bool(docutils)
    file_extensions = ['rst']

@ -167,6 +181,8 @@ class RstReader(BaseReader):


 class MarkdownReader(BaseReader):
+    """Reader for Markdown files"""
+
    enabled = bool(Markdown)
    file_extensions = ['md', 'markdown', 'mkd', 'mdown']

@ -203,6 +219,7 @@ class MarkdownReader(BaseReader):

 class HTMLReader(BaseReader):
    """Parses HTML files as input, looking for meta, title, and body tags"""
+
    file_extensions = ['htm', 'html']
    enabled = True

@ -313,6 +330,8 @@ class HTMLReader(BaseReader):


 class AsciiDocReader(BaseReader):
+    """Reader for AsciiDoc files"""
+
    enabled = bool(asciidoc)
    file_extensions = ['asc']
    default_options = ["--no-header-footer", "-a newline=\\n"]
@ -345,7 +364,14 @@ class AsciiDocReader(BaseReader):


 class Readers(object):
+    """Interface for all readers.

+    This class contains a mapping of file extensions / Reader classes, to know
+    which Reader class must be used to read a file (based on its extension).
+    This is customizable both with the 'READERS' setting, and with the
+    'readers_init' signall for plugins.
+
+    """
    def __init__(self, settings=None):
        self.settings = settings or {}
        self.readers = {}