Merge pull request #1011 from saimn/readers

Refactor readers and remove MARKUP. Fixes #866
2013-08-07 12:34:22 -07:00 · 2013-08-07 12:34:22 -07:00 · 5a469dc2e3
commit 5a469dc2e3
parent 0df12e31e2 f47f054d0b
11 changed files with 265 additions and 221 deletions
--- a/docs/internals.rst
+++ b/docs/internals.rst
@ -24,7 +24,7 @@ The logic is separated into different classes and concepts:
  then passed to the generators.

 * **Readers** are used to read from various formats (AsciiDoc, HTML, Markdown and
-  reStructuredText for now, but the system is extensible). Given a file, they 
+  reStructuredText for now, but the system is extensible). Given a file, they
  return metadata (author, tags, category, etc.) and content (HTML-formatted).

 * **Generators** generate the different outputs. For instance, Pelican comes with
@ -44,7 +44,7 @@ method that returns HTML content and some metadata.

 Take a look at the Markdown reader::

-    class MarkdownReader(Reader):
+    class MarkdownReader(BaseReader):
        enabled = bool(Markdown)

        def read(self, source_path):
--- a/docs/plugins.rst
+++ b/docs/plugins.rst
@ -71,6 +71,7 @@ finalized                       pelican object                  invoked after al
                                                                - minifying js/css assets.
                                                                - notify/ping search engines with an updated sitemap.
 generator_init                  generator                       invoked in the Generator.__init__
+readers_init                    readers                         invoked in the Readers.__init__
 article_generate_context        article_generator, metadata
 article_generate_preread        article_generator               invoked before a article is read in ArticlesGenerator.generate_context;
                                                                use if code needs to do something before every article is parsed
@ -144,13 +145,13 @@ write and don't slow down pelican itself when they're not active.
 No more talking, here is the example::

    from pelican import signals
-    from pelican.readers import EXTENSIONS, Reader
+    from pelican.readers import BaseReader

-    # Create a new reader class, inheriting from the pelican.reader.Reader
-    class NewReader(Reader):
+    # Create a new reader class, inheriting from the pelican.reader.BaseReader
+    class NewReader(BaseReader):
        enabled = True  # Yeah, you probably want that :-)

-        # The list of extensions you want this reader to match with.
+        # The list of file extensions you want this reader to match with.
        # In the case multiple readers use the same extensions, the latest will
        # win (so the one you're defining here, most probably).
        file_extensions = ['yeah']
@ -168,12 +169,12 @@ No more talking, here is the example::

            return "Some content", parsed

-    def add_reader(arg):
-        EXTENSIONS['yeah'] = NewReader
+    def add_reader(readers):
+        readers.reader_classes['yeah'] = NewReader

    # this is how pelican works.
    def register():
-        signals.initialized.connect(add_reader)
+        signals.readers_init.connect(add_reader)


 Adding a new generator
--- a/docs/settings.rst
+++ b/docs/settings.rst
@ -84,9 +84,10 @@ Setting name (default value)                                            What doe
                                                                        here or a single string representing one locale.
                                                                        When providing a list, all the locales will be tried
                                                                        until one works.
-`MARKUP` (``('rst', 'md')``)                                            A list of available markup languages you want
-                                                                        to use. For the moment, the only available values
-                                                                        are `rst`, `md`, `markdown`, `mkd`, `mdown`, `html`, and `htm`.
+`READERS` (``{}``)                                                      A dict of file extensions / Reader classes to overwrite or
+                                                                        add file readers. for instance, to avoid processing .html files:
+                                                                        ``READERS = {'html': None}``. Or to add a custom reader for the
+                                                                        `foo` extension: ``READERS = {'foo': FooReader}``
 `IGNORE_FILES` (``['.#*']``)                                            A list of file globbing patterns to match against the
                                                                        source files to be ignored by the processor. For example,
                                                                        the default ``['.#*']`` will ignore emacs lock files.
--- a/pelican/init.py
+++ b/pelican/init.py
@ -17,6 +17,7 @@ from pelican.generators import (ArticlesGenerator, PagesGenerator,
                                StaticGenerator, SourceFileGenerator,
                                TemplatePagesGenerator)
 from pelican.log import init
+from pelican.readers import Readers
 from pelican.settings import read_settings
 from pelican.utils import clean_output_dir, folder_watcher, file_watcher
 from pelican.writers import Writer
@ -46,7 +47,6 @@ class Pelican(object):
        self.path = settings['PATH']
        self.theme = settings['THEME']
        self.output_path = settings['OUTPUT_PATH']
-        self.markup = settings['MARKUP']
        self.ignore_files = settings['IGNORE_FILES']
        self.delete_outputdir = settings['DELETE_OUTPUT_DIRECTORY']
        self.output_retention = settings['OUTPUT_RETENTION']
@ -164,7 +164,6 @@ class Pelican(object):
                path=self.path,
                theme=self.theme,
                output_path=self.output_path,
-                markup=self.markup,
            ) for cls in self.get_generator_classes()
        ]

@ -236,10 +235,6 @@ def parse_arguments():
        help='Where to output the generated files. If not specified, a '
             'directory will be created, named "output" in the current path.')

-    parser.add_argument('-m', '--markup', dest='markup',
-        help='The list of markup language to use (rst or md). Please indicate '
-             'them separated by commas.')
-
    parser.add_argument('-s', '--settings', dest='settings',
        help='The settings of the application, this is automatically set to '
        '{0} if a file exists with this name.'.format(DEFAULT_CONFIG_NAME))
@ -279,8 +274,6 @@ def get_config(args):
    if args.output:
        config['OUTPUT_PATH'] = \
                os.path.abspath(os.path.expanduser(args.output))
-    if args.markup:
-        config['MARKUP'] = [a.strip().lower() for a in args.markup.split(',')]
    if args.theme:
        abstheme = os.path.abspath(os.path.expanduser(args.theme))
        config['THEME'] = abstheme if os.path.exists(abstheme) else args.theme
@ -296,8 +289,6 @@ def get_config(args):
        for key in config:
            if key in ('PATH', 'OUTPUT_PATH', 'THEME'):
                config[key] = config[key].decode(enc)
-            if key == "MARKUP":
-                config[key] = [a.decode(enc) for a in config[key]]
    return config


@ -315,16 +306,17 @@ def get_instance(args):
        module = __import__(module)
        cls = getattr(module, cls_name)

-    return cls(settings)
+    return cls(settings), settings


 def main():
    args = parse_arguments()
    init(args.verbosity)
-    pelican = get_instance(args)
+    pelican, settings = get_instance(args)
+    readers = Readers(settings)

    watchers = {'content': folder_watcher(pelican.path,
-                                          pelican.markup,
+                                          readers.extensions,
                                          pelican.ignore_files),
                'theme': folder_watcher(pelican.theme,
                                        [''],
@ -333,8 +325,8 @@ def main():

    try:
        if args.autoreload:
-            print('  --- AutoReload Mode: Monitoring `content`, `theme` and `settings`'
-                  ' for changes. ---')
+            print('  --- AutoReload Mode: Monitoring `content`, `theme` and'
+                  ' `settings` for changes. ---')

            while True:
                try:
--- a/pelican/generators.py
+++ b/pelican/generators.py
@ -13,16 +13,13 @@ from functools import partial
 from itertools import chain, groupby
 from operator import attrgetter, itemgetter

-from jinja2 import (
-        Environment, FileSystemLoader, PrefixLoader, ChoiceLoader, BaseLoader,
-        TemplateNotFound
-)
+from jinja2 import (Environment, FileSystemLoader, PrefixLoader, ChoiceLoader,
+                    BaseLoader, TemplateNotFound)

 from pelican.contents import Article, Page, Static, is_valid_content
-from pelican.readers import read_file
+from pelican.readers import Readers
 from pelican.utils import copy, process_translations, mkdir_p, DateFormatter
 from pelican import signals
-import pelican.utils


 logger = logging.getLogger(__name__)
@ -31,23 +28,23 @@ logger = logging.getLogger(__name__)
 class Generator(object):
    """Baseclass generator"""

-    def __init__(self, context, settings, path, theme, output_path, markup,
-                 **kwargs):
+    def __init__(self, context, settings, path, theme, output_path, **kwargs):
        self.context = context
        self.settings = settings
        self.path = path
        self.theme = theme
        self.output_path = output_path
-        self.markup = markup

        for arg, value in kwargs.items():
            setattr(self, arg, value)

+        self.readers = Readers(self.settings)
+
        # templates cache
        self._templates = {}
        self._templates_path = []
        self._templates_path.append(os.path.expanduser(
-                os.path.join(self.theme, 'templates')))
+            os.path.join(self.theme, 'templates')))
        self._templates_path += self.settings['EXTRA_TEMPLATES_PATHS']

        theme_path = os.path.dirname(os.path.abspath(__file__))
@ -85,9 +82,8 @@ class Generator(object):
            try:
                self._templates[name] = self.env.get_template(name + '.html')
            except TemplateNotFound:
-                raise Exception(
-                        ('[templates] unable to load %s.html from %s'
-                         % (name, self._templates_path)))
+                raise Exception('[templates] unable to load %s.html from %s'
+                                % (name, self._templates_path))
        return self._templates[name]

    def _include_path(self, path, extensions=None):
@ -98,7 +94,7 @@ class Generator(object):
            extensions are allowed)
        """
        if extensions is None:
-            extensions = tuple(self.markup)
+            extensions = tuple(self.readers.extensions)
        basename = os.path.basename(path)
        if extensions is False or basename.endswith(extensions):
            return True
@ -388,9 +384,9 @@ class ArticlesGenerator(Generator):
                self.settings['ARTICLE_DIR'],
                exclude=self.settings['ARTICLE_EXCLUDES']):
            try:
-                article = read_file(
+                article = self.readers.read_file(
                    base_path=self.path, path=f, content_class=Article,
-                    settings=self.settings, context=self.context,
+                    context=self.context,
                    preread_signal=signals.article_generator_preread,
                    preread_sender=self,
                    context_signal=signals.article_generator_context,
@ -496,9 +492,9 @@ class PagesGenerator(Generator):
                self.settings['PAGE_DIR'],
                exclude=self.settings['PAGE_EXCLUDES']):
            try:
-                page = read_file(
+                page = self.readers.read_file(
                    base_path=self.path, path=f, content_class=Page,
-                    settings=self.settings, context=self.context,
+                    context=self.context,
                    preread_signal=signals.page_generator_preread,
                    preread_sender=self,
                    context_signal=signals.page_generator_context,
@ -557,10 +553,9 @@ class StaticGenerator(Generator):
        for static_path in self.settings['STATIC_PATHS']:
            for f in self.get_files(
                    static_path, extensions=False):
-                static = read_file(
+                static = self.readers.read_file(
                    base_path=self.path, path=f, content_class=Static,
-                    fmt='static',
-                    settings=self.settings, context=self.context,
+                    fmt='static', context=self.context,
                    preread_signal=signals.static_generator_preread,
                    preread_sender=self,
                    context_signal=signals.static_generator_context,
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -34,10 +34,10 @@ try:
 except ImportError:
    from HTMLParser import HTMLParser

+from pelican import signals
 from pelican.contents import Page, Category, Tag, Author
 from pelican.utils import get_date, pelican_open

-logger = logging.getLogger(__name__)

 METADATA_PROCESSORS = {
    'tags': lambda x, y: [Tag(tag, y) for tag in x.split(',')],
@ -50,7 +50,19 @@ METADATA_PROCESSORS = {
 logger = logging.getLogger(__name__)


-class Reader(object):
+class BaseReader(object):
+    """Base class to read files.
+
+    This class is used to process static files, and it can be inherited for
+    other types of file. A Reader class must have the following attributes:
+
+    - enabled: (boolean) tell if the Reader class is enabled. It
+      generally depends on the import of some dependency.
+    - file_extensions: a list of file extensions that the Reader will process.
+    - extensions: a list of extensions to use in the reader (typical use is
+      Markdown).
+
+    """
    enabled = True
    file_extensions = ['static']
    extensions = None
@ -110,7 +122,9 @@ class PelicanHTMLTranslator(HTMLTranslator):
        return HTMLTranslator.visit_image(self, node)


-class RstReader(Reader):
+class RstReader(BaseReader):
+    """Reader for reStructuredText files"""
+
    enabled = bool(docutils)
    file_extensions = ['rst']

@ -166,7 +180,9 @@ class RstReader(Reader):
        return content, metadata


-class MarkdownReader(Reader):
+class MarkdownReader(BaseReader):
+    """Reader for Markdown files"""
+
    enabled = bool(Markdown)
    file_extensions = ['md', 'markdown', 'mkd', 'mdown']

@ -174,7 +190,6 @@ class MarkdownReader(Reader):
        super(MarkdownReader, self).__init__(*args, **kwargs)
        self.extensions = self.settings['MD_EXTENSIONS']
        self.extensions.append('meta')
-        self._md = Markdown(extensions=self.extensions)

    def _parse_metadata(self, meta):
        """Return the dict containing document metadata"""
@ -194,6 +209,7 @@ class MarkdownReader(Reader):
    def read(self, source_path):
        """Parse content and metadata of markdown files"""

+        self._md = Markdown(extensions=self.extensions)
        with pelican_open(source_path) as text:
            content = self._md.convert(text)

@ -201,8 +217,9 @@ class MarkdownReader(Reader):
        return content, metadata


-class HTMLReader(Reader):
+class HTMLReader(BaseReader):
    """Parses HTML files as input, looking for meta, title, and body tags"""
+
    file_extensions = ['htm', 'html']
    enabled = True

@ -312,7 +329,9 @@ class HTMLReader(Reader):
        return parser.body, metadata


-class AsciiDocReader(Reader):
+class AsciiDocReader(BaseReader):
+    """Reader for AsciiDoc files"""
+
    enabled = bool(asciidoc)
    file_extensions = ['asc']
    default_options = ["--no-header-footer", "-a newline=\\n"]
@ -344,109 +363,142 @@ class AsciiDocReader(Reader):
        return content, metadata


-EXTENSIONS = {}
+class Readers(object):
+    """Interface for all readers.

-for cls in [Reader] + Reader.__subclasses__():
-    for ext in cls.file_extensions:
-        EXTENSIONS[ext] = cls
+    This class contains a mapping of file extensions / Reader classes, to know
+    which Reader class must be used to read a file (based on its extension).
+    This is customizable both with the 'READERS' setting, and with the
+    'readers_init' signall for plugins.

+    """
+    def __init__(self, settings=None):
+        self.settings = settings or {}
+        self.readers = {}
+        self.reader_classes = {}

-def read_file(base_path, path, content_class=Page, fmt=None,
-              settings=None, context=None,
-              preread_signal=None, preread_sender=None,
-              context_signal=None, context_sender=None):
-    """Return a content object parsed with the given format."""
-    path = os.path.abspath(os.path.join(base_path, path))
-    source_path = os.path.relpath(path, base_path)
-    base, ext = os.path.splitext(os.path.basename(path))
-    logger.debug('read file {} -> {}'.format(
+        for cls in [BaseReader] + BaseReader.__subclasses__():
+            for ext in cls.file_extensions:
+                self.reader_classes[ext] = cls
+
+        if self.settings['READERS']:
+            self.reader_classes.update(self.settings['READERS'])
+
+        signals.readers_init.send(self)
+
+        for fmt, reader_class in self.reader_classes.items():
+            if not reader_class:
+                continue
+
+            if not reader_class.enabled:
+                logger.warning('Missing dependencies for {}'.format(fmt))
+                continue
+
+            self.readers[fmt] = reader_class(self.settings)
+
+            settings_key = '%s_EXTENSIONS' % fmt.upper()
+
+            if settings_key in self.settings:
+                self.readers[fmt].extensions = self.settings[settings_key]
+
+    @property
+    def extensions(self):
+        return self.readers.keys()
+
+    def read_file(self, base_path, path, content_class=Page, fmt=None,
+                  context=None, preread_signal=None, preread_sender=None,
+                  context_signal=None, context_sender=None):
+        """Return a content object parsed with the given format."""
+
+        path = os.path.abspath(os.path.join(base_path, path))
+        source_path = os.path.relpath(path, base_path)
+        logger.debug('read file {} -> {}'.format(
            source_path, content_class.__name__))
-    if not fmt:
-        fmt = ext[1:]

-    if fmt not in EXTENSIONS:
-        raise TypeError('Pelican does not know how to parse {}'.format(path))
+        if not fmt:
+            _, ext = os.path.splitext(os.path.basename(path))
+            fmt = ext[1:]

-    if preread_signal:
-        logger.debug('signal {}.send({})'.format(
+        if fmt not in self.readers:
+            raise TypeError(
+                'Pelican does not know how to parse {}'.format(path))
+
+        if preread_signal:
+            logger.debug('signal {}.send({})'.format(
                preread_signal, preread_sender))
-        preread_signal.send(preread_sender)
+            preread_signal.send(preread_sender)

-    if settings is None:
-        settings = {}
+        reader = self.readers[fmt]

-    reader_class = EXTENSIONS[fmt]
-    if not reader_class.enabled:
-        raise ValueError('Missing dependencies for {}'.format(fmt))
-
-    reader = reader_class(settings)
-
-    settings_key = '%s_EXTENSIONS' % fmt.upper()
-
-    if settings and settings_key in settings:
-        reader.extensions = settings[settings_key]
-
-    metadata = default_metadata(
-        settings=settings, process=reader.process_metadata)
-    metadata.update(path_metadata(
-            full_path=path, source_path=source_path, settings=settings))
-    metadata.update(parse_path_metadata(
-            source_path=source_path, settings=settings,
+        metadata = default_metadata(
+            settings=self.settings, process=reader.process_metadata)
+        metadata.update(path_metadata(
+            full_path=path, source_path=source_path,
+            settings=self.settings))
+        metadata.update(parse_path_metadata(
+            source_path=source_path, settings=self.settings,
            process=reader.process_metadata))
-    content, reader_metadata = reader.read(path)
-    metadata.update(reader_metadata)

-    # create warnings for all images with empty alt (up to a certain number)
-    # as they are really likely to be accessibility flaws
-    if content:
-        # find images with empty alt
-        imgs = re.compile(r"""
-            (?:
-                # src before alt
-                <img
-                [^\>]*
-                src=(['"])(.*)\1
-                [^\>]*
-                alt=(['"])\3
-            )|(?:
-                # alt before src
-                <img
-                [^\>]*
-                alt=(['"])\4
-                [^\>]*
-                src=(['"])(.*)\5
-            )
-            """, re.X)
-        matches = re.findall(imgs, content)
-        # find a correct threshold
-        nb_warnings = 10
-        if len(matches) == nb_warnings + 1:
-            nb_warnings += 1 # avoid bad looking case
-        # print one warning per image with empty alt until threshold
-        for match in matches[:nb_warnings]:
-            logger.warning('Empty alt attribute for image {} in {}'.format(
-                           os.path.basename(match[1] + match[5]), path))
-        # print one warning for the other images with empty alt
-        if len(matches) > nb_warnings:
-            logger.warning('{} other images with empty alt attributes'.format(
-                           len(matches) - nb_warnings))
+        content, reader_metadata = reader.read(path)
+        metadata.update(reader_metadata)

-    # eventually filter the content with typogrify if asked so
-    if content and settings and settings['TYPOGRIFY']:
-        from typogrify.filters import typogrify
-        content = typogrify(content)
-        metadata['title'] = typogrify(metadata['title'])
+        if content:
+            # find images with empty alt
+            find_empty_alt(content, path)

-    if context_signal:
-        logger.debug('signal {}.send({}, <metadata>)'.format(
+        # eventually filter the content with typogrify if asked so
+        if content and self.settings['TYPOGRIFY']:
+            from typogrify.filters import typogrify
+            content = typogrify(content)
+            metadata['title'] = typogrify(metadata['title'])
+
+        if context_signal:
+            logger.debug('signal {}.send({}, <metadata>)'.format(
                context_signal, context_sender))
-        context_signal.send(context_sender, metadata=metadata)
-    return content_class(
-        content=content,
-        metadata=metadata,
-        settings=settings,
-        source_path=path,
-        context=context)
+            context_signal.send(context_sender, metadata=metadata)
+
+        return content_class(content=content, metadata=metadata,
+                             settings=self.settings, source_path=path,
+                             context=context)
+
+
+def find_empty_alt(content, path):
+    """Find images with empty alt
+
+    Create warnings for all images with empty alt (up to a certain number),
+    as they are really likely to be accessibility flaws.
+
+    """
+    imgs = re.compile(r"""
+        (?:
+            # src before alt
+            <img
+            [^\>]*
+            src=(['"])(.*)\1
+            [^\>]*
+            alt=(['"])\3
+        )|(?:
+            # alt before src
+            <img
+            [^\>]*
+            alt=(['"])\4
+            [^\>]*
+            src=(['"])(.*)\5
+        )
+        """, re.X)
+    matches = re.findall(imgs, content)
+    # find a correct threshold
+    nb_warnings = 10
+    if len(matches) == nb_warnings + 1:
+        nb_warnings += 1  # avoid bad looking case
+    # print one warning per image with empty alt until threshold
+    for match in matches[:nb_warnings]:
+        logger.warning('Empty alt attribute for image {} in {}'.format(
+            os.path.basename(match[1] + match[5]), path))
+    # print one warning for the other images with empty alt
+    if len(matches) > nb_warnings:
+        logger.warning('{} other images with empty alt attributes'
+                       .format(len(matches) - nb_warnings))


 def default_metadata(settings=None, process=None):
@ -469,7 +521,7 @@ def path_metadata(full_path, source_path, settings=None):
            metadata['date'] = datetime.datetime.fromtimestamp(
                os.stat(full_path).st_ctime)
        metadata.update(settings.get('EXTRA_PATH_METADATA', {}).get(
-                source_path, {}))
+            source_path, {}))
    return metadata


@ -482,7 +534,7 @@ def parse_path_metadata(source_path, settings=None, process=None):
    ...     'PATH_METADATA':
    ...         '(?P<category>[^/]*)/(?P<date>\d{4}-\d{2}-\d{2})/.*',
    ...     }
-    >>> reader = Reader(settings=settings)
+    >>> reader = BaseReader(settings=settings)
    >>> metadata = parse_path_metadata(
    ...     source_path='my-cat/2013-01-01/my-slug.html',
    ...     settings=settings,
@ -498,13 +550,12 @@ def parse_path_metadata(source_path, settings=None, process=None):
    subdir = os.path.basename(dirname)
    if settings:
        checks = []
-        for key,data in [('FILENAME_METADATA', base),
-                         ('PATH_METADATA', source_path),
-                         ]:
+        for key, data in [('FILENAME_METADATA', base),
+                          ('PATH_METADATA', source_path)]:
            checks.append((settings.get(key, None), data))
        if settings.get('USE_FOLDER_AS_CATEGORY', None):
            checks.insert(0, ('(?P<category>.*)', subdir))
-        for regexp,data in checks:
+        for regexp, data in checks:
            if regexp and data:
                match = re.match(regexp, data)
                if match:
--- a/pelican/settings.py
+++ b/pelican/settings.py
@ -33,7 +33,7 @@ DEFAULT_CONFIG = {
    'PAGE_EXCLUDES': (),
    'THEME': DEFAULT_THEME,
    'OUTPUT_PATH': 'output',
-    'MARKUP': ('rst', 'md'),
+    'READERS': {},
    'STATIC_PATHS': ['images', ],
    'THEME_STATIC_DIR': 'theme',
    'THEME_STATIC_PATHS': ['static', ],
@ -112,6 +112,7 @@ DEFAULT_CONFIG = {
    'SLUG_SUBSTITUTIONS': (),
    }

+
 def read_settings(path=None, override=None):
    if path:
        local_settings = get_settings_from_file(path)
@ -120,7 +121,7 @@ def read_settings(path=None, override=None):
            if p in local_settings and local_settings[p] is not None \
                    and not isabs(local_settings[p]):
                absp = os.path.abspath(os.path.normpath(os.path.join(
-                            os.path.dirname(path), local_settings[p])))
+                    os.path.dirname(path), local_settings[p])))
                if p not in ('THEME', 'PLUGIN_PATH') or os.path.exists(absp):
                    local_settings[p] = absp
    else:
@ -138,7 +139,7 @@ def get_settings_from_module(module=None, default_settings=DEFAULT_CONFIG):
    context = copy.deepcopy(default_settings)
    if module is not None:
        context.update(
-                (k, v) for k, v in inspect.getmembers(module) if k.isupper())
+            (k, v) for k, v in inspect.getmembers(module) if k.isupper())
    return context


@ -221,17 +222,18 @@ def configure_settings(settings):
            settings['FEED_DOMAIN'] = settings['SITEURL']

    # Warn if feeds are generated with both SITEURL & FEED_DOMAIN undefined
-    feed_keys = ['FEED_ATOM', 'FEED_RSS',
-                 'FEED_ALL_ATOM', 'FEED_ALL_RSS',
-                 'CATEGORY_FEED_ATOM', 'CATEGORY_FEED_RSS',
-                 'TAG_FEED_ATOM', 'TAG_FEED_RSS',
-                 'TRANSLATION_FEED_ATOM', 'TRANSLATION_FEED_RSS',
-                ]
+    feed_keys = [
+        'FEED_ATOM', 'FEED_RSS',
+        'FEED_ALL_ATOM', 'FEED_ALL_RSS',
+        'CATEGORY_FEED_ATOM', 'CATEGORY_FEED_RSS',
+        'TAG_FEED_ATOM', 'TAG_FEED_RSS',
+        'TRANSLATION_FEED_ATOM', 'TRANSLATION_FEED_RSS',
+    ]

    if any(settings.get(k) for k in feed_keys):
        if not settings.get('SITEURL'):
-            logger.warning('Feeds generated without SITEURL set properly may not'
-                        ' be valid')
+            logger.warning('Feeds generated without SITEURL set properly may'
+                           ' not be valid')

    if not 'TIMEZONE' in settings:
        logger.warning(
@ -255,26 +257,26 @@ def configure_settings(settings):

    # Save people from accidentally setting a string rather than a list
    path_keys = (
-            'ARTICLE_EXCLUDES',
-            'DEFAULT_METADATA',
-            'DIRECT_TEMPLATES',
-            'EXTRA_TEMPLATES_PATHS',
-            'FILES_TO_COPY',
-            'IGNORE_FILES',
-            'JINJA_EXTENSIONS',
-            'MARKUP',
-            'PAGINATED_DIRECT_TEMPLATES',
-            'PLUGINS',
-            'STATIC_PATHS',
-            'THEME_STATIC_PATHS',)
+        'ARTICLE_EXCLUDES',
+        'DEFAULT_METADATA',
+        'DIRECT_TEMPLATES',
+        'EXTRA_TEMPLATES_PATHS',
+        'FILES_TO_COPY',
+        'IGNORE_FILES',
+        'JINJA_EXTENSIONS',
+        'PAGINATED_DIRECT_TEMPLATES',
+        'PLUGINS',
+        'STATIC_PATHS',
+        'THEME_STATIC_PATHS',
+    )
    for PATH_KEY in filter(lambda k: k in settings, path_keys):
            if isinstance(settings[PATH_KEY], six.string_types):
-                logger.warning("Detected misconfiguration with %s setting (must "
-                        "be a list), falling back to the default"
-                        % PATH_KEY)
+                logger.warning("Detected misconfiguration with %s setting "
+                               "(must be a list), falling back to the default"
+                               % PATH_KEY)
                settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY]

-    for old,new,doc in [
+    for old, new, doc in [
            ('LESS_GENERATOR', 'the Webassets plugin', None),
            ('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
             'https://github.com/getpelican/pelican/blob/master/docs/settings.rst#path-metadata'),
--- a/pelican/signals.py
+++ b/pelican/signals.py
@ -8,6 +8,10 @@ initialized = signal('pelican_initialized')
 get_generators = signal('get_generators')
 finalized = signal('pelican_finalized')

+# Reader-level signals
+
+readers_init = signal('readers_init')
+
 # Generator-level signals

 generator_init = signal('generator_init')
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -19,9 +19,9 @@ CONTENT_DIR = os.path.join(CUR_DIR, 'content')
 class TestGenerator(unittest.TestCase):
    def setUp(self):
        self.settings = get_settings()
+        self.settings['READERS'] = {'asc': None}
        self.generator = Generator(self.settings.copy(), self.settings,
-                                   CUR_DIR, self.settings['THEME'], None,
-                                   self.settings['MARKUP'])
+                                   CUR_DIR, self.settings['THEME'], None)

    def test_include_path(self):
        filename = os.path.join(CUR_DIR, 'content', 'article.rst')
@ -30,10 +30,6 @@ class TestGenerator(unittest.TestCase):
        self.assertTrue(include_path(filename, extensions=('rst',)))
        self.assertFalse(include_path(filename, extensions=('md',)))

-        # markup must be a tuple, test that this works also with a list
-        self.generator.markup = ['rst', 'md']
-        self.assertTrue(include_path(filename))
-

 class TestArticlesGenerator(unittest.TestCase):

@ -42,11 +38,11 @@ class TestArticlesGenerator(unittest.TestCase):
        settings = get_settings(filenames={})
        settings['DEFAULT_CATEGORY'] = 'Default'
        settings['DEFAULT_DATE'] = (1970, 1, 1)
+        settings['READERS'] = {'asc': None}

        cls.generator = ArticlesGenerator(
            context=settings.copy(), settings=settings,
-            path=CONTENT_DIR, theme=settings['THEME'],
-            output_path=None, markup=settings['MARKUP'])
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
        cls.generator.generate_context()
        cls.articles = [[page.title, page.status, page.category.name,
                         page.template] for page in cls.generator.articles]
@ -55,8 +51,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings = get_settings()
        generator = ArticlesGenerator(
            context=settings, settings=settings,
-            path=None, theme=settings['THEME'],
-            output_path=None, markup=settings['MARKUP'])
+            path=None, theme=settings['THEME'], output_path=None)
        writer = MagicMock()
        generator.generate_feeds(writer)
        writer.write_feed.assert_called_with([], settings,
@ -64,8 +59,7 @@ class TestArticlesGenerator(unittest.TestCase):

        generator = ArticlesGenerator(
            context=settings, settings=get_settings(FEED_ALL_ATOM=None),
-            path=None, theme=settings['THEME'],
-            output_path=None, markup=None)
+            path=None, theme=settings['THEME'], output_path=None)
        writer = MagicMock()
        generator.generate_feeds(writer)
        self.assertFalse(writer.write_feed.called)
@ -74,26 +68,33 @@ class TestArticlesGenerator(unittest.TestCase):

        articles_expected = [
            ['Article title', 'published', 'Default', 'article'],
-            ['Article with markdown and summary metadata single', 'published',
-             'Default', 'article'],
            ['Article with markdown and summary metadata multi', 'published',
             'Default', 'article'],
+            ['Article with markdown and summary metadata single', 'published',
+             'Default', 'article'],
+            ['Article with markdown containing footnotes', 'published',
+             'Default', 'article'],
            ['Article with template', 'published', 'Default', 'custom'],
-            ['Test md File', 'published', 'test', 'article'],
            ['Rst with filename metadata', 'published', 'yeah', 'article'],
            ['Test Markdown extensions', 'published', 'Default', 'article'],
+            ['Test markdown File', 'published', 'test', 'article'],
+            ['Test md File', 'published', 'test', 'article'],
+            ['Test mdown File', 'published', 'test', 'article'],
+            ['Test mkd File', 'published', 'test', 'article'],
            ['This is a super article !', 'published', 'Yeah', 'article'],
+            ['This is a super article !', 'published', 'Yeah', 'article'],
+            ['This is a super article !', 'published', 'yeah', 'article'],
+            ['This is a super article !', 'published', 'yeah', 'article'],
+            ['This is a super article !', 'published', 'yeah', 'article'],
+            ['This is a super article !', 'published', 'Default', 'article'],
            ['This is an article with category !', 'published', 'yeah',
-              'article'],
+             'article'],
            ['This is an article without category !', 'published', 'Default',
             'article'],
            ['This is an article without category !', 'published',
             'TestCategory', 'article'],
-            ['This is a super article !', 'published', 'yeah', 'article'],
-            ['マックOS X 10.8でパイソンとVirtualenvをインストールと設定',
-             'published', '指導書', 'article'],
-            ['Article with markdown containing footnotes', 'published',
-             'Default', 'article']
+            ['マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'published',
+             '指導書', 'article'],
        ]
        self.assertEqual(sorted(articles_expected), sorted(self.articles))

@ -121,11 +122,11 @@ class TestArticlesGenerator(unittest.TestCase):
        settings['DEFAULT_CATEGORY'] = 'Default'
        settings['DEFAULT_DATE'] = (1970, 1, 1)
        settings['USE_FOLDER_AS_CATEGORY'] = False
+        settings['READERS'] = {'asc': None}
        settings['filenames'] = {}
        generator = ArticlesGenerator(
            context=settings.copy(), settings=settings,
-            path=CONTENT_DIR, theme=settings['THEME'],
-            output_path=None, markup=settings['MARKUP'])
+            path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
        generator.generate_context()
        # test for name
        # categories are grouped by slug; if two categories have the same slug
@ -147,8 +148,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings = get_settings(filenames={})
        generator = ArticlesGenerator(
            context=settings, settings=settings,
-            path=None, theme=settings['THEME'],
-            output_path=None, markup=settings['MARKUP'])
+            path=None, theme=settings['THEME'], output_path=None)
        write = MagicMock()
        generator.generate_direct_templates(write)
        write.assert_called_with("archives.html",
@ -162,8 +162,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings['ARCHIVES_SAVE_AS'] = 'archives/index.html'
        generator = ArticlesGenerator(
            context=settings, settings=settings,
-            path=None, theme=settings['THEME'],
-            output_path=None, markup=settings['MARKUP'])
+            path=None, theme=settings['THEME'], output_path=None)
        write = MagicMock()
        generator.generate_direct_templates(write)
        write.assert_called_with("archives/index.html",
@ -178,8 +177,7 @@ class TestArticlesGenerator(unittest.TestCase):
        settings['ARCHIVES_SAVE_AS'] = 'archives/index.html'
        generator = ArticlesGenerator(
            context=settings, settings=settings,
-            path=None, theme=settings['THEME'],
-            output_path=None, markup=settings['MARKUP'])
+            path=None, theme=settings['THEME'], output_path=None)
        write = MagicMock()
        generator.generate_direct_templates(write)
        write.assert_called_count == 0
@ -212,8 +210,7 @@ class TestPageGenerator(unittest.TestCase):

        generator = PagesGenerator(
            context=settings.copy(), settings=settings,
-            path=CUR_DIR, theme=settings['THEME'],
-            output_path=None, markup=settings['MARKUP'])
+            path=CUR_DIR, theme=settings['THEME'], output_path=None)
        generator.generate_context()
        pages = self.distill_pages(generator.pages)
        hidden_pages = self.distill_pages(generator.hidden_pages)
@ -252,13 +249,12 @@ class TestTemplatePagesGenerator(unittest.TestCase):
        settings = get_settings()
        settings['STATIC_PATHS'] = ['static']
        settings['TEMPLATE_PAGES'] = {
-                'template/source.html': 'generated/file.html'
-                }
+            'template/source.html': 'generated/file.html'
+        }

        generator = TemplatePagesGenerator(
            context={'foo': 'bar'}, settings=settings,
-            path=self.temp_content, theme='',
-            output_path=self.temp_output, markup=None)
+            path=self.temp_content, theme='', output_path=self.temp_output)

        # create a dummy template file
        template_dir = os.path.join(self.temp_content, 'template')
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -19,8 +19,8 @@ class ReaderTest(unittest.TestCase):

    def read_file(self, path, **kwargs):
        # Isolate from future API changes to readers.read_file
-        return readers.read_file(
-            base_path=CONTENT_PATH, path=path, settings=get_settings(**kwargs))
+        r = readers.Readers(settings=get_settings(**kwargs))
+        return r.read_file(base_path=CONTENT_PATH, path=path)


 class RstReaderTest(ReaderTest):
@ -160,7 +160,7 @@ class MdReaderTest(ReaderTest):
            ' with some footnotes'
            '<sup id="fnref:footnote"><a class="footnote-ref" '
            'href="#fn:footnote" rel="footnote">2</a></sup></p>\n'
-            
+
            '<div class="footnote">\n'
            '<hr />\n<ol>\n<li id="fn:1">\n'
            '<p>Numbered footnote&#160;'
--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@ -353,12 +353,13 @@ class TestDateFormatter(unittest.TestCase):
                         'French locale needed')
    def test_french_locale(self):
        settings = read_settings(
-            override = {'LOCALE': locale.normalize('fr_FR.UTF-8'),
-                        'TEMPLATE_PAGES': {'template/source.html':
-                                           'generated/file.html'}})
+            override={'LOCALE': locale.normalize('fr_FR.UTF-8'),
+                      'TEMPLATE_PAGES': {'template/source.html':
+                                         'generated/file.html'}})

-        generator = TemplatePagesGenerator({'date': self.date}, settings,
-                self.temp_content, '', self.temp_output, None)
+        generator = TemplatePagesGenerator(
+            {'date': self.date}, settings,
+            self.temp_content, '', self.temp_output)
        generator.env.filters.update({'strftime': utils.DateFormatter()})

        writer = Writer(self.temp_output, settings=settings)
@ -385,8 +386,9 @@ class TestDateFormatter(unittest.TestCase):
                        'TEMPLATE_PAGES': {'template/source.html':
                                           'generated/file.html'}})

-        generator = TemplatePagesGenerator({'date': self.date}, settings,
-                self.temp_content, '', self.temp_output, None)
+        generator = TemplatePagesGenerator(
+            {'date': self.date}, settings,
+            self.temp_content, '', self.temp_output)
        generator.env.filters.update({'strftime': utils.DateFormatter()})

        writer = Writer(self.temp_output, settings=settings)