From 4bc4b1500c91b63a37891e60a1e262e4f3fada1c Mon Sep 17 00:00:00 2001 From: Simon Conseil Date: Sun, 4 Aug 2013 17:02:58 +0200 Subject: [PATCH 1/5] Refactor readers and remove MARKUP Add a `Readers` class which contains a dict of file extensions / `Reader` instances. This dict can be overwritten with a `READERS` settings, for instance to avoid processing *.html files: READERS = {'html': None} Or to add a custom reader for the `foo` extension: READERS = {'foo': FooReader} This dict is no storing the Reader classes as it was done before with `EXTENSIONS`. It stores the instances of the Reader classes to avoid instancing for each file reading. --- pelican/__init__.py | 22 ++-- pelican/generators.py | 37 +++--- pelican/readers.py | 209 +++++++++++++++++-------------- pelican/settings.py | 56 +++++---- pelican/tests/test_generators.py | 63 +++++----- pelican/tests/test_readers.py | 6 +- pelican/tests/test_utils.py | 11 +- 7 files changed, 201 insertions(+), 203 deletions(-) diff --git a/pelican/__init__.py b/pelican/__init__.py index 9bce4926..8ba79e0a 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -17,6 +17,7 @@ from pelican.generators import (ArticlesGenerator, PagesGenerator, StaticGenerator, SourceFileGenerator, TemplatePagesGenerator) from pelican.log import init +from pelican.readers import Readers from pelican.settings import read_settings from pelican.utils import clean_output_dir, folder_watcher, file_watcher from pelican.writers import Writer @@ -46,7 +47,6 @@ class Pelican(object): self.path = settings['PATH'] self.theme = settings['THEME'] self.output_path = settings['OUTPUT_PATH'] - self.markup = settings['MARKUP'] self.ignore_files = settings['IGNORE_FILES'] self.delete_outputdir = settings['DELETE_OUTPUT_DIRECTORY'] self.output_retention = settings['OUTPUT_RETENTION'] @@ -164,7 +164,6 @@ class Pelican(object): path=self.path, theme=self.theme, output_path=self.output_path, - markup=self.markup, ) for cls in self.get_generator_classes() ] @@ -236,10 +235,6 @@ def parse_arguments(): help='Where to output the generated files. If not specified, a ' 'directory will be created, named "output" in the current path.') - parser.add_argument('-m', '--markup', dest='markup', - help='The list of markup language to use (rst or md). Please indicate ' - 'them separated by commas.') - parser.add_argument('-s', '--settings', dest='settings', help='The settings of the application, this is automatically set to ' '{0} if a file exists with this name.'.format(DEFAULT_CONFIG_NAME)) @@ -279,8 +274,6 @@ def get_config(args): if args.output: config['OUTPUT_PATH'] = \ os.path.abspath(os.path.expanduser(args.output)) - if args.markup: - config['MARKUP'] = [a.strip().lower() for a in args.markup.split(',')] if args.theme: abstheme = os.path.abspath(os.path.expanduser(args.theme)) config['THEME'] = abstheme if os.path.exists(abstheme) else args.theme @@ -296,8 +289,6 @@ def get_config(args): for key in config: if key in ('PATH', 'OUTPUT_PATH', 'THEME'): config[key] = config[key].decode(enc) - if key == "MARKUP": - config[key] = [a.decode(enc) for a in config[key]] return config @@ -315,16 +306,17 @@ def get_instance(args): module = __import__(module) cls = getattr(module, cls_name) - return cls(settings) + return cls(settings), settings def main(): args = parse_arguments() init(args.verbosity) - pelican = get_instance(args) + pelican, settings = get_instance(args) + readers = Readers(settings) watchers = {'content': folder_watcher(pelican.path, - pelican.markup, + readers.extensions, pelican.ignore_files), 'theme': folder_watcher(pelican.theme, [''], @@ -333,8 +325,8 @@ def main(): try: if args.autoreload: - print(' --- AutoReload Mode: Monitoring `content`, `theme` and `settings`' - ' for changes. ---') + print(' --- AutoReload Mode: Monitoring `content`, `theme` and' + ' `settings` for changes. ---') while True: try: diff --git a/pelican/generators.py b/pelican/generators.py index 1444c95c..72c76b32 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -13,16 +13,13 @@ from functools import partial from itertools import chain, groupby from operator import attrgetter, itemgetter -from jinja2 import ( - Environment, FileSystemLoader, PrefixLoader, ChoiceLoader, BaseLoader, - TemplateNotFound -) +from jinja2 import (Environment, FileSystemLoader, PrefixLoader, ChoiceLoader, + BaseLoader, TemplateNotFound) from pelican.contents import Article, Page, Static, is_valid_content -from pelican.readers import read_file +from pelican.readers import Readers from pelican.utils import copy, process_translations, mkdir_p, DateFormatter from pelican import signals -import pelican.utils logger = logging.getLogger(__name__) @@ -31,23 +28,23 @@ logger = logging.getLogger(__name__) class Generator(object): """Baseclass generator""" - def __init__(self, context, settings, path, theme, output_path, markup, - **kwargs): + def __init__(self, context, settings, path, theme, output_path, **kwargs): self.context = context self.settings = settings self.path = path self.theme = theme self.output_path = output_path - self.markup = markup for arg, value in kwargs.items(): setattr(self, arg, value) + self.readers = Readers(self.settings) + # templates cache self._templates = {} self._templates_path = [] self._templates_path.append(os.path.expanduser( - os.path.join(self.theme, 'templates'))) + os.path.join(self.theme, 'templates'))) self._templates_path += self.settings['EXTRA_TEMPLATES_PATHS'] theme_path = os.path.dirname(os.path.abspath(__file__)) @@ -85,9 +82,8 @@ class Generator(object): try: self._templates[name] = self.env.get_template(name + '.html') except TemplateNotFound: - raise Exception( - ('[templates] unable to load %s.html from %s' - % (name, self._templates_path))) + raise Exception('[templates] unable to load %s.html from %s' + % (name, self._templates_path)) return self._templates[name] def _include_path(self, path, extensions=None): @@ -98,7 +94,7 @@ class Generator(object): extensions are allowed) """ if extensions is None: - extensions = tuple(self.markup) + extensions = tuple(self.readers.extensions) basename = os.path.basename(path) if extensions is False or basename.endswith(extensions): return True @@ -388,9 +384,9 @@ class ArticlesGenerator(Generator): self.settings['ARTICLE_DIR'], exclude=self.settings['ARTICLE_EXCLUDES']): try: - article = read_file( + article = self.readers.read_file( base_path=self.path, path=f, content_class=Article, - settings=self.settings, context=self.context, + context=self.context, preread_signal=signals.article_generator_preread, preread_sender=self, context_signal=signals.article_generator_context, @@ -496,9 +492,9 @@ class PagesGenerator(Generator): self.settings['PAGE_DIR'], exclude=self.settings['PAGE_EXCLUDES']): try: - page = read_file( + page = self.readers.read_file( base_path=self.path, path=f, content_class=Page, - settings=self.settings, context=self.context, + context=self.context, preread_signal=signals.page_generator_preread, preread_sender=self, context_signal=signals.page_generator_context, @@ -557,10 +553,9 @@ class StaticGenerator(Generator): for static_path in self.settings['STATIC_PATHS']: for f in self.get_files( static_path, extensions=False): - static = read_file( + static = self.readers.read_file( base_path=self.path, path=f, content_class=Static, - fmt='static', - settings=self.settings, context=self.context, + fmt='static', context=self.context, preread_signal=signals.static_generator_preread, preread_sender=self, context_signal=signals.static_generator_context, diff --git a/pelican/readers.py b/pelican/readers.py index 3923245e..9cf78042 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -37,7 +37,6 @@ except ImportError: from pelican.contents import Page, Category, Tag, Author from pelican.utils import get_date, pelican_open -logger = logging.getLogger(__name__) METADATA_PROCESSORS = { 'tags': lambda x, y: [Tag(tag, y) for tag in x.split(',')], @@ -50,7 +49,7 @@ METADATA_PROCESSORS = { logger = logging.getLogger(__name__) -class Reader(object): +class BaseReader(object): enabled = True file_extensions = ['static'] extensions = None @@ -110,7 +109,7 @@ class PelicanHTMLTranslator(HTMLTranslator): return HTMLTranslator.visit_image(self, node) -class RstReader(Reader): +class RstReader(BaseReader): enabled = bool(docutils) file_extensions = ['rst'] @@ -166,7 +165,7 @@ class RstReader(Reader): return content, metadata -class MarkdownReader(Reader): +class MarkdownReader(BaseReader): enabled = bool(Markdown) file_extensions = ['md', 'markdown', 'mkd', 'mdown'] @@ -174,7 +173,6 @@ class MarkdownReader(Reader): super(MarkdownReader, self).__init__(*args, **kwargs) self.extensions = self.settings['MD_EXTENSIONS'] self.extensions.append('meta') - self._md = Markdown(extensions=self.extensions) def _parse_metadata(self, meta): """Return the dict containing document metadata""" @@ -194,6 +192,7 @@ class MarkdownReader(Reader): def read(self, source_path): """Parse content and metadata of markdown files""" + self._md = Markdown(extensions=self.extensions) with pelican_open(source_path) as text: content = self._md.convert(text) @@ -201,7 +200,7 @@ class MarkdownReader(Reader): return content, metadata -class HTMLReader(Reader): +class HTMLReader(BaseReader): """Parses HTML files as input, looking for meta, title, and body tags""" file_extensions = ['htm', 'html'] enabled = True @@ -312,7 +311,7 @@ class HTMLReader(Reader): return parser.body, metadata -class AsciiDocReader(Reader): +class AsciiDocReader(BaseReader): enabled = bool(asciidoc) file_extensions = ['asc'] default_options = ["--no-header-footer", "-a newline=\\n"] @@ -344,109 +343,125 @@ class AsciiDocReader(Reader): return content, metadata -EXTENSIONS = {} +class Readers(object): -for cls in [Reader] + Reader.__subclasses__(): - for ext in cls.file_extensions: - EXTENSIONS[ext] = cls + def __init__(self, settings=None): + self.settings = settings or {} + self.readers = {} + extensions = {} + for cls in [BaseReader] + BaseReader.__subclasses__(): + for ext in cls.file_extensions: + extensions[ext] = cls -def read_file(base_path, path, content_class=Page, fmt=None, - settings=None, context=None, - preread_signal=None, preread_sender=None, - context_signal=None, context_sender=None): - """Return a content object parsed with the given format.""" - path = os.path.abspath(os.path.join(base_path, path)) - source_path = os.path.relpath(path, base_path) - base, ext = os.path.splitext(os.path.basename(path)) - logger.debug('read file {} -> {}'.format( + if self.settings['READERS']: + extensions.update(self.settings['READERS']) + + for fmt, reader_class in extensions.items(): + if not reader_class: + continue + + if not reader_class.enabled: + logger.warning('Missing dependencies for {}'.format(fmt)) + continue + + self.readers[fmt] = reader_class(self.settings) + + settings_key = '%s_EXTENSIONS' % fmt.upper() + + if settings_key in self.settings: + self.readers[fmt].extensions = self.settings[settings_key] + + @property + def extensions(self): + return self.readers.keys() + + def read_file(self, base_path, path, content_class=Page, fmt=None, + context=None, preread_signal=None, preread_sender=None, + context_signal=None, context_sender=None): + """Return a content object parsed with the given format.""" + + path = os.path.abspath(os.path.join(base_path, path)) + source_path = os.path.relpath(path, base_path) + logger.debug('read file {} -> {}'.format( source_path, content_class.__name__)) - if not fmt: - fmt = ext[1:] - if fmt not in EXTENSIONS: - raise TypeError('Pelican does not know how to parse {}'.format(path)) + if not fmt: + _, ext = os.path.splitext(os.path.basename(path)) + fmt = ext[1:] - if preread_signal: - logger.debug('signal {}.send({})'.format( + if fmt not in self.readers: + raise TypeError( + 'Pelican does not know how to parse {}'.format(path)) + + if preread_signal: + logger.debug('signal {}.send({})'.format( preread_signal, preread_sender)) - preread_signal.send(preread_sender) + preread_signal.send(preread_sender) - if settings is None: - settings = {} + reader = self.readers[fmt] - reader_class = EXTENSIONS[fmt] - if not reader_class.enabled: - raise ValueError('Missing dependencies for {}'.format(fmt)) - - reader = reader_class(settings) - - settings_key = '%s_EXTENSIONS' % fmt.upper() - - if settings and settings_key in settings: - reader.extensions = settings[settings_key] - - metadata = default_metadata( - settings=settings, process=reader.process_metadata) - metadata.update(path_metadata( - full_path=path, source_path=source_path, settings=settings)) - metadata.update(parse_path_metadata( - source_path=source_path, settings=settings, + metadata = default_metadata( + settings=self.settings, process=reader.process_metadata) + metadata.update(path_metadata( + full_path=path, source_path=source_path, + settings=self.settings)) + metadata.update(parse_path_metadata( + source_path=source_path, settings=self.settings, process=reader.process_metadata)) - content, reader_metadata = reader.read(path) - metadata.update(reader_metadata) - # create warnings for all images with empty alt (up to a certain number) - # as they are really likely to be accessibility flaws - if content: - # find images with empty alt - imgs = re.compile(r""" - (?: - # src before alt - ]* - src=(['"])(.*)\1 - [^\>]* - alt=(['"])\3 - )|(?: - # alt before src - ]* - alt=(['"])\4 - [^\>]* - src=(['"])(.*)\5 - ) - """, re.X) - matches = re.findall(imgs, content) - # find a correct threshold - nb_warnings = 10 - if len(matches) == nb_warnings + 1: - nb_warnings += 1 # avoid bad looking case - # print one warning per image with empty alt until threshold - for match in matches[:nb_warnings]: - logger.warning('Empty alt attribute for image {} in {}'.format( - os.path.basename(match[1] + match[5]), path)) - # print one warning for the other images with empty alt - if len(matches) > nb_warnings: - logger.warning('{} other images with empty alt attributes'.format( - len(matches) - nb_warnings)) + content, reader_metadata = reader.read(path) + metadata.update(reader_metadata) - # eventually filter the content with typogrify if asked so - if content and settings and settings['TYPOGRIFY']: - from typogrify.filters import typogrify - content = typogrify(content) - metadata['title'] = typogrify(metadata['title']) + # create warnings for all images with empty alt (up to a certain + # number) # as they are really likely to be accessibility flaws + if content: + # find images with empty alt + imgs = re.compile(r""" + (?: + # src before alt + ]* + src=(['"])(.*)\1 + [^\>]* + alt=(['"])\3 + )|(?: + # alt before src + ]* + alt=(['"])\4 + [^\>]* + src=(['"])(.*)\5 + ) + """, re.X) + matches = re.findall(imgs, content) + # find a correct threshold + nb_warnings = 10 + if len(matches) == nb_warnings + 1: + nb_warnings += 1 # avoid bad looking case + # print one warning per image with empty alt until threshold + for match in matches[:nb_warnings]: + logger.warning('Empty alt attribute for image {} in {}'.format( + os.path.basename(match[1] + match[5]), path)) + # print one warning for the other images with empty alt + if len(matches) > nb_warnings: + logger.warning('{} other images with empty alt attributes' + .format(len(matches) - nb_warnings)) - if context_signal: - logger.debug('signal {}.send({}, )'.format( + # eventually filter the content with typogrify if asked so + if content and self.settings['TYPOGRIFY']: + from typogrify.filters import typogrify + content = typogrify(content) + metadata['title'] = typogrify(metadata['title']) + + if context_signal: + logger.debug('signal {}.send({}, )'.format( context_signal, context_sender)) - context_signal.send(context_sender, metadata=metadata) - return content_class( - content=content, - metadata=metadata, - settings=settings, - source_path=path, - context=context) + context_signal.send(context_sender, metadata=metadata) + + return content_class(content=content, metadata=metadata, + settings=self.settings, source_path=path, + context=context) def default_metadata(settings=None, process=None): @@ -482,7 +497,7 @@ def parse_path_metadata(source_path, settings=None, process=None): ... 'PATH_METADATA': ... '(?P[^/]*)/(?P\d{4}-\d{2}-\d{2})/.*', ... } - >>> reader = Reader(settings=settings) + >>> reader = BaseReader(settings=settings) >>> metadata = parse_path_metadata( ... source_path='my-cat/2013-01-01/my-slug.html', ... settings=settings, diff --git a/pelican/settings.py b/pelican/settings.py index 0f37c98d..e71796a2 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -33,7 +33,7 @@ DEFAULT_CONFIG = { 'PAGE_EXCLUDES': (), 'THEME': DEFAULT_THEME, 'OUTPUT_PATH': 'output', - 'MARKUP': ('rst', 'md'), + 'READERS': {}, 'STATIC_PATHS': ['images', ], 'THEME_STATIC_DIR': 'theme', 'THEME_STATIC_PATHS': ['static', ], @@ -112,6 +112,7 @@ DEFAULT_CONFIG = { 'SLUG_SUBSTITUTIONS': (), } + def read_settings(path=None, override=None): if path: local_settings = get_settings_from_file(path) @@ -120,7 +121,7 @@ def read_settings(path=None, override=None): if p in local_settings and local_settings[p] is not None \ and not isabs(local_settings[p]): absp = os.path.abspath(os.path.normpath(os.path.join( - os.path.dirname(path), local_settings[p]))) + os.path.dirname(path), local_settings[p]))) if p not in ('THEME', 'PLUGIN_PATH') or os.path.exists(absp): local_settings[p] = absp else: @@ -138,7 +139,7 @@ def get_settings_from_module(module=None, default_settings=DEFAULT_CONFIG): context = copy.deepcopy(default_settings) if module is not None: context.update( - (k, v) for k, v in inspect.getmembers(module) if k.isupper()) + (k, v) for k, v in inspect.getmembers(module) if k.isupper()) return context @@ -221,17 +222,18 @@ def configure_settings(settings): settings['FEED_DOMAIN'] = settings['SITEURL'] # Warn if feeds are generated with both SITEURL & FEED_DOMAIN undefined - feed_keys = ['FEED_ATOM', 'FEED_RSS', - 'FEED_ALL_ATOM', 'FEED_ALL_RSS', - 'CATEGORY_FEED_ATOM', 'CATEGORY_FEED_RSS', - 'TAG_FEED_ATOM', 'TAG_FEED_RSS', - 'TRANSLATION_FEED_ATOM', 'TRANSLATION_FEED_RSS', - ] + feed_keys = [ + 'FEED_ATOM', 'FEED_RSS', + 'FEED_ALL_ATOM', 'FEED_ALL_RSS', + 'CATEGORY_FEED_ATOM', 'CATEGORY_FEED_RSS', + 'TAG_FEED_ATOM', 'TAG_FEED_RSS', + 'TRANSLATION_FEED_ATOM', 'TRANSLATION_FEED_RSS', + ] if any(settings.get(k) for k in feed_keys): if not settings.get('SITEURL'): - logger.warning('Feeds generated without SITEURL set properly may not' - ' be valid') + logger.warning('Feeds generated without SITEURL set properly may' + ' not be valid') if not 'TIMEZONE' in settings: logger.warning( @@ -255,26 +257,26 @@ def configure_settings(settings): # Save people from accidentally setting a string rather than a list path_keys = ( - 'ARTICLE_EXCLUDES', - 'DEFAULT_METADATA', - 'DIRECT_TEMPLATES', - 'EXTRA_TEMPLATES_PATHS', - 'FILES_TO_COPY', - 'IGNORE_FILES', - 'JINJA_EXTENSIONS', - 'MARKUP', - 'PAGINATED_DIRECT_TEMPLATES', - 'PLUGINS', - 'STATIC_PATHS', - 'THEME_STATIC_PATHS',) + 'ARTICLE_EXCLUDES', + 'DEFAULT_METADATA', + 'DIRECT_TEMPLATES', + 'EXTRA_TEMPLATES_PATHS', + 'FILES_TO_COPY', + 'IGNORE_FILES', + 'JINJA_EXTENSIONS', + 'PAGINATED_DIRECT_TEMPLATES', + 'PLUGINS', + 'STATIC_PATHS', + 'THEME_STATIC_PATHS', + ) for PATH_KEY in filter(lambda k: k in settings, path_keys): if isinstance(settings[PATH_KEY], six.string_types): - logger.warning("Detected misconfiguration with %s setting (must " - "be a list), falling back to the default" - % PATH_KEY) + logger.warning("Detected misconfiguration with %s setting " + "(must be a list), falling back to the default" + % PATH_KEY) settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY] - for old,new,doc in [ + for old, new, doc in [ ('LESS_GENERATOR', 'the Webassets plugin', None), ('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA', 'https://github.com/getpelican/pelican/blob/master/docs/settings.rst#path-metadata'), diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index d8a4336d..bd4e6021 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -20,8 +20,7 @@ class TestGenerator(unittest.TestCase): def setUp(self): self.settings = get_settings() self.generator = Generator(self.settings.copy(), self.settings, - CUR_DIR, self.settings['THEME'], None, - self.settings['MARKUP']) + CUR_DIR, self.settings['THEME'], None) def test_include_path(self): filename = os.path.join(CUR_DIR, 'content', 'article.rst') @@ -30,10 +29,6 @@ class TestGenerator(unittest.TestCase): self.assertTrue(include_path(filename, extensions=('rst',))) self.assertFalse(include_path(filename, extensions=('md',))) - # markup must be a tuple, test that this works also with a list - self.generator.markup = ['rst', 'md'] - self.assertTrue(include_path(filename)) - class TestArticlesGenerator(unittest.TestCase): @@ -45,8 +40,7 @@ class TestArticlesGenerator(unittest.TestCase): cls.generator = ArticlesGenerator( context=settings.copy(), settings=settings, - path=CONTENT_DIR, theme=settings['THEME'], - output_path=None, markup=settings['MARKUP']) + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) cls.generator.generate_context() cls.articles = [[page.title, page.status, page.category.name, page.template] for page in cls.generator.articles] @@ -55,8 +49,7 @@ class TestArticlesGenerator(unittest.TestCase): settings = get_settings() generator = ArticlesGenerator( context=settings, settings=settings, - path=None, theme=settings['THEME'], - output_path=None, markup=settings['MARKUP']) + path=None, theme=settings['THEME'], output_path=None) writer = MagicMock() generator.generate_feeds(writer) writer.write_feed.assert_called_with([], settings, @@ -64,8 +57,7 @@ class TestArticlesGenerator(unittest.TestCase): generator = ArticlesGenerator( context=settings, settings=get_settings(FEED_ALL_ATOM=None), - path=None, theme=settings['THEME'], - output_path=None, markup=None) + path=None, theme=settings['THEME'], output_path=None) writer = MagicMock() generator.generate_feeds(writer) self.assertFalse(writer.write_feed.called) @@ -74,26 +66,33 @@ class TestArticlesGenerator(unittest.TestCase): articles_expected = [ ['Article title', 'published', 'Default', 'article'], - ['Article with markdown and summary metadata single', 'published', - 'Default', 'article'], ['Article with markdown and summary metadata multi', 'published', 'Default', 'article'], + ['Article with markdown and summary metadata single', 'published', + 'Default', 'article'], + ['Article with markdown containing footnotes', 'published', + 'Default', 'article'], ['Article with template', 'published', 'Default', 'custom'], - ['Test md File', 'published', 'test', 'article'], ['Rst with filename metadata', 'published', 'yeah', 'article'], ['Test Markdown extensions', 'published', 'Default', 'article'], + ['Test markdown File', 'published', 'test', 'article'], + ['Test md File', 'published', 'test', 'article'], + ['Test mdown File', 'published', 'test', 'article'], + ['Test mkd File', 'published', 'test', 'article'], ['This is a super article !', 'published', 'Yeah', 'article'], + ['This is a super article !', 'published', 'Yeah', 'article'], + ['This is a super article !', 'published', 'yeah', 'article'], + ['This is a super article !', 'published', 'yeah', 'article'], + ['This is a super article !', 'published', 'yeah', 'article'], + ['This is a super article !', 'published', 'Default', 'article'], ['This is an article with category !', 'published', 'yeah', - 'article'], + 'article'], ['This is an article without category !', 'published', 'Default', 'article'], ['This is an article without category !', 'published', 'TestCategory', 'article'], - ['This is a super article !', 'published', 'yeah', 'article'], - ['マックOS X 10.8でパイソンとVirtualenvをインストールと設定', - 'published', '指導書', 'article'], - ['Article with markdown containing footnotes', 'published', - 'Default', 'article'] + ['マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'published', + '指導書', 'article'], ] self.assertEqual(sorted(articles_expected), sorted(self.articles)) @@ -124,8 +123,7 @@ class TestArticlesGenerator(unittest.TestCase): settings['filenames'] = {} generator = ArticlesGenerator( context=settings.copy(), settings=settings, - path=CONTENT_DIR, theme=settings['THEME'], - output_path=None, markup=settings['MARKUP']) + path=CONTENT_DIR, theme=settings['THEME'], output_path=None) generator.generate_context() # test for name # categories are grouped by slug; if two categories have the same slug @@ -147,8 +145,7 @@ class TestArticlesGenerator(unittest.TestCase): settings = get_settings(filenames={}) generator = ArticlesGenerator( context=settings, settings=settings, - path=None, theme=settings['THEME'], - output_path=None, markup=settings['MARKUP']) + path=None, theme=settings['THEME'], output_path=None) write = MagicMock() generator.generate_direct_templates(write) write.assert_called_with("archives.html", @@ -162,8 +159,7 @@ class TestArticlesGenerator(unittest.TestCase): settings['ARCHIVES_SAVE_AS'] = 'archives/index.html' generator = ArticlesGenerator( context=settings, settings=settings, - path=None, theme=settings['THEME'], - output_path=None, markup=settings['MARKUP']) + path=None, theme=settings['THEME'], output_path=None) write = MagicMock() generator.generate_direct_templates(write) write.assert_called_with("archives/index.html", @@ -178,8 +174,7 @@ class TestArticlesGenerator(unittest.TestCase): settings['ARCHIVES_SAVE_AS'] = 'archives/index.html' generator = ArticlesGenerator( context=settings, settings=settings, - path=None, theme=settings['THEME'], - output_path=None, markup=settings['MARKUP']) + path=None, theme=settings['THEME'], output_path=None) write = MagicMock() generator.generate_direct_templates(write) write.assert_called_count == 0 @@ -212,8 +207,7 @@ class TestPageGenerator(unittest.TestCase): generator = PagesGenerator( context=settings.copy(), settings=settings, - path=CUR_DIR, theme=settings['THEME'], - output_path=None, markup=settings['MARKUP']) + path=CUR_DIR, theme=settings['THEME'], output_path=None) generator.generate_context() pages = self.distill_pages(generator.pages) hidden_pages = self.distill_pages(generator.hidden_pages) @@ -252,13 +246,12 @@ class TestTemplatePagesGenerator(unittest.TestCase): settings = get_settings() settings['STATIC_PATHS'] = ['static'] settings['TEMPLATE_PAGES'] = { - 'template/source.html': 'generated/file.html' - } + 'template/source.html': 'generated/file.html' + } generator = TemplatePagesGenerator( context={'foo': 'bar'}, settings=settings, - path=self.temp_content, theme='', - output_path=self.temp_output, markup=None) + path=self.temp_content, theme='', output_path=self.temp_output) # create a dummy template file template_dir = os.path.join(self.temp_content, 'template') diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py index c67b8a1f..43cf5ecf 100644 --- a/pelican/tests/test_readers.py +++ b/pelican/tests/test_readers.py @@ -19,8 +19,8 @@ class ReaderTest(unittest.TestCase): def read_file(self, path, **kwargs): # Isolate from future API changes to readers.read_file - return readers.read_file( - base_path=CONTENT_PATH, path=path, settings=get_settings(**kwargs)) + r = readers.Readers(settings=get_settings(**kwargs)) + return r.read_file(base_path=CONTENT_PATH, path=path) class RstReaderTest(ReaderTest): @@ -160,7 +160,7 @@ class MdReaderTest(ReaderTest): ' with some footnotes' '2

\n' - + '
\n' '
\n
    \n
  1. \n' '

    Numbered footnote ' diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 0e65003a..3a1cceca 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -353,12 +353,13 @@ class TestDateFormatter(unittest.TestCase): 'French locale needed') def test_french_locale(self): settings = read_settings( - override = {'LOCALE': locale.normalize('fr_FR.UTF-8'), - 'TEMPLATE_PAGES': {'template/source.html': - 'generated/file.html'}}) + override={'LOCALE': locale.normalize('fr_FR.UTF-8'), + 'TEMPLATE_PAGES': {'template/source.html': + 'generated/file.html'}}) - generator = TemplatePagesGenerator({'date': self.date}, settings, - self.temp_content, '', self.temp_output, None) + generator = TemplatePagesGenerator( + {'date': self.date}, settings, + self.temp_content, '', self.temp_output) generator.env.filters.update({'strftime': utils.DateFormatter()}) writer = Writer(self.temp_output, settings=settings) From cfe72c2736755173c774fb8a612ba3a84dd81131 Mon Sep 17 00:00:00 2001 From: Simon Conseil Date: Sun, 4 Aug 2013 21:17:15 +0200 Subject: [PATCH 2/5] Disable asciidoc files for tests --- pelican/tests/test_generators.py | 3 +++ pelican/tests/test_utils.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py index bd4e6021..f47ce7d3 100644 --- a/pelican/tests/test_generators.py +++ b/pelican/tests/test_generators.py @@ -19,6 +19,7 @@ CONTENT_DIR = os.path.join(CUR_DIR, 'content') class TestGenerator(unittest.TestCase): def setUp(self): self.settings = get_settings() + self.settings['READERS'] = {'asc': None} self.generator = Generator(self.settings.copy(), self.settings, CUR_DIR, self.settings['THEME'], None) @@ -37,6 +38,7 @@ class TestArticlesGenerator(unittest.TestCase): settings = get_settings(filenames={}) settings['DEFAULT_CATEGORY'] = 'Default' settings['DEFAULT_DATE'] = (1970, 1, 1) + settings['READERS'] = {'asc': None} cls.generator = ArticlesGenerator( context=settings.copy(), settings=settings, @@ -120,6 +122,7 @@ class TestArticlesGenerator(unittest.TestCase): settings['DEFAULT_CATEGORY'] = 'Default' settings['DEFAULT_DATE'] = (1970, 1, 1) settings['USE_FOLDER_AS_CATEGORY'] = False + settings['READERS'] = {'asc': None} settings['filenames'] = {} generator = ArticlesGenerator( context=settings.copy(), settings=settings, diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 3a1cceca..0642926e 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -386,8 +386,9 @@ class TestDateFormatter(unittest.TestCase): 'TEMPLATE_PAGES': {'template/source.html': 'generated/file.html'}}) - generator = TemplatePagesGenerator({'date': self.date}, settings, - self.temp_content, '', self.temp_output, None) + generator = TemplatePagesGenerator( + {'date': self.date}, settings, + self.temp_content, '', self.temp_output) generator.env.filters.update({'strftime': utils.DateFormatter()}) writer = Writer(self.temp_output, settings=settings) From 85ea737a98d5e269a0140039f12bffbd43fe0bbb Mon Sep 17 00:00:00 2001 From: Simon Conseil Date: Wed, 7 Aug 2013 00:01:12 +0200 Subject: [PATCH 3/5] Add a signal to give access to the dict of Reader classes. --- pelican/readers.py | 13 ++++++++----- pelican/signals.py | 4 ++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pelican/readers.py b/pelican/readers.py index 9cf78042..e5283de7 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -34,6 +34,7 @@ try: except ImportError: from HTMLParser import HTMLParser +from pelican import signals from pelican.contents import Page, Category, Tag, Author from pelican.utils import get_date, pelican_open @@ -348,16 +349,18 @@ class Readers(object): def __init__(self, settings=None): self.settings = settings or {} self.readers = {} + self.reader_classes = {} - extensions = {} for cls in [BaseReader] + BaseReader.__subclasses__(): for ext in cls.file_extensions: - extensions[ext] = cls + self.reader_classes[ext] = cls if self.settings['READERS']: - extensions.update(self.settings['READERS']) + self.reader_classes.update(self.settings['READERS']) - for fmt, reader_class in extensions.items(): + signals.readers_init.send(self) + + for fmt, reader_class in self.reader_classes.items(): if not reader_class: continue @@ -484,7 +487,7 @@ def path_metadata(full_path, source_path, settings=None): metadata['date'] = datetime.datetime.fromtimestamp( os.stat(full_path).st_ctime) metadata.update(settings.get('EXTRA_PATH_METADATA', {}).get( - source_path, {})) + source_path, {})) return metadata diff --git a/pelican/signals.py b/pelican/signals.py index cb010d37..77802e88 100644 --- a/pelican/signals.py +++ b/pelican/signals.py @@ -8,6 +8,10 @@ initialized = signal('pelican_initialized') get_generators = signal('get_generators') finalized = signal('pelican_finalized') +# Reader-level signals + +readers_init = signal('readers_init') + # Generator-level signals generator_init = signal('generator_init') From bab8d0b26a80ab3c9b38311d26fccad938ec2220 Mon Sep 17 00:00:00 2001 From: Simon Conseil Date: Wed, 7 Aug 2013 00:10:26 +0200 Subject: [PATCH 4/5] Move the "find image with an empty alt" block in a function. --- pelican/readers.py | 79 +++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/pelican/readers.py b/pelican/readers.py index e5283de7..3b3bfd12 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -416,40 +416,9 @@ class Readers(object): content, reader_metadata = reader.read(path) metadata.update(reader_metadata) - # create warnings for all images with empty alt (up to a certain - # number) # as they are really likely to be accessibility flaws if content: # find images with empty alt - imgs = re.compile(r""" - (?: - # src before alt - ]* - src=(['"])(.*)\1 - [^\>]* - alt=(['"])\3 - )|(?: - # alt before src - ]* - alt=(['"])\4 - [^\>]* - src=(['"])(.*)\5 - ) - """, re.X) - matches = re.findall(imgs, content) - # find a correct threshold - nb_warnings = 10 - if len(matches) == nb_warnings + 1: - nb_warnings += 1 # avoid bad looking case - # print one warning per image with empty alt until threshold - for match in matches[:nb_warnings]: - logger.warning('Empty alt attribute for image {} in {}'.format( - os.path.basename(match[1] + match[5]), path)) - # print one warning for the other images with empty alt - if len(matches) > nb_warnings: - logger.warning('{} other images with empty alt attributes' - .format(len(matches) - nb_warnings)) + find_empty_alt(content, path) # eventually filter the content with typogrify if asked so if content and self.settings['TYPOGRIFY']: @@ -467,6 +436,45 @@ class Readers(object): context=context) +def find_empty_alt(content, path): + """Find images with empty alt + + Create warnings for all images with empty alt (up to a certain number), + as they are really likely to be accessibility flaws. + + """ + imgs = re.compile(r""" + (?: + # src before alt + ]* + src=(['"])(.*)\1 + [^\>]* + alt=(['"])\3 + )|(?: + # alt before src + ]* + alt=(['"])\4 + [^\>]* + src=(['"])(.*)\5 + ) + """, re.X) + matches = re.findall(imgs, content) + # find a correct threshold + nb_warnings = 10 + if len(matches) == nb_warnings + 1: + nb_warnings += 1 # avoid bad looking case + # print one warning per image with empty alt until threshold + for match in matches[:nb_warnings]: + logger.warning('Empty alt attribute for image {} in {}'.format( + os.path.basename(match[1] + match[5]), path)) + # print one warning for the other images with empty alt + if len(matches) > nb_warnings: + logger.warning('{} other images with empty alt attributes' + .format(len(matches) - nb_warnings)) + + def default_metadata(settings=None, process=None): metadata = {} if settings: @@ -516,13 +524,12 @@ def parse_path_metadata(source_path, settings=None, process=None): subdir = os.path.basename(dirname) if settings: checks = [] - for key,data in [('FILENAME_METADATA', base), - ('PATH_METADATA', source_path), - ]: + for key, data in [('FILENAME_METADATA', base), + ('PATH_METADATA', source_path)]: checks.append((settings.get(key, None), data)) if settings.get('USE_FOLDER_AS_CATEGORY', None): checks.insert(0, ('(?P.*)', subdir)) - for regexp,data in checks: + for regexp, data in checks: if regexp and data: match = re.match(regexp, data) if match: From f47f054d0be29d95ecb22e025f1a50932c00fd2b Mon Sep 17 00:00:00 2001 From: Simon Conseil Date: Sun, 4 Aug 2013 22:03:37 +0200 Subject: [PATCH 5/5] Add documentation for readers. --- docs/internals.rst | 4 ++-- docs/plugins.rst | 15 ++++++++------- docs/settings.rst | 7 ++++--- pelican/readers.py | 26 ++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 704122ba..f69a9bb8 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -24,7 +24,7 @@ The logic is separated into different classes and concepts: then passed to the generators. * **Readers** are used to read from various formats (AsciiDoc, HTML, Markdown and - reStructuredText for now, but the system is extensible). Given a file, they + reStructuredText for now, but the system is extensible). Given a file, they return metadata (author, tags, category, etc.) and content (HTML-formatted). * **Generators** generate the different outputs. For instance, Pelican comes with @@ -44,7 +44,7 @@ method that returns HTML content and some metadata. Take a look at the Markdown reader:: - class MarkdownReader(Reader): + class MarkdownReader(BaseReader): enabled = bool(Markdown) def read(self, source_path): diff --git a/docs/plugins.rst b/docs/plugins.rst index 93307afb..582f40a7 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -71,6 +71,7 @@ finalized pelican object invoked after al - minifying js/css assets. - notify/ping search engines with an updated sitemap. generator_init generator invoked in the Generator.__init__ +readers_init readers invoked in the Readers.__init__ article_generate_context article_generator, metadata article_generate_preread article_generator invoked before a article is read in ArticlesGenerator.generate_context; use if code needs to do something before every article is parsed @@ -144,13 +145,13 @@ write and don't slow down pelican itself when they're not active. No more talking, here is the example:: from pelican import signals - from pelican.readers import EXTENSIONS, Reader + from pelican.readers import BaseReader - # Create a new reader class, inheriting from the pelican.reader.Reader - class NewReader(Reader): + # Create a new reader class, inheriting from the pelican.reader.BaseReader + class NewReader(BaseReader): enabled = True # Yeah, you probably want that :-) - # The list of extensions you want this reader to match with. + # The list of file extensions you want this reader to match with. # In the case multiple readers use the same extensions, the latest will # win (so the one you're defining here, most probably). file_extensions = ['yeah'] @@ -168,12 +169,12 @@ No more talking, here is the example:: return "Some content", parsed - def add_reader(arg): - EXTENSIONS['yeah'] = NewReader + def add_reader(readers): + readers.reader_classes['yeah'] = NewReader # this is how pelican works. def register(): - signals.initialized.connect(add_reader) + signals.readers_init.connect(add_reader) Adding a new generator diff --git a/docs/settings.rst b/docs/settings.rst index 8ecac7c9..04574127 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -84,9 +84,10 @@ Setting name (default value) What doe here or a single string representing one locale. When providing a list, all the locales will be tried until one works. -`MARKUP` (``('rst', 'md')``) A list of available markup languages you want - to use. For the moment, the only available values - are `rst`, `md`, `markdown`, `mkd`, `mdown`, `html`, and `htm`. +`READERS` (``{}``) A dict of file extensions / Reader classes to overwrite or + add file readers. for instance, to avoid processing .html files: + ``READERS = {'html': None}``. Or to add a custom reader for the + `foo` extension: ``READERS = {'foo': FooReader}`` `IGNORE_FILES` (``['.#*']``) A list of file globbing patterns to match against the source files to be ignored by the processor. For example, the default ``['.#*']`` will ignore emacs lock files. diff --git a/pelican/readers.py b/pelican/readers.py index 3b3bfd12..97a184d0 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -51,6 +51,18 @@ logger = logging.getLogger(__name__) class BaseReader(object): + """Base class to read files. + + This class is used to process static files, and it can be inherited for + other types of file. A Reader class must have the following attributes: + + - enabled: (boolean) tell if the Reader class is enabled. It + generally depends on the import of some dependency. + - file_extensions: a list of file extensions that the Reader will process. + - extensions: a list of extensions to use in the reader (typical use is + Markdown). + + """ enabled = True file_extensions = ['static'] extensions = None @@ -111,6 +123,8 @@ class PelicanHTMLTranslator(HTMLTranslator): class RstReader(BaseReader): + """Reader for reStructuredText files""" + enabled = bool(docutils) file_extensions = ['rst'] @@ -167,6 +181,8 @@ class RstReader(BaseReader): class MarkdownReader(BaseReader): + """Reader for Markdown files""" + enabled = bool(Markdown) file_extensions = ['md', 'markdown', 'mkd', 'mdown'] @@ -203,6 +219,7 @@ class MarkdownReader(BaseReader): class HTMLReader(BaseReader): """Parses HTML files as input, looking for meta, title, and body tags""" + file_extensions = ['htm', 'html'] enabled = True @@ -313,6 +330,8 @@ class HTMLReader(BaseReader): class AsciiDocReader(BaseReader): + """Reader for AsciiDoc files""" + enabled = bool(asciidoc) file_extensions = ['asc'] default_options = ["--no-header-footer", "-a newline=\\n"] @@ -345,7 +364,14 @@ class AsciiDocReader(BaseReader): class Readers(object): + """Interface for all readers. + This class contains a mapping of file extensions / Reader classes, to know + which Reader class must be used to read a file (based on its extension). + This is customizable both with the 'READERS' setting, and with the + 'readers_init' signall for plugins. + + """ def __init__(self, settings=None): self.settings = settings or {} self.readers = {}