Merge pull request #1011 from saimn/readers

Refactor readers and remove MARKUP. Fixes #866
This commit is contained in:
Justin Mayer 2013-08-07 12:34:22 -07:00
commit 5a469dc2e3
11 changed files with 265 additions and 221 deletions

View file

@ -24,7 +24,7 @@ The logic is separated into different classes and concepts:
then passed to the generators. then passed to the generators.
* **Readers** are used to read from various formats (AsciiDoc, HTML, Markdown and * **Readers** are used to read from various formats (AsciiDoc, HTML, Markdown and
reStructuredText for now, but the system is extensible). Given a file, they reStructuredText for now, but the system is extensible). Given a file, they
return metadata (author, tags, category, etc.) and content (HTML-formatted). return metadata (author, tags, category, etc.) and content (HTML-formatted).
* **Generators** generate the different outputs. For instance, Pelican comes with * **Generators** generate the different outputs. For instance, Pelican comes with
@ -44,7 +44,7 @@ method that returns HTML content and some metadata.
Take a look at the Markdown reader:: Take a look at the Markdown reader::
class MarkdownReader(Reader): class MarkdownReader(BaseReader):
enabled = bool(Markdown) enabled = bool(Markdown)
def read(self, source_path): def read(self, source_path):

View file

@ -71,6 +71,7 @@ finalized pelican object invoked after al
- minifying js/css assets. - minifying js/css assets.
- notify/ping search engines with an updated sitemap. - notify/ping search engines with an updated sitemap.
generator_init generator invoked in the Generator.__init__ generator_init generator invoked in the Generator.__init__
readers_init readers invoked in the Readers.__init__
article_generate_context article_generator, metadata article_generate_context article_generator, metadata
article_generate_preread article_generator invoked before a article is read in ArticlesGenerator.generate_context; article_generate_preread article_generator invoked before a article is read in ArticlesGenerator.generate_context;
use if code needs to do something before every article is parsed use if code needs to do something before every article is parsed
@ -144,13 +145,13 @@ write and don't slow down pelican itself when they're not active.
No more talking, here is the example:: No more talking, here is the example::
from pelican import signals from pelican import signals
from pelican.readers import EXTENSIONS, Reader from pelican.readers import BaseReader
# Create a new reader class, inheriting from the pelican.reader.Reader # Create a new reader class, inheriting from the pelican.reader.BaseReader
class NewReader(Reader): class NewReader(BaseReader):
enabled = True # Yeah, you probably want that :-) enabled = True # Yeah, you probably want that :-)
# The list of extensions you want this reader to match with. # The list of file extensions you want this reader to match with.
# In the case multiple readers use the same extensions, the latest will # In the case multiple readers use the same extensions, the latest will
# win (so the one you're defining here, most probably). # win (so the one you're defining here, most probably).
file_extensions = ['yeah'] file_extensions = ['yeah']
@ -168,12 +169,12 @@ No more talking, here is the example::
return "Some content", parsed return "Some content", parsed
def add_reader(arg): def add_reader(readers):
EXTENSIONS['yeah'] = NewReader readers.reader_classes['yeah'] = NewReader
# this is how pelican works. # this is how pelican works.
def register(): def register():
signals.initialized.connect(add_reader) signals.readers_init.connect(add_reader)
Adding a new generator Adding a new generator

View file

@ -84,9 +84,10 @@ Setting name (default value) What doe
here or a single string representing one locale. here or a single string representing one locale.
When providing a list, all the locales will be tried When providing a list, all the locales will be tried
until one works. until one works.
`MARKUP` (``('rst', 'md')``) A list of available markup languages you want `READERS` (``{}``) A dict of file extensions / Reader classes to overwrite or
to use. For the moment, the only available values add file readers. for instance, to avoid processing .html files:
are `rst`, `md`, `markdown`, `mkd`, `mdown`, `html`, and `htm`. ``READERS = {'html': None}``. Or to add a custom reader for the
`foo` extension: ``READERS = {'foo': FooReader}``
`IGNORE_FILES` (``['.#*']``) A list of file globbing patterns to match against the `IGNORE_FILES` (``['.#*']``) A list of file globbing patterns to match against the
source files to be ignored by the processor. For example, source files to be ignored by the processor. For example,
the default ``['.#*']`` will ignore emacs lock files. the default ``['.#*']`` will ignore emacs lock files.

View file

@ -17,6 +17,7 @@ from pelican.generators import (ArticlesGenerator, PagesGenerator,
StaticGenerator, SourceFileGenerator, StaticGenerator, SourceFileGenerator,
TemplatePagesGenerator) TemplatePagesGenerator)
from pelican.log import init from pelican.log import init
from pelican.readers import Readers
from pelican.settings import read_settings from pelican.settings import read_settings
from pelican.utils import clean_output_dir, folder_watcher, file_watcher from pelican.utils import clean_output_dir, folder_watcher, file_watcher
from pelican.writers import Writer from pelican.writers import Writer
@ -46,7 +47,6 @@ class Pelican(object):
self.path = settings['PATH'] self.path = settings['PATH']
self.theme = settings['THEME'] self.theme = settings['THEME']
self.output_path = settings['OUTPUT_PATH'] self.output_path = settings['OUTPUT_PATH']
self.markup = settings['MARKUP']
self.ignore_files = settings['IGNORE_FILES'] self.ignore_files = settings['IGNORE_FILES']
self.delete_outputdir = settings['DELETE_OUTPUT_DIRECTORY'] self.delete_outputdir = settings['DELETE_OUTPUT_DIRECTORY']
self.output_retention = settings['OUTPUT_RETENTION'] self.output_retention = settings['OUTPUT_RETENTION']
@ -164,7 +164,6 @@ class Pelican(object):
path=self.path, path=self.path,
theme=self.theme, theme=self.theme,
output_path=self.output_path, output_path=self.output_path,
markup=self.markup,
) for cls in self.get_generator_classes() ) for cls in self.get_generator_classes()
] ]
@ -236,10 +235,6 @@ def parse_arguments():
help='Where to output the generated files. If not specified, a ' help='Where to output the generated files. If not specified, a '
'directory will be created, named "output" in the current path.') 'directory will be created, named "output" in the current path.')
parser.add_argument('-m', '--markup', dest='markup',
help='The list of markup language to use (rst or md). Please indicate '
'them separated by commas.')
parser.add_argument('-s', '--settings', dest='settings', parser.add_argument('-s', '--settings', dest='settings',
help='The settings of the application, this is automatically set to ' help='The settings of the application, this is automatically set to '
'{0} if a file exists with this name.'.format(DEFAULT_CONFIG_NAME)) '{0} if a file exists with this name.'.format(DEFAULT_CONFIG_NAME))
@ -279,8 +274,6 @@ def get_config(args):
if args.output: if args.output:
config['OUTPUT_PATH'] = \ config['OUTPUT_PATH'] = \
os.path.abspath(os.path.expanduser(args.output)) os.path.abspath(os.path.expanduser(args.output))
if args.markup:
config['MARKUP'] = [a.strip().lower() for a in args.markup.split(',')]
if args.theme: if args.theme:
abstheme = os.path.abspath(os.path.expanduser(args.theme)) abstheme = os.path.abspath(os.path.expanduser(args.theme))
config['THEME'] = abstheme if os.path.exists(abstheme) else args.theme config['THEME'] = abstheme if os.path.exists(abstheme) else args.theme
@ -296,8 +289,6 @@ def get_config(args):
for key in config: for key in config:
if key in ('PATH', 'OUTPUT_PATH', 'THEME'): if key in ('PATH', 'OUTPUT_PATH', 'THEME'):
config[key] = config[key].decode(enc) config[key] = config[key].decode(enc)
if key == "MARKUP":
config[key] = [a.decode(enc) for a in config[key]]
return config return config
@ -315,16 +306,17 @@ def get_instance(args):
module = __import__(module) module = __import__(module)
cls = getattr(module, cls_name) cls = getattr(module, cls_name)
return cls(settings) return cls(settings), settings
def main(): def main():
args = parse_arguments() args = parse_arguments()
init(args.verbosity) init(args.verbosity)
pelican = get_instance(args) pelican, settings = get_instance(args)
readers = Readers(settings)
watchers = {'content': folder_watcher(pelican.path, watchers = {'content': folder_watcher(pelican.path,
pelican.markup, readers.extensions,
pelican.ignore_files), pelican.ignore_files),
'theme': folder_watcher(pelican.theme, 'theme': folder_watcher(pelican.theme,
[''], [''],
@ -333,8 +325,8 @@ def main():
try: try:
if args.autoreload: if args.autoreload:
print(' --- AutoReload Mode: Monitoring `content`, `theme` and `settings`' print(' --- AutoReload Mode: Monitoring `content`, `theme` and'
' for changes. ---') ' `settings` for changes. ---')
while True: while True:
try: try:

View file

@ -13,16 +13,13 @@ from functools import partial
from itertools import chain, groupby from itertools import chain, groupby
from operator import attrgetter, itemgetter from operator import attrgetter, itemgetter
from jinja2 import ( from jinja2 import (Environment, FileSystemLoader, PrefixLoader, ChoiceLoader,
Environment, FileSystemLoader, PrefixLoader, ChoiceLoader, BaseLoader, BaseLoader, TemplateNotFound)
TemplateNotFound
)
from pelican.contents import Article, Page, Static, is_valid_content from pelican.contents import Article, Page, Static, is_valid_content
from pelican.readers import read_file from pelican.readers import Readers
from pelican.utils import copy, process_translations, mkdir_p, DateFormatter from pelican.utils import copy, process_translations, mkdir_p, DateFormatter
from pelican import signals from pelican import signals
import pelican.utils
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -31,23 +28,23 @@ logger = logging.getLogger(__name__)
class Generator(object): class Generator(object):
"""Baseclass generator""" """Baseclass generator"""
def __init__(self, context, settings, path, theme, output_path, markup, def __init__(self, context, settings, path, theme, output_path, **kwargs):
**kwargs):
self.context = context self.context = context
self.settings = settings self.settings = settings
self.path = path self.path = path
self.theme = theme self.theme = theme
self.output_path = output_path self.output_path = output_path
self.markup = markup
for arg, value in kwargs.items(): for arg, value in kwargs.items():
setattr(self, arg, value) setattr(self, arg, value)
self.readers = Readers(self.settings)
# templates cache # templates cache
self._templates = {} self._templates = {}
self._templates_path = [] self._templates_path = []
self._templates_path.append(os.path.expanduser( self._templates_path.append(os.path.expanduser(
os.path.join(self.theme, 'templates'))) os.path.join(self.theme, 'templates')))
self._templates_path += self.settings['EXTRA_TEMPLATES_PATHS'] self._templates_path += self.settings['EXTRA_TEMPLATES_PATHS']
theme_path = os.path.dirname(os.path.abspath(__file__)) theme_path = os.path.dirname(os.path.abspath(__file__))
@ -85,9 +82,8 @@ class Generator(object):
try: try:
self._templates[name] = self.env.get_template(name + '.html') self._templates[name] = self.env.get_template(name + '.html')
except TemplateNotFound: except TemplateNotFound:
raise Exception( raise Exception('[templates] unable to load %s.html from %s'
('[templates] unable to load %s.html from %s' % (name, self._templates_path))
% (name, self._templates_path)))
return self._templates[name] return self._templates[name]
def _include_path(self, path, extensions=None): def _include_path(self, path, extensions=None):
@ -98,7 +94,7 @@ class Generator(object):
extensions are allowed) extensions are allowed)
""" """
if extensions is None: if extensions is None:
extensions = tuple(self.markup) extensions = tuple(self.readers.extensions)
basename = os.path.basename(path) basename = os.path.basename(path)
if extensions is False or basename.endswith(extensions): if extensions is False or basename.endswith(extensions):
return True return True
@ -388,9 +384,9 @@ class ArticlesGenerator(Generator):
self.settings['ARTICLE_DIR'], self.settings['ARTICLE_DIR'],
exclude=self.settings['ARTICLE_EXCLUDES']): exclude=self.settings['ARTICLE_EXCLUDES']):
try: try:
article = read_file( article = self.readers.read_file(
base_path=self.path, path=f, content_class=Article, base_path=self.path, path=f, content_class=Article,
settings=self.settings, context=self.context, context=self.context,
preread_signal=signals.article_generator_preread, preread_signal=signals.article_generator_preread,
preread_sender=self, preread_sender=self,
context_signal=signals.article_generator_context, context_signal=signals.article_generator_context,
@ -496,9 +492,9 @@ class PagesGenerator(Generator):
self.settings['PAGE_DIR'], self.settings['PAGE_DIR'],
exclude=self.settings['PAGE_EXCLUDES']): exclude=self.settings['PAGE_EXCLUDES']):
try: try:
page = read_file( page = self.readers.read_file(
base_path=self.path, path=f, content_class=Page, base_path=self.path, path=f, content_class=Page,
settings=self.settings, context=self.context, context=self.context,
preread_signal=signals.page_generator_preread, preread_signal=signals.page_generator_preread,
preread_sender=self, preread_sender=self,
context_signal=signals.page_generator_context, context_signal=signals.page_generator_context,
@ -557,10 +553,9 @@ class StaticGenerator(Generator):
for static_path in self.settings['STATIC_PATHS']: for static_path in self.settings['STATIC_PATHS']:
for f in self.get_files( for f in self.get_files(
static_path, extensions=False): static_path, extensions=False):
static = read_file( static = self.readers.read_file(
base_path=self.path, path=f, content_class=Static, base_path=self.path, path=f, content_class=Static,
fmt='static', fmt='static', context=self.context,
settings=self.settings, context=self.context,
preread_signal=signals.static_generator_preread, preread_signal=signals.static_generator_preread,
preread_sender=self, preread_sender=self,
context_signal=signals.static_generator_context, context_signal=signals.static_generator_context,

View file

@ -34,10 +34,10 @@ try:
except ImportError: except ImportError:
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
from pelican import signals
from pelican.contents import Page, Category, Tag, Author from pelican.contents import Page, Category, Tag, Author
from pelican.utils import get_date, pelican_open from pelican.utils import get_date, pelican_open
logger = logging.getLogger(__name__)
METADATA_PROCESSORS = { METADATA_PROCESSORS = {
'tags': lambda x, y: [Tag(tag, y) for tag in x.split(',')], 'tags': lambda x, y: [Tag(tag, y) for tag in x.split(',')],
@ -50,7 +50,19 @@ METADATA_PROCESSORS = {
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class Reader(object): class BaseReader(object):
"""Base class to read files.
This class is used to process static files, and it can be inherited for
other types of file. A Reader class must have the following attributes:
- enabled: (boolean) tell if the Reader class is enabled. It
generally depends on the import of some dependency.
- file_extensions: a list of file extensions that the Reader will process.
- extensions: a list of extensions to use in the reader (typical use is
Markdown).
"""
enabled = True enabled = True
file_extensions = ['static'] file_extensions = ['static']
extensions = None extensions = None
@ -110,7 +122,9 @@ class PelicanHTMLTranslator(HTMLTranslator):
return HTMLTranslator.visit_image(self, node) return HTMLTranslator.visit_image(self, node)
class RstReader(Reader): class RstReader(BaseReader):
"""Reader for reStructuredText files"""
enabled = bool(docutils) enabled = bool(docutils)
file_extensions = ['rst'] file_extensions = ['rst']
@ -166,7 +180,9 @@ class RstReader(Reader):
return content, metadata return content, metadata
class MarkdownReader(Reader): class MarkdownReader(BaseReader):
"""Reader for Markdown files"""
enabled = bool(Markdown) enabled = bool(Markdown)
file_extensions = ['md', 'markdown', 'mkd', 'mdown'] file_extensions = ['md', 'markdown', 'mkd', 'mdown']
@ -174,7 +190,6 @@ class MarkdownReader(Reader):
super(MarkdownReader, self).__init__(*args, **kwargs) super(MarkdownReader, self).__init__(*args, **kwargs)
self.extensions = self.settings['MD_EXTENSIONS'] self.extensions = self.settings['MD_EXTENSIONS']
self.extensions.append('meta') self.extensions.append('meta')
self._md = Markdown(extensions=self.extensions)
def _parse_metadata(self, meta): def _parse_metadata(self, meta):
"""Return the dict containing document metadata""" """Return the dict containing document metadata"""
@ -194,6 +209,7 @@ class MarkdownReader(Reader):
def read(self, source_path): def read(self, source_path):
"""Parse content and metadata of markdown files""" """Parse content and metadata of markdown files"""
self._md = Markdown(extensions=self.extensions)
with pelican_open(source_path) as text: with pelican_open(source_path) as text:
content = self._md.convert(text) content = self._md.convert(text)
@ -201,8 +217,9 @@ class MarkdownReader(Reader):
return content, metadata return content, metadata
class HTMLReader(Reader): class HTMLReader(BaseReader):
"""Parses HTML files as input, looking for meta, title, and body tags""" """Parses HTML files as input, looking for meta, title, and body tags"""
file_extensions = ['htm', 'html'] file_extensions = ['htm', 'html']
enabled = True enabled = True
@ -312,7 +329,9 @@ class HTMLReader(Reader):
return parser.body, metadata return parser.body, metadata
class AsciiDocReader(Reader): class AsciiDocReader(BaseReader):
"""Reader for AsciiDoc files"""
enabled = bool(asciidoc) enabled = bool(asciidoc)
file_extensions = ['asc'] file_extensions = ['asc']
default_options = ["--no-header-footer", "-a newline=\\n"] default_options = ["--no-header-footer", "-a newline=\\n"]
@ -344,109 +363,142 @@ class AsciiDocReader(Reader):
return content, metadata return content, metadata
EXTENSIONS = {} class Readers(object):
"""Interface for all readers.
for cls in [Reader] + Reader.__subclasses__(): This class contains a mapping of file extensions / Reader classes, to know
for ext in cls.file_extensions: which Reader class must be used to read a file (based on its extension).
EXTENSIONS[ext] = cls This is customizable both with the 'READERS' setting, and with the
'readers_init' signall for plugins.
"""
def __init__(self, settings=None):
self.settings = settings or {}
self.readers = {}
self.reader_classes = {}
def read_file(base_path, path, content_class=Page, fmt=None, for cls in [BaseReader] + BaseReader.__subclasses__():
settings=None, context=None, for ext in cls.file_extensions:
preread_signal=None, preread_sender=None, self.reader_classes[ext] = cls
context_signal=None, context_sender=None):
"""Return a content object parsed with the given format.""" if self.settings['READERS']:
path = os.path.abspath(os.path.join(base_path, path)) self.reader_classes.update(self.settings['READERS'])
source_path = os.path.relpath(path, base_path)
base, ext = os.path.splitext(os.path.basename(path)) signals.readers_init.send(self)
logger.debug('read file {} -> {}'.format(
for fmt, reader_class in self.reader_classes.items():
if not reader_class:
continue
if not reader_class.enabled:
logger.warning('Missing dependencies for {}'.format(fmt))
continue
self.readers[fmt] = reader_class(self.settings)
settings_key = '%s_EXTENSIONS' % fmt.upper()
if settings_key in self.settings:
self.readers[fmt].extensions = self.settings[settings_key]
@property
def extensions(self):
return self.readers.keys()
def read_file(self, base_path, path, content_class=Page, fmt=None,
context=None, preread_signal=None, preread_sender=None,
context_signal=None, context_sender=None):
"""Return a content object parsed with the given format."""
path = os.path.abspath(os.path.join(base_path, path))
source_path = os.path.relpath(path, base_path)
logger.debug('read file {} -> {}'.format(
source_path, content_class.__name__)) source_path, content_class.__name__))
if not fmt:
fmt = ext[1:]
if fmt not in EXTENSIONS: if not fmt:
raise TypeError('Pelican does not know how to parse {}'.format(path)) _, ext = os.path.splitext(os.path.basename(path))
fmt = ext[1:]
if preread_signal: if fmt not in self.readers:
logger.debug('signal {}.send({})'.format( raise TypeError(
'Pelican does not know how to parse {}'.format(path))
if preread_signal:
logger.debug('signal {}.send({})'.format(
preread_signal, preread_sender)) preread_signal, preread_sender))
preread_signal.send(preread_sender) preread_signal.send(preread_sender)
if settings is None: reader = self.readers[fmt]
settings = {}
reader_class = EXTENSIONS[fmt] metadata = default_metadata(
if not reader_class.enabled: settings=self.settings, process=reader.process_metadata)
raise ValueError('Missing dependencies for {}'.format(fmt)) metadata.update(path_metadata(
full_path=path, source_path=source_path,
reader = reader_class(settings) settings=self.settings))
metadata.update(parse_path_metadata(
settings_key = '%s_EXTENSIONS' % fmt.upper() source_path=source_path, settings=self.settings,
if settings and settings_key in settings:
reader.extensions = settings[settings_key]
metadata = default_metadata(
settings=settings, process=reader.process_metadata)
metadata.update(path_metadata(
full_path=path, source_path=source_path, settings=settings))
metadata.update(parse_path_metadata(
source_path=source_path, settings=settings,
process=reader.process_metadata)) process=reader.process_metadata))
content, reader_metadata = reader.read(path)
metadata.update(reader_metadata)
# create warnings for all images with empty alt (up to a certain number) content, reader_metadata = reader.read(path)
# as they are really likely to be accessibility flaws metadata.update(reader_metadata)
if content:
# find images with empty alt
imgs = re.compile(r"""
(?:
# src before alt
<img
[^\>]*
src=(['"])(.*)\1
[^\>]*
alt=(['"])\3
)|(?:
# alt before src
<img
[^\>]*
alt=(['"])\4
[^\>]*
src=(['"])(.*)\5
)
""", re.X)
matches = re.findall(imgs, content)
# find a correct threshold
nb_warnings = 10
if len(matches) == nb_warnings + 1:
nb_warnings += 1 # avoid bad looking case
# print one warning per image with empty alt until threshold
for match in matches[:nb_warnings]:
logger.warning('Empty alt attribute for image {} in {}'.format(
os.path.basename(match[1] + match[5]), path))
# print one warning for the other images with empty alt
if len(matches) > nb_warnings:
logger.warning('{} other images with empty alt attributes'.format(
len(matches) - nb_warnings))
# eventually filter the content with typogrify if asked so if content:
if content and settings and settings['TYPOGRIFY']: # find images with empty alt
from typogrify.filters import typogrify find_empty_alt(content, path)
content = typogrify(content)
metadata['title'] = typogrify(metadata['title'])
if context_signal: # eventually filter the content with typogrify if asked so
logger.debug('signal {}.send({}, <metadata>)'.format( if content and self.settings['TYPOGRIFY']:
from typogrify.filters import typogrify
content = typogrify(content)
metadata['title'] = typogrify(metadata['title'])
if context_signal:
logger.debug('signal {}.send({}, <metadata>)'.format(
context_signal, context_sender)) context_signal, context_sender))
context_signal.send(context_sender, metadata=metadata) context_signal.send(context_sender, metadata=metadata)
return content_class(
content=content, return content_class(content=content, metadata=metadata,
metadata=metadata, settings=self.settings, source_path=path,
settings=settings, context=context)
source_path=path,
context=context)
def find_empty_alt(content, path):
"""Find images with empty alt
Create warnings for all images with empty alt (up to a certain number),
as they are really likely to be accessibility flaws.
"""
imgs = re.compile(r"""
(?:
# src before alt
<img
[^\>]*
src=(['"])(.*)\1
[^\>]*
alt=(['"])\3
)|(?:
# alt before src
<img
[^\>]*
alt=(['"])\4
[^\>]*
src=(['"])(.*)\5
)
""", re.X)
matches = re.findall(imgs, content)
# find a correct threshold
nb_warnings = 10
if len(matches) == nb_warnings + 1:
nb_warnings += 1 # avoid bad looking case
# print one warning per image with empty alt until threshold
for match in matches[:nb_warnings]:
logger.warning('Empty alt attribute for image {} in {}'.format(
os.path.basename(match[1] + match[5]), path))
# print one warning for the other images with empty alt
if len(matches) > nb_warnings:
logger.warning('{} other images with empty alt attributes'
.format(len(matches) - nb_warnings))
def default_metadata(settings=None, process=None): def default_metadata(settings=None, process=None):
@ -469,7 +521,7 @@ def path_metadata(full_path, source_path, settings=None):
metadata['date'] = datetime.datetime.fromtimestamp( metadata['date'] = datetime.datetime.fromtimestamp(
os.stat(full_path).st_ctime) os.stat(full_path).st_ctime)
metadata.update(settings.get('EXTRA_PATH_METADATA', {}).get( metadata.update(settings.get('EXTRA_PATH_METADATA', {}).get(
source_path, {})) source_path, {}))
return metadata return metadata
@ -482,7 +534,7 @@ def parse_path_metadata(source_path, settings=None, process=None):
... 'PATH_METADATA': ... 'PATH_METADATA':
... '(?P<category>[^/]*)/(?P<date>\d{4}-\d{2}-\d{2})/.*', ... '(?P<category>[^/]*)/(?P<date>\d{4}-\d{2}-\d{2})/.*',
... } ... }
>>> reader = Reader(settings=settings) >>> reader = BaseReader(settings=settings)
>>> metadata = parse_path_metadata( >>> metadata = parse_path_metadata(
... source_path='my-cat/2013-01-01/my-slug.html', ... source_path='my-cat/2013-01-01/my-slug.html',
... settings=settings, ... settings=settings,
@ -498,13 +550,12 @@ def parse_path_metadata(source_path, settings=None, process=None):
subdir = os.path.basename(dirname) subdir = os.path.basename(dirname)
if settings: if settings:
checks = [] checks = []
for key,data in [('FILENAME_METADATA', base), for key, data in [('FILENAME_METADATA', base),
('PATH_METADATA', source_path), ('PATH_METADATA', source_path)]:
]:
checks.append((settings.get(key, None), data)) checks.append((settings.get(key, None), data))
if settings.get('USE_FOLDER_AS_CATEGORY', None): if settings.get('USE_FOLDER_AS_CATEGORY', None):
checks.insert(0, ('(?P<category>.*)', subdir)) checks.insert(0, ('(?P<category>.*)', subdir))
for regexp,data in checks: for regexp, data in checks:
if regexp and data: if regexp and data:
match = re.match(regexp, data) match = re.match(regexp, data)
if match: if match:

View file

@ -33,7 +33,7 @@ DEFAULT_CONFIG = {
'PAGE_EXCLUDES': (), 'PAGE_EXCLUDES': (),
'THEME': DEFAULT_THEME, 'THEME': DEFAULT_THEME,
'OUTPUT_PATH': 'output', 'OUTPUT_PATH': 'output',
'MARKUP': ('rst', 'md'), 'READERS': {},
'STATIC_PATHS': ['images', ], 'STATIC_PATHS': ['images', ],
'THEME_STATIC_DIR': 'theme', 'THEME_STATIC_DIR': 'theme',
'THEME_STATIC_PATHS': ['static', ], 'THEME_STATIC_PATHS': ['static', ],
@ -112,6 +112,7 @@ DEFAULT_CONFIG = {
'SLUG_SUBSTITUTIONS': (), 'SLUG_SUBSTITUTIONS': (),
} }
def read_settings(path=None, override=None): def read_settings(path=None, override=None):
if path: if path:
local_settings = get_settings_from_file(path) local_settings = get_settings_from_file(path)
@ -120,7 +121,7 @@ def read_settings(path=None, override=None):
if p in local_settings and local_settings[p] is not None \ if p in local_settings and local_settings[p] is not None \
and not isabs(local_settings[p]): and not isabs(local_settings[p]):
absp = os.path.abspath(os.path.normpath(os.path.join( absp = os.path.abspath(os.path.normpath(os.path.join(
os.path.dirname(path), local_settings[p]))) os.path.dirname(path), local_settings[p])))
if p not in ('THEME', 'PLUGIN_PATH') or os.path.exists(absp): if p not in ('THEME', 'PLUGIN_PATH') or os.path.exists(absp):
local_settings[p] = absp local_settings[p] = absp
else: else:
@ -138,7 +139,7 @@ def get_settings_from_module(module=None, default_settings=DEFAULT_CONFIG):
context = copy.deepcopy(default_settings) context = copy.deepcopy(default_settings)
if module is not None: if module is not None:
context.update( context.update(
(k, v) for k, v in inspect.getmembers(module) if k.isupper()) (k, v) for k, v in inspect.getmembers(module) if k.isupper())
return context return context
@ -221,17 +222,18 @@ def configure_settings(settings):
settings['FEED_DOMAIN'] = settings['SITEURL'] settings['FEED_DOMAIN'] = settings['SITEURL']
# Warn if feeds are generated with both SITEURL & FEED_DOMAIN undefined # Warn if feeds are generated with both SITEURL & FEED_DOMAIN undefined
feed_keys = ['FEED_ATOM', 'FEED_RSS', feed_keys = [
'FEED_ALL_ATOM', 'FEED_ALL_RSS', 'FEED_ATOM', 'FEED_RSS',
'CATEGORY_FEED_ATOM', 'CATEGORY_FEED_RSS', 'FEED_ALL_ATOM', 'FEED_ALL_RSS',
'TAG_FEED_ATOM', 'TAG_FEED_RSS', 'CATEGORY_FEED_ATOM', 'CATEGORY_FEED_RSS',
'TRANSLATION_FEED_ATOM', 'TRANSLATION_FEED_RSS', 'TAG_FEED_ATOM', 'TAG_FEED_RSS',
] 'TRANSLATION_FEED_ATOM', 'TRANSLATION_FEED_RSS',
]
if any(settings.get(k) for k in feed_keys): if any(settings.get(k) for k in feed_keys):
if not settings.get('SITEURL'): if not settings.get('SITEURL'):
logger.warning('Feeds generated without SITEURL set properly may not' logger.warning('Feeds generated without SITEURL set properly may'
' be valid') ' not be valid')
if not 'TIMEZONE' in settings: if not 'TIMEZONE' in settings:
logger.warning( logger.warning(
@ -255,26 +257,26 @@ def configure_settings(settings):
# Save people from accidentally setting a string rather than a list # Save people from accidentally setting a string rather than a list
path_keys = ( path_keys = (
'ARTICLE_EXCLUDES', 'ARTICLE_EXCLUDES',
'DEFAULT_METADATA', 'DEFAULT_METADATA',
'DIRECT_TEMPLATES', 'DIRECT_TEMPLATES',
'EXTRA_TEMPLATES_PATHS', 'EXTRA_TEMPLATES_PATHS',
'FILES_TO_COPY', 'FILES_TO_COPY',
'IGNORE_FILES', 'IGNORE_FILES',
'JINJA_EXTENSIONS', 'JINJA_EXTENSIONS',
'MARKUP', 'PAGINATED_DIRECT_TEMPLATES',
'PAGINATED_DIRECT_TEMPLATES', 'PLUGINS',
'PLUGINS', 'STATIC_PATHS',
'STATIC_PATHS', 'THEME_STATIC_PATHS',
'THEME_STATIC_PATHS',) )
for PATH_KEY in filter(lambda k: k in settings, path_keys): for PATH_KEY in filter(lambda k: k in settings, path_keys):
if isinstance(settings[PATH_KEY], six.string_types): if isinstance(settings[PATH_KEY], six.string_types):
logger.warning("Detected misconfiguration with %s setting (must " logger.warning("Detected misconfiguration with %s setting "
"be a list), falling back to the default" "(must be a list), falling back to the default"
% PATH_KEY) % PATH_KEY)
settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY] settings[PATH_KEY] = DEFAULT_CONFIG[PATH_KEY]
for old,new,doc in [ for old, new, doc in [
('LESS_GENERATOR', 'the Webassets plugin', None), ('LESS_GENERATOR', 'the Webassets plugin', None),
('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA', ('FILES_TO_COPY', 'STATIC_PATHS and EXTRA_PATH_METADATA',
'https://github.com/getpelican/pelican/blob/master/docs/settings.rst#path-metadata'), 'https://github.com/getpelican/pelican/blob/master/docs/settings.rst#path-metadata'),

View file

@ -8,6 +8,10 @@ initialized = signal('pelican_initialized')
get_generators = signal('get_generators') get_generators = signal('get_generators')
finalized = signal('pelican_finalized') finalized = signal('pelican_finalized')
# Reader-level signals
readers_init = signal('readers_init')
# Generator-level signals # Generator-level signals
generator_init = signal('generator_init') generator_init = signal('generator_init')

View file

@ -19,9 +19,9 @@ CONTENT_DIR = os.path.join(CUR_DIR, 'content')
class TestGenerator(unittest.TestCase): class TestGenerator(unittest.TestCase):
def setUp(self): def setUp(self):
self.settings = get_settings() self.settings = get_settings()
self.settings['READERS'] = {'asc': None}
self.generator = Generator(self.settings.copy(), self.settings, self.generator = Generator(self.settings.copy(), self.settings,
CUR_DIR, self.settings['THEME'], None, CUR_DIR, self.settings['THEME'], None)
self.settings['MARKUP'])
def test_include_path(self): def test_include_path(self):
filename = os.path.join(CUR_DIR, 'content', 'article.rst') filename = os.path.join(CUR_DIR, 'content', 'article.rst')
@ -30,10 +30,6 @@ class TestGenerator(unittest.TestCase):
self.assertTrue(include_path(filename, extensions=('rst',))) self.assertTrue(include_path(filename, extensions=('rst',)))
self.assertFalse(include_path(filename, extensions=('md',))) self.assertFalse(include_path(filename, extensions=('md',)))
# markup must be a tuple, test that this works also with a list
self.generator.markup = ['rst', 'md']
self.assertTrue(include_path(filename))
class TestArticlesGenerator(unittest.TestCase): class TestArticlesGenerator(unittest.TestCase):
@ -42,11 +38,11 @@ class TestArticlesGenerator(unittest.TestCase):
settings = get_settings(filenames={}) settings = get_settings(filenames={})
settings['DEFAULT_CATEGORY'] = 'Default' settings['DEFAULT_CATEGORY'] = 'Default'
settings['DEFAULT_DATE'] = (1970, 1, 1) settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['READERS'] = {'asc': None}
cls.generator = ArticlesGenerator( cls.generator = ArticlesGenerator(
context=settings.copy(), settings=settings, context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
output_path=None, markup=settings['MARKUP'])
cls.generator.generate_context() cls.generator.generate_context()
cls.articles = [[page.title, page.status, page.category.name, cls.articles = [[page.title, page.status, page.category.name,
page.template] for page in cls.generator.articles] page.template] for page in cls.generator.articles]
@ -55,8 +51,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings = get_settings() settings = get_settings()
generator = ArticlesGenerator( generator = ArticlesGenerator(
context=settings, settings=settings, context=settings, settings=settings,
path=None, theme=settings['THEME'], path=None, theme=settings['THEME'], output_path=None)
output_path=None, markup=settings['MARKUP'])
writer = MagicMock() writer = MagicMock()
generator.generate_feeds(writer) generator.generate_feeds(writer)
writer.write_feed.assert_called_with([], settings, writer.write_feed.assert_called_with([], settings,
@ -64,8 +59,7 @@ class TestArticlesGenerator(unittest.TestCase):
generator = ArticlesGenerator( generator = ArticlesGenerator(
context=settings, settings=get_settings(FEED_ALL_ATOM=None), context=settings, settings=get_settings(FEED_ALL_ATOM=None),
path=None, theme=settings['THEME'], path=None, theme=settings['THEME'], output_path=None)
output_path=None, markup=None)
writer = MagicMock() writer = MagicMock()
generator.generate_feeds(writer) generator.generate_feeds(writer)
self.assertFalse(writer.write_feed.called) self.assertFalse(writer.write_feed.called)
@ -74,26 +68,33 @@ class TestArticlesGenerator(unittest.TestCase):
articles_expected = [ articles_expected = [
['Article title', 'published', 'Default', 'article'], ['Article title', 'published', 'Default', 'article'],
['Article with markdown and summary metadata single', 'published',
'Default', 'article'],
['Article with markdown and summary metadata multi', 'published', ['Article with markdown and summary metadata multi', 'published',
'Default', 'article'], 'Default', 'article'],
['Article with markdown and summary metadata single', 'published',
'Default', 'article'],
['Article with markdown containing footnotes', 'published',
'Default', 'article'],
['Article with template', 'published', 'Default', 'custom'], ['Article with template', 'published', 'Default', 'custom'],
['Test md File', 'published', 'test', 'article'],
['Rst with filename metadata', 'published', 'yeah', 'article'], ['Rst with filename metadata', 'published', 'yeah', 'article'],
['Test Markdown extensions', 'published', 'Default', 'article'], ['Test Markdown extensions', 'published', 'Default', 'article'],
['Test markdown File', 'published', 'test', 'article'],
['Test md File', 'published', 'test', 'article'],
['Test mdown File', 'published', 'test', 'article'],
['Test mkd File', 'published', 'test', 'article'],
['This is a super article !', 'published', 'Yeah', 'article'], ['This is a super article !', 'published', 'Yeah', 'article'],
['This is a super article !', 'published', 'Yeah', 'article'],
['This is a super article !', 'published', 'yeah', 'article'],
['This is a super article !', 'published', 'yeah', 'article'],
['This is a super article !', 'published', 'yeah', 'article'],
['This is a super article !', 'published', 'Default', 'article'],
['This is an article with category !', 'published', 'yeah', ['This is an article with category !', 'published', 'yeah',
'article'], 'article'],
['This is an article without category !', 'published', 'Default', ['This is an article without category !', 'published', 'Default',
'article'], 'article'],
['This is an article without category !', 'published', ['This is an article without category !', 'published',
'TestCategory', 'article'], 'TestCategory', 'article'],
['This is a super article !', 'published', 'yeah', 'article'], ['マックOS X 10.8でパイソンとVirtualenvをインストールと設定', 'published',
['マックOS X 10.8でパイソンとVirtualenvをインストールと設定', '指導書', 'article'],
'published', '指導書', 'article'],
['Article with markdown containing footnotes', 'published',
'Default', 'article']
] ]
self.assertEqual(sorted(articles_expected), sorted(self.articles)) self.assertEqual(sorted(articles_expected), sorted(self.articles))
@ -121,11 +122,11 @@ class TestArticlesGenerator(unittest.TestCase):
settings['DEFAULT_CATEGORY'] = 'Default' settings['DEFAULT_CATEGORY'] = 'Default'
settings['DEFAULT_DATE'] = (1970, 1, 1) settings['DEFAULT_DATE'] = (1970, 1, 1)
settings['USE_FOLDER_AS_CATEGORY'] = False settings['USE_FOLDER_AS_CATEGORY'] = False
settings['READERS'] = {'asc': None}
settings['filenames'] = {} settings['filenames'] = {}
generator = ArticlesGenerator( generator = ArticlesGenerator(
context=settings.copy(), settings=settings, context=settings.copy(), settings=settings,
path=CONTENT_DIR, theme=settings['THEME'], path=CONTENT_DIR, theme=settings['THEME'], output_path=None)
output_path=None, markup=settings['MARKUP'])
generator.generate_context() generator.generate_context()
# test for name # test for name
# categories are grouped by slug; if two categories have the same slug # categories are grouped by slug; if two categories have the same slug
@ -147,8 +148,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings = get_settings(filenames={}) settings = get_settings(filenames={})
generator = ArticlesGenerator( generator = ArticlesGenerator(
context=settings, settings=settings, context=settings, settings=settings,
path=None, theme=settings['THEME'], path=None, theme=settings['THEME'], output_path=None)
output_path=None, markup=settings['MARKUP'])
write = MagicMock() write = MagicMock()
generator.generate_direct_templates(write) generator.generate_direct_templates(write)
write.assert_called_with("archives.html", write.assert_called_with("archives.html",
@ -162,8 +162,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings['ARCHIVES_SAVE_AS'] = 'archives/index.html' settings['ARCHIVES_SAVE_AS'] = 'archives/index.html'
generator = ArticlesGenerator( generator = ArticlesGenerator(
context=settings, settings=settings, context=settings, settings=settings,
path=None, theme=settings['THEME'], path=None, theme=settings['THEME'], output_path=None)
output_path=None, markup=settings['MARKUP'])
write = MagicMock() write = MagicMock()
generator.generate_direct_templates(write) generator.generate_direct_templates(write)
write.assert_called_with("archives/index.html", write.assert_called_with("archives/index.html",
@ -178,8 +177,7 @@ class TestArticlesGenerator(unittest.TestCase):
settings['ARCHIVES_SAVE_AS'] = 'archives/index.html' settings['ARCHIVES_SAVE_AS'] = 'archives/index.html'
generator = ArticlesGenerator( generator = ArticlesGenerator(
context=settings, settings=settings, context=settings, settings=settings,
path=None, theme=settings['THEME'], path=None, theme=settings['THEME'], output_path=None)
output_path=None, markup=settings['MARKUP'])
write = MagicMock() write = MagicMock()
generator.generate_direct_templates(write) generator.generate_direct_templates(write)
write.assert_called_count == 0 write.assert_called_count == 0
@ -212,8 +210,7 @@ class TestPageGenerator(unittest.TestCase):
generator = PagesGenerator( generator = PagesGenerator(
context=settings.copy(), settings=settings, context=settings.copy(), settings=settings,
path=CUR_DIR, theme=settings['THEME'], path=CUR_DIR, theme=settings['THEME'], output_path=None)
output_path=None, markup=settings['MARKUP'])
generator.generate_context() generator.generate_context()
pages = self.distill_pages(generator.pages) pages = self.distill_pages(generator.pages)
hidden_pages = self.distill_pages(generator.hidden_pages) hidden_pages = self.distill_pages(generator.hidden_pages)
@ -252,13 +249,12 @@ class TestTemplatePagesGenerator(unittest.TestCase):
settings = get_settings() settings = get_settings()
settings['STATIC_PATHS'] = ['static'] settings['STATIC_PATHS'] = ['static']
settings['TEMPLATE_PAGES'] = { settings['TEMPLATE_PAGES'] = {
'template/source.html': 'generated/file.html' 'template/source.html': 'generated/file.html'
} }
generator = TemplatePagesGenerator( generator = TemplatePagesGenerator(
context={'foo': 'bar'}, settings=settings, context={'foo': 'bar'}, settings=settings,
path=self.temp_content, theme='', path=self.temp_content, theme='', output_path=self.temp_output)
output_path=self.temp_output, markup=None)
# create a dummy template file # create a dummy template file
template_dir = os.path.join(self.temp_content, 'template') template_dir = os.path.join(self.temp_content, 'template')

View file

@ -19,8 +19,8 @@ class ReaderTest(unittest.TestCase):
def read_file(self, path, **kwargs): def read_file(self, path, **kwargs):
# Isolate from future API changes to readers.read_file # Isolate from future API changes to readers.read_file
return readers.read_file( r = readers.Readers(settings=get_settings(**kwargs))
base_path=CONTENT_PATH, path=path, settings=get_settings(**kwargs)) return r.read_file(base_path=CONTENT_PATH, path=path)
class RstReaderTest(ReaderTest): class RstReaderTest(ReaderTest):
@ -160,7 +160,7 @@ class MdReaderTest(ReaderTest):
' with some footnotes' ' with some footnotes'
'<sup id="fnref:footnote"><a class="footnote-ref" ' '<sup id="fnref:footnote"><a class="footnote-ref" '
'href="#fn:footnote" rel="footnote">2</a></sup></p>\n' 'href="#fn:footnote" rel="footnote">2</a></sup></p>\n'
'<div class="footnote">\n' '<div class="footnote">\n'
'<hr />\n<ol>\n<li id="fn:1">\n' '<hr />\n<ol>\n<li id="fn:1">\n'
'<p>Numbered footnote&#160;' '<p>Numbered footnote&#160;'

View file

@ -353,12 +353,13 @@ class TestDateFormatter(unittest.TestCase):
'French locale needed') 'French locale needed')
def test_french_locale(self): def test_french_locale(self):
settings = read_settings( settings = read_settings(
override = {'LOCALE': locale.normalize('fr_FR.UTF-8'), override={'LOCALE': locale.normalize('fr_FR.UTF-8'),
'TEMPLATE_PAGES': {'template/source.html': 'TEMPLATE_PAGES': {'template/source.html':
'generated/file.html'}}) 'generated/file.html'}})
generator = TemplatePagesGenerator({'date': self.date}, settings, generator = TemplatePagesGenerator(
self.temp_content, '', self.temp_output, None) {'date': self.date}, settings,
self.temp_content, '', self.temp_output)
generator.env.filters.update({'strftime': utils.DateFormatter()}) generator.env.filters.update({'strftime': utils.DateFormatter()})
writer = Writer(self.temp_output, settings=settings) writer = Writer(self.temp_output, settings=settings)
@ -385,8 +386,9 @@ class TestDateFormatter(unittest.TestCase):
'TEMPLATE_PAGES': {'template/source.html': 'TEMPLATE_PAGES': {'template/source.html':
'generated/file.html'}}) 'generated/file.html'}})
generator = TemplatePagesGenerator({'date': self.date}, settings, generator = TemplatePagesGenerator(
self.temp_content, '', self.temp_output, None) {'date': self.date}, settings,
self.temp_content, '', self.temp_output)
generator.env.filters.update({'strftime': utils.DateFormatter()}) generator.env.filters.update({'strftime': utils.DateFormatter()})
writer = Writer(self.temp_output, settings=settings) writer = Writer(self.temp_output, settings=settings)