forked from github/pelican
As it is done when reading the file, we need to remove html tags for the permalink and the slug (this is done here for the notmyidea and simple themes). While modifying the themes I also replaced the `pagename` template tag with `article.url` (`pagename` was an empty variable, no more used ?).
179 lines
5.3 KiB
Python
179 lines
5.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
try:
|
|
import docutils
|
|
import docutils.core
|
|
import docutils.io
|
|
from docutils.writers.html4css1 import HTMLTranslator
|
|
|
|
# import the directives to have pygments support
|
|
from pelican import rstdirectives # NOQA
|
|
except ImportError:
|
|
core = False
|
|
try:
|
|
from markdown import Markdown
|
|
except ImportError:
|
|
Markdown = False # NOQA
|
|
import re
|
|
|
|
from pelican.contents import Category, Tag, Author
|
|
from pelican.utils import get_date, open
|
|
|
|
|
|
_METADATA_PROCESSORS = {
|
|
'tags': lambda x, y: [Tag(tag, y) for tag in unicode(x).split(',')],
|
|
'date': lambda x, y: get_date(x),
|
|
'status': lambda x, y: unicode.strip(x),
|
|
'category': Category,
|
|
'author': Author,
|
|
}
|
|
|
|
|
|
class Reader(object):
|
|
enabled = True
|
|
extensions = None
|
|
|
|
def __init__(self, settings):
|
|
self.settings = settings
|
|
|
|
def process_metadata(self, name, value):
|
|
if name in _METADATA_PROCESSORS:
|
|
return _METADATA_PROCESSORS[name](value, self.settings)
|
|
return value
|
|
|
|
|
|
class _FieldBodyTranslator(HTMLTranslator):
|
|
|
|
def __init__(self, document):
|
|
HTMLTranslator.__init__(self, document)
|
|
self.compact_p = None
|
|
|
|
def astext(self):
|
|
return ''.join(self.body)
|
|
|
|
def visit_field_body(self, node):
|
|
pass
|
|
|
|
def depart_field_body(self, node):
|
|
pass
|
|
|
|
|
|
def render_node_to_html(document, node):
|
|
visitor = _FieldBodyTranslator(document)
|
|
node.walkabout(visitor)
|
|
return visitor.astext()
|
|
|
|
|
|
class RstReader(Reader):
|
|
enabled = bool(docutils)
|
|
file_extensions = ['rst']
|
|
|
|
def _parse_metadata(self, document):
|
|
"""Return the dict containing document metadata"""
|
|
output = {}
|
|
for docinfo in document.traverse(docutils.nodes.docinfo):
|
|
for element in docinfo.children:
|
|
if element.tagname == 'field': # custom fields (e.g. summary)
|
|
name_elem, body_elem = element.children
|
|
name = name_elem.astext()
|
|
if name == 'summary':
|
|
value = render_node_to_html(document, body_elem)
|
|
else:
|
|
value = body_elem.astext()
|
|
else: # standard fields (e.g. address)
|
|
name = element.tagname
|
|
value = element.astext()
|
|
name = name.lower()
|
|
|
|
output[name] = self.process_metadata(name, value)
|
|
return output
|
|
|
|
def _get_publisher(self, filename):
|
|
extra_params = {'initial_header_level': '2'}
|
|
pub = docutils.core.Publisher(
|
|
destination_class=docutils.io.StringOutput)
|
|
pub.set_components('standalone', 'restructuredtext', 'html')
|
|
pub.process_programmatic_settings(None, extra_params, None)
|
|
pub.set_source(source_path=filename)
|
|
pub.publish()
|
|
return pub
|
|
|
|
def read(self, filename):
|
|
"""Parses restructured text"""
|
|
pub = self._get_publisher(filename)
|
|
parts = pub.writer.parts
|
|
content = parts.get('body')
|
|
|
|
metadata = self._parse_metadata(pub.document)
|
|
metadata.setdefault('title', parts.get('title'))
|
|
|
|
return content, metadata
|
|
|
|
|
|
class MarkdownReader(Reader):
|
|
enabled = bool(Markdown)
|
|
file_extensions = ['md', 'markdown', 'mkd']
|
|
extensions = ['codehilite', 'extra']
|
|
|
|
def read(self, filename):
|
|
"""Parse content and metadata of markdown files"""
|
|
text = open(filename)
|
|
md = Markdown(extensions=set(self.extensions + ['meta']))
|
|
content = md.convert(text)
|
|
|
|
metadata = {}
|
|
for name, value in md.Meta.items():
|
|
name = name.lower()
|
|
metadata[name] = self.process_metadata(name, value[0])
|
|
return content, metadata
|
|
|
|
|
|
class HtmlReader(Reader):
|
|
file_extensions = ['html', 'htm']
|
|
_re = re.compile('\<\!\-\-\#\s?[A-z0-9_-]*\s?\:s?[A-z0-9\s_-]*\s?\-\-\>')
|
|
|
|
def read(self, filename):
|
|
"""Parse content and metadata of (x)HTML files"""
|
|
with open(filename) as content:
|
|
metadata = {'title': 'unnamed'}
|
|
for i in self._re.findall(content):
|
|
key = i.split(':')[0][5:].strip()
|
|
value = i.split(':')[-1][:-3].strip()
|
|
name = key.lower()
|
|
metadata[name] = self.process_metadata(name, value)
|
|
|
|
return content, metadata
|
|
|
|
|
|
_EXTENSIONS = {}
|
|
|
|
for cls in Reader.__subclasses__():
|
|
for ext in cls.file_extensions:
|
|
_EXTENSIONS[ext] = cls
|
|
|
|
|
|
def read_file(filename, fmt=None, settings=None):
|
|
"""Return a reader object using the given format."""
|
|
if not fmt:
|
|
fmt = filename.split('.')[-1]
|
|
|
|
if fmt not in _EXTENSIONS:
|
|
raise TypeError('Pelican does not know how to parse %s' % filename)
|
|
|
|
reader = _EXTENSIONS[fmt](settings)
|
|
settings_key = '%s_EXTENSIONS' % fmt.upper()
|
|
|
|
if settings and settings_key in settings:
|
|
reader.extensions = settings[settings_key]
|
|
|
|
if not reader.enabled:
|
|
raise ValueError("Missing dependencies for %s" % fmt)
|
|
|
|
content, metadata = reader.read(filename)
|
|
|
|
# eventually filter the content with typogrify if asked so
|
|
if settings and settings['TYPOGRIFY']:
|
|
from typogrify import Typogrify
|
|
content = Typogrify.typogrify(content)
|
|
metadata['title'] = Typogrify.typogrify(metadata['title'])
|
|
|
|
return content, metadata
|