1
0
Fork 0
forked from github/pelican
pelican-theme/pelican/readers.py
2011-06-21 03:23:41 -07:00

145 lines
4.1 KiB
Python

# -*- coding: utf-8 -*-
try:
import docutils
import docutils.core
import docutils.io
from docutils.writers.html4css1 import HTMLTranslator
# import the directives to have pygments support
from pelican import rstdirectives
except ImportError:
core = False
try:
from markdown import Markdown
except ImportError:
Markdown = False
import re
from pelican.utils import get_date, open
_METADATA_PROCESSORS = {
'tags': lambda x: map(unicode.strip, x.split(',')),
'date': lambda x: get_date(x),
'status': unicode.strip,
}
def _process_metadata(name, value):
if name.lower() in _METADATA_PROCESSORS:
return _METADATA_PROCESSORS[name.lower()](value)
return value
class Reader(object):
enabled = True
class _FieldBodyTranslator(HTMLTranslator):
def astext(self):
return ''.join(self.body)
def visit_field_body(self, node):
pass
def depart_field_body(self, node):
pass
def render_node_to_html(document, node):
visitor = _FieldBodyTranslator(document)
node.walkabout(visitor)
return visitor.astext()
def get_metadata(document):
"""Return the dict containing document metadata"""
output = {}
for docinfo in document.traverse(docutils.nodes.docinfo):
for element in docinfo.children:
if element.tagname == 'field': # custom fields (e.g. summary)
name_elem, body_elem = element.children
name = name_elem.astext()
value = render_node_to_html(document, body_elem)
else: # standard fields (e.g. address)
name = element.tagname
value = element.astext()
output[name] = _process_metadata(name, value)
return output
class RstReader(Reader):
enabled = bool(docutils)
extension = "rst"
def _parse_metadata(self, document):
return get_metadata(document)
def _get_publisher(self, filename):
extra_params = {'initial_header_level': '2'}
pub = docutils.core.Publisher(destination_class=docutils.io.StringOutput)
pub.set_components('standalone', 'restructuredtext', 'html')
pub.process_programmatic_settings(None, extra_params, None)
pub.set_source(source_path=filename)
pub.publish()
return pub
def read(self, filename):
"""Parses restructured text"""
pub = self._get_publisher(filename)
parts = pub.writer.parts
content = parts.get('body')
metadata = self._parse_metadata(pub.document)
metadata.setdefault('title', parts.get('title'))
return content, metadata
class MarkdownReader(Reader):
enabled = bool(Markdown)
extension = "md"
def read(self, filename):
"""Parse content and metadata of markdown files"""
text = open(filename)
md = Markdown(extensions = ['meta', 'codehilite', 'extra'])
content = md.convert(text)
metadata = {}
for name, value in md.Meta.items():
name = name.lower()
metadata[name] = _process_metadata(name, value[0])
return content, metadata
class HtmlReader(Reader):
extension = "html"
_re = re.compile('\<\!\-\-\#\s?[A-z0-9_-]*\s?\:s?[A-z0-9\s_-]*\s?\-\-\>')
def read(self, filename):
"""Parse content and metadata of (x)HTML files"""
content = open(filename)
metadata = {'title':'unnamed'}
for i in self._re.findall(content):
key = i.split(':')[0][5:].strip()
value = i.split(':')[-1][:-3].strip()
name = key.lower()
metadata[name] = _process_metadata(name, value)
return content, metadata
_EXTENSIONS = dict((cls.extension, cls) for cls in Reader.__subclasses__())
def read_file(filename, fmt=None):
"""Return a reader object using the given format."""
if not fmt:
fmt = filename.split('.')[-1]
if fmt not in _EXTENSIONS.keys():
raise TypeError('Pelican does not know how to parse %s' % filename)
reader = _EXTENSIONS[fmt]()
if not reader.enabled:
raise ValueError("Missing dependencies for %s" % fmt)
return reader.read(filename)