ReST metadata parsing using docutils.

This commit is contained in:
Mikhail Korobov 2011-05-10 07:55:30 +06:00
commit cacc6db9a4

View file

@ -1,6 +1,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
try: try:
from docutils import core import docutils
import docutils.core
import docutils.io
from docutils.writers.html4css1 import HTMLTranslator
# import the directives to have pygments support # import the directives to have pygments support
from pelican import rstdirectives from pelican import rstdirectives
@ -21,40 +24,78 @@ _METADATA_PROCESSORS = {
'status': unicode.strip, 'status': unicode.strip,
} }
def _process_metadata(name, value):
if name in _METADATA_PROCESSORS:
return _METADATA_PROCESSORS[name](value)
return value
class Reader(object): class Reader(object):
enabled = True enabled = True
class _FieldBodyTranslator(HTMLTranslator):
def astext(self):
return ''.join(self.body)
def visit_field_body(self, node):
pass
def depart_field_body(self, node):
pass
def render_node_to_html(document, node):
visitor = _FieldBodyTranslator(document)
node.walkabout(visitor)
return visitor.astext()
def get_metadata(document):
"""Return the dict containing document metadata"""
output = {}
for docinfo in document.traverse(docutils.nodes.docinfo):
for element in docinfo.children:
if element.tagname == 'field': # custom fields (e.g. summary)
name_elem, body_elem = element.children
name = name_elem.astext()
value = render_node_to_html(document, body_elem)
else: # standard fields (e.g. address)
name = element.tagname
value = element.astext()
output[name] = _process_metadata(name, value)
return output
class RstReader(Reader): class RstReader(Reader):
enabled = bool(core) enabled = bool(docutils)
extension = "rst" extension = "rst"
def _parse_metadata(self, content): def _parse_metadata(self, document):
"""Return the dict containing metadata""" return get_metadata(document)
output = {}
for m in re.compile('^:([a-z]+): (.*)\s', re.M).finditer(content): def _get_publisher(self, filename):
name, value = m.group(1).lower(), m.group(2) extra_params = {'initial_header_level': '2'}
output[name] = _METADATA_PROCESSORS.get( pub = docutils.core.Publisher(destination_class=docutils.io.StringOutput)
name, lambda x:x pub.set_components('standalone', 'restructuredtext', 'html')
)(value) pub.process_programmatic_settings(None, extra_params, None)
return output pub.set_source(source_path=filename)
pub.publish()
return pub
def read(self, filename): def read(self, filename):
"""Parse restructured text""" """Parses restructured text"""
text = open(filename) pub = self._get_publisher(filename)
metadata = self._parse_metadata(text) parts = pub.writer.parts
extra_params = {'input_encoding': 'unicode', content = parts.get('body')
'initial_header_level': '2'}
rendered_content = core.publish_parts(text, metadata = self._parse_metadata(pub.document)
source_path=filename, metadata.setdefault('title', parts.get('title'))
writer_name='html',
settings_overrides=extra_params)
title = rendered_content.get('title')
content = rendered_content.get('body')
if not metadata.has_key('title'):
metadata['title'] = title
return content, metadata return content, metadata
class MarkdownReader(Reader): class MarkdownReader(Reader):
enabled = bool(Markdown) enabled = bool(Markdown)
extension = "md" extension = "md"
@ -68,9 +109,7 @@ class MarkdownReader(Reader):
metadata = {} metadata = {}
for name, value in md.Meta.items(): for name, value in md.Meta.items():
name = name.lower() name = name.lower()
metadata[name] = _METADATA_PROCESSORS.get( metadata[name] = _process_metadata(name, value[0])
name, lambda x:x
)(value[0])
return content, metadata return content, metadata
@ -85,7 +124,8 @@ class HtmlReader(Reader):
for i in self._re.findall(content): for i in self._re.findall(content):
key = i.split(':')[0][5:].strip() key = i.split(':')[0][5:].strip()
value = i.split(':')[-1][:-3].strip() value = i.split(':')[-1][:-3].strip()
metadata[key.lower()] = value name = key.lower()
metadata[name] = _process_metadata(name, value)
return content, metadata return content, metadata