mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
ReST metadata parsing using docutils.
This commit is contained in:
parent
eb988ab763
commit
cacc6db9a4
1 changed files with 69 additions and 29 deletions
|
|
@ -1,6 +1,9 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
try:
|
try:
|
||||||
from docutils import core
|
import docutils
|
||||||
|
import docutils.core
|
||||||
|
import docutils.io
|
||||||
|
from docutils.writers.html4css1 import HTMLTranslator
|
||||||
|
|
||||||
# import the directives to have pygments support
|
# import the directives to have pygments support
|
||||||
from pelican import rstdirectives
|
from pelican import rstdirectives
|
||||||
|
|
@ -21,40 +24,78 @@ _METADATA_PROCESSORS = {
|
||||||
'status': unicode.strip,
|
'status': unicode.strip,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _process_metadata(name, value):
|
||||||
|
if name in _METADATA_PROCESSORS:
|
||||||
|
return _METADATA_PROCESSORS[name](value)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
class Reader(object):
|
class Reader(object):
|
||||||
enabled = True
|
enabled = True
|
||||||
|
|
||||||
|
|
||||||
|
class _FieldBodyTranslator(HTMLTranslator):
|
||||||
|
|
||||||
|
def astext(self):
|
||||||
|
return ''.join(self.body)
|
||||||
|
|
||||||
|
def visit_field_body(self, node):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def depart_field_body(self, node):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def render_node_to_html(document, node):
|
||||||
|
visitor = _FieldBodyTranslator(document)
|
||||||
|
node.walkabout(visitor)
|
||||||
|
return visitor.astext()
|
||||||
|
|
||||||
|
def get_metadata(document):
|
||||||
|
"""Return the dict containing document metadata"""
|
||||||
|
output = {}
|
||||||
|
for docinfo in document.traverse(docutils.nodes.docinfo):
|
||||||
|
for element in docinfo.children:
|
||||||
|
if element.tagname == 'field': # custom fields (e.g. summary)
|
||||||
|
name_elem, body_elem = element.children
|
||||||
|
name = name_elem.astext()
|
||||||
|
value = render_node_to_html(document, body_elem)
|
||||||
|
else: # standard fields (e.g. address)
|
||||||
|
name = element.tagname
|
||||||
|
value = element.astext()
|
||||||
|
|
||||||
|
output[name] = _process_metadata(name, value)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
class RstReader(Reader):
|
class RstReader(Reader):
|
||||||
enabled = bool(core)
|
enabled = bool(docutils)
|
||||||
extension = "rst"
|
extension = "rst"
|
||||||
|
|
||||||
def _parse_metadata(self, content):
|
def _parse_metadata(self, document):
|
||||||
"""Return the dict containing metadata"""
|
return get_metadata(document)
|
||||||
output = {}
|
|
||||||
for m in re.compile('^:([a-z]+): (.*)\s', re.M).finditer(content):
|
def _get_publisher(self, filename):
|
||||||
name, value = m.group(1).lower(), m.group(2)
|
extra_params = {'initial_header_level': '2'}
|
||||||
output[name] = _METADATA_PROCESSORS.get(
|
pub = docutils.core.Publisher(destination_class=docutils.io.StringOutput)
|
||||||
name, lambda x:x
|
pub.set_components('standalone', 'restructuredtext', 'html')
|
||||||
)(value)
|
pub.process_programmatic_settings(None, extra_params, None)
|
||||||
return output
|
pub.set_source(source_path=filename)
|
||||||
|
pub.publish()
|
||||||
|
return pub
|
||||||
|
|
||||||
def read(self, filename):
|
def read(self, filename):
|
||||||
"""Parse restructured text"""
|
"""Parses restructured text"""
|
||||||
text = open(filename)
|
pub = self._get_publisher(filename)
|
||||||
metadata = self._parse_metadata(text)
|
parts = pub.writer.parts
|
||||||
extra_params = {'input_encoding': 'unicode',
|
content = parts.get('body')
|
||||||
'initial_header_level': '2'}
|
|
||||||
rendered_content = core.publish_parts(text,
|
metadata = self._parse_metadata(pub.document)
|
||||||
source_path=filename,
|
metadata.setdefault('title', parts.get('title'))
|
||||||
writer_name='html',
|
|
||||||
settings_overrides=extra_params)
|
|
||||||
title = rendered_content.get('title')
|
|
||||||
content = rendered_content.get('body')
|
|
||||||
if not metadata.has_key('title'):
|
|
||||||
metadata['title'] = title
|
|
||||||
return content, metadata
|
return content, metadata
|
||||||
|
|
||||||
|
|
||||||
class MarkdownReader(Reader):
|
class MarkdownReader(Reader):
|
||||||
enabled = bool(Markdown)
|
enabled = bool(Markdown)
|
||||||
extension = "md"
|
extension = "md"
|
||||||
|
|
@ -64,13 +105,11 @@ class MarkdownReader(Reader):
|
||||||
text = open(filename)
|
text = open(filename)
|
||||||
md = Markdown(extensions = ['meta', 'codehilite'])
|
md = Markdown(extensions = ['meta', 'codehilite'])
|
||||||
content = md.convert(text)
|
content = md.convert(text)
|
||||||
|
|
||||||
metadata = {}
|
metadata = {}
|
||||||
for name, value in md.Meta.items():
|
for name, value in md.Meta.items():
|
||||||
name = name.lower()
|
name = name.lower()
|
||||||
metadata[name] = _METADATA_PROCESSORS.get(
|
metadata[name] = _process_metadata(name, value[0])
|
||||||
name, lambda x:x
|
|
||||||
)(value[0])
|
|
||||||
return content, metadata
|
return content, metadata
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -85,7 +124,8 @@ class HtmlReader(Reader):
|
||||||
for i in self._re.findall(content):
|
for i in self._re.findall(content):
|
||||||
key = i.split(':')[0][5:].strip()
|
key = i.split(':')[0][5:].strip()
|
||||||
value = i.split(':')[-1][:-3].strip()
|
value = i.split(':')[-1][:-3].strip()
|
||||||
metadata[key.lower()] = value
|
name = key.lower()
|
||||||
|
metadata[name] = _process_metadata(name, value)
|
||||||
|
|
||||||
return content, metadata
|
return content, metadata
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue