1
0
Fork 0
forked from github/pelican

Merge pull request #116 from kmike/better_metadata2

Better metadata handling. Fixes #114
This commit is contained in:
Alexis Metaireau 2011-05-11 03:02:44 -07:00
commit 4aa829d45d
4 changed files with 102 additions and 33 deletions

View file

@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
try:
from docutils import core
import docutils
import docutils.core
import docutils.io
from docutils.writers.html4css1 import HTMLTranslator
# import the directives to have pygments support
from pelican import rstdirectives
@ -21,40 +24,78 @@ _METADATA_PROCESSORS = {
'status': unicode.strip,
}
def _process_metadata(name, value):
if name in _METADATA_PROCESSORS:
return _METADATA_PROCESSORS[name](value)
return value
class Reader(object):
enabled = True
class _FieldBodyTranslator(HTMLTranslator):
def astext(self):
return ''.join(self.body)
def visit_field_body(self, node):
pass
def depart_field_body(self, node):
pass
def render_node_to_html(document, node):
visitor = _FieldBodyTranslator(document)
node.walkabout(visitor)
return visitor.astext()
def get_metadata(document):
"""Return the dict containing document metadata"""
output = {}
for docinfo in document.traverse(docutils.nodes.docinfo):
for element in docinfo.children:
if element.tagname == 'field': # custom fields (e.g. summary)
name_elem, body_elem = element.children
name = name_elem.astext()
value = render_node_to_html(document, body_elem)
else: # standard fields (e.g. address)
name = element.tagname
value = element.astext()
output[name] = _process_metadata(name, value)
return output
class RstReader(Reader):
enabled = bool(core)
enabled = bool(docutils)
extension = "rst"
def _parse_metadata(self, content):
"""Return the dict containing metadata"""
output = {}
for m in re.compile('^:([a-z]+): (.*)\s', re.M).finditer(content):
name, value = m.group(1).lower(), m.group(2)
output[name] = _METADATA_PROCESSORS.get(
name, lambda x:x
)(value)
return output
def _parse_metadata(self, document):
return get_metadata(document)
def _get_publisher(self, filename):
extra_params = {'initial_header_level': '2'}
pub = docutils.core.Publisher(destination_class=docutils.io.StringOutput)
pub.set_components('standalone', 'restructuredtext', 'html')
pub.process_programmatic_settings(None, extra_params, None)
pub.set_source(source_path=filename)
pub.publish()
return pub
def read(self, filename):
"""Parse restructured text"""
text = open(filename)
metadata = self._parse_metadata(text)
extra_params = {'input_encoding': 'unicode',
'initial_header_level': '2'}
rendered_content = core.publish_parts(text,
source_path=filename,
writer_name='html',
settings_overrides=extra_params)
title = rendered_content.get('title')
content = rendered_content.get('body')
if not metadata.has_key('title'):
metadata['title'] = title
"""Parses restructured text"""
pub = self._get_publisher(filename)
parts = pub.writer.parts
content = parts.get('body')
metadata = self._parse_metadata(pub.document)
metadata.setdefault('title', parts.get('title'))
return content, metadata
class MarkdownReader(Reader):
enabled = bool(Markdown)
extension = "md"
@ -64,13 +105,11 @@ class MarkdownReader(Reader):
text = open(filename)
md = Markdown(extensions = ['meta', 'codehilite'])
content = md.convert(text)
metadata = {}
for name, value in md.Meta.items():
name = name.lower()
metadata[name] = _METADATA_PROCESSORS.get(
name, lambda x:x
)(value[0])
metadata[name] = _process_metadata(name, value[0])
return content, metadata
@ -85,7 +124,8 @@ class HtmlReader(Reader):
for i in self._re.findall(content):
key = i.split(':')[0][5:].strip()
value = i.split(':')[-1][:-3].strip()
metadata[key.lower()] = value
name = key.lower()
metadata[name] = _process_metadata(name, value)
return content, metadata

View file

@ -0,0 +1,27 @@
# coding: utf-8
import unittest2
import os
import datetime
from pelican import readers
CUR_DIR = os.path.dirname(__file__)
CONTENT_PATH = os.path.join(CUR_DIR, '..', '..', 'samples', 'content')
def _filename(*args):
return os.path.join(CONTENT_PATH, *args)
class RstReaderTest(unittest2.TestCase):
def test_metadata(self):
reader = readers.RstReader()
content, metadata = reader.read(_filename('super_article.rst'))
expected = {
'category': 'yeah',
'author': u'Alexis Métaireau',
'title': 'This is a super article !',
'summary': 'Multi-line metadata should be supported\nas well as <strong>inline markup</strong>.',
'date': datetime.datetime(2010, 12, 2, 10, 14),
'tags': ['foo', 'bar', 'foobar'],
}
self.assertDictEqual(metadata, expected)

View file

@ -5,7 +5,9 @@ This is a super article !
:date: 2010-12-02 10:14
:category: yeah
:author: Alexis Métaireau
:summary: This is a simple test
:summary:
Multi-line metadata should be supported
as well as **inline markup**.
Some content here !

View file

@ -1,13 +1,13 @@
# -*- coding: utf-8 -*-
AUTHOR = u'Alexis Métaireau'
SITENAME = u"Alexis' log"
SITENAME = u"Alexis' log"
SITEURL = 'http://blog.notmyidea.org'
GITHUB_URL = 'http://github.com/ametaireau/'
DISQUS_SITENAME = "blog-notmyidea"
PDF_GENERATOR = False
REVERSE_CATEGORY_ORDER = True
LOCALE = 'fr_FR.utf8'
LOCALE = 'fr_FR.utf-8'
DEFAULT_PAGINATION = 2
FEED_RSS = 'feeds/all.rss.xml'
@ -33,6 +33,6 @@ STATIC_PATHS = ["pictures",]
# A list of files to copy from the source to the destination
FILES_TO_COPY = (('extra/robots.txt', 'robots.txt'),)
# foobar will not be used, because it's not in caps. All configuration keys
# foobar will not be used, because it's not in caps. All configuration keys
# have to be in caps
foobar = "barbaz"