Merge pull request #116 from kmike/better_metadata2

Better metadata handling. Fixes #114
This commit is contained in:
Alexis Metaireau 2011-05-11 03:02:44 -07:00
commit 4aa829d45d
4 changed files with 102 additions and 33 deletions

View file

@ -1,6 +1,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
try: try:
from docutils import core import docutils
import docutils.core
import docutils.io
from docutils.writers.html4css1 import HTMLTranslator
# import the directives to have pygments support # import the directives to have pygments support
from pelican import rstdirectives from pelican import rstdirectives
@ -21,40 +24,78 @@ _METADATA_PROCESSORS = {
'status': unicode.strip, 'status': unicode.strip,
} }
def _process_metadata(name, value):
if name in _METADATA_PROCESSORS:
return _METADATA_PROCESSORS[name](value)
return value
class Reader(object): class Reader(object):
enabled = True enabled = True
class _FieldBodyTranslator(HTMLTranslator):
def astext(self):
return ''.join(self.body)
def visit_field_body(self, node):
pass
def depart_field_body(self, node):
pass
def render_node_to_html(document, node):
visitor = _FieldBodyTranslator(document)
node.walkabout(visitor)
return visitor.astext()
def get_metadata(document):
"""Return the dict containing document metadata"""
output = {}
for docinfo in document.traverse(docutils.nodes.docinfo):
for element in docinfo.children:
if element.tagname == 'field': # custom fields (e.g. summary)
name_elem, body_elem = element.children
name = name_elem.astext()
value = render_node_to_html(document, body_elem)
else: # standard fields (e.g. address)
name = element.tagname
value = element.astext()
output[name] = _process_metadata(name, value)
return output
class RstReader(Reader): class RstReader(Reader):
enabled = bool(core) enabled = bool(docutils)
extension = "rst" extension = "rst"
def _parse_metadata(self, content): def _parse_metadata(self, document):
"""Return the dict containing metadata""" return get_metadata(document)
output = {}
for m in re.compile('^:([a-z]+): (.*)\s', re.M).finditer(content): def _get_publisher(self, filename):
name, value = m.group(1).lower(), m.group(2) extra_params = {'initial_header_level': '2'}
output[name] = _METADATA_PROCESSORS.get( pub = docutils.core.Publisher(destination_class=docutils.io.StringOutput)
name, lambda x:x pub.set_components('standalone', 'restructuredtext', 'html')
)(value) pub.process_programmatic_settings(None, extra_params, None)
return output pub.set_source(source_path=filename)
pub.publish()
return pub
def read(self, filename): def read(self, filename):
"""Parse restructured text""" """Parses restructured text"""
text = open(filename) pub = self._get_publisher(filename)
metadata = self._parse_metadata(text) parts = pub.writer.parts
extra_params = {'input_encoding': 'unicode', content = parts.get('body')
'initial_header_level': '2'}
rendered_content = core.publish_parts(text, metadata = self._parse_metadata(pub.document)
source_path=filename, metadata.setdefault('title', parts.get('title'))
writer_name='html',
settings_overrides=extra_params)
title = rendered_content.get('title')
content = rendered_content.get('body')
if not metadata.has_key('title'):
metadata['title'] = title
return content, metadata return content, metadata
class MarkdownReader(Reader): class MarkdownReader(Reader):
enabled = bool(Markdown) enabled = bool(Markdown)
extension = "md" extension = "md"
@ -64,13 +105,11 @@ class MarkdownReader(Reader):
text = open(filename) text = open(filename)
md = Markdown(extensions = ['meta', 'codehilite']) md = Markdown(extensions = ['meta', 'codehilite'])
content = md.convert(text) content = md.convert(text)
metadata = {} metadata = {}
for name, value in md.Meta.items(): for name, value in md.Meta.items():
name = name.lower() name = name.lower()
metadata[name] = _METADATA_PROCESSORS.get( metadata[name] = _process_metadata(name, value[0])
name, lambda x:x
)(value[0])
return content, metadata return content, metadata
@ -85,7 +124,8 @@ class HtmlReader(Reader):
for i in self._re.findall(content): for i in self._re.findall(content):
key = i.split(':')[0][5:].strip() key = i.split(':')[0][5:].strip()
value = i.split(':')[-1][:-3].strip() value = i.split(':')[-1][:-3].strip()
metadata[key.lower()] = value name = key.lower()
metadata[name] = _process_metadata(name, value)
return content, metadata return content, metadata

View file

@ -0,0 +1,27 @@
# coding: utf-8
import unittest2
import os
import datetime
from pelican import readers
CUR_DIR = os.path.dirname(__file__)
CONTENT_PATH = os.path.join(CUR_DIR, '..', '..', 'samples', 'content')
def _filename(*args):
return os.path.join(CONTENT_PATH, *args)
class RstReaderTest(unittest2.TestCase):
def test_metadata(self):
reader = readers.RstReader()
content, metadata = reader.read(_filename('super_article.rst'))
expected = {
'category': 'yeah',
'author': u'Alexis Métaireau',
'title': 'This is a super article !',
'summary': 'Multi-line metadata should be supported\nas well as <strong>inline markup</strong>.',
'date': datetime.datetime(2010, 12, 2, 10, 14),
'tags': ['foo', 'bar', 'foobar'],
}
self.assertDictEqual(metadata, expected)

View file

@ -5,7 +5,9 @@ This is a super article !
:date: 2010-12-02 10:14 :date: 2010-12-02 10:14
:category: yeah :category: yeah
:author: Alexis Métaireau :author: Alexis Métaireau
:summary: This is a simple test :summary:
Multi-line metadata should be supported
as well as **inline markup**.
Some content here ! Some content here !

View file

@ -1,13 +1,13 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
AUTHOR = u'Alexis Métaireau' AUTHOR = u'Alexis Métaireau'
SITENAME = u"Alexis' log" SITENAME = u"Alexis' log"
SITEURL = 'http://blog.notmyidea.org' SITEURL = 'http://blog.notmyidea.org'
GITHUB_URL = 'http://github.com/ametaireau/' GITHUB_URL = 'http://github.com/ametaireau/'
DISQUS_SITENAME = "blog-notmyidea" DISQUS_SITENAME = "blog-notmyidea"
PDF_GENERATOR = False PDF_GENERATOR = False
REVERSE_CATEGORY_ORDER = True REVERSE_CATEGORY_ORDER = True
LOCALE = 'fr_FR.utf8' LOCALE = 'fr_FR.utf-8'
DEFAULT_PAGINATION = 2 DEFAULT_PAGINATION = 2
FEED_RSS = 'feeds/all.rss.xml' FEED_RSS = 'feeds/all.rss.xml'
@ -33,6 +33,6 @@ STATIC_PATHS = ["pictures",]
# A list of files to copy from the source to the destination # A list of files to copy from the source to the destination
FILES_TO_COPY = (('extra/robots.txt', 'robots.txt'),) FILES_TO_COPY = (('extra/robots.txt', 'robots.txt'),)
# foobar will not be used, because it's not in caps. All configuration keys # foobar will not be used, because it's not in caps. All configuration keys
# have to be in caps # have to be in caps
foobar = "barbaz" foobar = "barbaz"