forked from github/pelican
Merge pull request #116 from kmike/better_metadata2
Better metadata handling. Fixes #114
This commit is contained in:
commit
4aa829d45d
4 changed files with 102 additions and 33 deletions
|
|
@ -1,6 +1,9 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
try:
|
||||
from docutils import core
|
||||
import docutils
|
||||
import docutils.core
|
||||
import docutils.io
|
||||
from docutils.writers.html4css1 import HTMLTranslator
|
||||
|
||||
# import the directives to have pygments support
|
||||
from pelican import rstdirectives
|
||||
|
|
@ -21,40 +24,78 @@ _METADATA_PROCESSORS = {
|
|||
'status': unicode.strip,
|
||||
}
|
||||
|
||||
def _process_metadata(name, value):
|
||||
if name in _METADATA_PROCESSORS:
|
||||
return _METADATA_PROCESSORS[name](value)
|
||||
return value
|
||||
|
||||
|
||||
class Reader(object):
|
||||
enabled = True
|
||||
|
||||
|
||||
class _FieldBodyTranslator(HTMLTranslator):
|
||||
|
||||
def astext(self):
|
||||
return ''.join(self.body)
|
||||
|
||||
def visit_field_body(self, node):
|
||||
pass
|
||||
|
||||
def depart_field_body(self, node):
|
||||
pass
|
||||
|
||||
|
||||
def render_node_to_html(document, node):
|
||||
visitor = _FieldBodyTranslator(document)
|
||||
node.walkabout(visitor)
|
||||
return visitor.astext()
|
||||
|
||||
def get_metadata(document):
|
||||
"""Return the dict containing document metadata"""
|
||||
output = {}
|
||||
for docinfo in document.traverse(docutils.nodes.docinfo):
|
||||
for element in docinfo.children:
|
||||
if element.tagname == 'field': # custom fields (e.g. summary)
|
||||
name_elem, body_elem = element.children
|
||||
name = name_elem.astext()
|
||||
value = render_node_to_html(document, body_elem)
|
||||
else: # standard fields (e.g. address)
|
||||
name = element.tagname
|
||||
value = element.astext()
|
||||
|
||||
output[name] = _process_metadata(name, value)
|
||||
return output
|
||||
|
||||
|
||||
class RstReader(Reader):
|
||||
enabled = bool(core)
|
||||
enabled = bool(docutils)
|
||||
extension = "rst"
|
||||
|
||||
def _parse_metadata(self, content):
|
||||
"""Return the dict containing metadata"""
|
||||
output = {}
|
||||
for m in re.compile('^:([a-z]+): (.*)\s', re.M).finditer(content):
|
||||
name, value = m.group(1).lower(), m.group(2)
|
||||
output[name] = _METADATA_PROCESSORS.get(
|
||||
name, lambda x:x
|
||||
)(value)
|
||||
return output
|
||||
def _parse_metadata(self, document):
|
||||
return get_metadata(document)
|
||||
|
||||
def _get_publisher(self, filename):
|
||||
extra_params = {'initial_header_level': '2'}
|
||||
pub = docutils.core.Publisher(destination_class=docutils.io.StringOutput)
|
||||
pub.set_components('standalone', 'restructuredtext', 'html')
|
||||
pub.process_programmatic_settings(None, extra_params, None)
|
||||
pub.set_source(source_path=filename)
|
||||
pub.publish()
|
||||
return pub
|
||||
|
||||
def read(self, filename):
|
||||
"""Parse restructured text"""
|
||||
text = open(filename)
|
||||
metadata = self._parse_metadata(text)
|
||||
extra_params = {'input_encoding': 'unicode',
|
||||
'initial_header_level': '2'}
|
||||
rendered_content = core.publish_parts(text,
|
||||
source_path=filename,
|
||||
writer_name='html',
|
||||
settings_overrides=extra_params)
|
||||
title = rendered_content.get('title')
|
||||
content = rendered_content.get('body')
|
||||
if not metadata.has_key('title'):
|
||||
metadata['title'] = title
|
||||
"""Parses restructured text"""
|
||||
pub = self._get_publisher(filename)
|
||||
parts = pub.writer.parts
|
||||
content = parts.get('body')
|
||||
|
||||
metadata = self._parse_metadata(pub.document)
|
||||
metadata.setdefault('title', parts.get('title'))
|
||||
|
||||
return content, metadata
|
||||
|
||||
|
||||
class MarkdownReader(Reader):
|
||||
enabled = bool(Markdown)
|
||||
extension = "md"
|
||||
|
|
@ -64,13 +105,11 @@ class MarkdownReader(Reader):
|
|||
text = open(filename)
|
||||
md = Markdown(extensions = ['meta', 'codehilite'])
|
||||
content = md.convert(text)
|
||||
|
||||
|
||||
metadata = {}
|
||||
for name, value in md.Meta.items():
|
||||
name = name.lower()
|
||||
metadata[name] = _METADATA_PROCESSORS.get(
|
||||
name, lambda x:x
|
||||
)(value[0])
|
||||
metadata[name] = _process_metadata(name, value[0])
|
||||
return content, metadata
|
||||
|
||||
|
||||
|
|
@ -85,7 +124,8 @@ class HtmlReader(Reader):
|
|||
for i in self._re.findall(content):
|
||||
key = i.split(':')[0][5:].strip()
|
||||
value = i.split(':')[-1][:-3].strip()
|
||||
metadata[key.lower()] = value
|
||||
name = key.lower()
|
||||
metadata[name] = _process_metadata(name, value)
|
||||
|
||||
return content, metadata
|
||||
|
||||
|
|
|
|||
27
pelican/tests/test_readers.py
Normal file
27
pelican/tests/test_readers.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# coding: utf-8
|
||||
import unittest2
|
||||
import os
|
||||
import datetime
|
||||
from pelican import readers
|
||||
|
||||
CUR_DIR = os.path.dirname(__file__)
|
||||
CONTENT_PATH = os.path.join(CUR_DIR, '..', '..', 'samples', 'content')
|
||||
|
||||
def _filename(*args):
|
||||
return os.path.join(CONTENT_PATH, *args)
|
||||
|
||||
|
||||
class RstReaderTest(unittest2.TestCase):
|
||||
|
||||
def test_metadata(self):
|
||||
reader = readers.RstReader()
|
||||
content, metadata = reader.read(_filename('super_article.rst'))
|
||||
expected = {
|
||||
'category': 'yeah',
|
||||
'author': u'Alexis Métaireau',
|
||||
'title': 'This is a super article !',
|
||||
'summary': 'Multi-line metadata should be supported\nas well as <strong>inline markup</strong>.',
|
||||
'date': datetime.datetime(2010, 12, 2, 10, 14),
|
||||
'tags': ['foo', 'bar', 'foobar'],
|
||||
}
|
||||
self.assertDictEqual(metadata, expected)
|
||||
|
|
@ -5,7 +5,9 @@ This is a super article !
|
|||
:date: 2010-12-02 10:14
|
||||
:category: yeah
|
||||
:author: Alexis Métaireau
|
||||
:summary: This is a simple test
|
||||
:summary:
|
||||
Multi-line metadata should be supported
|
||||
as well as **inline markup**.
|
||||
|
||||
Some content here !
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
AUTHOR = u'Alexis Métaireau'
|
||||
SITENAME = u"Alexis' log"
|
||||
SITENAME = u"Alexis' log"
|
||||
SITEURL = 'http://blog.notmyidea.org'
|
||||
|
||||
GITHUB_URL = 'http://github.com/ametaireau/'
|
||||
DISQUS_SITENAME = "blog-notmyidea"
|
||||
PDF_GENERATOR = False
|
||||
REVERSE_CATEGORY_ORDER = True
|
||||
LOCALE = 'fr_FR.utf8'
|
||||
LOCALE = 'fr_FR.utf-8'
|
||||
DEFAULT_PAGINATION = 2
|
||||
|
||||
FEED_RSS = 'feeds/all.rss.xml'
|
||||
|
|
@ -33,6 +33,6 @@ STATIC_PATHS = ["pictures",]
|
|||
# A list of files to copy from the source to the destination
|
||||
FILES_TO_COPY = (('extra/robots.txt', 'robots.txt'),)
|
||||
|
||||
# foobar will not be used, because it's not in caps. All configuration keys
|
||||
# foobar will not be used, because it's not in caps. All configuration keys
|
||||
# have to be in caps
|
||||
foobar = "barbaz"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue