From cacc6db9a4cbbce979e4ebf90c93c770f6359453 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Tue, 10 May 2011 07:55:30 +0600
Subject: [PATCH 1/2] ReST metadata parsing using docutils.

---
 pelican/readers.py | 98 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 69 insertions(+), 29 deletions(-)

diff --git a/pelican/readers.py b/pelican/readers.py
index 7d799f88..11cbde4e 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -1,6 +1,9 @@
 # -*- coding: utf-8 -*-
 try:
-    from docutils import core
+    import docutils
+    import docutils.core
+    import docutils.io
+    from docutils.writers.html4css1 import HTMLTranslator
 
     # import the directives to have pygments support
     from pelican import rstdirectives
@@ -21,40 +24,78 @@ _METADATA_PROCESSORS = {
     'status': unicode.strip,
 }
 
+def _process_metadata(name, value):
+    if name in _METADATA_PROCESSORS:
+        return _METADATA_PROCESSORS[name](value)
+    return value
+
 
 class Reader(object):
     enabled = True
 
+
+class _FieldBodyTranslator(HTMLTranslator):
+
+    def astext(self):
+        return ''.join(self.body)
+
+    def visit_field_body(self, node):
+        pass
+
+    def depart_field_body(self, node):
+        pass
+
+
+def render_node_to_html(document, node):
+    visitor = _FieldBodyTranslator(document)
+    node.walkabout(visitor)
+    return visitor.astext()
+
+def get_metadata(document):
+    """Return the dict containing document metadata"""
+    output = {}
+    for docinfo in document.traverse(docutils.nodes.docinfo):
+        for element in docinfo.children:
+            if element.tagname == 'field': # custom fields (e.g. summary)
+                name_elem, body_elem = element.children
+                name = name_elem.astext()
+                value = render_node_to_html(document, body_elem)
+            else: # standard fields (e.g. address)
+                name = element.tagname
+                value = element.astext()
+
+            output[name] = _process_metadata(name, value)
+    return output
+
+
 class RstReader(Reader):
-    enabled = bool(core)
+    enabled = bool(docutils)
     extension = "rst"
 
-    def _parse_metadata(self, content):
-        """Return the dict containing metadata"""
-        output = {}
-        for m in re.compile('^:([a-z]+): (.*)\s', re.M).finditer(content):
-            name, value = m.group(1).lower(), m.group(2)
-            output[name] = _METADATA_PROCESSORS.get(
-                name, lambda x:x
-            )(value)
-        return output
+    def _parse_metadata(self, document):
+        return get_metadata(document)
+
+    def _get_publisher(self, filename):
+        extra_params = {'initial_header_level': '2'}
+        pub = docutils.core.Publisher(destination_class=docutils.io.StringOutput)
+        pub.set_components('standalone', 'restructuredtext', 'html')
+        pub.process_programmatic_settings(None, extra_params, None)
+        pub.set_source(source_path=filename)
+        pub.publish()
+        return pub
 
     def read(self, filename):
-        """Parse restructured text"""
-        text = open(filename)
-        metadata = self._parse_metadata(text)
-        extra_params = {'input_encoding': 'unicode',
-                        'initial_header_level': '2'}
-        rendered_content = core.publish_parts(text,
-                                              source_path=filename,
-                                              writer_name='html',
-                                              settings_overrides=extra_params)
-        title = rendered_content.get('title')
-        content = rendered_content.get('body')
-        if not metadata.has_key('title'):
-            metadata['title'] = title
+        """Parses restructured text"""
+        pub = self._get_publisher(filename)
+        parts = pub.writer.parts
+        content = parts.get('body')
+
+        metadata = self._parse_metadata(pub.document)
+        metadata.setdefault('title', parts.get('title'))
+
         return content, metadata
 
+
 class MarkdownReader(Reader):
     enabled = bool(Markdown)
     extension = "md"
@@ -64,13 +105,11 @@ class MarkdownReader(Reader):
         text = open(filename)
         md = Markdown(extensions = ['meta', 'codehilite'])
         content = md.convert(text)
-        
+
         metadata = {}
         for name, value in md.Meta.items():
             name = name.lower()
-            metadata[name] = _METADATA_PROCESSORS.get(
-                name, lambda x:x
-            )(value[0])
+            metadata[name] = _process_metadata(name, value[0])
         return content, metadata
 
 
@@ -85,7 +124,8 @@ class HtmlReader(Reader):
         for i in self._re.findall(content):
             key = i.split(':')[0][5:].strip()
             value = i.split(':')[-1][:-3].strip()
-            metadata[key.lower()] = value
+            name = key.lower()
+            metadata[name] = _process_metadata(name, value)
 
         return content, metadata
 

From 6cd425e408bf85cd12ba36c536bc3ff8d78c4fc7 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Wed, 11 May 2011 09:42:37 +0600
Subject: [PATCH 2/2] Basic test for the new rst reader. Locale is also
 converted to fr_FR.utf-8 (I wasn't able to run tests without this)

---
 pelican/tests/test_readers.py     | 27 +++++++++++++++++++++++++++
 samples/content/super_article.rst |  4 +++-
 samples/pelican.conf.py           |  6 +++---
 3 files changed, 33 insertions(+), 4 deletions(-)
 create mode 100644 pelican/tests/test_readers.py

diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py
new file mode 100644
index 00000000..5a255209
--- /dev/null
+++ b/pelican/tests/test_readers.py
@@ -0,0 +1,27 @@
+# coding: utf-8
+import unittest2
+import os
+import datetime
+from pelican import readers
+
+CUR_DIR = os.path.dirname(__file__)
+CONTENT_PATH = os.path.join(CUR_DIR, '..', '..', 'samples', 'content')
+
+def _filename(*args):
+    return os.path.join(CONTENT_PATH, *args)
+
+
+class RstReaderTest(unittest2.TestCase):
+
+    def test_metadata(self):
+        reader = readers.RstReader()
+        content, metadata = reader.read(_filename('super_article.rst'))
+        expected = {
+            'category': 'yeah',
+            'author': u'Alexis Métaireau',
+            'title': 'This is a super article !',
+            'summary': 'Multi-line metadata should be supported\nas well as <strong>inline markup</strong>.',
+            'date': datetime.datetime(2010, 12, 2, 10, 14),
+            'tags': ['foo', 'bar', 'foobar'],
+        }
+        self.assertDictEqual(metadata, expected)
diff --git a/samples/content/super_article.rst b/samples/content/super_article.rst
index b3e22051..03273fad 100644
--- a/samples/content/super_article.rst
+++ b/samples/content/super_article.rst
@@ -5,7 +5,9 @@ This is a super article !
 :date: 2010-12-02 10:14
 :category: yeah
 :author: Alexis Métaireau
-:summary: This is a simple test
+:summary:
+    Multi-line metadata should be supported
+    as well as **inline markup**.
 
 Some content here !
 
diff --git a/samples/pelican.conf.py b/samples/pelican.conf.py
index a3a07fad..8648e7cb 100755
--- a/samples/pelican.conf.py
+++ b/samples/pelican.conf.py
@@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 AUTHOR = u'Alexis Métaireau'
-SITENAME = u"Alexis' log" 
+SITENAME = u"Alexis' log"
 SITEURL = 'http://blog.notmyidea.org'
 
 GITHUB_URL = 'http://github.com/ametaireau/'
 DISQUS_SITENAME = "blog-notmyidea"
 PDF_GENERATOR = False
 REVERSE_CATEGORY_ORDER = True
-LOCALE = 'fr_FR.utf8'
+LOCALE = 'fr_FR.utf-8'
 DEFAULT_PAGINATION = 2
 
 FEED_RSS = 'feeds/all.rss.xml'
@@ -33,6 +33,6 @@ STATIC_PATHS = ["pictures",]
 # A list of files to copy from the source to the destination
 FILES_TO_COPY = (('extra/robots.txt', 'robots.txt'),)
 
-# foobar will not be used, because it's not in caps. All configuration keys 
+# foobar will not be used, because it's not in caps. All configuration keys
 # have to be in caps
 foobar = "barbaz"