2011-02-01 22:49:33 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2013-01-11 02:57:43 +01:00
|
|
|
from __future__ import unicode_literals, print_function
|
|
|
|
|
|
2012-11-28 00:29:30 +01:00
|
|
|
import os
|
|
|
|
|
import re
|
2011-02-24 05:15:04 +00:00
|
|
|
try:
|
2011-05-10 07:55:30 +06:00
|
|
|
import docutils
|
|
|
|
|
import docutils.core
|
|
|
|
|
import docutils.io
|
|
|
|
|
from docutils.writers.html4css1 import HTMLTranslator
|
2011-02-24 05:15:04 +00:00
|
|
|
|
|
|
|
|
# import the directives to have pygments support
|
2012-03-09 16:21:38 +01:00
|
|
|
from pelican import rstdirectives # NOQA
|
2011-02-24 05:15:04 +00:00
|
|
|
except ImportError:
|
|
|
|
|
core = False
|
|
|
|
|
try:
|
|
|
|
|
from markdown import Markdown
|
|
|
|
|
except ImportError:
|
2012-03-09 16:21:38 +01:00
|
|
|
Markdown = False # NOQA
|
2012-10-28 07:37:53 -07:00
|
|
|
try:
|
|
|
|
|
from asciidocapi import AsciiDocAPI
|
|
|
|
|
asciidoc = True
|
|
|
|
|
except ImportError:
|
|
|
|
|
asciidoc = False
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2012-06-20 19:59:32 -04:00
|
|
|
import cgi
|
2013-01-28 22:21:45 -05:00
|
|
|
try:
|
|
|
|
|
from html.parser import HTMLParser
|
|
|
|
|
except ImportError:
|
|
|
|
|
from HTMLParser import HTMLParser
|
2012-06-20 19:52:17 -04:00
|
|
|
|
2012-03-09 16:21:38 +01:00
|
|
|
from pelican.contents import Category, Tag, Author
|
2012-09-08 13:07:51 +03:00
|
|
|
from pelican.utils import get_date, pelican_open
|
2010-10-30 00:56:40 +01:00
|
|
|
|
|
|
|
|
|
2013-03-12 12:19:53 -07:00
|
|
|
METADATA_PROCESSORS = {
|
2013-01-11 02:57:43 +01:00
|
|
|
'tags': lambda x, y: [Tag(tag, y) for tag in x.split(',')],
|
2011-12-23 23:43:32 +00:00
|
|
|
'date': lambda x, y: get_date(x),
|
2013-01-11 02:57:43 +01:00
|
|
|
'status': lambda x, y: x.strip(),
|
2011-12-22 15:13:12 +00:00
|
|
|
'category': Category,
|
2011-12-22 16:22:34 +00:00
|
|
|
'author': Author,
|
2010-12-22 01:08:23 +03:00
|
|
|
}
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2012-03-09 16:21:38 +01:00
|
|
|
|
2011-02-24 05:15:04 +00:00
|
|
|
class Reader(object):
|
|
|
|
|
enabled = True
|
2013-01-04 15:57:58 -05:00
|
|
|
file_extensions = ['static']
|
2011-06-15 23:48:54 +00:00
|
|
|
extensions = None
|
2011-05-10 07:55:30 +06:00
|
|
|
|
2011-12-23 23:43:32 +00:00
|
|
|
def __init__(self, settings):
|
|
|
|
|
self.settings = settings
|
|
|
|
|
|
|
|
|
|
def process_metadata(self, name, value):
|
2013-03-12 12:19:53 -07:00
|
|
|
if name in METADATA_PROCESSORS:
|
|
|
|
|
return METADATA_PROCESSORS[name](value, self.settings)
|
2011-12-23 23:43:32 +00:00
|
|
|
return value
|
|
|
|
|
|
2013-01-04 15:57:58 -05:00
|
|
|
def read(self, source_path):
|
|
|
|
|
"No-op parser"
|
|
|
|
|
content = None
|
|
|
|
|
metadata = {}
|
|
|
|
|
return content, metadata
|
|
|
|
|
|
2012-03-09 16:21:38 +01:00
|
|
|
|
2011-05-10 07:55:30 +06:00
|
|
|
class _FieldBodyTranslator(HTMLTranslator):
|
|
|
|
|
|
2012-03-25 21:39:41 +04:00
|
|
|
def __init__(self, document):
|
|
|
|
|
HTMLTranslator.__init__(self, document)
|
|
|
|
|
self.compact_p = None
|
|
|
|
|
|
2011-05-10 07:55:30 +06:00
|
|
|
def astext(self):
|
|
|
|
|
return ''.join(self.body)
|
|
|
|
|
|
|
|
|
|
def visit_field_body(self, node):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def depart_field_body(self, node):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def render_node_to_html(document, node):
|
|
|
|
|
visitor = _FieldBodyTranslator(document)
|
|
|
|
|
node.walkabout(visitor)
|
|
|
|
|
return visitor.astext()
|
|
|
|
|
|
2012-03-09 16:21:38 +01:00
|
|
|
|
2012-07-17 13:30:06 +02:00
|
|
|
class PelicanHTMLTranslator(HTMLTranslator):
|
|
|
|
|
|
|
|
|
|
def visit_abbreviation(self, node):
|
|
|
|
|
attrs = {}
|
|
|
|
|
if node.hasattr('explanation'):
|
|
|
|
|
attrs['title'] = node['explanation']
|
|
|
|
|
self.body.append(self.starttag(node, 'abbr', '', **attrs))
|
|
|
|
|
|
|
|
|
|
def depart_abbreviation(self, node):
|
|
|
|
|
self.body.append('</abbr>')
|
|
|
|
|
|
|
|
|
|
|
2011-02-24 05:15:04 +00:00
|
|
|
class RstReader(Reader):
|
2011-05-10 07:55:30 +06:00
|
|
|
enabled = bool(docutils)
|
2012-05-01 22:34:32 -04:00
|
|
|
file_extensions = ['rst']
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2011-05-10 07:55:30 +06:00
|
|
|
def _parse_metadata(self, document):
|
2011-12-23 23:43:32 +00:00
|
|
|
"""Return the dict containing document metadata"""
|
|
|
|
|
output = {}
|
|
|
|
|
for docinfo in document.traverse(docutils.nodes.docinfo):
|
|
|
|
|
for element in docinfo.children:
|
2012-03-09 16:21:38 +01:00
|
|
|
if element.tagname == 'field': # custom fields (e.g. summary)
|
2011-12-23 23:43:32 +00:00
|
|
|
name_elem, body_elem = element.children
|
|
|
|
|
name = name_elem.astext()
|
2012-03-18 15:12:06 +01:00
|
|
|
if name == 'summary':
|
|
|
|
|
value = render_node_to_html(document, body_elem)
|
|
|
|
|
else:
|
|
|
|
|
value = body_elem.astext()
|
2012-03-09 16:21:38 +01:00
|
|
|
else: # standard fields (e.g. address)
|
2011-12-23 23:43:32 +00:00
|
|
|
name = element.tagname
|
|
|
|
|
value = element.astext()
|
2012-03-12 01:33:30 +09:00
|
|
|
name = name.lower()
|
2011-12-23 23:43:32 +00:00
|
|
|
|
|
|
|
|
output[name] = self.process_metadata(name, value)
|
|
|
|
|
return output
|
2011-05-10 07:55:30 +06:00
|
|
|
|
2013-01-04 10:50:09 -05:00
|
|
|
def _get_publisher(self, source_path):
|
2013-02-08 01:47:20 +02:00
|
|
|
extra_params = {'initial_header_level': '2',
|
|
|
|
|
'syntax_highlight': 'short'}
|
2012-03-09 16:21:38 +01:00
|
|
|
pub = docutils.core.Publisher(
|
2012-09-28 23:09:57 +02:00
|
|
|
destination_class=docutils.io.StringOutput)
|
2011-05-10 07:55:30 +06:00
|
|
|
pub.set_components('standalone', 'restructuredtext', 'html')
|
2012-07-17 13:30:06 +02:00
|
|
|
pub.writer.translator_class = PelicanHTMLTranslator
|
2011-05-10 07:55:30 +06:00
|
|
|
pub.process_programmatic_settings(None, extra_params, None)
|
2013-01-04 10:50:09 -05:00
|
|
|
pub.set_source(source_path=source_path)
|
2011-05-10 07:55:30 +06:00
|
|
|
pub.publish()
|
|
|
|
|
return pub
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2013-01-04 10:50:09 -05:00
|
|
|
def read(self, source_path):
|
2011-05-10 07:55:30 +06:00
|
|
|
"""Parses restructured text"""
|
2013-01-04 10:50:09 -05:00
|
|
|
pub = self._get_publisher(source_path)
|
2011-05-10 07:55:30 +06:00
|
|
|
parts = pub.writer.parts
|
|
|
|
|
content = parts.get('body')
|
|
|
|
|
|
|
|
|
|
metadata = self._parse_metadata(pub.document)
|
|
|
|
|
metadata.setdefault('title', parts.get('title'))
|
|
|
|
|
|
2011-05-07 20:00:30 +01:00
|
|
|
return content, metadata
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2011-05-10 07:55:30 +06:00
|
|
|
|
2011-02-24 05:15:04 +00:00
|
|
|
class MarkdownReader(Reader):
|
|
|
|
|
enabled = bool(Markdown)
|
2012-05-01 22:34:32 -04:00
|
|
|
file_extensions = ['md', 'markdown', 'mkd']
|
2013-04-02 18:09:21 -04:00
|
|
|
default_extensions = ['codehilite(css_class=highlight)', 'extra']
|
2013-02-23 22:59:22 -06:00
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
|
super(MarkdownReader, self).__init__(*args, **kwargs)
|
|
|
|
|
self.extensions = self.settings.get('MD_EXTENSIONS',
|
|
|
|
|
self.default_extensions)
|
|
|
|
|
self.extensions.append('meta')
|
|
|
|
|
self._md = Markdown(extensions=self.extensions)
|
2010-10-31 00:08:16 +01:00
|
|
|
|
2012-12-11 00:48:47 -05:00
|
|
|
def _parse_metadata(self, meta):
|
|
|
|
|
"""Return the dict containing document metadata"""
|
|
|
|
|
output = {}
|
|
|
|
|
for name, value in meta.items():
|
2010-12-17 00:04:45 +03:00
|
|
|
name = name.lower()
|
2012-12-11 00:34:15 -05:00
|
|
|
if name == "summary":
|
|
|
|
|
summary_values = "\n".join(str(item) for item in value)
|
2013-02-23 22:59:22 -06:00
|
|
|
summary = self._md.convert(summary_values)
|
2012-12-11 00:48:47 -05:00
|
|
|
output[name] = self.process_metadata(name, summary)
|
2012-12-11 00:34:15 -05:00
|
|
|
else:
|
2012-12-11 00:48:47 -05:00
|
|
|
output[name] = self.process_metadata(name, value[0])
|
|
|
|
|
return output
|
|
|
|
|
|
2013-01-04 10:50:09 -05:00
|
|
|
def read(self, source_path):
|
2012-12-11 00:48:47 -05:00
|
|
|
"""Parse content and metadata of markdown files"""
|
2013-01-28 21:41:42 -05:00
|
|
|
|
|
|
|
|
with pelican_open(source_path) as text:
|
2013-02-23 22:59:22 -06:00
|
|
|
content = self._md.convert(text)
|
2012-12-11 00:48:47 -05:00
|
|
|
|
2013-02-23 22:59:22 -06:00
|
|
|
metadata = self._parse_metadata(self._md.Meta)
|
2011-05-07 20:00:30 +01:00
|
|
|
return content, metadata
|
2010-10-31 00:08:16 +01:00
|
|
|
|
2013-03-03 20:12:31 -08:00
|
|
|
|
2012-06-14 23:08:34 -04:00
|
|
|
class HTMLReader(Reader):
|
|
|
|
|
"""Parses HTML files as input, looking for meta, title, and body tags"""
|
|
|
|
|
file_extensions = ['htm', 'html']
|
|
|
|
|
enabled = True
|
2011-02-14 19:10:01 +01:00
|
|
|
|
2012-06-14 23:08:34 -04:00
|
|
|
class _HTMLParser(HTMLParser):
|
|
|
|
|
def __init__(self, settings):
|
|
|
|
|
HTMLParser.__init__(self)
|
|
|
|
|
self.body = ''
|
|
|
|
|
self.metadata = {}
|
|
|
|
|
self.settings = settings
|
2012-06-10 18:27:38 -04:00
|
|
|
|
|
|
|
|
self._data_buffer = ''
|
2012-06-14 23:08:34 -04:00
|
|
|
|
|
|
|
|
self._in_top_level = True
|
|
|
|
|
self._in_head = False
|
2012-06-10 18:27:38 -04:00
|
|
|
self._in_title = False
|
|
|
|
|
self._in_body = False
|
2012-06-14 23:08:34 -04:00
|
|
|
self._in_tags = False
|
|
|
|
|
|
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
|
|
|
if tag == 'head' and self._in_top_level:
|
|
|
|
|
self._in_top_level = False
|
|
|
|
|
self._in_head = True
|
|
|
|
|
elif tag == 'title' and self._in_head:
|
|
|
|
|
self._in_title = True
|
|
|
|
|
self._data_buffer = ''
|
|
|
|
|
elif tag == 'body' and self._in_top_level:
|
|
|
|
|
self._in_top_level = False
|
|
|
|
|
self._in_body = True
|
|
|
|
|
self._data_buffer = ''
|
|
|
|
|
elif tag == 'meta' and self._in_head:
|
|
|
|
|
self._handle_meta_tag(attrs)
|
|
|
|
|
|
|
|
|
|
elif self._in_body:
|
|
|
|
|
self._data_buffer += self.build_tag(tag, attrs, False)
|
|
|
|
|
|
|
|
|
|
def handle_endtag(self, tag):
|
|
|
|
|
if tag == 'head':
|
|
|
|
|
if self._in_head:
|
|
|
|
|
self._in_head = False
|
|
|
|
|
self._in_top_level = True
|
|
|
|
|
elif tag == 'title':
|
|
|
|
|
self._in_title = False
|
|
|
|
|
self.metadata['title'] = self._data_buffer
|
|
|
|
|
elif tag == 'body':
|
|
|
|
|
self.body = self._data_buffer
|
|
|
|
|
self._in_body = False
|
|
|
|
|
self._in_top_level = True
|
|
|
|
|
elif self._in_body:
|
|
|
|
|
self._data_buffer += '</{}>'.format(cgi.escape(tag))
|
|
|
|
|
|
|
|
|
|
def handle_startendtag(self, tag, attrs):
|
|
|
|
|
if tag == 'meta' and self._in_head:
|
|
|
|
|
self._handle_meta_tag(attrs)
|
|
|
|
|
if self._in_body:
|
|
|
|
|
self._data_buffer += self.build_tag(tag, attrs, True)
|
|
|
|
|
|
|
|
|
|
def handle_comment(self, data):
|
2013-01-28 22:11:06 -05:00
|
|
|
self._data_buffer += '<!--{}-->'.format(data)
|
2012-06-14 23:08:34 -04:00
|
|
|
|
|
|
|
|
def handle_data(self, data):
|
|
|
|
|
self._data_buffer += data
|
|
|
|
|
|
2012-06-20 23:19:06 -04:00
|
|
|
def handle_entityref(self, data):
|
|
|
|
|
self._data_buffer += '&{};'.format(data)
|
|
|
|
|
|
|
|
|
|
def handle_charref(self, data):
|
2012-06-21 09:05:27 -04:00
|
|
|
self._data_buffer += '&#{};'.format(data)
|
2013-03-03 20:12:31 -08:00
|
|
|
|
2012-06-14 23:08:34 -04:00
|
|
|
def build_tag(self, tag, attrs, close_tag):
|
|
|
|
|
result = '<{}'.format(cgi.escape(tag))
|
2013-03-03 20:12:31 -08:00
|
|
|
for k, v in attrs:
|
2013-02-10 11:02:52 -05:00
|
|
|
result += ' ' + cgi.escape(k)
|
|
|
|
|
if v is not None:
|
|
|
|
|
result += '="{}"'.format(cgi.escape(v))
|
2012-06-14 23:08:34 -04:00
|
|
|
if close_tag:
|
|
|
|
|
return result + ' />'
|
|
|
|
|
return result + '>'
|
|
|
|
|
|
|
|
|
|
def _handle_meta_tag(self, attrs):
|
|
|
|
|
name = self._attr_value(attrs, 'name').lower()
|
|
|
|
|
contents = self._attr_value(attrs, 'contents', '')
|
|
|
|
|
|
|
|
|
|
if name == 'keywords':
|
|
|
|
|
name = 'tags'
|
2012-06-10 18:27:38 -04:00
|
|
|
self.metadata[name] = contents
|
|
|
|
|
|
2012-06-14 23:08:34 -04:00
|
|
|
@classmethod
|
|
|
|
|
def _attr_value(cls, attrs, name, default=None):
|
|
|
|
|
return next((x[1] for x in attrs if x[0] == name), default)
|
2012-06-10 18:27:38 -04:00
|
|
|
|
|
|
|
|
def read(self, filename):
|
2013-01-28 21:46:54 -05:00
|
|
|
"""Parse content and metadata of HTML files"""
|
2013-01-28 22:11:06 -05:00
|
|
|
with pelican_open(filename) as content:
|
2012-06-14 23:08:34 -04:00
|
|
|
parser = self._HTMLParser(self.settings)
|
2012-06-10 18:27:38 -04:00
|
|
|
parser.feed(content)
|
|
|
|
|
parser.close()
|
2011-02-14 19:10:01 +01:00
|
|
|
|
2012-06-14 23:08:34 -04:00
|
|
|
metadata = {}
|
|
|
|
|
for k in parser.metadata:
|
|
|
|
|
metadata[k] = self.process_metadata(k, parser.metadata[k])
|
|
|
|
|
return parser.body, metadata
|
2012-10-28 07:37:53 -07:00
|
|
|
|
2013-03-03 20:12:31 -08:00
|
|
|
|
2012-10-28 07:37:53 -07:00
|
|
|
class AsciiDocReader(Reader):
|
|
|
|
|
enabled = bool(asciidoc)
|
|
|
|
|
file_extensions = ['asc']
|
|
|
|
|
default_options = ["--no-header-footer", "-a newline=\\n"]
|
|
|
|
|
|
2013-01-04 10:50:09 -05:00
|
|
|
def read(self, source_path):
|
2012-10-28 07:37:53 -07:00
|
|
|
"""Parse content and metadata of asciidoc files"""
|
|
|
|
|
from cStringIO import StringIO
|
2013-02-21 15:57:25 +01:00
|
|
|
with pelican_open(source_path) as source:
|
|
|
|
|
text = StringIO(source)
|
2012-10-28 07:37:53 -07:00
|
|
|
content = StringIO()
|
|
|
|
|
ad = AsciiDocAPI()
|
|
|
|
|
|
|
|
|
|
options = self.settings.get('ASCIIDOC_OPTIONS', [])
|
|
|
|
|
if isinstance(options, (str, unicode)):
|
|
|
|
|
options = [m.strip() for m in options.split(',')]
|
|
|
|
|
options = self.default_options + options
|
|
|
|
|
for o in options:
|
|
|
|
|
ad.options(*o.split())
|
|
|
|
|
|
|
|
|
|
ad.execute(text, content, backend="html4")
|
|
|
|
|
content = content.getvalue()
|
|
|
|
|
|
|
|
|
|
metadata = {}
|
|
|
|
|
for name, value in ad.asciidoc.document.attributes.items():
|
|
|
|
|
name = name.lower()
|
|
|
|
|
metadata[name] = self.process_metadata(name, value)
|
|
|
|
|
if 'doctitle' in metadata:
|
|
|
|
|
metadata['title'] = metadata['doctitle']
|
|
|
|
|
return content, metadata
|
2011-02-14 19:10:01 +01:00
|
|
|
|
|
|
|
|
|
2013-03-12 12:19:53 -07:00
|
|
|
EXTENSIONS = {}
|
2012-04-10 00:40:05 -04:00
|
|
|
|
2013-01-04 15:57:58 -05:00
|
|
|
for cls in [Reader] + Reader.__subclasses__():
|
2012-05-01 22:34:32 -04:00
|
|
|
for ext in cls.file_extensions:
|
2013-03-12 12:19:53 -07:00
|
|
|
EXTENSIONS[ext] = cls
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2012-03-09 16:17:09 +01:00
|
|
|
|
2013-01-04 10:50:09 -05:00
|
|
|
def read_file(path, fmt=None, settings=None):
|
2010-10-30 00:56:40 +01:00
|
|
|
"""Return a reader object using the given format."""
|
2013-01-04 10:50:09 -05:00
|
|
|
base, ext = os.path.splitext(os.path.basename(path))
|
2010-10-30 00:56:40 +01:00
|
|
|
if not fmt:
|
2012-11-28 00:29:30 +01:00
|
|
|
fmt = ext[1:]
|
2012-03-11 02:48:36 +01:00
|
|
|
|
2013-03-12 12:19:53 -07:00
|
|
|
if fmt not in EXTENSIONS:
|
2013-01-04 10:50:09 -05:00
|
|
|
raise TypeError('Pelican does not know how to parse {}'.format(path))
|
2012-03-11 02:48:36 +01:00
|
|
|
|
2013-03-12 12:19:53 -07:00
|
|
|
reader = EXTENSIONS[fmt](settings)
|
2011-06-15 23:48:54 +00:00
|
|
|
settings_key = '%s_EXTENSIONS' % fmt.upper()
|
2012-03-11 02:48:36 +01:00
|
|
|
|
2011-06-15 23:48:54 +00:00
|
|
|
if settings and settings_key in settings:
|
|
|
|
|
reader.extensions = settings[settings_key]
|
2012-03-11 02:48:36 +01:00
|
|
|
|
2011-02-24 05:15:04 +00:00
|
|
|
if not reader.enabled:
|
|
|
|
|
raise ValueError("Missing dependencies for %s" % fmt)
|
2012-03-11 02:48:36 +01:00
|
|
|
|
2013-01-04 14:25:12 -05:00
|
|
|
metadata = parse_path_metadata(
|
|
|
|
|
path=path, settings=settings, process=reader.process_metadata)
|
|
|
|
|
content, reader_metadata = reader.read(path)
|
|
|
|
|
metadata.update(reader_metadata)
|
2012-03-11 02:48:36 +01:00
|
|
|
|
|
|
|
|
# eventually filter the content with typogrify if asked so
|
2013-01-04 16:12:34 -05:00
|
|
|
if content and settings and settings.get('TYPOGRIFY'):
|
2012-07-26 22:02:06 +02:00
|
|
|
from typogrify.filters import typogrify
|
|
|
|
|
content = typogrify(content)
|
|
|
|
|
metadata['title'] = typogrify(metadata['title'])
|
2012-03-11 02:48:36 +01:00
|
|
|
|
|
|
|
|
return content, metadata
|
2013-01-04 14:25:12 -05:00
|
|
|
|
|
|
|
|
def parse_path_metadata(path, settings=None, process=None):
|
|
|
|
|
"""Extract a metadata dictionary from a file's path
|
|
|
|
|
|
|
|
|
|
>>> import pprint
|
|
|
|
|
>>> settings = {
|
|
|
|
|
... 'FILENAME_METADATA': '(?P<slug>[^.]*).*',
|
2013-01-04 14:30:40 -05:00
|
|
|
... 'PATH_METADATA':
|
|
|
|
|
... '(?P<category>[^/]*)/(?P<date>\d{4}-\d{2}-\d{2})/.*',
|
2013-01-04 14:25:12 -05:00
|
|
|
... }
|
|
|
|
|
>>> reader = Reader(settings=settings)
|
|
|
|
|
>>> metadata = parse_path_metadata(
|
|
|
|
|
... path='my-cat/2013-01-01/my-slug.html',
|
|
|
|
|
... settings=settings,
|
|
|
|
|
... process=reader.process_metadata)
|
2013-01-04 14:30:40 -05:00
|
|
|
>>> pprint.pprint(metadata) # doctest: +ELLIPSIS
|
|
|
|
|
{'category': <pelican.urlwrappers.Category object at ...>,
|
|
|
|
|
'date': datetime.datetime(2013, 1, 1, 0, 0),
|
|
|
|
|
'slug': 'my-slug'}
|
2013-01-04 14:25:12 -05:00
|
|
|
"""
|
|
|
|
|
metadata = {}
|
|
|
|
|
base, ext = os.path.splitext(os.path.basename(path))
|
|
|
|
|
if settings:
|
|
|
|
|
for key,data in [('FILENAME_METADATA', base),
|
2013-01-04 14:30:40 -05:00
|
|
|
('PATH_METADATA', path),
|
2013-01-04 14:25:12 -05:00
|
|
|
]:
|
|
|
|
|
regexp = settings.get(key)
|
|
|
|
|
if regexp:
|
|
|
|
|
match = re.match(regexp, data)
|
|
|
|
|
if match:
|
|
|
|
|
# .items() for py3k compat.
|
|
|
|
|
for k, v in match.groupdict().items():
|
|
|
|
|
if k not in metadata:
|
|
|
|
|
k = k.lower() # metadata must be lowercase
|
|
|
|
|
if process:
|
|
|
|
|
v = process(k, v)
|
|
|
|
|
metadata[k] = v
|
|
|
|
|
return metadata
|