From a9c530281e1928e9f9f3c2cb78312db784505ca1 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 4 Jan 2013 18:14:28 -0500 Subject: [PATCH] Move Article metadata extraction from generators to readers There's no reason why this information should be Article-specific. This commit breaks the other generators for the moment. I'll fix them shortly. --- pelican/generators.py | 41 ++++++++--------------------------- pelican/readers.py | 50 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/pelican/generators.py b/pelican/generators.py index 75b61df2..7daa55d2 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -5,7 +5,6 @@ import os import math import random import logging -import datetime import shutil from codecs import open @@ -19,9 +18,7 @@ from jinja2 import ( TemplateNotFound ) -from pelican.contents import ( - Article, Page, Category, Static, is_valid_content -) +from pelican.contents import Article, Page, Static, is_valid_content from pelican.readers import read_file from pelican.utils import copy, process_translations, mkdir_p, DateFormatter from pelican import signals @@ -383,37 +380,17 @@ class ArticlesGenerator(Generator): article_path, exclude=self.settings['ARTICLE_EXCLUDES']): try: - signals.article_generate_preread.send(self) - content, metadata = read_file(f, settings=self.settings) + article = read_file( + base_path=self.path, path=f, content_class=Article, + settings=self.settings, context=self.context, + preread_signal=signals.article_generator_preread, + preread_sender=self, + context_signal=signals.article_generator_context, + context_sender=self) except Exception as e: - logger.warning('Could not process %s\n%s' % (f, str(e))) + logger.warning('Could not process {}\n{}'.format(f, e)) continue - # if no category is set, use the name of the path as a category - if 'category' not in metadata: - - if (self.settings['USE_FOLDER_AS_CATEGORY'] - and os.path.dirname(f) != article_path): - # if the article is in a subdirectory - category = os.path.basename(os.path.dirname(f)) - else: - # if the article is not in a subdirectory - category = self.settings['DEFAULT_CATEGORY'] - - if category != '': - metadata['category'] = Category(category, self.settings) - - if 'date' not in metadata and self.settings.get('DEFAULT_DATE'): - if self.settings['DEFAULT_DATE'] == 'fs': - metadata['date'] = datetime.datetime.fromtimestamp( - os.stat(f).st_ctime) - else: - metadata['date'] = datetime.datetime( - *self.settings['DEFAULT_DATE']) - - signals.article_generate_context.send(self, metadata=metadata) - article = Article(content, metadata, settings=self.settings, - source_path=f, context=self.context) if not is_valid_content(article, f): continue diff --git a/pelican/readers.py b/pelican/readers.py index 2de00b51..3cf69dcf 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals, print_function +import datetime import logging import os import re @@ -369,8 +370,13 @@ def read_file(base_path, path, content_class=Page, fmt=None, if not reader.enabled: raise ValueError("Missing dependencies for %s" % fmt) - metadata = parse_path_metadata( - path=source_path, settings=settings, process=reader.process_metadata) + metadata = default_metadata( + settings=settings, process=reader.process_metadata) + metadata.update(path_metadata( + full_path=path, source_path=source_path, settings=settings)) + metadata.update(parse_path_metadata( + source_path=source_path, settings=settings, + process=reader.process_metadata)) content, reader_metadata = reader.read(path) metadata.update(reader_metadata) @@ -391,7 +397,29 @@ def read_file(base_path, path, content_class=Page, fmt=None, source_path=path, context=context) -def parse_path_metadata(path, settings=None, process=None): + +def default_metadata(settings=None, process=None): + metadata = {} + if settings: + if 'DEFAULT_CATEGORY' in settings: + value = settings['DEFAULT_CATEGORY'] + if process: + value = process('category', value) + metadata['category'] = value + if 'DEFAULT_DATE' in settings and settings['DEFAULT_DATE'] != 'fs': + metadata['date'] = datetime.datetime(*settings['DEFAULT_DATE']) + return metadata + + +def path_metadata(full_path, source_path, settings=None): + metadata = {} + if settings and settings.get('DEFAULT_DATE', None) == 'fs': + metadata['date'] = datetime.datetime.fromtimestamp( + os.stat(path).st_ctime) + return metadata + + +def parse_path_metadata(source_path, settings=None, process=None): """Extract a metadata dictionary from a file's path >>> import pprint @@ -402,7 +430,7 @@ def parse_path_metadata(path, settings=None, process=None): ... } >>> reader = Reader(settings=settings) >>> metadata = parse_path_metadata( - ... path='my-cat/2013-01-01/my-slug.html', + ... source_path='my-cat/2013-01-01/my-slug.html', ... settings=settings, ... process=reader.process_metadata) >>> pprint.pprint(metadata) # doctest: +ELLIPSIS @@ -411,13 +439,19 @@ def parse_path_metadata(path, settings=None, process=None): 'slug': 'my-slug'} """ metadata = {} - base, ext = os.path.splitext(os.path.basename(path)) + dirname, basename = os.path.split(source_path) + base, ext = os.path.splitext(basename) + subdir = os.path.basename(dirname) if settings: + checks = [] for key,data in [('FILENAME_METADATA', base), - ('PATH_METADATA', path), + ('PATH_METADATA', source_path), ]: - regexp = settings.get(key) - if regexp: + checks.append((settings.get(key, None), data)) + if settings.get('USE_FOLDER_AS_CATEGORY', None): + checks.insert(0, ('(?P.*)', subdir)) + for regexp,data in checks: + if regexp and data: match = re.match(regexp, data) if match: # .items() for py3k compat.