mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Move Article metadata extraction from generators to readers
There's no reason why this information should be Article-specific. This commit breaks the other generators for the moment. I'll fix them shortly.
This commit is contained in:
parent
f2d6f77462
commit
a9c530281e
2 changed files with 51 additions and 40 deletions
|
|
@ -5,7 +5,6 @@ import os
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
import logging
|
import logging
|
||||||
import datetime
|
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from codecs import open
|
from codecs import open
|
||||||
|
|
@ -19,9 +18,7 @@ from jinja2 import (
|
||||||
TemplateNotFound
|
TemplateNotFound
|
||||||
)
|
)
|
||||||
|
|
||||||
from pelican.contents import (
|
from pelican.contents import Article, Page, Static, is_valid_content
|
||||||
Article, Page, Category, Static, is_valid_content
|
|
||||||
)
|
|
||||||
from pelican.readers import read_file
|
from pelican.readers import read_file
|
||||||
from pelican.utils import copy, process_translations, mkdir_p, DateFormatter
|
from pelican.utils import copy, process_translations, mkdir_p, DateFormatter
|
||||||
from pelican import signals
|
from pelican import signals
|
||||||
|
|
@ -383,37 +380,17 @@ class ArticlesGenerator(Generator):
|
||||||
article_path,
|
article_path,
|
||||||
exclude=self.settings['ARTICLE_EXCLUDES']):
|
exclude=self.settings['ARTICLE_EXCLUDES']):
|
||||||
try:
|
try:
|
||||||
signals.article_generate_preread.send(self)
|
article = read_file(
|
||||||
content, metadata = read_file(f, settings=self.settings)
|
base_path=self.path, path=f, content_class=Article,
|
||||||
|
settings=self.settings, context=self.context,
|
||||||
|
preread_signal=signals.article_generator_preread,
|
||||||
|
preread_sender=self,
|
||||||
|
context_signal=signals.article_generator_context,
|
||||||
|
context_sender=self)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning('Could not process %s\n%s' % (f, str(e)))
|
logger.warning('Could not process {}\n{}'.format(f, e))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# if no category is set, use the name of the path as a category
|
|
||||||
if 'category' not in metadata:
|
|
||||||
|
|
||||||
if (self.settings['USE_FOLDER_AS_CATEGORY']
|
|
||||||
and os.path.dirname(f) != article_path):
|
|
||||||
# if the article is in a subdirectory
|
|
||||||
category = os.path.basename(os.path.dirname(f))
|
|
||||||
else:
|
|
||||||
# if the article is not in a subdirectory
|
|
||||||
category = self.settings['DEFAULT_CATEGORY']
|
|
||||||
|
|
||||||
if category != '':
|
|
||||||
metadata['category'] = Category(category, self.settings)
|
|
||||||
|
|
||||||
if 'date' not in metadata and self.settings.get('DEFAULT_DATE'):
|
|
||||||
if self.settings['DEFAULT_DATE'] == 'fs':
|
|
||||||
metadata['date'] = datetime.datetime.fromtimestamp(
|
|
||||||
os.stat(f).st_ctime)
|
|
||||||
else:
|
|
||||||
metadata['date'] = datetime.datetime(
|
|
||||||
*self.settings['DEFAULT_DATE'])
|
|
||||||
|
|
||||||
signals.article_generate_context.send(self, metadata=metadata)
|
|
||||||
article = Article(content, metadata, settings=self.settings,
|
|
||||||
source_path=f, context=self.context)
|
|
||||||
if not is_valid_content(article, f):
|
if not is_valid_content(article, f):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
@ -369,8 +370,13 @@ def read_file(base_path, path, content_class=Page, fmt=None,
|
||||||
if not reader.enabled:
|
if not reader.enabled:
|
||||||
raise ValueError("Missing dependencies for %s" % fmt)
|
raise ValueError("Missing dependencies for %s" % fmt)
|
||||||
|
|
||||||
metadata = parse_path_metadata(
|
metadata = default_metadata(
|
||||||
path=source_path, settings=settings, process=reader.process_metadata)
|
settings=settings, process=reader.process_metadata)
|
||||||
|
metadata.update(path_metadata(
|
||||||
|
full_path=path, source_path=source_path, settings=settings))
|
||||||
|
metadata.update(parse_path_metadata(
|
||||||
|
source_path=source_path, settings=settings,
|
||||||
|
process=reader.process_metadata))
|
||||||
content, reader_metadata = reader.read(path)
|
content, reader_metadata = reader.read(path)
|
||||||
metadata.update(reader_metadata)
|
metadata.update(reader_metadata)
|
||||||
|
|
||||||
|
|
@ -391,7 +397,29 @@ def read_file(base_path, path, content_class=Page, fmt=None,
|
||||||
source_path=path,
|
source_path=path,
|
||||||
context=context)
|
context=context)
|
||||||
|
|
||||||
def parse_path_metadata(path, settings=None, process=None):
|
|
||||||
|
def default_metadata(settings=None, process=None):
|
||||||
|
metadata = {}
|
||||||
|
if settings:
|
||||||
|
if 'DEFAULT_CATEGORY' in settings:
|
||||||
|
value = settings['DEFAULT_CATEGORY']
|
||||||
|
if process:
|
||||||
|
value = process('category', value)
|
||||||
|
metadata['category'] = value
|
||||||
|
if 'DEFAULT_DATE' in settings and settings['DEFAULT_DATE'] != 'fs':
|
||||||
|
metadata['date'] = datetime.datetime(*settings['DEFAULT_DATE'])
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
def path_metadata(full_path, source_path, settings=None):
|
||||||
|
metadata = {}
|
||||||
|
if settings and settings.get('DEFAULT_DATE', None) == 'fs':
|
||||||
|
metadata['date'] = datetime.datetime.fromtimestamp(
|
||||||
|
os.stat(path).st_ctime)
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
def parse_path_metadata(source_path, settings=None, process=None):
|
||||||
"""Extract a metadata dictionary from a file's path
|
"""Extract a metadata dictionary from a file's path
|
||||||
|
|
||||||
>>> import pprint
|
>>> import pprint
|
||||||
|
|
@ -402,7 +430,7 @@ def parse_path_metadata(path, settings=None, process=None):
|
||||||
... }
|
... }
|
||||||
>>> reader = Reader(settings=settings)
|
>>> reader = Reader(settings=settings)
|
||||||
>>> metadata = parse_path_metadata(
|
>>> metadata = parse_path_metadata(
|
||||||
... path='my-cat/2013-01-01/my-slug.html',
|
... source_path='my-cat/2013-01-01/my-slug.html',
|
||||||
... settings=settings,
|
... settings=settings,
|
||||||
... process=reader.process_metadata)
|
... process=reader.process_metadata)
|
||||||
>>> pprint.pprint(metadata) # doctest: +ELLIPSIS
|
>>> pprint.pprint(metadata) # doctest: +ELLIPSIS
|
||||||
|
|
@ -411,13 +439,19 @@ def parse_path_metadata(path, settings=None, process=None):
|
||||||
'slug': 'my-slug'}
|
'slug': 'my-slug'}
|
||||||
"""
|
"""
|
||||||
metadata = {}
|
metadata = {}
|
||||||
base, ext = os.path.splitext(os.path.basename(path))
|
dirname, basename = os.path.split(source_path)
|
||||||
|
base, ext = os.path.splitext(basename)
|
||||||
|
subdir = os.path.basename(dirname)
|
||||||
if settings:
|
if settings:
|
||||||
|
checks = []
|
||||||
for key,data in [('FILENAME_METADATA', base),
|
for key,data in [('FILENAME_METADATA', base),
|
||||||
('PATH_METADATA', path),
|
('PATH_METADATA', source_path),
|
||||||
]:
|
]:
|
||||||
regexp = settings.get(key)
|
checks.append((settings.get(key, None), data))
|
||||||
if regexp:
|
if settings.get('USE_FOLDER_AS_CATEGORY', None):
|
||||||
|
checks.insert(0, ('(?P<category>.*)', subdir))
|
||||||
|
for regexp,data in checks:
|
||||||
|
if regexp and data:
|
||||||
match = re.match(regexp, data)
|
match = re.match(regexp, data)
|
||||||
if match:
|
if match:
|
||||||
# .items() for py3k compat.
|
# .items() for py3k compat.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue