1
0
Fork 0
forked from github/pelican

Log warnings about files that would have been processed by disabled readers

This commit is contained in:
boxydog 2024-06-17 08:11:21 -05:00
commit f19de98b9e
7 changed files with 119 additions and 13 deletions

View file

@ -30,7 +30,6 @@ from pelican.generators import (
) )
from pelican.plugins import signals from pelican.plugins import signals
from pelican.plugins._utils import get_plugin_name, load_plugins from pelican.plugins._utils import get_plugin_name, load_plugins
from pelican.readers import Readers
from pelican.server import ComplexHTTPRequestHandler, RootedHTTPServer from pelican.server import ComplexHTTPRequestHandler, RootedHTTPServer
from pelican.settings import read_settings from pelican.settings import read_settings
from pelican.utils import clean_output_dir, maybe_pluralize, wait_for_changes from pelican.utils import clean_output_dir, maybe_pluralize, wait_for_changes
@ -126,6 +125,8 @@ class Pelican:
for p in generators: for p in generators:
if hasattr(p, "generate_context"): if hasattr(p, "generate_context"):
p.generate_context() p.generate_context()
if hasattr(p, "check_disabled_readers"):
p.check_disabled_readers()
# for plugins that create/edit the summary # for plugins that create/edit the summary
logger.debug("Signal all_generators_finalized.send(<generators>)") logger.debug("Signal all_generators_finalized.send(<generators>)")
@ -573,7 +574,7 @@ def autoreload(args, excqueue=None):
try: try:
pelican.run() pelican.run()
changed_files = wait_for_changes(args.settings, Readers, settings) changed_files = wait_for_changes(args.settings, settings)
changed_files = {c[1] for c in changed_files} changed_files = {c[1] for c in changed_files}
if settings_file in changed_files: if settings_file in changed_files:

View file

@ -7,6 +7,7 @@ from collections import defaultdict
from functools import partial from functools import partial
from itertools import chain, groupby from itertools import chain, groupby
from operator import attrgetter from operator import attrgetter
from typing import List, Optional, Set
from jinja2 import ( from jinja2 import (
BaseLoader, BaseLoader,
@ -156,7 +157,9 @@ class Generator:
return False return False
def get_files(self, paths, exclude=None, extensions=None): def get_files(
self, paths, exclude: Optional[List[str]] = None, extensions=None
) -> Set[str]:
"""Return a list of files to use, based on rules """Return a list of files to use, based on rules
:param paths: the list pf paths to search (relative to self.path) :param paths: the list pf paths to search (relative to self.path)
@ -250,6 +253,13 @@ class Generator:
# return the name of the class for logging purposes # return the name of the class for logging purposes
return self.__class__.__name__ return self.__class__.__name__
def _check_disabled_readers(self, paths, exclude: Optional[List[str]]) -> None:
"""Log warnings for files that would have been processed by disabled readers."""
for fil in self.get_files(
paths, exclude=exclude, extensions=self.readers.disabled_extensions
):
self.readers.check_file(fil)
class CachingGenerator(Generator, FileStampDataCacher): class CachingGenerator(Generator, FileStampDataCacher):
"""Subclass of Generator and FileStampDataCacher classes """Subclass of Generator and FileStampDataCacher classes
@ -643,6 +653,11 @@ class ArticlesGenerator(CachingGenerator):
self.generate_authors(write) self.generate_authors(write)
self.generate_drafts(write) self.generate_drafts(write)
def check_disabled_readers(self) -> None:
self._check_disabled_readers(
self.settings["ARTICLE_PATHS"], exclude=self.settings["ARTICLE_EXCLUDES"]
)
def generate_context(self): def generate_context(self):
"""Add the articles into the shared context""" """Add the articles into the shared context"""
@ -849,6 +864,11 @@ class PagesGenerator(CachingGenerator):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
signals.page_generator_init.send(self) signals.page_generator_init.send(self)
def check_disabled_readers(self) -> None:
self._check_disabled_readers(
self.settings["PAGE_PATHS"], exclude=self.settings["PAGE_EXCLUDES"]
)
def generate_context(self): def generate_context(self):
all_pages = [] all_pages = []
hidden_pages = [] hidden_pages = []
@ -953,6 +973,11 @@ class StaticGenerator(Generator):
self.fallback_to_symlinks = False self.fallback_to_symlinks = False
signals.static_generator_init.send(self) signals.static_generator_init.send(self)
def check_disabled_readers(self) -> None:
self._check_disabled_readers(
self.settings["STATIC_PATHS"], exclude=self.settings["STATIC_EXCLUDES"]
)
def generate_context(self): def generate_context(self):
self.staticfiles = [] self.staticfiles = []
linked_files = set(self.context["static_links"]) linked_files = set(self.context["static_links"])

View file

@ -17,7 +17,7 @@ from pelican import rstdirectives # NOQA
from pelican.cache import FileStampDataCacher from pelican.cache import FileStampDataCacher
from pelican.contents import Author, Category, Page, Tag from pelican.contents import Author, Category, Page, Tag
from pelican.plugins import signals from pelican.plugins import signals
from pelican.utils import get_date, pelican_open, posixize_path from pelican.utils import file_suffix, get_date, pelican_open, posixize_path
try: try:
from markdown import Markdown from markdown import Markdown
@ -125,6 +125,10 @@ class BaseReader:
metadata = {} metadata = {}
return content, metadata return content, metadata
def disabled_message(self) -> str:
"""Message about why this plugin was disabled."""
return ""
class _FieldBodyTranslator(HTMLTranslator): class _FieldBodyTranslator(HTMLTranslator):
def __init__(self, document): def __init__(self, document):
@ -347,6 +351,12 @@ class MarkdownReader(BaseReader):
metadata = {} metadata = {}
return content, metadata return content, metadata
def disabled_message(self) -> str:
return (
"Could not import markdown.Markdown. "
"Have you installed the markdown package?"
)
class HTMLReader(BaseReader): class HTMLReader(BaseReader):
"""Parses HTML files as input, looking for meta, title, and body tags""" """Parses HTML files as input, looking for meta, title, and body tags"""
@ -508,17 +518,23 @@ class Readers(FileStampDataCacher):
def __init__(self, settings=None, cache_name=""): def __init__(self, settings=None, cache_name=""):
self.settings = settings or {} self.settings = settings or {}
self.readers = {} self.readers = {}
self.disabled_readers = {}
# extension => reader for readers that are enabled
self.reader_classes = {} self.reader_classes = {}
# extension => reader for readers that are not enabled
disabled_reader_classes = {}
for cls in [BaseReader] + BaseReader.__subclasses__(): for cls in [BaseReader] + BaseReader.__subclasses__():
if not cls.enabled: if not cls.enabled:
logger.debug( logger.debug(
"Missing dependencies for %s", ", ".join(cls.file_extensions) "Missing dependencies for %s", ", ".join(cls.file_extensions)
) )
continue
for ext in cls.file_extensions: for ext in cls.file_extensions:
self.reader_classes[ext] = cls if cls.enabled:
self.reader_classes[ext] = cls
else:
disabled_reader_classes[ext] = cls
if self.settings["READERS"]: if self.settings["READERS"]:
self.reader_classes.update(self.settings["READERS"]) self.reader_classes.update(self.settings["READERS"])
@ -531,6 +547,9 @@ class Readers(FileStampDataCacher):
self.readers[fmt] = reader_class(self.settings) self.readers[fmt] = reader_class(self.settings)
for fmt, reader_class in disabled_reader_classes.items():
self.disabled_readers[fmt] = reader_class(self.settings)
# set up caching # set up caching
cache_this_level = ( cache_this_level = (
cache_name != "" and self.settings["CONTENT_CACHING_LAYER"] == "reader" cache_name != "" and self.settings["CONTENT_CACHING_LAYER"] == "reader"
@ -541,8 +560,13 @@ class Readers(FileStampDataCacher):
@property @property
def extensions(self): def extensions(self):
"""File extensions that will be processed by a reader."""
return self.readers.keys() return self.readers.keys()
@property
def disabled_extensions(self):
return self.disabled_readers.keys()
def read_file( def read_file(
self, self,
base_path, base_path,
@ -562,8 +586,7 @@ class Readers(FileStampDataCacher):
logger.debug("Read file %s -> %s", source_path, content_class.__name__) logger.debug("Read file %s -> %s", source_path, content_class.__name__)
if not fmt: if not fmt:
_, ext = os.path.splitext(os.path.basename(path)) fmt = file_suffix(path)
fmt = ext[1:]
if fmt not in self.readers: if fmt not in self.readers:
raise TypeError("Pelican does not know how to parse %s", path) raise TypeError("Pelican does not know how to parse %s", path)
@ -654,6 +677,12 @@ class Readers(FileStampDataCacher):
context=context, context=context,
) )
def check_file(self, source_path: str) -> None:
"""Log a warning if a file is processed by a disabled reader."""
reader = self.disabled_readers.get(file_suffix(source_path), None)
if reader:
logger.warning(f"{source_path}: {reader.disabled_message()}")
def find_empty_alt(content, path): def find_empty_alt(content, path):
"""Find images with empty alt """Find images with empty alt

View file

@ -9,10 +9,11 @@ import unittest
from collections.abc import Sequence from collections.abc import Sequence
from shutil import rmtree from shutil import rmtree
from tempfile import TemporaryDirectory, mkdtemp from tempfile import TemporaryDirectory, mkdtemp
from unittest.mock import patch from unittest.mock import PropertyMock, patch
from rich.console import Console from rich.console import Console
import pelican.readers
from pelican import Pelican, __version__, main from pelican import Pelican, __version__, main
from pelican.generators import StaticGenerator from pelican.generators import StaticGenerator
from pelican.settings import read_settings from pelican.settings import read_settings
@ -303,3 +304,24 @@ class TestPelican(LoggedTestCase):
main(["-o", temp_dir, "pelican/tests/simple_content"]) main(["-o", temp_dir, "pelican/tests/simple_content"])
self.assertIn("Processed 1 article", out.getvalue()) self.assertIn("Processed 1 article", out.getvalue())
self.assertEqual("", err.getvalue()) self.assertEqual("", err.getvalue())
def test_main_on_content_markdown_disabled(self):
"""Invoke main on simple_content directory."""
with patch.object(
pelican.readers.MarkdownReader, "enabled", new_callable=PropertyMock
) as attr_mock:
attr_mock.return_value = False
out, err = io.StringIO(), io.StringIO()
with contextlib.redirect_stdout(out), contextlib.redirect_stderr(err):
with TemporaryDirectory() as temp_dir:
# Don't highlight anything.
# See https://rich.readthedocs.io/en/stable/highlighting.html
with patch("pelican.console", new=Console(highlight=False)):
main(["-o", temp_dir, "pelican/tests/simple_content"])
self.assertIn("Processed 0 articles", out.getvalue())
self.assertLogCountEqual(
1,
".*article_with_md_extension.md: "
"Could not import markdown.Markdown. "
"Have you installed the markdown package?",
)

View file

@ -1,5 +1,5 @@
import os import os
from unittest.mock import patch from unittest.mock import PropertyMock, patch
from pelican import readers from pelican import readers
from pelican.tests.support import get_settings, unittest from pelican.tests.support import get_settings, unittest
@ -32,6 +32,19 @@ class ReaderTest(unittest.TestCase):
else: else:
self.fail(f"Expected {key} to have value {value}, but was not in Dict") self.fail(f"Expected {key} to have value {value}, but was not in Dict")
def test_markdown_disabled(self):
with patch.object(
readers.MarkdownReader, "enabled", new_callable=PropertyMock
) as attr_mock:
attr_mock.return_value = False
readrs = readers.Readers(settings=get_settings())
self.assertEqual(
set(readers.MarkdownReader.file_extensions),
readrs.disabled_readers.keys(),
)
for val in readrs.disabled_readers.values():
self.assertEqual(readers.MarkdownReader, val.__class__)
class TestAssertDictHasSubset(ReaderTest): class TestAssertDictHasSubset(ReaderTest):
def setUp(self): def setUp(self):

View file

@ -966,3 +966,10 @@ class TestMemoized(unittest.TestCase):
container.get.cache.clear() container.get.cache.clear()
self.assertEqual("bar", container.get("bar")) self.assertEqual("bar", container.get("bar"))
get_mock.assert_called_once_with("bar") get_mock.assert_called_once_with("bar")
class TestStringUtils(unittest.TestCase):
def test_file_suffix(self):
self.assertEqual("", utils.file_suffix(""))
self.assertEqual("", utils.file_suffix("foo"))
self.assertEqual("md", utils.file_suffix("foo.md"))

View file

@ -29,6 +29,7 @@ from typing import (
) )
import dateutil.parser import dateutil.parser
from watchfiles import Change
try: try:
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
@ -39,7 +40,6 @@ from markupsafe import Markup
if TYPE_CHECKING: if TYPE_CHECKING:
from pelican.contents import Content from pelican.contents import Content
from pelican.readers import Readers
from pelican.settings import Settings from pelican.settings import Settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -797,9 +797,8 @@ def order_content(
def wait_for_changes( def wait_for_changes(
settings_file: str, settings_file: str,
reader_class: type[Readers],
settings: Settings, settings: Settings,
): ) -> set[tuple[Change, str]]:
content_path = settings.get("PATH", "") content_path = settings.get("PATH", "")
theme_path = settings.get("THEME", "") theme_path = settings.get("THEME", "")
ignore_files = { ignore_files = {
@ -924,3 +923,13 @@ def temporary_locale(
locale.setlocale(lc_category, temp_locale) locale.setlocale(lc_category, temp_locale)
yield yield
locale.setlocale(lc_category, orig_locale) locale.setlocale(lc_category, orig_locale)
def file_suffix(path: str) -> str:
"""Return the suffix of a filename in a path."""
_, ext = os.path.splitext(os.path.basename(path))
ret = ""
if len(ext) > 1:
# drop the ".", e.g., "exe", not ".exe"
ret = ext[1:]
return ret