Allow usage of regex in LOG_FILTER setting

Fix #2893
This commit is contained in:
Koen Martens 2023-03-13 10:33:00 +01:00
commit 284ea41323
7 changed files with 131 additions and 21 deletions

4
RELEASE.md Normal file
View file

@ -0,0 +1,4 @@
Release type: minor
Allow regular expressions in `LOG_FILTER` setting

1
THANKS
View file

@ -97,6 +97,7 @@ Julian Berman
Justin Mayer Justin Mayer
Kevin Deldycke Kevin Deldycke
Kevin Yap Kevin Yap
Koen Martens
Kyle Fuller Kyle Fuller
Laureline Guerin Laureline Guerin
Leonard Huang Leonard Huang

View file

@ -130,12 +130,18 @@ Basic settings
.. data:: LOG_FILTER = [] .. data:: LOG_FILTER = []
A list of tuples containing the logging level (up to ``warning``) and the A list of tuples containing the type (either ``string`` or ``regex``),
message to be ignored. the logging level (up to ``warning``) and a string. If the type is ``string``
messages that are equal to the third argument are not shown. If the type is
``regex``, the third argument is interpreted as a regular expression and any
message matching that will not be shown.
Example:: Example::
LOG_FILTER = [(logging.WARN, 'TAG_SAVE_AS is set to False')] LOG_FILTER = [
('string', logging.WARN, 'Empty theme folder. Using `basic` theme.'),
('regex', logging.WARN, r'Cannot get modification stamp for /foo/.*'),
]
.. data:: READERS = {} .. data:: READERS = {}
@ -1304,15 +1310,19 @@ the **meaningful** error message in the middle of tons of annoying log output
can be quite tricky. In order to filter out redundant log messages, Pelican can be quite tricky. In order to filter out redundant log messages, Pelican
comes with the ``LOG_FILTER`` setting. comes with the ``LOG_FILTER`` setting.
``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being ``LOG_FILTER`` should be a list of tuples ``(type, level, msg_or_regexp)``, each
composed of the logging level (up to ``warning``) and the message to be of them being composed of the type (``string`` or ``regex``), the logging level
(up to ``warning``) and the message or regular expression to be
ignored. Simply populate the list with the log messages you want to hide, and ignored. Simply populate the list with the log messages you want to hide, and
they will be filtered out. they will be filtered out.
For example:: For example::
import logging import logging
LOG_FILTER = [(logging.WARN, 'TAG_SAVE_AS is set to False')] LOG_FILTER = [
('string', logging.WARN, 'TAG_SAVE_AS is set to False'),
('regex', logging.WARN, r'Cannot get modification stamp for /foo/.*'),
]
It is possible to filter out messages by a template. Check out source code to It is possible to filter out messages by a template. Check out source code to
obtain a template. obtain a template.
@ -1320,13 +1330,14 @@ obtain a template.
For example:: For example::
import logging import logging
LOG_FILTER = [(logging.WARN, 'Empty alt attribute for image %s in %s')] LOG_FILTER = [('string', logging.WARN, 'Empty alt attribute for image %s in %s')]
.. Warning:: .. Warning::
Silencing messages by templates is a dangerous feature. It is possible to Silencing messages by templates or regular expressons is a dangerous
unintentionally filter out multiple message types with the same template feature. It is possible to unintentionally filter out multiple message
(including messages from future Pelican versions). Proceed with caution. types with the same template (including messages from future Pelican
versions). Proceed with caution.
.. note:: .. note::

View file

@ -1,11 +1,14 @@
import logging import logging
import re
import warnings
from collections import defaultdict from collections import defaultdict
from rich.console import Console from rich.console import Console
from rich.logging import RichHandler from rich.logging import RichHandler
__all__ = [ __all__ = [
'init' 'init',
'LimitFilter',
] ]
console = Console() console = Console()
@ -23,11 +26,37 @@ class LimitFilter(logging.Filter):
LOGS_DEDUP_MIN_LEVEL = logging.WARNING LOGS_DEDUP_MIN_LEVEL = logging.WARNING
_ignore = set() ignore = set()
ignore_regexp = set()
_raised_messages = set() _raised_messages = set()
_threshold = 5 _threshold = 5
_group_count = defaultdict(int) _group_count = defaultdict(int)
@classmethod
def add_ignore_rule(cls, rule_specification):
if len(rule_specification) == 2: # old-style string or template
LimitFilter.ignore.add(rule_specification)
warnings.warn(
'2-tuple specification of LOG_FILTER item is deprecated,' +
'replace with 3-tuple starting with \'string\' (see' +
'documentation of LOG_FILTER for more details)',
FutureWarning
)
elif len(rule_specification) == 3: # new-style string/template/regexp
if rule_specification[0] == "string":
LimitFilter.ignore.add(rule_specification[1:])
elif rule_specification[0] == "regex":
regex = re.compile(rule_specification[2])
LimitFilter.ignore_regexp.add((rule_specification[1], regex))
else:
raise ValueError(
f"Invalid LOG_FILTER type '{rule_specification[0]}'"
)
else:
raise ValueError(
f"Invalid item '{str(rule_specification)}' in LOG_FILTER"
)
def filter(self, record): def filter(self, record):
# don't limit log messages for anything above "warning" # don't limit log messages for anything above "warning"
if record.levelno > self.LOGS_DEDUP_MIN_LEVEL: if record.levelno > self.LOGS_DEDUP_MIN_LEVEL:
@ -50,7 +79,11 @@ class LimitFilter(logging.Filter):
if logger_level > logging.DEBUG: if logger_level > logging.DEBUG:
template_key = (record.levelno, record.msg) template_key = (record.levelno, record.msg)
message_key = (record.levelno, record.getMessage()) message_key = (record.levelno, record.getMessage())
if (template_key in self._ignore or message_key in self._ignore): if template_key in self.ignore or message_key in self.ignore:
return False
if any(regexp[1].match(record.getMessage())
for regexp in self.ignore_regexp
if regexp[0] == record.levelno):
return False return False
# check if we went over threshold # check if we went over threshold

View file

@ -518,7 +518,8 @@ def configure_settings(settings):
# specify the log messages to be ignored # specify the log messages to be ignored
log_filter = settings.get('LOG_FILTER', DEFAULT_CONFIG['LOG_FILTER']) log_filter = settings.get('LOG_FILTER', DEFAULT_CONFIG['LOG_FILTER'])
LimitFilter._ignore.update(set(log_filter)) for item in log_filter:
LimitFilter.add_ignore_rule(item)
# lookup the theme in "pelican/themes" if the given one doesn't exist # lookup the theme in "pelican/themes" if the given one doesn't exist
if not os.path.isdir(settings['THEME']): if not os.path.isdir(settings['THEME']):

View file

@ -1,4 +1,5 @@
import logging import logging
import re
import unittest import unittest
from collections import defaultdict from collections import defaultdict
from contextlib import contextmanager from contextlib import contextmanager
@ -19,7 +20,8 @@ class TestLog(unittest.TestCase):
super().tearDown() super().tearDown()
def _reset_limit_filter(self): def _reset_limit_filter(self):
log.LimitFilter._ignore = set() log.LimitFilter.ignore = set()
log.LimitFilter.ignore_regexp = set()
log.LimitFilter._raised_messages = set() log.LimitFilter._raised_messages = set()
log.LimitFilter._threshold = 5 log.LimitFilter._threshold = 5
log.LimitFilter._group_count = defaultdict(int) log.LimitFilter._group_count = defaultdict(int)
@ -49,7 +51,7 @@ class TestLog(unittest.TestCase):
# filter by template # filter by template
with self.reset_logger(): with self.reset_logger():
log.LimitFilter._ignore.add((logging.WARNING, 'Log %s')) log.LimitFilter.ignore.add((logging.WARNING, 'Log %s'))
do_logging() do_logging()
self.assertEqual( self.assertEqual(
self.handler.count_logs('Log \\d', logging.WARNING), self.handler.count_logs('Log \\d', logging.WARNING),
@ -60,7 +62,7 @@ class TestLog(unittest.TestCase):
# filter by exact message # filter by exact message
with self.reset_logger(): with self.reset_logger():
log.LimitFilter._ignore.add((logging.WARNING, 'Log 3')) log.LimitFilter.ignore.add((logging.WARNING, 'Log 3'))
do_logging() do_logging()
self.assertEqual( self.assertEqual(
self.handler.count_logs('Log \\d', logging.WARNING), self.handler.count_logs('Log \\d', logging.WARNING),
@ -69,14 +71,30 @@ class TestLog(unittest.TestCase):
self.handler.count_logs('Another log \\d', logging.WARNING), self.handler.count_logs('Another log \\d', logging.WARNING),
5) 5)
# filter by both # filter by regular expression
with self.reset_logger(): with self.reset_logger():
log.LimitFilter._ignore.add((logging.WARNING, 'Log 3')) log.LimitFilter.ignore_regexp.add((logging.WARNING,
log.LimitFilter._ignore.add((logging.WARNING, 'Another log %s')) re.compile(r'Log.*')))
log.LimitFilter.ignore_regexp.add((logging.WARNING,
re.compile(r'.*log 4')))
do_logging() do_logging()
self.assertEqual( self.assertEqual(
self.handler.count_logs('Log \\d', logging.WARNING), self.handler.count_logs('Log \\d', logging.WARNING),
0)
self.assertEqual(
self.handler.count_logs('Another log \\d', logging.WARNING),
4) 4)
# filter by all
with self.reset_logger():
log.LimitFilter.ignore.add((logging.WARNING, 'Log 3'))
log.LimitFilter.ignore.add((logging.WARNING, 'Another log %s'))
log.LimitFilter.ignore_regexp.add((logging.WARNING,
re.compile(r'Lo.*4$')))
do_logging()
self.assertEqual(
self.handler.count_logs('Log \\d', logging.WARNING),
3)
self.assertEqual( self.assertEqual(
self.handler.count_logs('Another log \\d', logging.WARNING), self.handler.count_logs('Another log \\d', logging.WARNING),
0) 0)

View file

@ -1,9 +1,11 @@
import copy import copy
import locale import locale
import logging
import os import os
import re
from os.path import abspath, dirname, join from os.path import abspath, dirname, join
from pelican.log import LimitFilter
from pelican.settings import (DEFAULT_CONFIG, DEFAULT_THEME, from pelican.settings import (DEFAULT_CONFIG, DEFAULT_THEME,
_printf_s_to_format_field, _printf_s_to_format_field,
configure_settings, configure_settings,
@ -108,6 +110,46 @@ class TestSettingsConfiguration(unittest.TestCase):
configure_settings(settings) configure_settings(settings)
self.assertEqual(settings['FEED_DOMAIN'], 'http://feeds.example.com') self.assertEqual(settings['FEED_DOMAIN'], 'http://feeds.example.com')
def test_configure_log_filter_settings(self):
# Various forms of filter settings should be applied correctly.
settings = {
'LOG_FILTER': [
(logging.WARNING, 'foo'),
('string', logging.ERROR, 'bar'),
('regex', logging.INFO, r'baz.*boo'),
],
'PATH': os.curdir,
'THEME': DEFAULT_THEME,
}
with self.assertWarns(
FutureWarning,
msg='2-tuple specification of LOG_FILTER item is deprecated,' +
'replace with 3-tuple starting with \'string\' (see' +
'documentation of LOG_FILTER for more details)'):
configure_settings(settings)
self.assertEqual(LimitFilter.ignore, {
(logging.WARNING, 'foo'),
(logging.ERROR, 'bar'),
})
self.assertEqual(LimitFilter.ignore_regexp, {
(logging.INFO, re.compile(r'baz.*boo'))
})
settings['LOG_FILTER'] = [(1, 2, 3, 4)]
with self.assertRaisesRegex(
ValueError,
r"Invalid item '\(1, 2, 3, 4\)' in LOG_FILTER"
):
configure_settings(settings)
settings['LOG_FILTER'] = [('foo', 'bar', 'baz')]
with self.assertRaisesRegex(
ValueError,
r"Invalid LOG_FILTER type 'foo'"
):
configure_settings(settings)
def test_theme_settings_exceptions(self): def test_theme_settings_exceptions(self):
settings = self.settings settings = self.settings