Merge pull request #1030 from Rogdham/filter_log

Limit and filter logs
2025-10-15 20:28:56 +02:00 · 2014-04-14 14:56:35 -04:00 · 2014-04-14 14:56:35 -04:00 · b11b8a93cd
commit b11b8a93cd
parent c6ff88d0fc d9b0091357
8 changed files with 146 additions and 33 deletions
--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@ -143,3 +143,41 @@ and Python 3 at the same time:
  changed it where I felt necessary.
 - Changed xrange() back to range(), so it is valid in both Python versions.
 Logging tips
 ============
 Try to use logging with appropriate levels.
 For logging messages that are not repeated, use the usual Python way:
    # at top of file
    import logging
    logger = logging.getLogger(__name__)
    # when needed
    logger.warning('A warning that could occur only once")
 However, if you want to log messages that may occur several times, instead of
 a string, gives a tuple to the logging method, with two arguments:
 1. The message to log for this very execution
 2. A generic message that will appear if the previous one would occur to many
    times.
 For example, if you want to log missing resources, use the following code:
    for ressource in ressources:
        if ressource.is_missing:
            logger.warning((
                'The resource {r} is missing'.format(r=ressource.name),
                'Other resources were missing'))
 The logs will be displayed as follows:
    WARNING: The resource prettiest_cat.jpg is missing
    WARNING: The resource best_cat_ever.jpg is missing
    WARNING: The resource cutest_cat.jpg is missing
    WARNING: The resource lolcat.jpg is missing
    WARNING: Other resources were missing
--- a/docs/settings.rst
+++ b/docs/settings.rst
@ -88,6 +88,9 @@ Setting name (default value)
                                                                                 here or a single string representing one locale.
                                                                                 When providing a list, all the locales will be tried
                                                                                 until one works.
 `LOG_FILTER` (``[]``)                                                            A list of tuples containing the logging level (up to warning)
                                                                                 and the message to be ignored.
                                                                                 For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
 `READERS` (``{}``)                                                               A dictionary of file extensions / Reader classes for Pelican to
                                                                                 process or ignore. For example, to avoid processing .html files,
                                                                                 set: ``READERS = {'html': None}``. To add a custom reader for the
@ -694,6 +697,23 @@ adding the following to your configuration::
    CSS_FILE = "wide.css"
 Logging
 =======
 Sometimes, useless lines of log appears while the generation occurs. Finding
 **the** meaningful error message in the middle of tons of annoying log outputs
 can be quite tricky. To be able to filter out all useless log messages, Pelican
 comes with the ``LOG_FILTER`` setting.
 ``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being
 composed of the logging level (up to warning) and the message to be ignored.
 Simply populate the list with the logs you want to hide and they will be
 filtered out.
 For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
 Example settings
 ================
--- a/pelican/init.py
+++ b/pelican/init.py
@ -11,12 +11,15 @@ import argparse
 import locale
 import collections
 # pelican.log has to be the first pelican module to be loaded
 # because logging.setLoggerClass has to be called before logging.getLogger
 from pelican.log import init
 from pelican import signals
 from pelican.generators import (ArticlesGenerator, PagesGenerator,
                                StaticGenerator, SourceFileGenerator,
                                TemplatePagesGenerator)
 from pelican.log import init
 from pelican.readers import Readers
 from pelican.settings import read_settings
 from pelican.utils import clean_output_dir, folder_watcher, file_watcher
--- a/pelican/contents.py
+++ b/pelican/contents.py
@ -239,8 +239,10 @@ class Content(object):
                             self._context['filenames'][path].url))
                    origin = origin.replace('\\', '/')  # for Windows paths.
                else:
-                    logger.warning("Unable to find {fn}, skipping url"
+                    logger.warning(("Unable to find {fn}, skipping url"
-                                   " replacement".format(fn=path))
+                                    " replacement".format(fn=value),
                                    "Other ressources were not found"
                                    " and their urls not replaced"))
            elif what == 'category':
                origin = Category(path, self.settings).url
            elif what == 'tag':
--- a/pelican/log.py
+++ b/pelican/log.py
@ -9,7 +9,7 @@ import os
 import sys
 import logging
-from logging import Formatter, getLogger, StreamHandler, DEBUG
+from collections import defaultdict
 RESET_TERM = '\033[0;m'
@ -30,7 +30,7 @@ def ansi(color, text):
    return '\033[1;{0}m{1}{2}'.format(code, text, RESET_TERM)
-class ANSIFormatter(Formatter):
+class ANSIFormatter(logging.Formatter):
    """Convert a `logging.LogRecord' object into colored text, using ANSI
       escape sequences.
@ -51,7 +51,7 @@ class ANSIFormatter(Formatter):
            return ansi('white', record.levelname) + ': ' + msg
-class TextFormatter(Formatter):
+class TextFormatter(logging.Formatter):
    """
    Convert a `logging.LogRecord' object into text.
    """
@ -63,7 +63,62 @@ class TextFormatter(Formatter):
            return record.levelname + ': ' + record.getMessage()
-def init(level=None, logger=getLogger(), handler=StreamHandler()):
+class LimitFilter(logging.Filter):
    """
    Remove duplicates records, and limit the number of records in the same
    group.
    Groups are specified by the message to use when the number of records in
    the same group hit the limit.
    E.g.: log.warning(('43 is not the answer', 'More erroneous answers'))
    """
    ignore = set()
    threshold = 5
    group_count = defaultdict(int)
    def filter(self, record):
        # don't limit levels over warnings
        if record.levelno > logging.WARN:
            return record
        # extract group
        group = None
        if len(record.msg) == 2:
            record.msg, group = record.msg
        # ignore record if it was already raised
        # use .getMessage() and not .msg for string formatting
        ignore_key = (record.levelno, record.getMessage())
        to_ignore = ignore_key in LimitFilter.ignore
        LimitFilter.ignore.add(ignore_key)
        if to_ignore:
            return False
        # check if we went over threshold
        if group:
            key = (record.levelno, group)
            LimitFilter.group_count[key] += 1
            if LimitFilter.group_count[key] == LimitFilter.threshold:
                record.msg = group
            if LimitFilter.group_count[key] > LimitFilter.threshold:
                return False
        return record
 class LimitLogger(logging.Logger):
    """
    A logger which add LimitFilter automatically
    """
    limit_filter = LimitFilter()
    def __init__(self, *args, **kwargs):
        super(LimitLogger, self).__init__(*args, **kwargs)
        self.addFilter(LimitLogger.limit_filter)
 logging.setLoggerClass(LimitLogger)
 def init(level=None, handler=logging.StreamHandler()):
    logger = logging.getLogger()
    if (os.isatty(sys.stdout.fileno())
@ -79,7 +134,7 @@ def init(level=None, logger=getLogger(), handler=StreamHandler()):
 if __name__ == '__main__':
-    init(level=DEBUG)
+    init(level=logging.DEBUG)
    root_logger = logging.getLogger()
    root_logger.debug('debug')
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -318,7 +318,11 @@ class HTMLReader(BaseReader):
            if not contents:
                contents = self._attr_value(attrs, 'contents', '')
                if contents:
-                    logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)
+                    logger.warning((
                        "Meta tag attribute 'contents' used in file {}, should"
                        " be changed to 'content'".format(self._filename),
                        "Other files have meta tag attribute 'contents' that"
                        " should be changed to 'content'"))
            if name == 'keywords':
                name = 'tags'
@ -385,10 +389,6 @@ class Readers(object):
    """
    # used to warn about missing dependencies only once, at the first
    # instanciation of a Readers object.
    warn_missing_deps = True
    def __init__(self, settings=None):
        self.settings = settings or {}
        self.readers = {}
@ -396,16 +396,13 @@ class Readers(object):
        for cls in [BaseReader] + BaseReader.__subclasses__():
            if not cls.enabled:
-                if self.__class__.warn_missing_deps:
+                logger.debug('Missing dependencies for {}'
-                    logger.debug('Missing dependencies for {}'
+                             .format(', '.join(cls.file_extensions)))
                                 .format(', '.join(cls.file_extensions)))
                continue
            for ext in cls.file_extensions:
                self.reader_classes[ext] = cls
        self.__class__.warn_missing_deps = False
        if self.settings['READERS']:
            self.reader_classes.update(self.settings['READERS'])
@ -505,19 +502,10 @@ def find_empty_alt(content, path):
            src=(['"])(.*)\5
        )
        """, re.X)
-    matches = re.findall(imgs, content)
+    for match in re.findall(imgs, content):
-    # find a correct threshold
+        logger.warning(('Empty alt attribute for image {} in {}'.format(
-    nb_warnings = 10
+            os.path.basename(match[1] + match[5]), path),
-    if len(matches) == nb_warnings + 1:
+            'Other images have empty alt attributes'))
        nb_warnings += 1  # avoid bad looking case
    # print one warning per image with empty alt until threshold
    for match in matches[:nb_warnings]:
        logger.warning('Empty alt attribute for image {} in {}'.format(
            os.path.basename(match[1] + match[5]), path))
    # print one warning for the other images with empty alt
    if len(matches) > nb_warnings:
        logger.warning('{} other images with empty alt attributes'
                       .format(len(matches) - nb_warnings))
 def default_metadata(settings=None, process=None):
--- a/pelican/settings.py
+++ b/pelican/settings.py
@ -19,6 +19,8 @@ except ImportError:
 from os.path import isabs
 from pelican.log import LimitFilter
 logger = logging.getLogger(__name__)
@ -98,6 +100,7 @@ DEFAULT_CONFIG = {
    'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra'],
    'JINJA_EXTENSIONS': [],
    'JINJA_FILTERS': {},
    'LOG_FILTER': [],
    'LOCALE': [''],  # defaults to user locale
    'DEFAULT_PAGINATION': False,
    'DEFAULT_ORPHANS': 0,
@ -170,12 +173,16 @@ def get_settings_from_file(path, default_settings=DEFAULT_CONFIG):
 def configure_settings(settings):
    """Provide optimizations, error checking and warnings for the given
    settings.
-
+    Set up the logs to be ignored as well.
    """
    if not 'PATH' in settings or not os.path.isdir(settings['PATH']):
        raise Exception('You need to specify a path containing the content'
                        ' (see pelican --help for more information)')
    # set up logs to be ignored
    LimitFilter.ignore.update(set(settings.get('LOG_FILTER',
                                               DEFAULT_CONFIG['LOG_FILTER'])))
    # lookup the theme in "pelican/themes" if the given one doesn't exist
    if not os.path.isdir(settings['THEME']):
        theme_path = os.path.join(
--- a/pelican/tests/test_pelican.py
+++ b/pelican/tests/test_pelican.py
@ -83,7 +83,7 @@ class TestPelican(LoggedTestCase):
        mute(True)(pelican.run)()
        self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'basic'))
        self.assertLogCountEqual(
-            count=4,
+            count=3,
            msg="Unable to find.*skipping url replacement",
            level=logging.WARNING)