Limit and filter logs

Drop duplicates logs. Allow for logs to be grouped, enforcing a maximum number of logs per group. Add the LOG_FILTER setting to ask from the configuration file to ignore some logs (of level up to warning).
2014-04-01 20:44:09 +02:00 · 2014-04-01 20:44:09 +02:00 · d9b0091357
commit d9b0091357
parent cfd18b20fc
8 changed files with 146 additions and 33 deletions
--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@ -143,3 +143,41 @@ and Python 3 at the same time:
  changed it where I felt necessary.

 - Changed xrange() back to range(), so it is valid in both Python versions.
+
+
+Logging tips
+============
+
+Try to use logging with appropriate levels.
+
+For logging messages that are not repeated, use the usual Python way:
+
+    # at top of file
+    import logging
+    logger = logging.getLogger(__name__)
+
+    # when needed
+    logger.warning('A warning that could occur only once")
+
+However, if you want to log messages that may occur several times, instead of
+a string, gives a tuple to the logging method, with two arguments:
+
+ 1. The message to log for this very execution
+ 2. A generic message that will appear if the previous one would occur to many
+    times.
+
+For example, if you want to log missing resources, use the following code:
+
+    for ressource in ressources:
+        if ressource.is_missing:
+            logger.warning((
+                'The resource {r} is missing'.format(r=ressource.name),
+                'Other resources were missing'))
+
+The logs will be displayed as follows:
+
+    WARNING: The resource prettiest_cat.jpg is missing
+    WARNING: The resource best_cat_ever.jpg is missing
+    WARNING: The resource cutest_cat.jpg is missing
+    WARNING: The resource lolcat.jpg is missing
+    WARNING: Other resources were missing
--- a/docs/settings.rst
+++ b/docs/settings.rst
@ -88,6 +88,9 @@ Setting name (default value)
                                                                                 here or a single string representing one locale.
                                                                                 When providing a list, all the locales will be tried
                                                                                 until one works.
+`LOG_FILTER` (``[]``)                                                            A list of tuples containing the logging level (up to warning)
+                                                                                 and the message to be ignored.
+                                                                                 For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
 `READERS` (``{}``)                                                               A dictionary of file extensions / Reader classes for Pelican to
                                                                                 process or ignore. For example, to avoid processing .html files,
                                                                                 set: ``READERS = {'html': None}``. To add a custom reader for the
@ -694,6 +697,23 @@ adding the following to your configuration::

    CSS_FILE = "wide.css"

+
+Logging
+=======
+
+Sometimes, useless lines of log appears while the generation occurs. Finding
+**the** meaningful error message in the middle of tons of annoying log outputs
+can be quite tricky. To be able to filter out all useless log messages, Pelican
+comes with the ``LOG_FILTER`` setting.
+
+``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being
+composed of the logging level (up to warning) and the message to be ignored.
+Simply populate the list with the logs you want to hide and they will be
+filtered out.
+
+For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
+
+
 Example settings
 ================

--- a/pelican/init.py
+++ b/pelican/init.py
@ -11,12 +11,15 @@ import argparse
 import locale
 import collections

+# pelican.log has to be the first pelican module to be loaded
+# because logging.setLoggerClass has to be called before logging.getLogger
+from pelican.log import init
+
 from pelican import signals

 from pelican.generators import (ArticlesGenerator, PagesGenerator,
                                StaticGenerator, SourceFileGenerator,
                                TemplatePagesGenerator)
-from pelican.log import init
 from pelican.readers import Readers
 from pelican.settings import read_settings
 from pelican.utils import clean_output_dir, folder_watcher, file_watcher
--- a/pelican/contents.py
+++ b/pelican/contents.py
@ -239,8 +239,10 @@ class Content(object):
                             self._context['filenames'][path].url))
                    origin = origin.replace('\\', '/')  # for Windows paths.
                else:
-                    logger.warning("Unable to find {fn}, skipping url"
-                                   " replacement".format(fn=path))
+                    logger.warning(("Unable to find {fn}, skipping url"
+                                    " replacement".format(fn=value),
+                                    "Other ressources were not found"
+                                    " and their urls not replaced"))
            elif what == 'category':
                origin = Category(path, self.settings).url
            elif what == 'tag':
--- a/pelican/log.py
+++ b/pelican/log.py
@ -9,7 +9,7 @@ import os
 import sys
 import logging

-from logging import Formatter, getLogger, StreamHandler, DEBUG
+from collections import defaultdict


 RESET_TERM = '\033[0;m'
@ -30,7 +30,7 @@ def ansi(color, text):
    return '\033[1;{0}m{1}{2}'.format(code, text, RESET_TERM)


-class ANSIFormatter(Formatter):
+class ANSIFormatter(logging.Formatter):
    """Convert a `logging.LogRecord' object into colored text, using ANSI
       escape sequences.

@ -51,7 +51,7 @@ class ANSIFormatter(Formatter):
            return ansi('white', record.levelname) + ': ' + msg


-class TextFormatter(Formatter):
+class TextFormatter(logging.Formatter):
    """
    Convert a `logging.LogRecord' object into text.
    """
@ -63,7 +63,62 @@ class TextFormatter(Formatter):
            return record.levelname + ': ' + record.getMessage()


-def init(level=None, logger=getLogger(), handler=StreamHandler()):
+class LimitFilter(logging.Filter):
+    """
+    Remove duplicates records, and limit the number of records in the same
+    group.
+
+    Groups are specified by the message to use when the number of records in
+    the same group hit the limit.
+    E.g.: log.warning(('43 is not the answer', 'More erroneous answers'))
+    """
+
+    ignore = set()
+    threshold = 5
+    group_count = defaultdict(int)
+
+    def filter(self, record):
+        # don't limit levels over warnings
+        if record.levelno > logging.WARN:
+            return record
+        # extract group
+        group = None
+        if len(record.msg) == 2:
+            record.msg, group = record.msg
+        # ignore record if it was already raised
+        # use .getMessage() and not .msg for string formatting
+        ignore_key = (record.levelno, record.getMessage())
+        to_ignore = ignore_key in LimitFilter.ignore
+        LimitFilter.ignore.add(ignore_key)
+        if to_ignore:
+            return False
+        # check if we went over threshold
+        if group:
+            key = (record.levelno, group)
+            LimitFilter.group_count[key] += 1
+            if LimitFilter.group_count[key] == LimitFilter.threshold:
+                record.msg = group
+            if LimitFilter.group_count[key] > LimitFilter.threshold:
+                return False
+        return record
+
+
+class LimitLogger(logging.Logger):
+    """
+    A logger which add LimitFilter automatically
+    """
+
+    limit_filter = LimitFilter()
+
+    def __init__(self, *args, **kwargs):
+        super(LimitLogger, self).__init__(*args, **kwargs)
+        self.addFilter(LimitLogger.limit_filter)
+
+logging.setLoggerClass(LimitLogger)
+
+
+def init(level=None, handler=logging.StreamHandler()):
+
    logger = logging.getLogger()

    if (os.isatty(sys.stdout.fileno())
@ -79,7 +134,7 @@ def init(level=None, logger=getLogger(), handler=StreamHandler()):


 if __name__ == '__main__':
-    init(level=DEBUG)
+    init(level=logging.DEBUG)

    root_logger = logging.getLogger()
    root_logger.debug('debug')
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -318,7 +318,11 @@ class HTMLReader(BaseReader):
            if not contents:
                contents = self._attr_value(attrs, 'contents', '')
                if contents:
-                    logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)
+                    logger.warning((
+                        "Meta tag attribute 'contents' used in file {}, should"
+                        " be changed to 'content'".format(self._filename),
+                        "Other files have meta tag attribute 'contents' that"
+                        " should be changed to 'content'"))

            if name == 'keywords':
                name = 'tags'
@ -385,10 +389,6 @@ class Readers(object):

    """

-    # used to warn about missing dependencies only once, at the first
-    # instanciation of a Readers object.
-    warn_missing_deps = True
-
    def __init__(self, settings=None):
        self.settings = settings or {}
        self.readers = {}
@ -396,7 +396,6 @@ class Readers(object):

        for cls in [BaseReader] + BaseReader.__subclasses__():
            if not cls.enabled:
-                if self.__class__.warn_missing_deps:
                logger.debug('Missing dependencies for {}'
                             .format(', '.join(cls.file_extensions)))
                continue
@ -404,8 +403,6 @@ class Readers(object):
            for ext in cls.file_extensions:
                self.reader_classes[ext] = cls

-        self.__class__.warn_missing_deps = False
-
        if self.settings['READERS']:
            self.reader_classes.update(self.settings['READERS'])

@ -505,19 +502,10 @@ def find_empty_alt(content, path):
            src=(['"])(.*)\5
        )
        """, re.X)
-    matches = re.findall(imgs, content)
-    # find a correct threshold
-    nb_warnings = 10
-    if len(matches) == nb_warnings + 1:
-        nb_warnings += 1  # avoid bad looking case
-    # print one warning per image with empty alt until threshold
-    for match in matches[:nb_warnings]:
-        logger.warning('Empty alt attribute for image {} in {}'.format(
-            os.path.basename(match[1] + match[5]), path))
-    # print one warning for the other images with empty alt
-    if len(matches) > nb_warnings:
-        logger.warning('{} other images with empty alt attributes'
-                       .format(len(matches) - nb_warnings))
+    for match in re.findall(imgs, content):
+        logger.warning(('Empty alt attribute for image {} in {}'.format(
+            os.path.basename(match[1] + match[5]), path),
+            'Other images have empty alt attributes'))


 def default_metadata(settings=None, process=None):
--- a/pelican/settings.py
+++ b/pelican/settings.py
@ -19,6 +19,8 @@ except ImportError:

 from os.path import isabs

+from pelican.log import LimitFilter
+

 logger = logging.getLogger(__name__)

@ -98,6 +100,7 @@ DEFAULT_CONFIG = {
    'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra'],
    'JINJA_EXTENSIONS': [],
    'JINJA_FILTERS': {},
+    'LOG_FILTER': [],
    'LOCALE': [''],  # defaults to user locale
    'DEFAULT_PAGINATION': False,
    'DEFAULT_ORPHANS': 0,
@ -170,12 +173,16 @@ def get_settings_from_file(path, default_settings=DEFAULT_CONFIG):
 def configure_settings(settings):
    """Provide optimizations, error checking and warnings for the given
    settings.
-
+    Set up the logs to be ignored as well.
    """
    if not 'PATH' in settings or not os.path.isdir(settings['PATH']):
        raise Exception('You need to specify a path containing the content'
                        ' (see pelican --help for more information)')

+    # set up logs to be ignored
+    LimitFilter.ignore.update(set(settings.get('LOG_FILTER',
+                                               DEFAULT_CONFIG['LOG_FILTER'])))
+
    # lookup the theme in "pelican/themes" if the given one doesn't exist
    if not os.path.isdir(settings['THEME']):
        theme_path = os.path.join(
--- a/pelican/tests/test_pelican.py
+++ b/pelican/tests/test_pelican.py
@ -83,7 +83,7 @@ class TestPelican(LoggedTestCase):
        mute(True)(pelican.run)()
        self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'basic'))
        self.assertLogCountEqual(
-            count=4,
+            count=3,
            msg="Unable to find.*skipping url replacement",
            level=logging.WARNING)