Merge pull request #1030 from Rogdham/filter_log

Limit and filter logs
This commit is contained in:
Justin Mayer 2014-04-14 14:56:35 -04:00
commit b11b8a93cd
8 changed files with 146 additions and 33 deletions

View file

@ -143,3 +143,41 @@ and Python 3 at the same time:
changed it where I felt necessary. changed it where I felt necessary.
- Changed xrange() back to range(), so it is valid in both Python versions. - Changed xrange() back to range(), so it is valid in both Python versions.
Logging tips
============
Try to use logging with appropriate levels.
For logging messages that are not repeated, use the usual Python way:
# at top of file
import logging
logger = logging.getLogger(__name__)
# when needed
logger.warning('A warning that could occur only once")
However, if you want to log messages that may occur several times, instead of
a string, gives a tuple to the logging method, with two arguments:
1. The message to log for this very execution
2. A generic message that will appear if the previous one would occur to many
times.
For example, if you want to log missing resources, use the following code:
for ressource in ressources:
if ressource.is_missing:
logger.warning((
'The resource {r} is missing'.format(r=ressource.name),
'Other resources were missing'))
The logs will be displayed as follows:
WARNING: The resource prettiest_cat.jpg is missing
WARNING: The resource best_cat_ever.jpg is missing
WARNING: The resource cutest_cat.jpg is missing
WARNING: The resource lolcat.jpg is missing
WARNING: Other resources were missing

View file

@ -88,6 +88,9 @@ Setting name (default value)
here or a single string representing one locale. here or a single string representing one locale.
When providing a list, all the locales will be tried When providing a list, all the locales will be tried
until one works. until one works.
`LOG_FILTER` (``[]``) A list of tuples containing the logging level (up to warning)
and the message to be ignored.
For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
`READERS` (``{}``) A dictionary of file extensions / Reader classes for Pelican to `READERS` (``{}``) A dictionary of file extensions / Reader classes for Pelican to
process or ignore. For example, to avoid processing .html files, process or ignore. For example, to avoid processing .html files,
set: ``READERS = {'html': None}``. To add a custom reader for the set: ``READERS = {'html': None}``. To add a custom reader for the
@ -694,6 +697,23 @@ adding the following to your configuration::
CSS_FILE = "wide.css" CSS_FILE = "wide.css"
Logging
=======
Sometimes, useless lines of log appears while the generation occurs. Finding
**the** meaningful error message in the middle of tons of annoying log outputs
can be quite tricky. To be able to filter out all useless log messages, Pelican
comes with the ``LOG_FILTER`` setting.
``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being
composed of the logging level (up to warning) and the message to be ignored.
Simply populate the list with the logs you want to hide and they will be
filtered out.
For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
Example settings Example settings
================ ================

View file

@ -11,12 +11,15 @@ import argparse
import locale import locale
import collections import collections
# pelican.log has to be the first pelican module to be loaded
# because logging.setLoggerClass has to be called before logging.getLogger
from pelican.log import init
from pelican import signals from pelican import signals
from pelican.generators import (ArticlesGenerator, PagesGenerator, from pelican.generators import (ArticlesGenerator, PagesGenerator,
StaticGenerator, SourceFileGenerator, StaticGenerator, SourceFileGenerator,
TemplatePagesGenerator) TemplatePagesGenerator)
from pelican.log import init
from pelican.readers import Readers from pelican.readers import Readers
from pelican.settings import read_settings from pelican.settings import read_settings
from pelican.utils import clean_output_dir, folder_watcher, file_watcher from pelican.utils import clean_output_dir, folder_watcher, file_watcher

View file

@ -239,8 +239,10 @@ class Content(object):
self._context['filenames'][path].url)) self._context['filenames'][path].url))
origin = origin.replace('\\', '/') # for Windows paths. origin = origin.replace('\\', '/') # for Windows paths.
else: else:
logger.warning("Unable to find {fn}, skipping url" logger.warning(("Unable to find {fn}, skipping url"
" replacement".format(fn=path)) " replacement".format(fn=value),
"Other ressources were not found"
" and their urls not replaced"))
elif what == 'category': elif what == 'category':
origin = Category(path, self.settings).url origin = Category(path, self.settings).url
elif what == 'tag': elif what == 'tag':

View file

@ -9,7 +9,7 @@ import os
import sys import sys
import logging import logging
from logging import Formatter, getLogger, StreamHandler, DEBUG from collections import defaultdict
RESET_TERM = '\033[0;m' RESET_TERM = '\033[0;m'
@ -30,7 +30,7 @@ def ansi(color, text):
return '\033[1;{0}m{1}{2}'.format(code, text, RESET_TERM) return '\033[1;{0}m{1}{2}'.format(code, text, RESET_TERM)
class ANSIFormatter(Formatter): class ANSIFormatter(logging.Formatter):
"""Convert a `logging.LogRecord' object into colored text, using ANSI """Convert a `logging.LogRecord' object into colored text, using ANSI
escape sequences. escape sequences.
@ -51,7 +51,7 @@ class ANSIFormatter(Formatter):
return ansi('white', record.levelname) + ': ' + msg return ansi('white', record.levelname) + ': ' + msg
class TextFormatter(Formatter): class TextFormatter(logging.Formatter):
""" """
Convert a `logging.LogRecord' object into text. Convert a `logging.LogRecord' object into text.
""" """
@ -63,7 +63,62 @@ class TextFormatter(Formatter):
return record.levelname + ': ' + record.getMessage() return record.levelname + ': ' + record.getMessage()
def init(level=None, logger=getLogger(), handler=StreamHandler()): class LimitFilter(logging.Filter):
"""
Remove duplicates records, and limit the number of records in the same
group.
Groups are specified by the message to use when the number of records in
the same group hit the limit.
E.g.: log.warning(('43 is not the answer', 'More erroneous answers'))
"""
ignore = set()
threshold = 5
group_count = defaultdict(int)
def filter(self, record):
# don't limit levels over warnings
if record.levelno > logging.WARN:
return record
# extract group
group = None
if len(record.msg) == 2:
record.msg, group = record.msg
# ignore record if it was already raised
# use .getMessage() and not .msg for string formatting
ignore_key = (record.levelno, record.getMessage())
to_ignore = ignore_key in LimitFilter.ignore
LimitFilter.ignore.add(ignore_key)
if to_ignore:
return False
# check if we went over threshold
if group:
key = (record.levelno, group)
LimitFilter.group_count[key] += 1
if LimitFilter.group_count[key] == LimitFilter.threshold:
record.msg = group
if LimitFilter.group_count[key] > LimitFilter.threshold:
return False
return record
class LimitLogger(logging.Logger):
"""
A logger which add LimitFilter automatically
"""
limit_filter = LimitFilter()
def __init__(self, *args, **kwargs):
super(LimitLogger, self).__init__(*args, **kwargs)
self.addFilter(LimitLogger.limit_filter)
logging.setLoggerClass(LimitLogger)
def init(level=None, handler=logging.StreamHandler()):
logger = logging.getLogger() logger = logging.getLogger()
if (os.isatty(sys.stdout.fileno()) if (os.isatty(sys.stdout.fileno())
@ -79,7 +134,7 @@ def init(level=None, logger=getLogger(), handler=StreamHandler()):
if __name__ == '__main__': if __name__ == '__main__':
init(level=DEBUG) init(level=logging.DEBUG)
root_logger = logging.getLogger() root_logger = logging.getLogger()
root_logger.debug('debug') root_logger.debug('debug')

View file

@ -318,7 +318,11 @@ class HTMLReader(BaseReader):
if not contents: if not contents:
contents = self._attr_value(attrs, 'contents', '') contents = self._attr_value(attrs, 'contents', '')
if contents: if contents:
logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename) logger.warning((
"Meta tag attribute 'contents' used in file {}, should"
" be changed to 'content'".format(self._filename),
"Other files have meta tag attribute 'contents' that"
" should be changed to 'content'"))
if name == 'keywords': if name == 'keywords':
name = 'tags' name = 'tags'
@ -385,10 +389,6 @@ class Readers(object):
""" """
# used to warn about missing dependencies only once, at the first
# instanciation of a Readers object.
warn_missing_deps = True
def __init__(self, settings=None): def __init__(self, settings=None):
self.settings = settings or {} self.settings = settings or {}
self.readers = {} self.readers = {}
@ -396,16 +396,13 @@ class Readers(object):
for cls in [BaseReader] + BaseReader.__subclasses__(): for cls in [BaseReader] + BaseReader.__subclasses__():
if not cls.enabled: if not cls.enabled:
if self.__class__.warn_missing_deps: logger.debug('Missing dependencies for {}'
logger.debug('Missing dependencies for {}' .format(', '.join(cls.file_extensions)))
.format(', '.join(cls.file_extensions)))
continue continue
for ext in cls.file_extensions: for ext in cls.file_extensions:
self.reader_classes[ext] = cls self.reader_classes[ext] = cls
self.__class__.warn_missing_deps = False
if self.settings['READERS']: if self.settings['READERS']:
self.reader_classes.update(self.settings['READERS']) self.reader_classes.update(self.settings['READERS'])
@ -505,19 +502,10 @@ def find_empty_alt(content, path):
src=(['"])(.*)\5 src=(['"])(.*)\5
) )
""", re.X) """, re.X)
matches = re.findall(imgs, content) for match in re.findall(imgs, content):
# find a correct threshold logger.warning(('Empty alt attribute for image {} in {}'.format(
nb_warnings = 10 os.path.basename(match[1] + match[5]), path),
if len(matches) == nb_warnings + 1: 'Other images have empty alt attributes'))
nb_warnings += 1 # avoid bad looking case
# print one warning per image with empty alt until threshold
for match in matches[:nb_warnings]:
logger.warning('Empty alt attribute for image {} in {}'.format(
os.path.basename(match[1] + match[5]), path))
# print one warning for the other images with empty alt
if len(matches) > nb_warnings:
logger.warning('{} other images with empty alt attributes'
.format(len(matches) - nb_warnings))
def default_metadata(settings=None, process=None): def default_metadata(settings=None, process=None):

View file

@ -19,6 +19,8 @@ except ImportError:
from os.path import isabs from os.path import isabs
from pelican.log import LimitFilter
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -98,6 +100,7 @@ DEFAULT_CONFIG = {
'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra'], 'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra'],
'JINJA_EXTENSIONS': [], 'JINJA_EXTENSIONS': [],
'JINJA_FILTERS': {}, 'JINJA_FILTERS': {},
'LOG_FILTER': [],
'LOCALE': [''], # defaults to user locale 'LOCALE': [''], # defaults to user locale
'DEFAULT_PAGINATION': False, 'DEFAULT_PAGINATION': False,
'DEFAULT_ORPHANS': 0, 'DEFAULT_ORPHANS': 0,
@ -170,12 +173,16 @@ def get_settings_from_file(path, default_settings=DEFAULT_CONFIG):
def configure_settings(settings): def configure_settings(settings):
"""Provide optimizations, error checking and warnings for the given """Provide optimizations, error checking and warnings for the given
settings. settings.
Set up the logs to be ignored as well.
""" """
if not 'PATH' in settings or not os.path.isdir(settings['PATH']): if not 'PATH' in settings or not os.path.isdir(settings['PATH']):
raise Exception('You need to specify a path containing the content' raise Exception('You need to specify a path containing the content'
' (see pelican --help for more information)') ' (see pelican --help for more information)')
# set up logs to be ignored
LimitFilter.ignore.update(set(settings.get('LOG_FILTER',
DEFAULT_CONFIG['LOG_FILTER'])))
# lookup the theme in "pelican/themes" if the given one doesn't exist # lookup the theme in "pelican/themes" if the given one doesn't exist
if not os.path.isdir(settings['THEME']): if not os.path.isdir(settings['THEME']):
theme_path = os.path.join( theme_path = os.path.join(

View file

@ -83,7 +83,7 @@ class TestPelican(LoggedTestCase):
mute(True)(pelican.run)() mute(True)(pelican.run)()
self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'basic')) self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'basic'))
self.assertLogCountEqual( self.assertLogCountEqual(
count=4, count=3,
msg="Unable to find.*skipping url replacement", msg="Unable to find.*skipping url replacement",
level=logging.WARNING) level=logging.WARNING)