1
0
Fork 0
forked from github/pelican

Merge pull request #1030 from Rogdham/filter_log

Limit and filter logs
This commit is contained in:
Justin Mayer 2014-04-14 14:56:35 -04:00
commit b11b8a93cd
8 changed files with 146 additions and 33 deletions

View file

@ -143,3 +143,41 @@ and Python 3 at the same time:
changed it where I felt necessary.
- Changed xrange() back to range(), so it is valid in both Python versions.
Logging tips
============
Try to use logging with appropriate levels.
For logging messages that are not repeated, use the usual Python way:
# at top of file
import logging
logger = logging.getLogger(__name__)
# when needed
logger.warning('A warning that could occur only once")
However, if you want to log messages that may occur several times, instead of
a string, gives a tuple to the logging method, with two arguments:
1. The message to log for this very execution
2. A generic message that will appear if the previous one would occur to many
times.
For example, if you want to log missing resources, use the following code:
for ressource in ressources:
if ressource.is_missing:
logger.warning((
'The resource {r} is missing'.format(r=ressource.name),
'Other resources were missing'))
The logs will be displayed as follows:
WARNING: The resource prettiest_cat.jpg is missing
WARNING: The resource best_cat_ever.jpg is missing
WARNING: The resource cutest_cat.jpg is missing
WARNING: The resource lolcat.jpg is missing
WARNING: Other resources were missing

View file

@ -88,6 +88,9 @@ Setting name (default value)
here or a single string representing one locale.
When providing a list, all the locales will be tried
until one works.
`LOG_FILTER` (``[]``) A list of tuples containing the logging level (up to warning)
and the message to be ignored.
For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
`READERS` (``{}``) A dictionary of file extensions / Reader classes for Pelican to
process or ignore. For example, to avoid processing .html files,
set: ``READERS = {'html': None}``. To add a custom reader for the
@ -694,6 +697,23 @@ adding the following to your configuration::
CSS_FILE = "wide.css"
Logging
=======
Sometimes, useless lines of log appears while the generation occurs. Finding
**the** meaningful error message in the middle of tons of annoying log outputs
can be quite tricky. To be able to filter out all useless log messages, Pelican
comes with the ``LOG_FILTER`` setting.
``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being
composed of the logging level (up to warning) and the message to be ignored.
Simply populate the list with the logs you want to hide and they will be
filtered out.
For example: ``[(logging.WARN, 'TAG_SAVE_AS is set to False')]``
Example settings
================

View file

@ -11,12 +11,15 @@ import argparse
import locale
import collections
# pelican.log has to be the first pelican module to be loaded
# because logging.setLoggerClass has to be called before logging.getLogger
from pelican.log import init
from pelican import signals
from pelican.generators import (ArticlesGenerator, PagesGenerator,
StaticGenerator, SourceFileGenerator,
TemplatePagesGenerator)
from pelican.log import init
from pelican.readers import Readers
from pelican.settings import read_settings
from pelican.utils import clean_output_dir, folder_watcher, file_watcher

View file

@ -239,8 +239,10 @@ class Content(object):
self._context['filenames'][path].url))
origin = origin.replace('\\', '/') # for Windows paths.
else:
logger.warning("Unable to find {fn}, skipping url"
" replacement".format(fn=path))
logger.warning(("Unable to find {fn}, skipping url"
" replacement".format(fn=value),
"Other ressources were not found"
" and their urls not replaced"))
elif what == 'category':
origin = Category(path, self.settings).url
elif what == 'tag':

View file

@ -9,7 +9,7 @@ import os
import sys
import logging
from logging import Formatter, getLogger, StreamHandler, DEBUG
from collections import defaultdict
RESET_TERM = '\033[0;m'
@ -30,7 +30,7 @@ def ansi(color, text):
return '\033[1;{0}m{1}{2}'.format(code, text, RESET_TERM)
class ANSIFormatter(Formatter):
class ANSIFormatter(logging.Formatter):
"""Convert a `logging.LogRecord' object into colored text, using ANSI
escape sequences.
@ -51,7 +51,7 @@ class ANSIFormatter(Formatter):
return ansi('white', record.levelname) + ': ' + msg
class TextFormatter(Formatter):
class TextFormatter(logging.Formatter):
"""
Convert a `logging.LogRecord' object into text.
"""
@ -63,7 +63,62 @@ class TextFormatter(Formatter):
return record.levelname + ': ' + record.getMessage()
def init(level=None, logger=getLogger(), handler=StreamHandler()):
class LimitFilter(logging.Filter):
"""
Remove duplicates records, and limit the number of records in the same
group.
Groups are specified by the message to use when the number of records in
the same group hit the limit.
E.g.: log.warning(('43 is not the answer', 'More erroneous answers'))
"""
ignore = set()
threshold = 5
group_count = defaultdict(int)
def filter(self, record):
# don't limit levels over warnings
if record.levelno > logging.WARN:
return record
# extract group
group = None
if len(record.msg) == 2:
record.msg, group = record.msg
# ignore record if it was already raised
# use .getMessage() and not .msg for string formatting
ignore_key = (record.levelno, record.getMessage())
to_ignore = ignore_key in LimitFilter.ignore
LimitFilter.ignore.add(ignore_key)
if to_ignore:
return False
# check if we went over threshold
if group:
key = (record.levelno, group)
LimitFilter.group_count[key] += 1
if LimitFilter.group_count[key] == LimitFilter.threshold:
record.msg = group
if LimitFilter.group_count[key] > LimitFilter.threshold:
return False
return record
class LimitLogger(logging.Logger):
"""
A logger which add LimitFilter automatically
"""
limit_filter = LimitFilter()
def __init__(self, *args, **kwargs):
super(LimitLogger, self).__init__(*args, **kwargs)
self.addFilter(LimitLogger.limit_filter)
logging.setLoggerClass(LimitLogger)
def init(level=None, handler=logging.StreamHandler()):
logger = logging.getLogger()
if (os.isatty(sys.stdout.fileno())
@ -79,7 +134,7 @@ def init(level=None, logger=getLogger(), handler=StreamHandler()):
if __name__ == '__main__':
init(level=DEBUG)
init(level=logging.DEBUG)
root_logger = logging.getLogger()
root_logger.debug('debug')

View file

@ -318,7 +318,11 @@ class HTMLReader(BaseReader):
if not contents:
contents = self._attr_value(attrs, 'contents', '')
if contents:
logger.warning("Meta tag attribute 'contents' used in file %s, should be changed to 'content'", self._filename)
logger.warning((
"Meta tag attribute 'contents' used in file {}, should"
" be changed to 'content'".format(self._filename),
"Other files have meta tag attribute 'contents' that"
" should be changed to 'content'"))
if name == 'keywords':
name = 'tags'
@ -385,10 +389,6 @@ class Readers(object):
"""
# used to warn about missing dependencies only once, at the first
# instanciation of a Readers object.
warn_missing_deps = True
def __init__(self, settings=None):
self.settings = settings or {}
self.readers = {}
@ -396,16 +396,13 @@ class Readers(object):
for cls in [BaseReader] + BaseReader.__subclasses__():
if not cls.enabled:
if self.__class__.warn_missing_deps:
logger.debug('Missing dependencies for {}'
.format(', '.join(cls.file_extensions)))
logger.debug('Missing dependencies for {}'
.format(', '.join(cls.file_extensions)))
continue
for ext in cls.file_extensions:
self.reader_classes[ext] = cls
self.__class__.warn_missing_deps = False
if self.settings['READERS']:
self.reader_classes.update(self.settings['READERS'])
@ -505,19 +502,10 @@ def find_empty_alt(content, path):
src=(['"])(.*)\5
)
""", re.X)
matches = re.findall(imgs, content)
# find a correct threshold
nb_warnings = 10
if len(matches) == nb_warnings + 1:
nb_warnings += 1 # avoid bad looking case
# print one warning per image with empty alt until threshold
for match in matches[:nb_warnings]:
logger.warning('Empty alt attribute for image {} in {}'.format(
os.path.basename(match[1] + match[5]), path))
# print one warning for the other images with empty alt
if len(matches) > nb_warnings:
logger.warning('{} other images with empty alt attributes'
.format(len(matches) - nb_warnings))
for match in re.findall(imgs, content):
logger.warning(('Empty alt attribute for image {} in {}'.format(
os.path.basename(match[1] + match[5]), path),
'Other images have empty alt attributes'))
def default_metadata(settings=None, process=None):

View file

@ -19,6 +19,8 @@ except ImportError:
from os.path import isabs
from pelican.log import LimitFilter
logger = logging.getLogger(__name__)
@ -98,6 +100,7 @@ DEFAULT_CONFIG = {
'MD_EXTENSIONS': ['codehilite(css_class=highlight)', 'extra'],
'JINJA_EXTENSIONS': [],
'JINJA_FILTERS': {},
'LOG_FILTER': [],
'LOCALE': [''], # defaults to user locale
'DEFAULT_PAGINATION': False,
'DEFAULT_ORPHANS': 0,
@ -170,12 +173,16 @@ def get_settings_from_file(path, default_settings=DEFAULT_CONFIG):
def configure_settings(settings):
"""Provide optimizations, error checking and warnings for the given
settings.
Set up the logs to be ignored as well.
"""
if not 'PATH' in settings or not os.path.isdir(settings['PATH']):
raise Exception('You need to specify a path containing the content'
' (see pelican --help for more information)')
# set up logs to be ignored
LimitFilter.ignore.update(set(settings.get('LOG_FILTER',
DEFAULT_CONFIG['LOG_FILTER'])))
# lookup the theme in "pelican/themes" if the given one doesn't exist
if not os.path.isdir(settings['THEME']):
theme_path = os.path.join(

View file

@ -83,7 +83,7 @@ class TestPelican(LoggedTestCase):
mute(True)(pelican.run)()
self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'basic'))
self.assertLogCountEqual(
count=4,
count=3,
msg="Unable to find.*skipping url replacement",
level=logging.WARNING)