diff --git a/pelican/readers.py b/pelican/readers.py
index fb2ccfc4..046aeeb6 100644
--- a/pelican/readers.py
+++ b/pelican/readers.py
@@ -37,7 +37,6 @@ except ImportError:
from pelican.contents import Page, Category, Tag, Author
from pelican.utils import get_date, pelican_open
-
logger = logging.getLogger(__name__)
METADATA_PROCESSORS = {
@@ -104,6 +103,12 @@ class PelicanHTMLTranslator(HTMLTranslator):
def depart_abbreviation(self, node):
self.body.append('')
+ def visit_image(self, node):
+ # set an empty alt if alt is not specified
+ # avoids that alt is taken from src
+ node['alt'] = node.get('alt', '')
+ return HTMLTranslator.visit_image(self, node)
+
class RstReader(Reader):
enabled = bool(docutils)
@@ -391,6 +396,41 @@ def read_file(base_path, path, content_class=Page, fmt=None,
content, reader_metadata = reader.read(path)
metadata.update(reader_metadata)
+ # create warnings for all images with empty alt (up to a certain number)
+ # as they are really likely to be accessibility flaws
+ if content:
+ # find images with empty alt
+ imgs = re.compile(r"""
+ (?:
+ # src before alt
+
]*
+ src=(['"])(.*)\1
+ [^\>]*
+ alt=(['"])\3
+ )|(?:
+ # alt before src
+
]*
+ alt=(['"])\4
+ [^\>]*
+ src=(['"])(.*)\5
+ )
+ """, re.X)
+ matches = re.findall(imgs, content)
+ # find a correct threshold
+ nb_warnings = 10
+ if len(matches) == nb_warnings + 1:
+ nb_warnings += 1 # avoid bad looking case
+ # print one warning per image with empty alt until threshold
+ for match in matches[:nb_warnings]:
+ logger.warning('Empty alt attribute for image {} in {}'.format(
+ os.path.basename(match[1] + match[5]), path))
+ # print one warning for the other images with empty alt
+ if len(matches) > nb_warnings:
+ logger.warning('{} other images with empty alt attributes'.format(
+ len(matches) - nb_warnings))
+
# eventually filter the content with typogrify if asked so
if content and settings and settings['TYPOGRIFY']:
from typogrify.filters import typogrify