Fix quote escaping in read html attributes.

* Wrap HTML attributes in quotes according to their content.  If it contains a double quote use single quotes, otherwise escape with double quotes.
* Add escape_html utility to ensure quote entities are converted identically across Python versions.

Fixes #1260
This commit is contained in:
Simon StJG 2015-10-12 20:31:32 +00:00
commit d333ed12c6
5 changed files with 47 additions and 11 deletions

View file

@ -18,18 +18,14 @@ from pelican import rstdirectives # NOQA
from pelican import signals
from pelican.cache import FileStampDataCacher
from pelican.contents import Author, Category, Page, Tag
from pelican.utils import SafeDatetime, get_date, pelican_open, posixize_path
from pelican.utils import SafeDatetime, escape_html, get_date, pelican_open, \
posixize_path
try:
from markdown import Markdown
except ImportError:
Markdown = False # NOQA
try:
from html import escape
except ImportError:
from cgi import escape
# Metadata processors have no way to discard an unwanted value, so we have
# them return this value instead to signal that it should be discarded later.
# This means that _filter_discardable_metadata() must be called on processed
@ -354,7 +350,7 @@ class HTMLReader(BaseReader):
self._in_body = False
self._in_top_level = True
elif self._in_body:
self._data_buffer += '</{}>'.format(escape(tag))
self._data_buffer += '</{}>'.format(escape_html(tag))
def handle_startendtag(self, tag, attrs):
if tag == 'meta' and self._in_head:
@ -375,11 +371,16 @@ class HTMLReader(BaseReader):
self._data_buffer += '&#{};'.format(data)
def build_tag(self, tag, attrs, close_tag):
result = '<{}'.format(escape(tag))
result = '<{}'.format(escape_html(tag))
for k, v in attrs:
result += ' ' + escape(k)
result += ' ' + escape_html(k)
if v is not None:
result += '="{}"'.format(escape(v))
# If the attribute value contains a double quote, surround
# with single quotes, otherwise use double quotes.
if '"' in v:
result += "='{}'".format(escape_html(v, quote=False))
else:
result += '="{}"'.format(escape_html(v, quote=False))
if close_tag:
return result + ' />'
return result + '>'