forked from github/pelican
Fix quote escaping in read html attributes.
* Wrap HTML attributes in quotes according to their content. If it contains a double quote use single quotes, otherwise escape with double quotes. * Add escape_html utility to ensure quote entities are converted identically across Python versions. Fixes #1260
This commit is contained in:
parent
661ee49eda
commit
d333ed12c6
5 changed files with 47 additions and 11 deletions
|
|
@ -18,18 +18,14 @@ from pelican import rstdirectives # NOQA
|
|||
from pelican import signals
|
||||
from pelican.cache import FileStampDataCacher
|
||||
from pelican.contents import Author, Category, Page, Tag
|
||||
from pelican.utils import SafeDatetime, get_date, pelican_open, posixize_path
|
||||
from pelican.utils import SafeDatetime, escape_html, get_date, pelican_open, \
|
||||
posixize_path
|
||||
|
||||
try:
|
||||
from markdown import Markdown
|
||||
except ImportError:
|
||||
Markdown = False # NOQA
|
||||
|
||||
try:
|
||||
from html import escape
|
||||
except ImportError:
|
||||
from cgi import escape
|
||||
|
||||
# Metadata processors have no way to discard an unwanted value, so we have
|
||||
# them return this value instead to signal that it should be discarded later.
|
||||
# This means that _filter_discardable_metadata() must be called on processed
|
||||
|
|
@ -354,7 +350,7 @@ class HTMLReader(BaseReader):
|
|||
self._in_body = False
|
||||
self._in_top_level = True
|
||||
elif self._in_body:
|
||||
self._data_buffer += '</{}>'.format(escape(tag))
|
||||
self._data_buffer += '</{}>'.format(escape_html(tag))
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
if tag == 'meta' and self._in_head:
|
||||
|
|
@ -375,11 +371,16 @@ class HTMLReader(BaseReader):
|
|||
self._data_buffer += '&#{};'.format(data)
|
||||
|
||||
def build_tag(self, tag, attrs, close_tag):
|
||||
result = '<{}'.format(escape(tag))
|
||||
result = '<{}'.format(escape_html(tag))
|
||||
for k, v in attrs:
|
||||
result += ' ' + escape(k)
|
||||
result += ' ' + escape_html(k)
|
||||
if v is not None:
|
||||
result += '="{}"'.format(escape(v))
|
||||
# If the attribute value contains a double quote, surround
|
||||
# with single quotes, otherwise use double quotes.
|
||||
if '"' in v:
|
||||
result += "='{}'".format(escape_html(v, quote=False))
|
||||
else:
|
||||
result += '="{}"'.format(escape_html(v, quote=False))
|
||||
if close_tag:
|
||||
return result + ' />'
|
||||
return result + '>'
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
Ensure that if an attribute value contains a double quote, it is
|
||||
surrounded with single quotes, otherwise with double quotes.
|
||||
<span data-test="'single quoted string'">Span content</span>
|
||||
<span data-test='"double quoted string"'>Span content</span>
|
||||
<span data-test="string without quotes">Span content</span>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -61,7 +61,7 @@ class TestCache(unittest.TestCase):
|
|||
- article_with_null_attributes.html
|
||||
- 2012-11-30_md_w_filename_meta#foo-bar.md
|
||||
"""
|
||||
self.assertEqual(generator.readers.read_file.call_count, 3)
|
||||
self.assertEqual(generator.readers.read_file.call_count, 4)
|
||||
|
||||
@unittest.skipUnless(MagicMock, 'Needs Mock module')
|
||||
def test_article_reader_content_caching(self):
|
||||
|
|
|
|||
|
|
@ -587,6 +587,17 @@ class HTMLReaderTest(ReaderTest):
|
|||
<input name="test" disabled style="" />
|
||||
''', page.content)
|
||||
|
||||
def test_article_with_attributes_containing_double_quotes(self):
|
||||
page = self.read_file(path='article_with_attributes_containing_' +
|
||||
'double_quotes.html')
|
||||
self.assertEqual('''
|
||||
Ensure that if an attribute value contains a double quote, it is
|
||||
surrounded with single quotes, otherwise with double quotes.
|
||||
<span data-test="'single quoted string'">Span content</span>
|
||||
<span data-test='"double quoted string"'>Span content</span>
|
||||
<span data-test="string without quotes">Span content</span>
|
||||
''', page.content)
|
||||
|
||||
def test_article_metadata_key_lowercase(self):
|
||||
# Keys of metadata should be lowercase.
|
||||
page = self.read_file(path='article_with_uppercase_metadata.html')
|
||||
|
|
|
|||
|
|
@ -28,6 +28,11 @@ import six
|
|||
from six.moves import html_entities
|
||||
from six.moves.html_parser import HTMLParser
|
||||
|
||||
try:
|
||||
from html import escape
|
||||
except ImportError:
|
||||
from cgi import escape
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
|
@ -548,6 +553,14 @@ def truncate_html_words(s, num, end_text='...'):
|
|||
return out
|
||||
|
||||
|
||||
def escape_html(text, quote=True):
|
||||
"""Escape '&', '<' and '>' to HTML-safe sequences.
|
||||
|
||||
In Python 2 this uses cgi.escape and in Python 3 this uses html.escape. We
|
||||
wrap here to ensure the quote argument has an identical default."""
|
||||
return escape(text, quote=quote)
|
||||
|
||||
|
||||
def process_translations(content_list, order_by=None):
|
||||
""" Finds translation and returns them.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue