forked from github/pelican
Fix quote escaping in read html attributes.
* Wrap HTML attributes in quotes according to their content. If it contains a double quote use single quotes, otherwise escape with double quotes. * Add escape_html utility to ensure quote entities are converted identically across Python versions. Fixes #1260
This commit is contained in:
parent
661ee49eda
commit
d333ed12c6
5 changed files with 47 additions and 11 deletions
|
|
@ -18,18 +18,14 @@ from pelican import rstdirectives # NOQA
|
||||||
from pelican import signals
|
from pelican import signals
|
||||||
from pelican.cache import FileStampDataCacher
|
from pelican.cache import FileStampDataCacher
|
||||||
from pelican.contents import Author, Category, Page, Tag
|
from pelican.contents import Author, Category, Page, Tag
|
||||||
from pelican.utils import SafeDatetime, get_date, pelican_open, posixize_path
|
from pelican.utils import SafeDatetime, escape_html, get_date, pelican_open, \
|
||||||
|
posixize_path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from markdown import Markdown
|
from markdown import Markdown
|
||||||
except ImportError:
|
except ImportError:
|
||||||
Markdown = False # NOQA
|
Markdown = False # NOQA
|
||||||
|
|
||||||
try:
|
|
||||||
from html import escape
|
|
||||||
except ImportError:
|
|
||||||
from cgi import escape
|
|
||||||
|
|
||||||
# Metadata processors have no way to discard an unwanted value, so we have
|
# Metadata processors have no way to discard an unwanted value, so we have
|
||||||
# them return this value instead to signal that it should be discarded later.
|
# them return this value instead to signal that it should be discarded later.
|
||||||
# This means that _filter_discardable_metadata() must be called on processed
|
# This means that _filter_discardable_metadata() must be called on processed
|
||||||
|
|
@ -354,7 +350,7 @@ class HTMLReader(BaseReader):
|
||||||
self._in_body = False
|
self._in_body = False
|
||||||
self._in_top_level = True
|
self._in_top_level = True
|
||||||
elif self._in_body:
|
elif self._in_body:
|
||||||
self._data_buffer += '</{}>'.format(escape(tag))
|
self._data_buffer += '</{}>'.format(escape_html(tag))
|
||||||
|
|
||||||
def handle_startendtag(self, tag, attrs):
|
def handle_startendtag(self, tag, attrs):
|
||||||
if tag == 'meta' and self._in_head:
|
if tag == 'meta' and self._in_head:
|
||||||
|
|
@ -375,11 +371,16 @@ class HTMLReader(BaseReader):
|
||||||
self._data_buffer += '&#{};'.format(data)
|
self._data_buffer += '&#{};'.format(data)
|
||||||
|
|
||||||
def build_tag(self, tag, attrs, close_tag):
|
def build_tag(self, tag, attrs, close_tag):
|
||||||
result = '<{}'.format(escape(tag))
|
result = '<{}'.format(escape_html(tag))
|
||||||
for k, v in attrs:
|
for k, v in attrs:
|
||||||
result += ' ' + escape(k)
|
result += ' ' + escape_html(k)
|
||||||
if v is not None:
|
if v is not None:
|
||||||
result += '="{}"'.format(escape(v))
|
# If the attribute value contains a double quote, surround
|
||||||
|
# with single quotes, otherwise use double quotes.
|
||||||
|
if '"' in v:
|
||||||
|
result += "='{}'".format(escape_html(v, quote=False))
|
||||||
|
else:
|
||||||
|
result += '="{}"'.format(escape_html(v, quote=False))
|
||||||
if close_tag:
|
if close_tag:
|
||||||
return result + ' />'
|
return result + ' />'
|
||||||
return result + '>'
|
return result + '>'
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Ensure that if an attribute value contains a double quote, it is
|
||||||
|
surrounded with single quotes, otherwise with double quotes.
|
||||||
|
<span data-test="'single quoted string'">Span content</span>
|
||||||
|
<span data-test='"double quoted string"'>Span content</span>
|
||||||
|
<span data-test="string without quotes">Span content</span>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -61,7 +61,7 @@ class TestCache(unittest.TestCase):
|
||||||
- article_with_null_attributes.html
|
- article_with_null_attributes.html
|
||||||
- 2012-11-30_md_w_filename_meta#foo-bar.md
|
- 2012-11-30_md_w_filename_meta#foo-bar.md
|
||||||
"""
|
"""
|
||||||
self.assertEqual(generator.readers.read_file.call_count, 3)
|
self.assertEqual(generator.readers.read_file.call_count, 4)
|
||||||
|
|
||||||
@unittest.skipUnless(MagicMock, 'Needs Mock module')
|
@unittest.skipUnless(MagicMock, 'Needs Mock module')
|
||||||
def test_article_reader_content_caching(self):
|
def test_article_reader_content_caching(self):
|
||||||
|
|
|
||||||
|
|
@ -587,6 +587,17 @@ class HTMLReaderTest(ReaderTest):
|
||||||
<input name="test" disabled style="" />
|
<input name="test" disabled style="" />
|
||||||
''', page.content)
|
''', page.content)
|
||||||
|
|
||||||
|
def test_article_with_attributes_containing_double_quotes(self):
|
||||||
|
page = self.read_file(path='article_with_attributes_containing_' +
|
||||||
|
'double_quotes.html')
|
||||||
|
self.assertEqual('''
|
||||||
|
Ensure that if an attribute value contains a double quote, it is
|
||||||
|
surrounded with single quotes, otherwise with double quotes.
|
||||||
|
<span data-test="'single quoted string'">Span content</span>
|
||||||
|
<span data-test='"double quoted string"'>Span content</span>
|
||||||
|
<span data-test="string without quotes">Span content</span>
|
||||||
|
''', page.content)
|
||||||
|
|
||||||
def test_article_metadata_key_lowercase(self):
|
def test_article_metadata_key_lowercase(self):
|
||||||
# Keys of metadata should be lowercase.
|
# Keys of metadata should be lowercase.
|
||||||
page = self.read_file(path='article_with_uppercase_metadata.html')
|
page = self.read_file(path='article_with_uppercase_metadata.html')
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,11 @@ import six
|
||||||
from six.moves import html_entities
|
from six.moves import html_entities
|
||||||
from six.moves.html_parser import HTMLParser
|
from six.moves.html_parser import HTMLParser
|
||||||
|
|
||||||
|
try:
|
||||||
|
from html import escape
|
||||||
|
except ImportError:
|
||||||
|
from cgi import escape
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -548,6 +553,14 @@ def truncate_html_words(s, num, end_text='...'):
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def escape_html(text, quote=True):
|
||||||
|
"""Escape '&', '<' and '>' to HTML-safe sequences.
|
||||||
|
|
||||||
|
In Python 2 this uses cgi.escape and in Python 3 this uses html.escape. We
|
||||||
|
wrap here to ensure the quote argument has an identical default."""
|
||||||
|
return escape(text, quote=quote)
|
||||||
|
|
||||||
|
|
||||||
def process_translations(content_list, order_by=None):
|
def process_translations(content_list, order_by=None):
|
||||||
""" Finds translation and returns them.
|
""" Finds translation and returns them.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue