diff --git a/pelican/readers.py b/pelican/readers.py index 2e51c4ff..7a5f2ee2 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -18,18 +18,14 @@ from pelican import rstdirectives # NOQA from pelican import signals from pelican.cache import FileStampDataCacher from pelican.contents import Author, Category, Page, Tag -from pelican.utils import SafeDatetime, get_date, pelican_open, posixize_path +from pelican.utils import SafeDatetime, escape_html, get_date, pelican_open, \ + posixize_path try: from markdown import Markdown except ImportError: Markdown = False # NOQA -try: - from html import escape -except ImportError: - from cgi import escape - # Metadata processors have no way to discard an unwanted value, so we have # them return this value instead to signal that it should be discarded later. # This means that _filter_discardable_metadata() must be called on processed @@ -354,7 +350,7 @@ class HTMLReader(BaseReader): self._in_body = False self._in_top_level = True elif self._in_body: - self._data_buffer += ''.format(escape(tag)) + self._data_buffer += ''.format(escape_html(tag)) def handle_startendtag(self, tag, attrs): if tag == 'meta' and self._in_head: @@ -375,11 +371,16 @@ class HTMLReader(BaseReader): self._data_buffer += '&#{};'.format(data) def build_tag(self, tag, attrs, close_tag): - result = '<{}'.format(escape(tag)) + result = '<{}'.format(escape_html(tag)) for k, v in attrs: - result += ' ' + escape(k) + result += ' ' + escape_html(k) if v is not None: - result += '="{}"'.format(escape(v)) + # If the attribute value contains a double quote, surround + # with single quotes, otherwise use double quotes. + if '"' in v: + result += "='{}'".format(escape_html(v, quote=False)) + else: + result += '="{}"'.format(escape_html(v, quote=False)) if close_tag: return result + ' />' return result + '>' diff --git a/pelican/tests/content/article_with_attributes_containing_double_quotes.html b/pelican/tests/content/article_with_attributes_containing_double_quotes.html new file mode 100644 index 00000000..7daa5801 --- /dev/null +++ b/pelican/tests/content/article_with_attributes_containing_double_quotes.html @@ -0,0 +1,11 @@ + + + + + Ensure that if an attribute value contains a double quote, it is + surrounded with single quotes, otherwise with double quotes. + Span content + Span content + Span content + + diff --git a/pelican/tests/test_cache.py b/pelican/tests/test_cache.py index 006e421b..3da3f789 100644 --- a/pelican/tests/test_cache.py +++ b/pelican/tests/test_cache.py @@ -61,7 +61,7 @@ class TestCache(unittest.TestCase): - article_with_null_attributes.html - 2012-11-30_md_w_filename_meta#foo-bar.md """ - self.assertEqual(generator.readers.read_file.call_count, 3) + self.assertEqual(generator.readers.read_file.call_count, 4) @unittest.skipUnless(MagicMock, 'Needs Mock module') def test_article_reader_content_caching(self): diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py index 5fabc470..dc434835 100644 --- a/pelican/tests/test_readers.py +++ b/pelican/tests/test_readers.py @@ -587,6 +587,17 @@ class HTMLReaderTest(ReaderTest): ''', page.content) + def test_article_with_attributes_containing_double_quotes(self): + page = self.read_file(path='article_with_attributes_containing_' + + 'double_quotes.html') + self.assertEqual(''' + Ensure that if an attribute value contains a double quote, it is + surrounded with single quotes, otherwise with double quotes. + Span content + Span content + Span content + ''', page.content) + def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. page = self.read_file(path='article_with_uppercase_metadata.html') diff --git a/pelican/utils.py b/pelican/utils.py index cc9eb405..1422a979 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -28,6 +28,11 @@ import six from six.moves import html_entities from six.moves.html_parser import HTMLParser +try: + from html import escape +except ImportError: + from cgi import escape + logger = logging.getLogger(__name__) @@ -548,6 +553,14 @@ def truncate_html_words(s, num, end_text='...'): return out +def escape_html(text, quote=True): + """Escape '&', '<' and '>' to HTML-safe sequences. + + In Python 2 this uses cgi.escape and in Python 3 this uses html.escape. We + wrap here to ensure the quote argument has an identical default.""" + return escape(text, quote=quote) + + def process_translations(content_list, order_by=None): """ Finds translation and returns them.