This commit is contained in:
Rúnar Berg Baugsson Sigríðarson 2015-10-12 08:13:56 +00:00
commit 54a1672ffd
13 changed files with 39 additions and 11 deletions

View file

@ -379,7 +379,11 @@ class HTMLReader(BaseReader):
for k, v in attrs:
result += ' ' + escape(k)
if v is not None:
result += '="{}"'.format(escape(v))
# figure out the proper surrounding quote-marks
if '"' in v:
result += "='{}'".format(escape(v))
else:
result += '="{}"'.format(escape(v))
if close_tag:
return result + ' />'
return result + '>'

View file

@ -0,0 +1,12 @@
<html>
<head>
</head>
<body>
<section id="double-quote-attribute-value">
Both double quoted attribute values should
be supported.
As well as single quoted, so they can accept some
<span data-json='{"key": "value"}'>JSON data-attributes</span>
</section>
</body>
</html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -579,6 +579,18 @@ class HTMLReaderTest(ReaderTest):
}
self.assertDictHasSubset(page.metadata, expected)
def test_article_with_attributes(self):
page = self.read_file(path='article_with_attributes.html')
self.assertEqual('''
<section id="double-quote-attribute-value">
Both double quoted attribute values should
be supported.
As well as single quoted, so they can accept some
<span data-json='{"key": "value"}'>JSON data-attributes</span>
</section>
''', page.content)
def test_article_with_null_attributes(self):
page = self.read_file(path='article_with_null_attributes.html')