1
0
Fork 0
forked from github/pelican

fix null attributes in html parser

This commit is contained in:
dave mankoff 2013-02-10 11:02:52 -05:00
commit 08439bdcf1
3 changed files with 22 additions and 1 deletions

View file

@ -240,7 +240,10 @@ class HTMLReader(Reader):
def build_tag(self, tag, attrs, close_tag):
result = '<{}'.format(cgi.escape(tag))
result += ''.join((' {}="{}"'.format(cgi.escape(k), cgi.escape(v)) for k,v in attrs))
for k,v in attrs:
result += ' ' + cgi.escape(k)
if v is not None:
result += '="{}"'.format(cgi.escape(v))
if close_tag:
return result + ' />'
return result + '>'

View file

@ -0,0 +1,8 @@
<html>
<head>
</head>
<body>
Ensure that empty attributes are copied properly.
<input name="test" disabled style="" />
</body>
</html>

View file

@ -298,9 +298,19 @@ class HTMLReaderTest(unittest.TestCase):
self.assertEquals(value, metadata[key], key)
def test_article_with_null_attributes(self):
reader = readers.HTMLReader({})
content, metadata = reader.read(_path('article_with_null_attributes.html'))
self.assertEquals('''
Ensure that empty attributes are copied properly.
<input name="test" disabled style="" />
''', content)
def test_article_metadata_key_lowercase(self):
"""Keys of metadata should be lowercase."""
reader = readers.HTMLReader({})
content, metadata = reader.read(_path('article_with_uppercase_metadata.html'))
self.assertIn('category', metadata, "Key should be lowercase.")
self.assertEquals('Yeah', metadata.get('category'), "Value keeps cases.")