Fix utils.truncate_html_words() to work with invalid HTML references

Invalid references like those missing semicolons (e.g. `&mdash`) or
those causing overflows (e.g. `�`) are now gracefully
handled and no exception is thrown.

This commit also adds tests and comments where needed.
This commit is contained in:
Andrea Corbellini 2018-02-08 18:30:09 +01:00
commit b573576b00
2 changed files with 65 additions and 10 deletions

View file

@ -217,6 +217,14 @@ class TestUtils(LoggedTestCase):
utils.truncate_html_words("∫dx " * 100, 20),
"∫dx " * 20 + '')
# Words with invalid or broken HTML references.
self.assertEqual(utils.truncate_html_words('&invalid;', 20), '&invalid;')
self.assertEqual(utils.truncate_html_words('�', 20), '�')
self.assertEqual(utils.truncate_html_words('�', 20), '�')
self.assertEqual(utils.truncate_html_words('&mdash', 20), '&mdash')
self.assertEqual(utils.truncate_html_words('&#1234', 20), '&#1234')
self.assertEqual(utils.truncate_html_words('&#xabc', 20), '&#xabc')
def test_process_translations(self):
fr_articles = []
en_articles = []