more accurate code and tests

This commit is contained in:
Andrea Corbellini 2018-02-08 20:10:08 +01:00
commit 01480a539f
2 changed files with 5 additions and 6 deletions

View file

@ -225,11 +225,11 @@ class TestUtils(LoggedTestCase):
self.assertEqual(
utils.truncate_html_words('�', 20), '�')
self.assertEqual(
utils.truncate_html_words('&mdash', 20), '&mdash')
utils.truncate_html_words('&mdash text', 20), '&mdash text')
self.assertEqual(
utils.truncate_html_words('&#1234', 20), '&#1234')
utils.truncate_html_words('&#1234 text', 20), '&#1234 text')
self.assertEqual(
utils.truncate_html_words('&#xabc', 20), '&#xabc')
utils.truncate_html_words('&#xabc text', 20), '&#xabc text')
def test_process_translations(self):
fr_articles = []

View file

@ -571,13 +571,12 @@ class _HTMLWordTruncator(HTMLParser):
# - `1` (the length of `&`)
# - `len(name)` (the length of `mdash`)
# - `1` (the length of `;`)
# - `1` (required to go to the start of `suffix`)
#
# Note that, in case of malformed HTML, the ';' character may
# not be present.
offset = self.getoffset()
ref_end = offset + len(name) + 1 + 1
ref_end = offset + len(name) + 1
try:
if self.rawdata[ref_end] == ';':
@ -623,7 +622,7 @@ class _HTMLWordTruncator(HTMLParser):
char = six.unichr(codepoint)
except (ValueError, OverflowError):
char = ''
self._handle_ref(name, char)
self._handle_ref('#' + name, char)
def truncate_html_words(s, num, end_text=''):