more accurate code and tests

This commit is contained in:
Andrea Corbellini 2018-02-08 20:10:08 +01:00
commit 01480a539f
2 changed files with 5 additions and 6 deletions

View file

@ -225,11 +225,11 @@ class TestUtils(LoggedTestCase):
self.assertEqual( self.assertEqual(
utils.truncate_html_words('�', 20), '�') utils.truncate_html_words('�', 20), '�')
self.assertEqual( self.assertEqual(
utils.truncate_html_words('&mdash', 20), '&mdash') utils.truncate_html_words('&mdash text', 20), '&mdash text')
self.assertEqual( self.assertEqual(
utils.truncate_html_words('&#1234', 20), '&#1234') utils.truncate_html_words('&#1234 text', 20), '&#1234 text')
self.assertEqual( self.assertEqual(
utils.truncate_html_words('&#xabc', 20), '&#xabc') utils.truncate_html_words('&#xabc text', 20), '&#xabc text')
def test_process_translations(self): def test_process_translations(self):
fr_articles = [] fr_articles = []

View file

@ -571,13 +571,12 @@ class _HTMLWordTruncator(HTMLParser):
# - `1` (the length of `&`) # - `1` (the length of `&`)
# - `len(name)` (the length of `mdash`) # - `len(name)` (the length of `mdash`)
# - `1` (the length of `;`) # - `1` (the length of `;`)
# - `1` (required to go to the start of `suffix`)
# #
# Note that, in case of malformed HTML, the ';' character may # Note that, in case of malformed HTML, the ';' character may
# not be present. # not be present.
offset = self.getoffset() offset = self.getoffset()
ref_end = offset + len(name) + 1 + 1 ref_end = offset + len(name) + 1
try: try:
if self.rawdata[ref_end] == ';': if self.rawdata[ref_end] == ';':
@ -623,7 +622,7 @@ class _HTMLWordTruncator(HTMLParser):
char = six.unichr(codepoint) char = six.unichr(codepoint)
except (ValueError, OverflowError): except (ValueError, OverflowError):
char = '' char = ''
self._handle_ref(name, char) self._handle_ref('#' + name, char)
def truncate_html_words(s, num, end_text=''): def truncate_html_words(s, num, end_text=''):