more accurate code and tests

2025-10-15 20:28:56 +02:00 · 2018-02-08 20:10:08 +01:00 · 2018-02-08 20:10:08 +01:00 · 01480a539f
commit 01480a539f
parent fc7af9e1c3
2 changed files with 5 additions and 6 deletions
--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@ -225,11 +225,11 @@ class TestUtils(LoggedTestCase):
        self.assertEqual(
            utils.truncate_html_words('&#xfffffffff;', 20), '&#xfffffffff;')
        self.assertEqual(
-            utils.truncate_html_words('&mdash', 20), '&mdash')
+            utils.truncate_html_words('&mdash text', 20), '&mdash text')
        self.assertEqual(
-            utils.truncate_html_words('&#1234', 20), '&#1234')
+            utils.truncate_html_words('&#1234 text', 20), '&#1234 text')
        self.assertEqual(
-            utils.truncate_html_words('&#xabc', 20), '&#xabc')
+            utils.truncate_html_words('&#xabc text', 20), '&#xabc text')

    def test_process_translations(self):
        fr_articles = []
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -571,13 +571,12 @@ class _HTMLWordTruncator(HTMLParser):
        # - `1` (the length of `&`)
        # - `len(name)` (the length of `mdash`)
        # - `1` (the length of `;`)
-        # - `1` (required to go to the start of `suffix`)
        #
        # Note that, in case of malformed HTML, the ';' character may
        # not be present.

        offset = self.getoffset()
-        ref_end = offset + len(name) + 1 + 1
+        ref_end = offset + len(name) + 1

        try:
            if self.rawdata[ref_end] == ';':
@ -623,7 +622,7 @@ class _HTMLWordTruncator(HTMLParser):
            char = six.unichr(codepoint)
        except (ValueError, OverflowError):
            char = ''
-        self._handle_ref(name, char)
+        self._handle_ref('#' + name, char)


 def truncate_html_words(s, num, end_text='…'):