From d583efb8616cf19401865b04c4270b1a41a96d7f Mon Sep 17 00:00:00 2001
From: Andrea Corbellini <corbellini.andrea@gmail.com>
Date: Fri, 4 Sep 2015 16:49:41 +0200
Subject: [PATCH] When truncating, stop parsing the document immediately after
 finding the last word.

---
 pelican/utils.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/pelican/utils.py b/pelican/utils.py
index 7ad0914c..97768f53 100644
--- a/pelican/utils.py
+++ b/pelican/utils.py
@@ -414,6 +414,13 @@ class _HTMLWordTruncator(HTMLParser):
     _singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area',
                  'hr', 'input')
 
+    class TruncationCompleted(Exception):
+
+        def __init__(self, truncate_at):
+            super(_HTMLWordTruncator.TruncationCompleted, self).__init__(
+                truncate_at)
+            self.truncate_at = truncate_at
+
     def __init__(self, max_words):
         # In Python 2, HTMLParser is not a new-style class,
         # hence super() cannot be used.
@@ -425,6 +432,16 @@ class _HTMLWordTruncator(HTMLParser):
         self.last_word_end = None
         self.truncate_at = None
 
+    def feed(self, *args, **kwargs):
+        try:
+            # With Python 2, super() cannot be used.
+            # See the comment for __init__().
+            HTMLParser.feed(self, *args, **kwargs)
+        except self.TruncationCompleted as exc:
+            self.truncate_at = exc.truncate_at
+        else:
+            self.truncate_at = None
+
     def getoffset(self):
         line_start = 0
         lineno, line_offset = self.getpos()
@@ -436,22 +453,18 @@ class _HTMLWordTruncator(HTMLParser):
         self.words_found += 1
         self.last_word_end = None
         if self.words_found == self.max_words:
-            self.truncate_at = word_end
+            raise self.TruncationCompleted(word_end)
 
     def add_last_word(self):
         if self.last_word_end is not None:
             self.add_word(self.last_word_end)
 
     def handle_starttag(self, tag, attrs):
-        if self.truncate_at is not None:
-            return
         self.add_last_word()
         if tag not in self._singlets:
             self.open_tags.insert(0, tag)
 
     def handle_endtag(self, tag):
-        if self.truncate_at is not None:
-            return
         self.add_last_word()
         try:
             i = self.open_tags.index(tag)