mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
When truncating, stop parsing the document immediately after finding the last word.
This commit is contained in:
parent
a6c258eb7f
commit
d583efb861
1 changed files with 18 additions and 5 deletions
|
|
@ -414,6 +414,13 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area',
|
||||
'hr', 'input')
|
||||
|
||||
class TruncationCompleted(Exception):
|
||||
|
||||
def __init__(self, truncate_at):
|
||||
super(_HTMLWordTruncator.TruncationCompleted, self).__init__(
|
||||
truncate_at)
|
||||
self.truncate_at = truncate_at
|
||||
|
||||
def __init__(self, max_words):
|
||||
# In Python 2, HTMLParser is not a new-style class,
|
||||
# hence super() cannot be used.
|
||||
|
|
@ -425,6 +432,16 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
self.last_word_end = None
|
||||
self.truncate_at = None
|
||||
|
||||
def feed(self, *args, **kwargs):
|
||||
try:
|
||||
# With Python 2, super() cannot be used.
|
||||
# See the comment for __init__().
|
||||
HTMLParser.feed(self, *args, **kwargs)
|
||||
except self.TruncationCompleted as exc:
|
||||
self.truncate_at = exc.truncate_at
|
||||
else:
|
||||
self.truncate_at = None
|
||||
|
||||
def getoffset(self):
|
||||
line_start = 0
|
||||
lineno, line_offset = self.getpos()
|
||||
|
|
@ -436,22 +453,18 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
self.words_found += 1
|
||||
self.last_word_end = None
|
||||
if self.words_found == self.max_words:
|
||||
self.truncate_at = word_end
|
||||
raise self.TruncationCompleted(word_end)
|
||||
|
||||
def add_last_word(self):
|
||||
if self.last_word_end is not None:
|
||||
self.add_word(self.last_word_end)
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if self.truncate_at is not None:
|
||||
return
|
||||
self.add_last_word()
|
||||
if tag not in self._singlets:
|
||||
self.open_tags.insert(0, tag)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if self.truncate_at is not None:
|
||||
return
|
||||
self.add_last_word()
|
||||
try:
|
||||
i = self.open_tags.index(tag)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue