mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #1813 from andreacorbellini/faster-truncation
When truncating, stop parsing the document immediately after finding the last word
This commit is contained in:
commit
b152aba6c6
1 changed files with 18 additions and 5 deletions
|
|
@ -414,6 +414,13 @@ class _HTMLWordTruncator(HTMLParser):
|
||||||
_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area',
|
_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area',
|
||||||
'hr', 'input')
|
'hr', 'input')
|
||||||
|
|
||||||
|
class TruncationCompleted(Exception):
|
||||||
|
|
||||||
|
def __init__(self, truncate_at):
|
||||||
|
super(_HTMLWordTruncator.TruncationCompleted, self).__init__(
|
||||||
|
truncate_at)
|
||||||
|
self.truncate_at = truncate_at
|
||||||
|
|
||||||
def __init__(self, max_words):
|
def __init__(self, max_words):
|
||||||
# In Python 2, HTMLParser is not a new-style class,
|
# In Python 2, HTMLParser is not a new-style class,
|
||||||
# hence super() cannot be used.
|
# hence super() cannot be used.
|
||||||
|
|
@ -425,6 +432,16 @@ class _HTMLWordTruncator(HTMLParser):
|
||||||
self.last_word_end = None
|
self.last_word_end = None
|
||||||
self.truncate_at = None
|
self.truncate_at = None
|
||||||
|
|
||||||
|
def feed(self, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
# With Python 2, super() cannot be used.
|
||||||
|
# See the comment for __init__().
|
||||||
|
HTMLParser.feed(self, *args, **kwargs)
|
||||||
|
except self.TruncationCompleted as exc:
|
||||||
|
self.truncate_at = exc.truncate_at
|
||||||
|
else:
|
||||||
|
self.truncate_at = None
|
||||||
|
|
||||||
def getoffset(self):
|
def getoffset(self):
|
||||||
line_start = 0
|
line_start = 0
|
||||||
lineno, line_offset = self.getpos()
|
lineno, line_offset = self.getpos()
|
||||||
|
|
@ -436,22 +453,18 @@ class _HTMLWordTruncator(HTMLParser):
|
||||||
self.words_found += 1
|
self.words_found += 1
|
||||||
self.last_word_end = None
|
self.last_word_end = None
|
||||||
if self.words_found == self.max_words:
|
if self.words_found == self.max_words:
|
||||||
self.truncate_at = word_end
|
raise self.TruncationCompleted(word_end)
|
||||||
|
|
||||||
def add_last_word(self):
|
def add_last_word(self):
|
||||||
if self.last_word_end is not None:
|
if self.last_word_end is not None:
|
||||||
self.add_word(self.last_word_end)
|
self.add_word(self.last_word_end)
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if self.truncate_at is not None:
|
|
||||||
return
|
|
||||||
self.add_last_word()
|
self.add_last_word()
|
||||||
if tag not in self._singlets:
|
if tag not in self._singlets:
|
||||||
self.open_tags.insert(0, tag)
|
self.open_tags.insert(0, tag)
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
if self.truncate_at is not None:
|
|
||||||
return
|
|
||||||
self.add_last_word()
|
self.add_last_word()
|
||||||
try:
|
try:
|
||||||
i = self.open_tags.index(tag)
|
i = self.open_tags.index(tag)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue