mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Improve word count behavior when generating summary
Improve _HTMLWordTruncator by using more than one unicode block in _word_regex, making word count function behave properly with CJK, Cyrillic, and more Latin characters when generating summary.
This commit is contained in:
parent
a088f8bb9e
commit
22192c148a
2 changed files with 31 additions and 3 deletions
|
|
@ -218,6 +218,19 @@ class TestUtils(LoggedTestCase):
|
|||
utils.truncate_html_words('word ' * 100, 20),
|
||||
'word ' * 20 + '…')
|
||||
|
||||
# Plain text with Unicode content.
|
||||
self.assertEqual(
|
||||
utils.truncate_html_words(
|
||||
'我愿意这样,朋友——我独自远行,不但没有你,\
|
||||
并且再没有别的影在黑暗里。', 12
|
||||
),
|
||||
'我愿意这样,朋友——我独自远行' + ' …')
|
||||
self.assertEqual(
|
||||
utils.truncate_html_words(
|
||||
'Ты мелькнула, ты предстала, Снова сердце задрожало,', 3
|
||||
),
|
||||
'Ты мелькнула, ты' + ' …')
|
||||
|
||||
# Words enclosed or intervaled by HTML tags.
|
||||
self.assertEqual(
|
||||
utils.truncate_html_words('<p>' + 'word ' * 100 + '</p>', 20),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue