forked from github/pelican
parent
5c222ef41b
commit
d5d792060c
3 changed files with 11 additions and 4 deletions
5
RELEASE.md
Normal file
5
RELEASE.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
Release type: minor
|
||||
|
||||
* Use JSON values for extra settings in Invoke tasks template (#2994)
|
||||
* Add content tag for links, which can help with things like Twitter social cards (#3001)
|
||||
* Improve word count behavior when generating summary (#3002)
|
||||
|
|
@ -230,6 +230,11 @@ class TestUtils(LoggedTestCase):
|
|||
'Ты мелькнула, ты предстала, Снова сердце задрожало,', 3
|
||||
),
|
||||
'Ты мелькнула, ты' + ' …')
|
||||
self.assertEqual(
|
||||
utils.truncate_html_words(
|
||||
'Trong đầm gì đẹp bằng sen', 4
|
||||
),
|
||||
'Trong đầm gì đẹp' + ' …')
|
||||
|
||||
# Words enclosed or intervaled by HTML tags.
|
||||
self.assertEqual(
|
||||
|
|
|
|||
|
|
@ -412,10 +412,7 @@ def posixize_path(rel_path):
|
|||
|
||||
class _HTMLWordTruncator(HTMLParser):
|
||||
|
||||
_word_regex = re.compile(r"(({SBC})({SBC}|-|')*)|{DBC}".format(
|
||||
# SBC means Latin-like characters. A word contains a few characters.
|
||||
# ASCII |Extended Latin | Cyrillic
|
||||
SBC="[0-9a-zA-Z]|[\u00C0-\u024f]|[\u0400-\u04FF]",
|
||||
_word_regex = re.compile(r"{DBC}|(\w[\w'-]*)".format(
|
||||
# DBC means CJK-like characters. An character can stand for a word.
|
||||
DBC=("([\u4E00-\u9FFF])|" # CJK Unified Ideographs
|
||||
"([\u3400-\u4DBF])|" # CJK Unified Ideographs Extension A
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue