Introduce paragraph count summary (#2761)

Co-authored-by: Justin Mayer <entroP@gmail.com>
This commit is contained in:
Agathe 2024-06-25 15:07:41 +02:00 committed by GitHub
commit 513abbfdc6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 80 additions and 2 deletions

View file

@ -28,6 +28,7 @@ from pelican.utils import (
sanitised_join,
set_date_tzinfo,
slugify,
truncate_html_paragraphs,
truncate_html_words,
)
@ -440,8 +441,13 @@ class Content:
if "summary" in self.metadata:
return self.metadata["summary"]
content = self.content
max_paragraphs = self.settings.get("SUMMARY_MAX_PARAGRAPHS")
if max_paragraphs is not None:
content = truncate_html_paragraphs(self.content, max_paragraphs)
if self.settings["SUMMARY_MAX_LENGTH"] is None:
return self.content
return content
return truncate_html_words(
self.content,

View file

@ -116,6 +116,31 @@ class TestPage(TestBase):
page = Page(**page_kwargs)
self.assertEqual(page.summary, "")
def test_summary_paragraph(self):
# If SUMMARY_MAX_PARAGRAPHS is set, the generated summary should
# not exceed the given paragraph count.
page_kwargs = self._copy_page_kwargs()
settings = get_settings()
page_kwargs["settings"] = settings
del page_kwargs["metadata"]["summary"]
settings["SUMMARY_MAX_PARAGRAPHS"] = 1
settings["SUMMARY_MAX_LENGTH"] = None
page = Page(**page_kwargs)
self.assertEqual(page.summary, TEST_CONTENT)
def test_summary_paragraph_max_length(self):
# If both SUMMARY_MAX_PARAGRAPHS and SUMMARY_MAX_LENGTH are set,
# the generated summary should not exceed the given paragraph count and
# not exceed the given length.
page_kwargs = self._copy_page_kwargs()
settings = get_settings()
page_kwargs["settings"] = settings
del page_kwargs["metadata"]["summary"]
settings["SUMMARY_MAX_PARAGRAPHS"] = 1
settings["SUMMARY_MAX_LENGTH"] = 10
page = Page(**page_kwargs)
self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
def test_summary_end_suffix(self):
# If a :SUMMARY_END_SUFFIX: is set, and there is no other summary,
# generated summary should contain the specified marker at the end.

View file

@ -401,6 +401,23 @@ class TestUtils(LoggedTestCase):
self.assertEqual(utils.truncate_html_words("&#1234 text", 20), "&#1234 text")
self.assertEqual(utils.truncate_html_words("&#xabc text", 20), "&#xabc text")
def test_truncate_html_paragraphs(self):
one = "<p>one</p>"
self.assertEqual(utils.truncate_html_paragraphs(one, 0), "")
self.assertEqual(utils.truncate_html_paragraphs(one, 1), one)
self.assertEqual(utils.truncate_html_paragraphs(one, 2), one)
two = one + "<p>two</p>"
self.assertEqual(utils.truncate_html_paragraphs(two, 1), one)
self.assertEqual(utils.truncate_html_paragraphs(two, 2), two)
three = two + "<p>three</p>"
self.assertEqual(utils.truncate_html_paragraphs(three, 1), one)
self.assertEqual(utils.truncate_html_paragraphs(three, 2), two)
self.assertEqual(utils.truncate_html_paragraphs(three, 3), three)
self.assertEqual(utils.truncate_html_paragraphs(three, 4), three)
def test_process_translations(self):
fr_articles = []
en_articles = []

View file

@ -631,6 +631,25 @@ def truncate_html_words(s: str, num: int, end_text: str = "…") -> str:
return out
def truncate_html_paragraphs(s, count):
"""Truncate HTML to a certain number of paragraphs.
:param count: number of paragraphs to keep
Newlines in the HTML are preserved.
"""
paragraphs = []
tag_stop = 0
substr = s[:]
for _ in range(count):
substr = substr[tag_stop:]
tag_start = substr.find("<p>")
tag_stop = substr.find("</p>") + len("</p>")
paragraphs.append(substr[tag_start:tag_stop])
return "".join(paragraphs)
def process_translations(
content_list: list[Content],
translation_id: str | Collection[str] | None = None,