1
0
Fork 0
forked from github/pelican

Introduce paragraph count summary (#2761)

Co-authored-by: Justin Mayer <entroP@gmail.com>
This commit is contained in:
Agathe 2024-06-25 15:07:41 +02:00 committed by GitHub
commit 513abbfdc6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 80 additions and 2 deletions

View file

@ -162,7 +162,10 @@ author you can use ``author`` field.
If you do not explicitly specify summary metadata for a given post, the If you do not explicitly specify summary metadata for a given post, the
``SUMMARY_MAX_LENGTH`` setting can be used to specify how many words from the ``SUMMARY_MAX_LENGTH`` setting can be used to specify how many words from the
beginning of an article are used as the summary. beginning of an article are used as the summary. You can also use an article's
first N paragraphs as its summary using the ``SUMMARY_MAX_PARAGRAPHS`` setting.
If both settings are in use, the specified number of paragraphs will
be used but may be truncated to respect the specified maximum length.
You can also extract any metadata from the filename through a regular You can also extract any metadata from the filename through a regular
expression to be set in the ``FILENAME_METADATA`` setting. All named groups expression to be set in the ``FILENAME_METADATA`` setting. All named groups

View file

@ -308,6 +308,14 @@ Basic settings
does not otherwise specify a summary. Setting to ``None`` will cause the does not otherwise specify a summary. Setting to ``None`` will cause the
summary to be a copy of the original content. summary to be a copy of the original content.
.. data:: SUMMARY_MAX_PARAGRAPHS = None
When creating a short summary of an article, this will be the number of
paragraphs to use as the summary. This only applies if your content
does not otherwise specify a summary. Setting to ``None`` will cause the
summary to use the whole text (up to ``SUMMARY_MAX_LENGTH``) instead of just
the first N paragraphs.
.. data:: SUMMARY_END_SUFFIX = '…' .. data:: SUMMARY_END_SUFFIX = '…'
When creating a short summary of an article and the result was truncated to When creating a short summary of an article and the result was truncated to

View file

@ -28,6 +28,7 @@ from pelican.utils import (
sanitised_join, sanitised_join,
set_date_tzinfo, set_date_tzinfo,
slugify, slugify,
truncate_html_paragraphs,
truncate_html_words, truncate_html_words,
) )
@ -440,8 +441,13 @@ class Content:
if "summary" in self.metadata: if "summary" in self.metadata:
return self.metadata["summary"] return self.metadata["summary"]
content = self.content
max_paragraphs = self.settings.get("SUMMARY_MAX_PARAGRAPHS")
if max_paragraphs is not None:
content = truncate_html_paragraphs(self.content, max_paragraphs)
if self.settings["SUMMARY_MAX_LENGTH"] is None: if self.settings["SUMMARY_MAX_LENGTH"] is None:
return self.content return content
return truncate_html_words( return truncate_html_words(
self.content, self.content,

View file

@ -116,6 +116,31 @@ class TestPage(TestBase):
page = Page(**page_kwargs) page = Page(**page_kwargs)
self.assertEqual(page.summary, "") self.assertEqual(page.summary, "")
def test_summary_paragraph(self):
# If SUMMARY_MAX_PARAGRAPHS is set, the generated summary should
# not exceed the given paragraph count.
page_kwargs = self._copy_page_kwargs()
settings = get_settings()
page_kwargs["settings"] = settings
del page_kwargs["metadata"]["summary"]
settings["SUMMARY_MAX_PARAGRAPHS"] = 1
settings["SUMMARY_MAX_LENGTH"] = None
page = Page(**page_kwargs)
self.assertEqual(page.summary, TEST_CONTENT)
def test_summary_paragraph_max_length(self):
# If both SUMMARY_MAX_PARAGRAPHS and SUMMARY_MAX_LENGTH are set,
# the generated summary should not exceed the given paragraph count and
# not exceed the given length.
page_kwargs = self._copy_page_kwargs()
settings = get_settings()
page_kwargs["settings"] = settings
del page_kwargs["metadata"]["summary"]
settings["SUMMARY_MAX_PARAGRAPHS"] = 1
settings["SUMMARY_MAX_LENGTH"] = 10
page = Page(**page_kwargs)
self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
def test_summary_end_suffix(self): def test_summary_end_suffix(self):
# If a :SUMMARY_END_SUFFIX: is set, and there is no other summary, # If a :SUMMARY_END_SUFFIX: is set, and there is no other summary,
# generated summary should contain the specified marker at the end. # generated summary should contain the specified marker at the end.

View file

@ -401,6 +401,23 @@ class TestUtils(LoggedTestCase):
self.assertEqual(utils.truncate_html_words("&#1234 text", 20), "&#1234 text") self.assertEqual(utils.truncate_html_words("&#1234 text", 20), "&#1234 text")
self.assertEqual(utils.truncate_html_words("&#xabc text", 20), "&#xabc text") self.assertEqual(utils.truncate_html_words("&#xabc text", 20), "&#xabc text")
def test_truncate_html_paragraphs(self):
one = "<p>one</p>"
self.assertEqual(utils.truncate_html_paragraphs(one, 0), "")
self.assertEqual(utils.truncate_html_paragraphs(one, 1), one)
self.assertEqual(utils.truncate_html_paragraphs(one, 2), one)
two = one + "<p>two</p>"
self.assertEqual(utils.truncate_html_paragraphs(two, 1), one)
self.assertEqual(utils.truncate_html_paragraphs(two, 2), two)
three = two + "<p>three</p>"
self.assertEqual(utils.truncate_html_paragraphs(three, 1), one)
self.assertEqual(utils.truncate_html_paragraphs(three, 2), two)
self.assertEqual(utils.truncate_html_paragraphs(three, 3), three)
self.assertEqual(utils.truncate_html_paragraphs(three, 4), three)
def test_process_translations(self): def test_process_translations(self):
fr_articles = [] fr_articles = []
en_articles = [] en_articles = []

View file

@ -631,6 +631,25 @@ def truncate_html_words(s: str, num: int, end_text: str = "…") -> str:
return out return out
def truncate_html_paragraphs(s, count):
"""Truncate HTML to a certain number of paragraphs.
:param count: number of paragraphs to keep
Newlines in the HTML are preserved.
"""
paragraphs = []
tag_stop = 0
substr = s[:]
for _ in range(count):
substr = substr[tag_stop:]
tag_start = substr.find("<p>")
tag_stop = substr.find("</p>") + len("</p>")
paragraphs.append(substr[tag_start:tag_stop])
return "".join(paragraphs)
def process_translations( def process_translations(
content_list: list[Content], content_list: list[Content],
translation_id: str | Collection[str] | None = None, translation_id: str | Collection[str] | None = None,