forked from github/pelican
Introduce paragraph count summary (#2761)
Co-authored-by: Justin Mayer <entroP@gmail.com>
This commit is contained in:
parent
ef501a3d89
commit
513abbfdc6
6 changed files with 80 additions and 2 deletions
|
|
@ -162,7 +162,10 @@ author you can use ``author`` field.
|
||||||
|
|
||||||
If you do not explicitly specify summary metadata for a given post, the
|
If you do not explicitly specify summary metadata for a given post, the
|
||||||
``SUMMARY_MAX_LENGTH`` setting can be used to specify how many words from the
|
``SUMMARY_MAX_LENGTH`` setting can be used to specify how many words from the
|
||||||
beginning of an article are used as the summary.
|
beginning of an article are used as the summary. You can also use an article's
|
||||||
|
first N paragraphs as its summary using the ``SUMMARY_MAX_PARAGRAPHS`` setting.
|
||||||
|
If both settings are in use, the specified number of paragraphs will
|
||||||
|
be used but may be truncated to respect the specified maximum length.
|
||||||
|
|
||||||
You can also extract any metadata from the filename through a regular
|
You can also extract any metadata from the filename through a regular
|
||||||
expression to be set in the ``FILENAME_METADATA`` setting. All named groups
|
expression to be set in the ``FILENAME_METADATA`` setting. All named groups
|
||||||
|
|
|
||||||
|
|
@ -308,6 +308,14 @@ Basic settings
|
||||||
does not otherwise specify a summary. Setting to ``None`` will cause the
|
does not otherwise specify a summary. Setting to ``None`` will cause the
|
||||||
summary to be a copy of the original content.
|
summary to be a copy of the original content.
|
||||||
|
|
||||||
|
.. data:: SUMMARY_MAX_PARAGRAPHS = None
|
||||||
|
|
||||||
|
When creating a short summary of an article, this will be the number of
|
||||||
|
paragraphs to use as the summary. This only applies if your content
|
||||||
|
does not otherwise specify a summary. Setting to ``None`` will cause the
|
||||||
|
summary to use the whole text (up to ``SUMMARY_MAX_LENGTH``) instead of just
|
||||||
|
the first N paragraphs.
|
||||||
|
|
||||||
.. data:: SUMMARY_END_SUFFIX = '…'
|
.. data:: SUMMARY_END_SUFFIX = '…'
|
||||||
|
|
||||||
When creating a short summary of an article and the result was truncated to
|
When creating a short summary of an article and the result was truncated to
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ from pelican.utils import (
|
||||||
sanitised_join,
|
sanitised_join,
|
||||||
set_date_tzinfo,
|
set_date_tzinfo,
|
||||||
slugify,
|
slugify,
|
||||||
|
truncate_html_paragraphs,
|
||||||
truncate_html_words,
|
truncate_html_words,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -440,8 +441,13 @@ class Content:
|
||||||
if "summary" in self.metadata:
|
if "summary" in self.metadata:
|
||||||
return self.metadata["summary"]
|
return self.metadata["summary"]
|
||||||
|
|
||||||
|
content = self.content
|
||||||
|
max_paragraphs = self.settings.get("SUMMARY_MAX_PARAGRAPHS")
|
||||||
|
if max_paragraphs is not None:
|
||||||
|
content = truncate_html_paragraphs(self.content, max_paragraphs)
|
||||||
|
|
||||||
if self.settings["SUMMARY_MAX_LENGTH"] is None:
|
if self.settings["SUMMARY_MAX_LENGTH"] is None:
|
||||||
return self.content
|
return content
|
||||||
|
|
||||||
return truncate_html_words(
|
return truncate_html_words(
|
||||||
self.content,
|
self.content,
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,31 @@ class TestPage(TestBase):
|
||||||
page = Page(**page_kwargs)
|
page = Page(**page_kwargs)
|
||||||
self.assertEqual(page.summary, "")
|
self.assertEqual(page.summary, "")
|
||||||
|
|
||||||
|
def test_summary_paragraph(self):
|
||||||
|
# If SUMMARY_MAX_PARAGRAPHS is set, the generated summary should
|
||||||
|
# not exceed the given paragraph count.
|
||||||
|
page_kwargs = self._copy_page_kwargs()
|
||||||
|
settings = get_settings()
|
||||||
|
page_kwargs["settings"] = settings
|
||||||
|
del page_kwargs["metadata"]["summary"]
|
||||||
|
settings["SUMMARY_MAX_PARAGRAPHS"] = 1
|
||||||
|
settings["SUMMARY_MAX_LENGTH"] = None
|
||||||
|
page = Page(**page_kwargs)
|
||||||
|
self.assertEqual(page.summary, TEST_CONTENT)
|
||||||
|
|
||||||
|
def test_summary_paragraph_max_length(self):
|
||||||
|
# If both SUMMARY_MAX_PARAGRAPHS and SUMMARY_MAX_LENGTH are set,
|
||||||
|
# the generated summary should not exceed the given paragraph count and
|
||||||
|
# not exceed the given length.
|
||||||
|
page_kwargs = self._copy_page_kwargs()
|
||||||
|
settings = get_settings()
|
||||||
|
page_kwargs["settings"] = settings
|
||||||
|
del page_kwargs["metadata"]["summary"]
|
||||||
|
settings["SUMMARY_MAX_PARAGRAPHS"] = 1
|
||||||
|
settings["SUMMARY_MAX_LENGTH"] = 10
|
||||||
|
page = Page(**page_kwargs)
|
||||||
|
self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
|
||||||
|
|
||||||
def test_summary_end_suffix(self):
|
def test_summary_end_suffix(self):
|
||||||
# If a :SUMMARY_END_SUFFIX: is set, and there is no other summary,
|
# If a :SUMMARY_END_SUFFIX: is set, and there is no other summary,
|
||||||
# generated summary should contain the specified marker at the end.
|
# generated summary should contain the specified marker at the end.
|
||||||
|
|
|
||||||
|
|
@ -401,6 +401,23 @@ class TestUtils(LoggedTestCase):
|
||||||
self.assertEqual(utils.truncate_html_words("Ӓ text", 20), "Ӓ text")
|
self.assertEqual(utils.truncate_html_words("Ӓ text", 20), "Ӓ text")
|
||||||
self.assertEqual(utils.truncate_html_words("઼ text", 20), "઼ text")
|
self.assertEqual(utils.truncate_html_words("઼ text", 20), "઼ text")
|
||||||
|
|
||||||
|
def test_truncate_html_paragraphs(self):
|
||||||
|
one = "<p>one</p>"
|
||||||
|
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(one, 0), "")
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(one, 1), one)
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(one, 2), one)
|
||||||
|
|
||||||
|
two = one + "<p>two</p>"
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(two, 1), one)
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(two, 2), two)
|
||||||
|
|
||||||
|
three = two + "<p>three</p>"
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(three, 1), one)
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(three, 2), two)
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(three, 3), three)
|
||||||
|
self.assertEqual(utils.truncate_html_paragraphs(three, 4), three)
|
||||||
|
|
||||||
def test_process_translations(self):
|
def test_process_translations(self):
|
||||||
fr_articles = []
|
fr_articles = []
|
||||||
en_articles = []
|
en_articles = []
|
||||||
|
|
|
||||||
|
|
@ -631,6 +631,25 @@ def truncate_html_words(s: str, num: int, end_text: str = "…") -> str:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def truncate_html_paragraphs(s, count):
|
||||||
|
"""Truncate HTML to a certain number of paragraphs.
|
||||||
|
|
||||||
|
:param count: number of paragraphs to keep
|
||||||
|
|
||||||
|
Newlines in the HTML are preserved.
|
||||||
|
"""
|
||||||
|
paragraphs = []
|
||||||
|
tag_stop = 0
|
||||||
|
substr = s[:]
|
||||||
|
for _ in range(count):
|
||||||
|
substr = substr[tag_stop:]
|
||||||
|
tag_start = substr.find("<p>")
|
||||||
|
tag_stop = substr.find("</p>") + len("</p>")
|
||||||
|
paragraphs.append(substr[tag_start:tag_stop])
|
||||||
|
|
||||||
|
return "".join(paragraphs)
|
||||||
|
|
||||||
|
|
||||||
def process_translations(
|
def process_translations(
|
||||||
content_list: list[Content],
|
content_list: list[Content],
|
||||||
translation_id: str | Collection[str] | None = None,
|
translation_id: str | Collection[str] | None = None,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue