From 7466b13e0a6c2f0a19ee7b8640adfbd8a7a8ec9e Mon Sep 17 00:00:00 2001
From: Salar Nosrati-Ershad <s3r@tutamail.com>
Date: Wed, 22 Nov 2023 22:54:30 +0330
Subject: [PATCH 01/22] fix: keep newline at the end of the file in tools As
 referenced in Jinja documentation about whitespace control:
 <https://jinja.palletsprojects.com/en/3.1.x/templates/#whitespace-control> > 
 To keep single trailing newlines, configure Jinja to > 
 `keep_trailing_newline` I added this to our Jinja environment to keep EOL new
 line in tools scripts

---
 RELEASE.md                          | 3 +++
 pelican/tools/pelican_quickstart.py | 1 +
 2 files changed, 4 insertions(+)
 create mode 100644 RELEASE.md

diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 00000000..7881aeac
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,3 @@
+Release type: patch
+
+Keep the newline at the end of the file in generating tools scripts
diff --git a/pelican/tools/pelican_quickstart.py b/pelican/tools/pelican_quickstart.py
index db00ce70..a4dc98e1 100755
--- a/pelican/tools/pelican_quickstart.py
+++ b/pelican/tools/pelican_quickstart.py
@@ -44,6 +44,7 @@ _TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templ
 _jinja_env = Environment(
     loader=FileSystemLoader(_TEMPLATES_DIR),
     trim_blocks=True,
+    keep_trailing_newline=True,
 )
 
 

From 4ed5c0d5b87e7711e779be6a26c4a1d9ad21aeaa Mon Sep 17 00:00:00 2001
From: MinchinWeb <w_minchin@hotmail.com>
Date: Sat, 25 Nov 2023 20:57:40 -0700
Subject: [PATCH 02/22] Log the original calling location, rather than the
 wrapper function

---
 pelican/log.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pelican/log.py b/pelican/log.py
index 0d2b6a3f..befecbf1 100644
--- a/pelican/log.py
+++ b/pelican/log.py
@@ -85,13 +85,15 @@ class FatalLogger(LimitLogger):
     warnings_fatal = False
     errors_fatal = False
 
+    # adding `stacklevel=2` means that the displayed filename and line number
+    # will match the "original" calling location, rather than the wrapper here
     def warning(self, *args, **kwargs):
-        super().warning(*args, **kwargs)
+        super().warning(*args, stacklevel=2, **kwargs)
         if FatalLogger.warnings_fatal:
             raise RuntimeError("Warning encountered")
 
     def error(self, *args, **kwargs):
-        super().error(*args, **kwargs)
+        super().error(*args, stacklevel=2, **kwargs)
         if FatalLogger.errors_fatal:
             raise RuntimeError("Error encountered")
 

From 8626d5bd85da049e7ca7828a785d08e02b736aa1 Mon Sep 17 00:00:00 2001
From: Raphael Das Gupta <github.com@raphael.dasgupta.ch>
Date: Fri, 22 Dec 2023 15:56:57 +0100
Subject: [PATCH 03/22] docs: update URL to AsciiDoc website

https://www.methods.co.nz/asciidoc/
gives a SSL certificate warning
and a 404 (page not found) error.
https://asciidoc.org is the new official website
for the AsciiDoc file format.
(It's also what https://en.wikipedia.org/wiki/AsciiDoc links to.)
---
 docs/content.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/content.rst b/docs/content.rst
index cacacea9..8a5d9b32 100644
--- a/docs/content.rst
+++ b/docs/content.rst
@@ -631,7 +631,7 @@ are not included by default in tag, category, and author indexes, nor in the
 main article feed. This has the effect of creating an "unlisted" post.
 
 .. _W3C ISO 8601: https://www.w3.org/TR/NOTE-datetime
-.. _AsciiDoc: https://www.methods.co.nz/asciidoc/
+.. _AsciiDoc: https://asciidoc.org
 .. _Pelican Plugins: https://github.com/pelican-plugins
 .. _pelican-plugins: https://github.com/getpelican/pelican-plugins
 .. _Python-Markdown: https://github.com/Python-Markdown/markdown

From f0beb81a973f44ed1c8704984bc325b5f4df095c Mon Sep 17 00:00:00 2001
From: MinchinWeb <w_minchin@hotmail.com>
Date: Sun, 14 Jan 2024 13:45:51 -0700
Subject: [PATCH 04/22] Better error logging if a plugin refuses to load

---
 pelican/__init__.py | 3 ++-
 pelican/log.py      | 8 ++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/pelican/__init__.py b/pelican/__init__.py
index a25f5624..40251887 100644
--- a/pelican/__init__.py
+++ b/pelican/__init__.py
@@ -80,7 +80,8 @@ class Pelican:
                 plugin.register()
                 self.plugins.append(plugin)
             except Exception as e:
-                logger.error("Cannot register plugin `%s`\n%s", name, e)
+                logger.error("Cannot register plugin `%s`\n%s", name, e, stacklevel=3)
+                print(e.stacktrace)
 
         self.settings["PLUGINS"] = [get_plugin_name(p) for p in self.plugins]
 
diff --git a/pelican/log.py b/pelican/log.py
index befecbf1..6a8fcdf1 100644
--- a/pelican/log.py
+++ b/pelican/log.py
@@ -88,12 +88,16 @@ class FatalLogger(LimitLogger):
     # adding `stacklevel=2` means that the displayed filename and line number
     # will match the "original" calling location, rather than the wrapper here
     def warning(self, *args, **kwargs):
-        super().warning(*args, stacklevel=2, **kwargs)
+        if "stacklevel" not in kwargs.keys():
+            kwargs["stacklevel"] = 2
+        super().warning(*args, **kwargs)
         if FatalLogger.warnings_fatal:
             raise RuntimeError("Warning encountered")
 
     def error(self, *args, **kwargs):
-        super().error(*args, stacklevel=2, **kwargs)
+        if "stacklevel" not in kwargs.keys():
+            kwargs["stacklevel"] = 2
+        super().error(*args, **kwargs)
         if FatalLogger.errors_fatal:
             raise RuntimeError("Error encountered")
 

From f69e2cca6b5d26c8a6b2f3f4444a2c3de2e2d202 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Ricks?= <bjoern.ricks@greenbone.net>
Date: Sun, 17 Dec 2023 13:56:33 +0100
Subject: [PATCH 05/22] Add type hints for settings module

Types make it easier to understand the code and improve autocompletion
in IDEs.
---
 pelican/settings.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/pelican/settings.py b/pelican/settings.py
index 33ec210a..29051ddb 100644
--- a/pelican/settings.py
+++ b/pelican/settings.py
@@ -8,11 +8,13 @@ import re
 import sys
 from os.path import isabs
 from pathlib import Path
+from types import ModuleType
+from typing import Any, Dict, Optional
 
 from pelican.log import LimitFilter
 
 
-def load_source(name, path):
+def load_source(name: str, path: str) -> ModuleType:
     spec = importlib.util.spec_from_file_location(name, path)
     mod = importlib.util.module_from_spec(spec)
     sys.modules[name] = mod
@@ -22,6 +24,8 @@ def load_source(name, path):
 
 logger = logging.getLogger(__name__)
 
+Settings = Dict[str, Any]
+
 DEFAULT_THEME = os.path.join(
     os.path.dirname(os.path.abspath(__file__)), "themes", "notmyidea"
 )
@@ -177,7 +181,9 @@ DEFAULT_CONFIG = {
 PYGMENTS_RST_OPTIONS = None
 
 
-def read_settings(path=None, override=None):
+def read_settings(
+    path: Optional[str] = None, override: Optional[Settings] = None
+) -> Settings:
     settings = override or {}
 
     if path:
@@ -221,7 +227,7 @@ def read_settings(path=None, override=None):
     return settings
 
 
-def get_settings_from_module(module=None):
+def get_settings_from_module(module: Optional[ModuleType] = None) -> Settings:
     """Loads settings from a module, returns a dictionary."""
 
     context = {}
@@ -230,7 +236,7 @@ def get_settings_from_module(module=None):
     return context
 
 
-def get_settings_from_file(path):
+def get_settings_from_file(path: str) -> Settings:
     """Loads settings from a file path, returning a dict."""
 
     name, ext = os.path.splitext(os.path.basename(path))
@@ -238,7 +244,7 @@ def get_settings_from_file(path):
     return get_settings_from_module(module)
 
 
-def get_jinja_environment(settings):
+def get_jinja_environment(settings: Settings) -> Settings:
     """Sets the environment for Jinja"""
 
     jinja_env = settings.setdefault(
@@ -253,7 +259,7 @@ def get_jinja_environment(settings):
     return settings
 
 
-def _printf_s_to_format_field(printf_string, format_field):
+def _printf_s_to_format_field(printf_string: str, format_field: str) -> str:
     """Tries to replace %s with {format_field} in the provided printf_string.
     Raises ValueError in case of failure.
     """
@@ -269,7 +275,7 @@ def _printf_s_to_format_field(printf_string, format_field):
     return result
 
 
-def handle_deprecated_settings(settings):
+def handle_deprecated_settings(settings: Settings) -> Settings:
     """Converts deprecated settings and issues warnings. Issues an exception
     if both old and new setting is specified.
     """
@@ -566,7 +572,7 @@ def handle_deprecated_settings(settings):
     return settings
 
 
-def configure_settings(settings):
+def configure_settings(settings: Settings) -> Settings:
     """Provide optimizations, error checking, and warnings for the given
     settings.
     Also, specify the log messages to be ignored.

From bf4fd679a5322433cc4313a80cac49d4ed6c348f Mon Sep 17 00:00:00 2001
From: boxydog <93335439+boxydog@users.noreply.github.com>
Date: Mon, 15 Jan 2024 03:43:19 -0600
Subject: [PATCH 06/22] Document how to import posts from Medium (#3262)

---
 docs/importer.rst | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/docs/importer.rst b/docs/importer.rst
index 997a4632..08092984 100644
--- a/docs/importer.rst
+++ b/docs/importer.rst
@@ -26,6 +26,12 @@ not be converted (as Pelican also supports Markdown).
    manually, or use a plugin such as `More Categories`_ that enables multiple
    categories per article.
 
+.. note::
+
+   Imported pages may contain links to images that still point to the original site.
+   So you might want to download those images into your local content and manually
+   re-link them from the relevant pages of your site.
+
 Dependencies
 ============
 
@@ -121,6 +127,15 @@ For WordPress::
 
     $ pelican-import --wpfile -o ~/output ~/posts.xml
 
+For Medium (an example of using an RSS feed):
+
+    $ python -m pip install feedparser
+    $ pelican-import --feed https://medium.com/feed/@username
+
+.. note::
+
+   The RSS feed may only return the most recent posts — not all of them.
+
 Tests
 =====
 

From 5e6dba73acfd6a85560d82870d1cda9d184c3cb5 Mon Sep 17 00:00:00 2001
From: Salar Nosrati-Ershad <s3r@tutamail.com>
Date: Mon, 15 Jan 2024 13:33:54 +0330
Subject: [PATCH 07/22] Add Github Pages commit message variable (#3250)

---
 pelican/tools/templates/Makefile.jinja2 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pelican/tools/templates/Makefile.jinja2 b/pelican/tools/templates/Makefile.jinja2
index 93ab1aa7..1e9dbff5 100644
--- a/pelican/tools/templates/Makefile.jinja2
+++ b/pelican/tools/templates/Makefile.jinja2
@@ -37,6 +37,7 @@ DROPBOX_DIR={{dropbox_dir}}
 {% endif %}
 {% if github %}
 GITHUB_PAGES_BRANCH={{github_pages_branch}}
+GITHUB_PAGES_COMMIT_MESSAGE=Generate Pelican site
 
 {% endif %}
 
@@ -161,7 +162,7 @@ cf_upload: publish
 {% if github %}
 {% set upload = upload + ["github"] %}
 github: publish
-	ghp-import -m "Generate Pelican site" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)"
+	ghp-import -m "$(GITHUB_PAGES_COMMIT_MESSAGE)" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)"
 	git push origin $(GITHUB_PAGES_BRANCH)
 
 {% endif %}

From b1cb6c7326e32afba373113b86d823d46f94a812 Mon Sep 17 00:00:00 2001
From: Salar Nosrati-Ershad <s3r@tutamail.com>
Date: Mon, 15 Jan 2024 13:40:12 +0330
Subject: [PATCH 08/22] Use `--no-jekyll` flag when invoking `ghp-import`
 (#3259)

---
 pelican/tools/templates/Makefile.jinja2 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pelican/tools/templates/Makefile.jinja2 b/pelican/tools/templates/Makefile.jinja2
index 1e9dbff5..67571b47 100644
--- a/pelican/tools/templates/Makefile.jinja2
+++ b/pelican/tools/templates/Makefile.jinja2
@@ -162,7 +162,7 @@ cf_upload: publish
 {% if github %}
 {% set upload = upload + ["github"] %}
 github: publish
-	ghp-import -m "$(GITHUB_PAGES_COMMIT_MESSAGE)" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)"
+	ghp-import -m "$(GITHUB_PAGES_COMMIT_MESSAGE)" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)" --no-jekyll
 	git push origin $(GITHUB_PAGES_BRANCH)
 
 {% endif %}

From d6a33f1d21a8cbb34b584895554147ad97e97a72 Mon Sep 17 00:00:00 2001
From: boxydog <boxydog@users.noreply.github.com>
Date: Fri, 1 Dec 2023 11:27:16 -0600
Subject: [PATCH 09/22] Medium post importer (from medium export)

---
 docs/content.rst                              |   4 +-
 docs/importer.rst                             |  13 +-
 pelican/tests/content/medium_post_content.txt |   4 +
 ...2017-04-21_-medium-post--d1bf01d62ba3.html |  72 ++++++++
 pelican/tests/test_generators.py              |  37 +++-
 pelican/tests/test_importer.py                |  83 +++++++++
 pelican/tools/pelican_import.py               | 165 +++++++++++++++++-
 7 files changed, 357 insertions(+), 21 deletions(-)
 create mode 100644 pelican/tests/content/medium_post_content.txt
 create mode 100644 pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html

diff --git a/docs/content.rst b/docs/content.rst
index cacacea9..46db1140 100644
--- a/docs/content.rst
+++ b/docs/content.rst
@@ -439,8 +439,8 @@ For **Markdown**, one must rely on an extension. For example, using the `mdx_inc
 Importing an existing site
 ==========================
 
-It is possible to import your site from WordPress, Tumblr, Dotclear, and RSS
-feeds using a simple script. See :ref:`import`.
+It is possible to import your site from several other blogging sites
+(like WordPress, Tumblr, ..) using a simple script. See :ref:`import`.
 
 Translations
 ============
diff --git a/docs/importer.rst b/docs/importer.rst
index 997a4632..093ef465 100644
--- a/docs/importer.rst
+++ b/docs/importer.rst
@@ -11,6 +11,7 @@ software to reStructuredText or Markdown. The supported import formats are:
 
 - Blogger XML export
 - Dotclear export
+- Medium export
 - Tumblr API
 - WordPress XML export
 - RSS/Atom feed
@@ -65,6 +66,7 @@ Optional arguments
   -h, --help            Show this help message and exit
   --blogger             Blogger XML export (default: False)
   --dotclear            Dotclear export (default: False)
+  --medium              Medium export (default: False)
   --tumblr              Tumblr API (default: False)
   --wpfile              WordPress XML export (default: False)
   --feed                Feed to parse (default: False)
@@ -80,8 +82,7 @@ Optional arguments
                           (default: False)
   --filter-author       Import only post from the specified author
   --strip-raw           Strip raw HTML code that can't be converted to markup
-                        such as flash embeds or iframes (wordpress import
-                        only) (default: False)
+                        such as flash embeds or iframes (default: False)
   --wp-custpost         Put wordpress custom post types in directories. If
                         used with --dir-cat option directories will be created
                         as "/post_type/category/" (wordpress import only)
@@ -113,6 +114,14 @@ For Dotclear::
 
     $ pelican-import --dotclear -o ~/output ~/backup.txt
 
+For Medium::
+
+    $ pelican-import --medium -o ~/output ~/medium-export/posts/
+
+The Medium export is a zip file.  Unzip it, and point this tool to the
+"posts" subdirectory.  For more information on how to export, see
+https://help.medium.com/hc/en-us/articles/115004745787-Export-your-account-data.
+
 For Tumblr::
 
     $ pelican-import --tumblr -o ~/output --blogname=<blogname> <api_key>
diff --git a/pelican/tests/content/medium_post_content.txt b/pelican/tests/content/medium_post_content.txt
new file mode 100644
index 00000000..5e21881c
--- /dev/null
+++ b/pelican/tests/content/medium_post_content.txt
@@ -0,0 +1,4 @@
+
+<hr/><h3>Title header</h3><p>A paragraph of content.</p><p>Paragraph number two.</p><p>A list:</p><ol><li>One.</li><li>Two.</li><li>Three.</li></ol><p>A link: <a data-href="https://example.com/example" href="https://example.com/example" target="_blank">link text</a>.</p><h3>Header 2</h3><p>A block quote:</p><blockquote>quote words <strong>strong words</strong></blockquote><p>after blockquote</p><figure><img data-height="282" data-image-id="image1.png" data-width="739" src="https://cdn-images-1.medium.com/max/800/image1.png"/><figcaption>A figure caption.</figcaption></figure><p>A final note: <a data-href="http://stats.stackexchange.com/" href="http://stats.stackexchange.com/" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p><hr/><p><em>Next: </em><a data-href="https://medium.com/@username/post-url" href="https://medium.com/@username/post-url" target="_blank"><em>Next post</em>
+</a></p>
+<p>By <a href="https://medium.com/@username">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p>
diff --git a/pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html b/pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
new file mode 100644
index 00000000..02d272dc
--- /dev/null
+++ b/pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
@@ -0,0 +1,72 @@
+<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>A title</title><style>
+      * {
+        font-family: Georgia, Cambria, "Times New Roman", Times, serif;
+      }
+      html, body {
+        margin: 0;
+        padding: 0;
+      }
+      h1 {
+        font-size: 50px;
+        margin-bottom: 17px;
+        color: #333;
+      }
+      h2 {
+        font-size: 24px;
+        line-height: 1.6;
+        margin: 30px 0 0 0;
+        margin-bottom: 18px;
+        margin-top: 33px;
+        color: #333;
+      }
+      h3 {
+        font-size: 30px;
+        margin: 10px 0 20px 0;
+        color: #333;
+      }
+      header {
+        width: 640px;
+        margin: auto;
+      }
+      section {
+        width: 640px;
+        margin: auto;
+      }
+      section p {
+        margin-bottom: 27px;
+        font-size: 20px;
+        line-height: 1.6;
+        color: #333;
+      }
+      section img {
+        max-width: 640px;
+      }
+      footer {
+        padding: 0 20px;
+        margin: 50px 0;
+        text-align: center;
+        font-size: 12px;
+      }
+      .aspectRatioPlaceholder {
+        max-width: auto !important;
+        max-height: auto !important;
+      }
+      .aspectRatioPlaceholder-fill {
+        padding-bottom: 0 !important;
+      }
+      header,
+      section[data-field=subtitle],
+      section[data-field=description] {
+        display: none;
+      }
+      </style></head><body><article class="h-entry">
+<header>
+<h1 class="p-name">A name (like title)</h1>
+</header>
+<section data-field="subtitle" class="p-summary">
+    Summary (first several words of content)
+</section>
+<section data-field="body" class="e-content">
+<section name="ad15" class="section section--body section--first"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><h3 name="20a3" id="20a3" class="graf graf--h3 graf--leading graf--title">Title header</h3><p name="e3d6" id="e3d6" class="graf graf--p graf-after--h3">A paragraph of content.</p><p name="c7a8" id="c7a8" class="graf graf--p graf-after--p">Paragraph number two.</p><p name="42aa" id="42aa" class="graf graf--p graf-after--p">A list:</p><ol class="postList"><li name="d65f" id="d65f" class="graf graf--li graf-after--p">One.</li><li name="232b" id="232b" class="graf graf--li graf-after--li">Two.</li><li name="ef87" id="ef87" class="graf graf--li graf-after--li">Three.</li></ol><p name="e743" id="e743" class="graf graf--p graf-after--p">A link: <a href="https://example.com/example" data-href="https://example.com/example" class="markup--anchor markup--p-anchor" target="_blank">link text</a>.</p><h3 name="4cfd" id="4cfd" class="graf graf--h3 graf-after--p">Header 2</h3><p name="433c" id="433c" class="graf graf--p graf-after--p">A block quote:</p><blockquote name="3537" id="3537" class="graf graf--blockquote graf-after--p">quote words <strong class="markup--strong markup--blockquote-strong">strong words</strong></blockquote><p name="00cc" id="00cc" class="graf graf--p graf-after--blockquote">after blockquote</p><figure name="edb0" id="edb0" class="graf graf--figure graf-after--p"><img class="graf-image" data-image-id="image1.png" data-width="739" data-height="282" src="https://cdn-images-1.medium.com/max/800/image1.png"><figcaption class="imageCaption">A figure caption.</figcaption></figure><p name="f401" id="f401" class="graf graf--p graf-after--p graf--trailing">A final note: <a href="http://stats.stackexchange.com/" data-href="http://stats.stackexchange.com/" class="markup--anchor markup--p-anchor" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p></div></div></section><section name="09a3" class="section section--body section--last"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><p name="81e8" id="81e8" class="graf graf--p graf--leading"><em class="markup--em markup--p-em">Next: </em><a href="https://medium.com/@username/post-url" data-href="https://medium.com/@username/post-url" class="markup--anchor markup--p-anchor" target="_blank"><em class="markup--em markup--p-em">Next post</em>
+</section>
+<footer><p>By <a href="https://medium.com/@username" class="p-author h-card">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time class="dt-published" datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url" class="p-canonical">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p></footer></article></body></html>
diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py
index af6f5b1a..8c257b55 100644
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@@ -264,6 +264,7 @@ class TestArticlesGenerator(unittest.TestCase):
 
     def test_generate_context(self):
         articles_expected = [
+            ["A title", "published", "medium_posts", "article"],
             ["Article title", "published", "Default", "article"],
             [
                 "Article with markdown and summary metadata multi",
@@ -391,13 +392,24 @@ class TestArticlesGenerator(unittest.TestCase):
         # terms of process order will define the name for that category
         categories = [cat.name for cat, _ in self.generator.categories]
         categories_alternatives = (
-            sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
-            sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
+            ),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
+            ),
         )
         self.assertIn(sorted(categories), categories_alternatives)
         # test for slug
         categories = [cat.slug for cat, _ in self.generator.categories]
-        categories_expected = ["default", "testcategory", "yeah", "test", "zhi-dao-shu"]
+        categories_expected = [
+            "default",
+            "testcategory",
+            "medium_posts",
+            "yeah",
+            "test",
+            "zhi-dao-shu",
+        ]
         self.assertEqual(sorted(categories), sorted(categories_expected))
 
     def test_do_not_use_folder_as_category(self):
@@ -549,7 +561,8 @@ class TestArticlesGenerator(unittest.TestCase):
             granularity: {period["period"] for period in periods}
             for granularity, periods in period_archives.items()
         }
-        expected = {"year": {(1970,), (2010,), (2012,), (2014,)}}
+        self.maxDiff = None
+        expected = {"year": {(1970,), (2010,), (2012,), (2014,), (2017,)}}
         self.assertEqual(expected, abbreviated_archives)
 
         # Month archives enabled:
@@ -570,7 +583,7 @@ class TestArticlesGenerator(unittest.TestCase):
             for granularity, periods in period_archives.items()
         }
         expected = {
-            "year": {(1970,), (2010,), (2012,), (2014,)},
+            "year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
             "month": {
                 (1970, "January"),
                 (2010, "December"),
@@ -578,6 +591,7 @@ class TestArticlesGenerator(unittest.TestCase):
                 (2012, "November"),
                 (2012, "October"),
                 (2014, "February"),
+                (2017, "April"),
             },
         }
         self.assertEqual(expected, abbreviated_archives)
@@ -602,7 +616,7 @@ class TestArticlesGenerator(unittest.TestCase):
             for granularity, periods in period_archives.items()
         }
         expected = {
-            "year": {(1970,), (2010,), (2012,), (2014,)},
+            "year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
             "month": {
                 (1970, "January"),
                 (2010, "December"),
@@ -610,6 +624,7 @@ class TestArticlesGenerator(unittest.TestCase):
                 (2012, "November"),
                 (2012, "October"),
                 (2014, "February"),
+                (2017, "April"),
             },
             "day": {
                 (1970, "January", 1),
@@ -619,6 +634,7 @@ class TestArticlesGenerator(unittest.TestCase):
                 (2012, "October", 30),
                 (2012, "October", 31),
                 (2014, "February", 9),
+                (2017, "April", 21),
             },
         }
         self.assertEqual(expected, abbreviated_archives)
@@ -836,8 +852,12 @@ class TestArticlesGenerator(unittest.TestCase):
 
         categories = sorted([category.name for category, _ in generator.categories])
         categories_expected = [
-            sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
-            sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
+            ),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
+            ),
         ]
         self.assertIn(categories, categories_expected)
 
@@ -864,6 +884,7 @@ class TestArticlesGenerator(unittest.TestCase):
         generator.generate_context()
 
         expected = [
+            "A title",
             "An Article With Code Block To Test Typogrify Ignore",
             "Article title",
             "Article with Nonconformant HTML meta tags",
diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py
index 05ef5bbd..916c1183 100644
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@@ -21,6 +21,10 @@ from pelican.tools.pelican_import import (
     get_attachments,
     tumblr2fields,
     wp2fields,
+    mediumpost2fields,
+    mediumposts2fields,
+    strip_medium_post_content,
+    medium_slug,
 )
 from pelican.utils import path_to_file_url, slugify
 
@@ -708,3 +712,82 @@ class TestTumblrImporter(TestCaseWithCLocale):
             posts,
             posts,
         )
+
+
+class TestMediumImporter(TestCaseWithCLocale):
+    def setUp(self):
+        super().setUp()
+        self.test_content_root = "pelican/tests/content"
+        # The content coming out of parsing is similar, but not the same.
+        # Beautiful soup rearranges the order of attributes, for example.
+        # So, we keep a copy of the content for the test.
+        content_filename = f"{self.test_content_root}/medium_post_content.txt"
+        with open(content_filename, encoding="utf-8") as the_content_file:
+            # Many editors and scripts add a final newline, so live with that
+            # in our test
+            the_content = the_content_file.read()
+            assert the_content[-1] == "\n"
+            the_content = the_content[:-1]
+        self.post_tuple = (
+            "A title",
+            the_content,
+            # slug:
+            "2017-04-21-medium-post",
+            "2017-04-21 17:11",
+            "User Name",
+            None,
+            (),
+            "published",
+            "article",
+            "html",
+        )
+
+    def test_mediumpost2field(self):
+        """Parse one post"""
+        post_filename = f"{self.test_content_root}/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html"
+        val = mediumpost2fields(post_filename)
+        self.assertEqual(self.post_tuple, val, val)
+
+    def test_mediumposts2field(self):
+        """Parse all posts in an export directory"""
+        posts = [
+            fields
+            for fields in mediumposts2fields(f"{self.test_content_root}/medium_posts")
+        ]
+        self.assertEqual(1, len(posts))
+        self.assertEqual(self.post_tuple, posts[0])
+
+    def test_strip_content(self):
+        """Strip out unhelpful tags"""
+        html_doc = (
+            "<section>This keeps <i>lots</i> of <b>tags</b>, but not "
+            "the <section>section</section> tags</section>"
+        )
+        soup = BeautifulSoup(html_doc, "html.parser")
+        self.assertEqual(
+            "This keeps <i>lots</i> of <b>tags</b>, but not the section tags",
+            strip_medium_post_content(soup),
+        )
+
+    def test_medium_slug(self):
+        # Remove hex stuff at the end
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug(
+                "medium-export/posts/2017-04-27_A-long-title--2971442227dd.html"
+            ),
+        )
+        # Remove "--DRAFT" at the end
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug("medium-export/posts/2017-04-27_A-long-title--DRAFT.html"),
+        )
+        # Remove both (which happens)
+        self.assertEqual(
+            "draft_How-to-do", medium_slug("draft_How-to-do--DRAFT--87225c81dddd.html")
+        )
+        # If no hex stuff, leave it alone
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug("medium-export/posts/2017-04-27_A-long-title.html"),
+        )
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 681a5c45..eb343860 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -15,6 +15,8 @@ from urllib.error import URLError
 from urllib.parse import quote, urlparse, urlsplit, urlunsplit
 from urllib.request import urlretrieve
 
+import dateutil.parser
+
 # because logging.setLoggerClass has to be called before logging.getLogger
 from pelican.log import init
 from pelican.settings import DEFAULT_CONFIG
@@ -114,19 +116,25 @@ def decode_wp_content(content, br=True):
     return content
 
 
-def xml_to_soup(xml):
-    """Opens an xml file"""
+def _import_bs4():
+    """Import and return bs4, otherwise sys.exit."""
     try:
-        from bs4 import BeautifulSoup
+        import bs4
     except ImportError:
         error = (
             'Missing dependency "BeautifulSoup4" and "lxml" required to '
             "import XML files."
         )
         sys.exit(error)
+    return bs4
+
+
+def file_to_soup(xml, features="xml"):
+    """Reads a file, returns soup."""
+    bs4 = _import_bs4()
     with open(xml, encoding="utf-8") as infile:
         xmlfile = infile.read()
-    soup = BeautifulSoup(xmlfile, "xml")
+    soup = bs4.BeautifulSoup(xmlfile, features)
     return soup
 
 
@@ -140,7 +148,7 @@ def get_filename(post_name, post_id):
 def wp2fields(xml, wp_custpost=False):
     """Opens a wordpress XML file, and yield Pelican fields"""
 
-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
     items = soup.rss.channel.findAll("item")
     for item in items:
         if item.find("status").string in ["publish", "draft"]:
@@ -210,7 +218,7 @@ def wp2fields(xml, wp_custpost=False):
 def blogger2fields(xml):
     """Opens a blogger XML file, and yield Pelican fields"""
 
-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
     entries = soup.feed.findAll("entry")
     for entry in entries:
         raw_kind = entry.find(
@@ -536,6 +544,133 @@ def tumblr2fields(api_key, blogname):
         posts = _get_tumblr_posts(api_key, blogname, offset)
 
 
+def strip_medium_post_content(soup) -> str:
+    """Strip some tags and attributes from medium post content.
+
+    For example, the 'section' and 'div' tags cause trouble while rendering.
+
+    The problem with these tags is you can get a section divider (--------------)
+    that is not between two pieces of content.  For example:
+
+      Some text.
+
+      .. container:: section-divider
+
+         --------------
+
+      .. container:: section-content
+
+      More content.
+
+    In this case, pandoc complains: "Unexpected section title or transition."
+
+    Also, the "id" and "name" attributes in tags cause similar problems.  They show
+    up in .rst as extra junk that separates transitions.
+    """
+    # Remove tags
+    # section and div cause problems
+    # footer also can cause problems, and has nothing we want to keep
+    # See https://stackoverflow.com/a/8439761
+    invalid_tags = ["section", "div", "footer"]
+    for tag in invalid_tags:
+        for match in soup.findAll(tag):
+            match.replaceWithChildren()
+
+    # Remove attributes
+    # See https://stackoverflow.com/a/9045719
+    invalid_attributes = ["name", "id", "class"]
+    bs4 = _import_bs4()
+    for tag in soup.descendants:
+        if isinstance(tag, bs4.element.Tag):
+            tag.attrs = {
+                key: value
+                for key, value in tag.attrs.items()
+                if key not in invalid_attributes
+            }
+
+    # Get the string of all content, keeping other tags
+    all_content = "".join(str(element) for element in soup.contents)
+    return all_content
+
+
+def mediumpost2fields(filepath: str) -> tuple:
+    """Take an HTML post from a medium export, return Pelican fields."""
+
+    soup = file_to_soup(filepath, "html.parser")
+    if not soup:
+        raise ValueError(f"{filepath} could not be parsed by beautifulsoup")
+    kind = "article"
+
+    content = soup.find("section", class_="e-content")
+    if not content:
+        raise ValueError(f"{filepath}: Post has no content")
+
+    title = soup.find("title").string or ""
+
+    raw_date = soup.find("time", class_="dt-published")
+    date = None
+    if raw_date:
+        # This datetime can include timezone, e.g., "2017-04-21T17:11:55.799Z"
+        # python before 3.11 can't parse the timezone using datetime.fromisoformat
+        # See also https://docs.python.org/3.10/library/datetime.html#datetime.datetime.fromisoformat
+        # "This does not support parsing arbitrary ISO 8601 strings"
+        # So, we use dateutil.parser, which can handle it.
+        date_object = dateutil.parser.parse(raw_date.attrs["datetime"])
+        date = date_object.strftime("%Y-%m-%d %H:%M")
+        status = "published"
+    else:
+        status = "draft"
+    author = soup.find("a", class_="p-author h-card")
+    if author:
+        author = author.string
+
+    # Now that we're done with classes, we can strip the content
+    content = strip_medium_post_content(content)
+
+    # medium HTML export doesn't have tag or category
+    # RSS feed has tags, but it doesn't have all the posts.
+    tags = ()
+
+    slug = medium_slug(filepath)
+
+    # TODO: make the fields a python dataclass
+    return (
+        title,
+        content,
+        slug,
+        date,
+        author,
+        None,
+        tags,
+        status,
+        kind,
+        "html",
+    )
+
+
+def medium_slug(filepath: str) -> str:
+    """Make the filepath of a medium exported file into a slug."""
+    # slug: filename without extension
+    slug = os.path.basename(filepath)
+    slug = os.path.splitext(slug)[0]
+    # A medium export filename looks like date_-title-...html
+    # But, RST doesn't like "_-" (see https://github.com/sphinx-doc/sphinx/issues/4350)
+    # so get rid of it
+    slug = slug.replace("_-", "-")
+    # drop the hex string medium puts on the end of the filename, why keep it.
+    # e.g., "-a8a8a8a8" or "---a9a9a9a9"
+    # also: drafts don't need "--DRAFT"
+    slug = re.sub(r"((-)+([0-9a-f]+|DRAFT))+$", "", slug)
+    return slug
+
+
+def mediumposts2fields(medium_export_dir: str):
+    """Take HTML posts in a medium export directory, and yield Pelican fields."""
+    for file in os.listdir(medium_export_dir):
+        filename = os.fsdecode(file)
+        yield mediumpost2fields(os.path.join(medium_export_dir, filename))
+
+
 def feed2fields(file):
     """Read a feed and yield pelican fields"""
     import feedparser
@@ -711,7 +846,7 @@ def get_attachments(xml):
     """returns a dictionary of posts that have attachments with a list
     of the attachment_urls
     """
-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
     items = soup.rss.channel.findAll("item")
     names = {}
     attachments = []
@@ -837,6 +972,9 @@ def fields2pelican(
             posts_require_pandoc.append(filename)
 
         slug = not disable_slugs and filename or None
+        assert slug is None or filename == os.path.basename(
+            filename
+        ), f"filename is not a basename: {filename}"
 
         if wp_attach and attachments:
             try:
@@ -984,6 +1122,9 @@ def main():
     parser.add_argument(
         "--dotclear", action="store_true", dest="dotclear", help="Dotclear export"
     )
+    parser.add_argument(
+        "--medium", action="store_true", dest="medium", help="Medium export"
+    )
     parser.add_argument(
         "--tumblr", action="store_true", dest="tumblr", help="Tumblr export"
     )
@@ -1069,6 +1210,8 @@ def main():
         input_type = "blogger"
     elif args.dotclear:
         input_type = "dotclear"
+    elif args.medium:
+        input_type = "medium"
     elif args.tumblr:
         input_type = "tumblr"
     elif args.wpfile:
@@ -1077,8 +1220,8 @@ def main():
         input_type = "feed"
     else:
         error = (
-            "You must provide either --blogger, --dotclear, "
-            "--tumblr, --wpfile or --feed options"
+            "You must provide one of --blogger, --dotclear, "
+            "--medium, --tumblr, --wpfile or --feed options"
         )
         exit(error)
 
@@ -1097,12 +1240,16 @@ def main():
         fields = blogger2fields(args.input)
     elif input_type == "dotclear":
         fields = dc2fields(args.input)
+    elif input_type == "medium":
+        fields = mediumposts2fields(args.input)
     elif input_type == "tumblr":
         fields = tumblr2fields(args.input, args.blogname)
     elif input_type == "wordpress":
         fields = wp2fields(args.input, args.wp_custpost or False)
     elif input_type == "feed":
         fields = feed2fields(args.input)
+    else:
+        raise ValueError(f"Unhandled input_type {input_type}")
 
     if args.wp_attach:
         attachments = get_attachments(args.input)

From fbe81a971a8f96eae6a13aee4471468f31cbf194 Mon Sep 17 00:00:00 2001
From: Justin Mayer <entroP@gmail.com>
Date: Wed, 17 Jan 2024 09:48:05 +0100
Subject: [PATCH 10/22] Delete RELEASE.md

---
 RELEASE.md | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 RELEASE.md

diff --git a/RELEASE.md b/RELEASE.md
deleted file mode 100644
index 7881aeac..00000000
--- a/RELEASE.md
+++ /dev/null
@@ -1,3 +0,0 @@
-Release type: patch
-
-Keep the newline at the end of the file in generating tools scripts

From d39dd9b85f0309e4101e74a270fd2ce97f051a84 Mon Sep 17 00:00:00 2001
From: MinchinWeb <w_minchin@hotmail.com>
Date: Sun, 21 Jan 2024 22:52:56 -0700
Subject: [PATCH 11/22] Resolve inter-site links in summaries.

c.f. https://github.com/getpelican/pelican/issues/3265
c.f. https://github.com/MinchinWeb/minchin.pelican.plugins.summary/issues/5
---
 pelican/__init__.py |  7 +++++--
 pelican/contents.py | 17 +++++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/pelican/__init__.py b/pelican/__init__.py
index a25f5624..1a3090f8 100644
--- a/pelican/__init__.py
+++ b/pelican/__init__.py
@@ -120,12 +120,15 @@ class Pelican:
             if hasattr(p, "generate_context"):
                 p.generate_context()
 
+        # for plugins that create/edit the summary
+        logger.debug("Signal all_generators_finalized.send(<generators>)")
+        signals.all_generators_finalized.send(generators)
+
+        # update links in the summary, etc
         for p in generators:
             if hasattr(p, "refresh_metadata_intersite_links"):
                 p.refresh_metadata_intersite_links()
 
-        signals.all_generators_finalized.send(generators)
-
         writer = self._get_writer()
 
         for p in generators:
diff --git a/pelican/contents.py b/pelican/contents.py
index 474e5bbf..27b8bbc3 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -520,12 +520,17 @@ class Content:
 
         # _summary is an internal variable that some plugins may be writing to,
         # so ensure changes to it are picked up
-        if (
-            "summary" in self.settings["FORMATTED_FIELDS"]
-            and "summary" in self.metadata
-        ):
-            self._summary = self._update_content(self._summary, self.get_siteurl())
-            self.metadata["summary"] = self._summary
+        if "summary" in self.settings["FORMATTED_FIELDS"]:
+            if hasattr(self, "_summary"):
+                self.metadata["summary"] = self._summary
+
+            if "summary" in self.metadata:
+                self.metadata["summary"] = self._update_content(
+                    self.metadata["summary"], self.get_siteurl()
+                )
+
+            if hasattr(self, "_summary") and "summary" in self.metadata:
+                self._summary = self.metadata["summary"]
 
 
 class Page(Content):

From 2fa5c515b0232ce212a3d83827de88b01deaa598 Mon Sep 17 00:00:00 2001
From: namori <157323136+nam-ori@users.noreply.github.com>
Date: Tue, 23 Jan 2024 09:43:07 +0100
Subject: [PATCH 12/22] Feeds - Update generators.py to fix a bug with slugs
 (#3279)

---
 pelican/generators.py | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/pelican/generators.py b/pelican/generators.py
index 3b5ca9e4..076c8d38 100644
--- a/pelican/generators.py
+++ b/pelican/generators.py
@@ -384,8 +384,8 @@ class ArticlesGenerator(CachingGenerator):
                     str(self.settings["CATEGORY_FEED_ATOM"]).format(slug=cat.slug),
                     self.settings.get(
                         "CATEGORY_FEED_ATOM_URL",
-                        str(self.settings["CATEGORY_FEED_ATOM"]).format(slug=cat.slug),
-                    ),
+                        str(self.settings["CATEGORY_FEED_ATOM"]),
+                    ).format(slug=cat.slug),
                     feed_title=cat.name,
                 )
 
@@ -396,8 +396,8 @@ class ArticlesGenerator(CachingGenerator):
                     str(self.settings["CATEGORY_FEED_RSS"]).format(slug=cat.slug),
                     self.settings.get(
                         "CATEGORY_FEED_RSS_URL",
-                        str(self.settings["CATEGORY_FEED_RSS"]).format(slug=cat.slug),
-                    ),
+                        str(self.settings["CATEGORY_FEED_RSS"]),
+                    ).format(slug=cat.slug),
                     feed_title=cat.name,
                     feed_type="rss",
                 )
@@ -410,8 +410,8 @@ class ArticlesGenerator(CachingGenerator):
                     str(self.settings["AUTHOR_FEED_ATOM"]).format(slug=auth.slug),
                     self.settings.get(
                         "AUTHOR_FEED_ATOM_URL",
-                        str(self.settings["AUTHOR_FEED_ATOM"]).format(slug=auth.slug),
-                    ),
+                        str(self.settings["AUTHOR_FEED_ATOM"]),
+                    ).format(slug=auth.slug),
                     feed_title=auth.name,
                 )
 
@@ -422,8 +422,8 @@ class ArticlesGenerator(CachingGenerator):
                     str(self.settings["AUTHOR_FEED_RSS"]).format(slug=auth.slug),
                     self.settings.get(
                         "AUTHOR_FEED_RSS_URL",
-                        str(self.settings["AUTHOR_FEED_RSS"]).format(slug=auth.slug),
-                    ),
+                        str(self.settings["AUTHOR_FEED_RSS"]),
+                    ).format(slug=auth.slug),
                     feed_title=auth.name,
                     feed_type="rss",
                 )
@@ -437,8 +437,8 @@ class ArticlesGenerator(CachingGenerator):
                         str(self.settings["TAG_FEED_ATOM"]).format(slug=tag.slug),
                         self.settings.get(
                             "TAG_FEED_ATOM_URL",
-                            str(self.settings["TAG_FEED_ATOM"]).format(slug=tag.slug),
-                        ),
+                            str(self.settings["TAG_FEED_ATOM"]),
+                        ).format(slug=tag.slug),
                         feed_title=tag.name,
                     )
 
@@ -449,8 +449,8 @@ class ArticlesGenerator(CachingGenerator):
                         str(self.settings["TAG_FEED_RSS"]).format(slug=tag.slug),
                         self.settings.get(
                             "TAG_FEED_RSS_URL",
-                            str(self.settings["TAG_FEED_RSS"]).format(slug=tag.slug),
-                        ),
+                            str(self.settings["TAG_FEED_RSS"]),
+                        ).format(slug=tag.slug),
                         feed_title=tag.name,
                         feed_type="rss",
                     )
@@ -471,10 +471,8 @@ class ArticlesGenerator(CachingGenerator):
                         str(self.settings["TRANSLATION_FEED_ATOM"]).format(lang=lang),
                         self.settings.get(
                             "TRANSLATION_FEED_ATOM_URL",
-                            str(self.settings["TRANSLATION_FEED_ATOM"]).format(
-                                lang=lang
-                            ),
-                        ),
+                            str(self.settings["TRANSLATION_FEED_ATOM"]),
+                        ).format(lang=lang),
                     )
                 if self.settings.get("TRANSLATION_FEED_RSS"):
                     writer.write_feed(

From 3a662ace031a20d15f4933c028b3fffd1b588430 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Ricks?= <bjoern.ricks@greenbone.net>
Date: Thu, 18 Jan 2024 17:17:29 +0100
Subject: [PATCH 13/22] Add type hints for contents module

Types make it easier to understand the code and improve autocompletion
in IDEs.
---
 pelican/contents.py | 95 +++++++++++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/pelican/contents.py b/pelican/contents.py
index 474e5bbf..82be8f73 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -6,7 +6,8 @@ import os
 import re
 from datetime import timezone
 from html import unescape
-from urllib.parse import unquote, urljoin, urlparse, urlunparse
+from typing import Any, Dict, Optional, Set, Tuple
+from urllib.parse import ParseResult, unquote, urljoin, urlparse, urlunparse
 
 try:
     from zoneinfo import ZoneInfo
@@ -15,7 +16,7 @@ except ModuleNotFoundError:
 
 
 from pelican.plugins import signals
-from pelican.settings import DEFAULT_CONFIG
+from pelican.settings import DEFAULT_CONFIG, Settings
 from pelican.utils import (
     deprecated_attribute,
     memoized,
@@ -44,12 +45,20 @@ class Content:
 
     """
 
+    default_template: Optional[str] = None
+    mandatory_properties: Tuple[str, ...] = ()
+
     @deprecated_attribute(old="filename", new="source_path", since=(3, 2, 0))
     def filename():
         return None
 
     def __init__(
-        self, content, metadata=None, settings=None, source_path=None, context=None
+        self,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+        settings: Optional[Settings] = None,
+        source_path: Optional[str] = None,
+        context: Optional[Dict[Any, Any]] = None,
     ):
         if metadata is None:
             metadata = {}
@@ -156,10 +165,10 @@ class Content:
 
         signals.content_object_init.send(self)
 
-    def __str__(self):
+    def __str__(self) -> str:
         return self.source_path or repr(self)
 
-    def _has_valid_mandatory_properties(self):
+    def _has_valid_mandatory_properties(self) -> bool:
         """Test mandatory properties are set."""
         for prop in self.mandatory_properties:
             if not hasattr(self, prop):
@@ -169,7 +178,7 @@ class Content:
                 return False
         return True
 
-    def _has_valid_save_as(self):
+    def _has_valid_save_as(self) -> bool:
         """Return true if save_as doesn't write outside output path, false
         otherwise."""
         try:
@@ -190,7 +199,7 @@ class Content:
 
         return True
 
-    def _has_valid_status(self):
+    def _has_valid_status(self) -> bool:
         if hasattr(self, "allowed_statuses"):
             if self.status not in self.allowed_statuses:
                 logger.error(
@@ -204,7 +213,7 @@ class Content:
         # if undefined we allow all
         return True
 
-    def is_valid(self):
+    def is_valid(self) -> bool:
         """Validate Content"""
         # Use all() to not short circuit and get results of all validations
         return all(
@@ -216,7 +225,7 @@ class Content:
         )
 
     @property
-    def url_format(self):
+    def url_format(self) -> Dict[str, Any]:
         """Returns the URL, formatted with the proper values"""
         metadata = copy.copy(self.metadata)
         path = self.metadata.get("path", self.get_relative_source_path())
@@ -232,19 +241,19 @@ class Content:
         )
         return metadata
 
-    def _expand_settings(self, key, klass=None):
+    def _expand_settings(self, key: str, klass: Optional[str] = None) -> str:
         if not klass:
             klass = self.__class__.__name__
         fq_key = (f"{klass}_{key}").upper()
         return str(self.settings[fq_key]).format(**self.url_format)
 
-    def get_url_setting(self, key):
+    def get_url_setting(self, key: str) -> str:
         if hasattr(self, "override_" + key):
             return getattr(self, "override_" + key)
         key = key if self.in_default_lang else "lang_%s" % key
         return self._expand_settings(key)
 
-    def _link_replacer(self, siteurl, m):
+    def _link_replacer(self, siteurl: str, m: re.Match) -> str:
         what = m.group("what")
         value = urlparse(m.group("value"))
         path = value.path
@@ -272,15 +281,15 @@ class Content:
         # XXX Put this in a different location.
         if what in {"filename", "static", "attach"}:
 
-            def _get_linked_content(key, url):
+            def _get_linked_content(key: str, url: ParseResult) -> Optional[Content]:
                 nonlocal value
 
-                def _find_path(path):
+                def _find_path(path: str) -> Optional[Content]:
                     if path.startswith("/"):
                         path = path[1:]
                     else:
                         # relative to the source path of this content
-                        path = self.get_relative_source_path(
+                        path = self.get_relative_source_path(  # type: ignore
                             os.path.join(self.relative_dir, path)
                         )
                     return self._context[key].get(path, None)
@@ -324,7 +333,7 @@ class Content:
             linked_content = _get_linked_content(key, value)
             if linked_content:
                 if what == "attach":
-                    linked_content.attach_to(self)
+                    linked_content.attach_to(self)  # type: ignore
                 origin = joiner(siteurl, linked_content.url)
                 origin = origin.replace("\\", "/")  # for Windows paths.
             else:
@@ -359,7 +368,7 @@ class Content:
 
         return "".join((m.group("markup"), m.group("quote"), origin, m.group("quote")))
 
-    def _get_intrasite_link_regex(self):
+    def _get_intrasite_link_regex(self) -> re.Pattern:
         intrasite_link_regex = self.settings["INTRASITE_LINK_REGEX"]
         regex = r"""
             (?P<markup><[^\>]+  # match tag with all url-value attributes
@@ -370,7 +379,7 @@ class Content:
             (?P=quote)""".format(intrasite_link_regex)
         return re.compile(regex, re.X)
 
-    def _update_content(self, content, siteurl):
+    def _update_content(self, content: str, siteurl: str) -> str:
         """Update the content attribute.
 
         Change all the relative paths of the content to relative paths
@@ -386,7 +395,7 @@ class Content:
         hrefs = self._get_intrasite_link_regex()
         return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
 
-    def get_static_links(self):
+    def get_static_links(self) -> Set[str]:
         static_links = set()
         hrefs = self._get_intrasite_link_regex()
         for m in hrefs.finditer(self._content):
@@ -402,15 +411,15 @@ class Content:
                 path = self.get_relative_source_path(
                     os.path.join(self.relative_dir, path)
                 )
-            path = path.replace("%20", " ")
+            path = path.replace("%20", " ")  # type: ignore
             static_links.add(path)
         return static_links
 
-    def get_siteurl(self):
+    def get_siteurl(self) -> str:
         return self._context.get("localsiteurl", "")
 
     @memoized
-    def get_content(self, siteurl):
+    def get_content(self, siteurl: str) -> str:
         if hasattr(self, "_get_content"):
             content = self._get_content()
         else:
@@ -418,11 +427,11 @@ class Content:
         return self._update_content(content, siteurl)
 
     @property
-    def content(self):
+    def content(self) -> str:
         return self.get_content(self.get_siteurl())
 
     @memoized
-    def get_summary(self, siteurl):
+    def get_summary(self, siteurl: str) -> str:
         """Returns the summary of an article.
 
         This is based on the summary metadata if set, otherwise truncate the
@@ -441,10 +450,10 @@ class Content:
         )
 
     @property
-    def summary(self):
+    def summary(self) -> str:
         return self.get_summary(self.get_siteurl())
 
-    def _get_summary(self):
+    def _get_summary(self) -> str:
         """deprecated function to access summary"""
 
         logger.warning(
@@ -454,34 +463,36 @@ class Content:
         return self.summary
 
     @summary.setter
-    def summary(self, value):
+    def summary(self, value: str):
         """Dummy function"""
         pass
 
     @property
-    def status(self):
+    def status(self) -> str:
         return self._status
 
     @status.setter
-    def status(self, value):
+    def status(self, value: str) -> None:
         # TODO maybe typecheck
         self._status = value.lower()
 
     @property
-    def url(self):
+    def url(self) -> str:
         return self.get_url_setting("url")
 
     @property
-    def save_as(self):
+    def save_as(self) -> str:
         return self.get_url_setting("save_as")
 
-    def _get_template(self):
+    def _get_template(self) -> str:
         if hasattr(self, "template") and self.template is not None:
             return self.template
         else:
             return self.default_template
 
-    def get_relative_source_path(self, source_path=None):
+    def get_relative_source_path(
+        self, source_path: Optional[str] = None
+    ) -> Optional[str]:
         """Return the relative path (from the content path) to the given
         source_path.
 
@@ -501,7 +512,7 @@ class Content:
         )
 
     @property
-    def relative_dir(self):
+    def relative_dir(self) -> str:
         return posixize_path(
             os.path.dirname(
                 os.path.relpath(
@@ -511,7 +522,7 @@ class Content:
             )
         )
 
-    def refresh_metadata_intersite_links(self):
+    def refresh_metadata_intersite_links(self) -> None:
         for key in self.settings["FORMATTED_FIELDS"]:
             if key in self.metadata and key != "summary":
                 value = self._update_content(self.metadata[key], self.get_siteurl())
@@ -534,7 +545,7 @@ class Page(Content):
     default_status = "published"
     default_template = "page"
 
-    def _expand_settings(self, key):
+    def _expand_settings(self, key: str) -> str:
         klass = "draft_page" if self.status == "draft" else None
         return super()._expand_settings(key, klass)
 
@@ -561,7 +572,7 @@ class Article(Content):
         if not hasattr(self, "date") and self.status == "draft":
             self.date = datetime.datetime.max.replace(tzinfo=self.timezone)
 
-    def _expand_settings(self, key):
+    def _expand_settings(self, key: str) -> str:
         klass = "draft" if self.status == "draft" else "article"
         return super()._expand_settings(key, klass)
 
@@ -571,7 +582,7 @@ class Static(Content):
     default_status = "published"
     default_template = None
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
         self._output_location_referenced = False
 
@@ -588,18 +599,18 @@ class Static(Content):
         return None
 
     @property
-    def url(self):
+    def url(self) -> str:
         # Note when url has been referenced, so we can avoid overriding it.
         self._output_location_referenced = True
         return super().url
 
     @property
-    def save_as(self):
+    def save_as(self) -> str:
         # Note when save_as has been referenced, so we can avoid overriding it.
         self._output_location_referenced = True
         return super().save_as
 
-    def attach_to(self, content):
+    def attach_to(self, content: Content) -> None:
         """Override our output directory with that of the given content object."""
 
         # Determine our file's new output path relative to the linking
@@ -624,7 +635,7 @@ class Static(Content):
 
         new_url = path_to_url(new_save_as)
 
-        def _log_reason(reason):
+        def _log_reason(reason: str) -> None:
             logger.warning(
                 "The {attach} link in %s cannot relocate "
                 "%s because %s. Falling back to "

From e4807316ae9338f05701a70d216687a94fb796d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Ricks?= <bjoern.ricks@greenbone.net>
Date: Thu, 18 Jan 2024 09:18:00 +0100
Subject: [PATCH 14/22] Add type hints for utils module

Types make it easier to understand the code and improve autocompletion
in IDEs.
---
 pelican/utils.py | 143 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 97 insertions(+), 46 deletions(-)

diff --git a/pelican/utils.py b/pelican/utils.py
index eda53d3f..5f161667 100644
--- a/pelican/utils.py
+++ b/pelican/utils.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import datetime
 import fnmatch
 import locale
@@ -16,6 +18,21 @@ from html import entities
 from html.parser import HTMLParser
 from itertools import groupby
 from operator import attrgetter
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Collection,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+)
 
 import dateutil.parser
 
@@ -27,11 +44,15 @@ from markupsafe import Markup
 
 import watchfiles
 
+if TYPE_CHECKING:
+    from pelican.contents import Content
+    from pelican.readers import Readers
+    from pelican.settings import Settings
 
 logger = logging.getLogger(__name__)
 
 
-def sanitised_join(base_directory, *parts):
+def sanitised_join(base_directory: str, *parts: str) -> str:
     joined = posixize_path(os.path.abspath(os.path.join(base_directory, *parts)))
     base = posixize_path(os.path.abspath(base_directory))
     if not joined.startswith(base):
@@ -40,7 +61,7 @@ def sanitised_join(base_directory, *parts):
     return joined
 
 
-def strftime(date, date_format):
+def strftime(date: datetime.datetime, date_format: str) -> str:
     """
     Enhanced replacement for built-in strftime with zero stripping
 
@@ -109,10 +130,10 @@ class DateFormatter:
     defined in LOCALE setting
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.locale = locale.setlocale(locale.LC_TIME)
 
-    def __call__(self, date, date_format):
+    def __call__(self, date: datetime.datetime, date_format: str) -> str:
         # on OSX, encoding from LC_CTYPE determines the unicode output in PY3
         # make sure it's same as LC_TIME
         with temporary_locale(self.locale, locale.LC_TIME), temporary_locale(
@@ -131,11 +152,11 @@ class memoized:
 
     """
 
-    def __init__(self, func):
+    def __init__(self, func: Callable) -> None:
         self.func = func
-        self.cache = {}
+        self.cache: Dict[Any, Any] = {}
 
-    def __call__(self, *args):
+    def __call__(self, *args) -> Any:
         if not isinstance(args, Hashable):
             # uncacheable. a list, for instance.
             # better to not cache than blow up.
@@ -147,17 +168,23 @@ class memoized:
             self.cache[args] = value
             return value
 
-    def __repr__(self):
+    def __repr__(self) -> Optional[str]:
         return self.func.__doc__
 
-    def __get__(self, obj, objtype):
+    def __get__(self, obj: Any, objtype):
         """Support instance methods."""
         fn = partial(self.__call__, obj)
         fn.cache = self.cache
         return fn
 
 
-def deprecated_attribute(old, new, since=None, remove=None, doc=None):
+def deprecated_attribute(
+    old: str,
+    new: str,
+    since: Tuple[int, ...],
+    remove: Optional[Tuple[int, ...]] = None,
+    doc: Optional[str] = None,
+):
     """Attribute deprecation decorator for gentle upgrades
 
     For example:
@@ -198,7 +225,7 @@ def deprecated_attribute(old, new, since=None, remove=None, doc=None):
     return decorator
 
 
-def get_date(string):
+def get_date(string: str) -> datetime.datetime:
     """Return a datetime object from a string.
 
     If no format matches the given date, raise a ValueError.
@@ -212,7 +239,9 @@ def get_date(string):
 
 
 @contextmanager
-def pelican_open(filename, mode="r", strip_crs=(sys.platform == "win32")):
+def pelican_open(
+    filename: str, mode: str = "r", strip_crs: bool = (sys.platform == "win32")
+) -> Generator[str, None, None]:
     """Open a file and return its content"""
 
     # utf-8-sig will clear any BOM if present
@@ -221,7 +250,12 @@ def pelican_open(filename, mode="r", strip_crs=(sys.platform == "win32")):
     yield content
 
 
-def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
+def slugify(
+    value: str,
+    regex_subs: Iterable[Tuple[str, str]] = (),
+    preserve_case: bool = False,
+    use_unicode: bool = False,
+) -> str:
     """
     Normalizes string, converts to lowercase, removes non-alpha characters,
     and converts spaces to hyphens.
@@ -233,9 +267,10 @@ def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
     """
 
     import unicodedata
+
     import unidecode
 
-    def normalize_unicode(text):
+    def normalize_unicode(text: str) -> str:
         # normalize text by compatibility composition
         # see: https://en.wikipedia.org/wiki/Unicode_equivalence
         return unicodedata.normalize("NFKC", text)
@@ -262,7 +297,9 @@ def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
     return value.strip()
 
 
-def copy(source, destination, ignores=None):
+def copy(
+    source: str, destination: str, ignores: Optional[Iterable[str]] = None
+) -> None:
     """Recursively copy source into destination.
 
     If source is a file, destination has to be a file as well.
@@ -334,7 +371,7 @@ def copy(source, destination, ignores=None):
                     )
 
 
-def copy_file(source, destination):
+def copy_file(source: str, destination: str) -> None:
     """Copy a file"""
     try:
         shutil.copyfile(source, destination)
@@ -344,7 +381,7 @@ def copy_file(source, destination):
         )
 
 
-def clean_output_dir(path, retention):
+def clean_output_dir(path: str, retention: Iterable[str]) -> None:
     """Remove all files from output directory except those in retention list"""
 
     if not os.path.exists(path):
@@ -381,24 +418,24 @@ def clean_output_dir(path, retention):
             logger.error("Unable to delete %s, file type unknown", file)
 
 
-def get_relative_path(path):
+def get_relative_path(path: str) -> str:
     """Return the relative path from the given path to the root path."""
     components = split_all(path)
-    if len(components) <= 1:
+    if components is None or len(components) <= 1:
         return os.curdir
     else:
         parents = [os.pardir] * (len(components) - 1)
         return os.path.join(*parents)
 
 
-def path_to_url(path):
+def path_to_url(path: str) -> str:
     """Return the URL corresponding to a given path."""
     if path is not None:
         path = posixize_path(path)
     return path
 
 
-def posixize_path(rel_path):
+def posixize_path(rel_path: str) -> str:
     """Use '/' as path separator, so that source references,
     like '{static}/foo/bar.jpg' or 'extras/favicon.ico',
     will work on Windows as well as on Mac and Linux."""
@@ -427,20 +464,20 @@ class _HTMLWordTruncator(HTMLParser):
     _singlets = ("br", "col", "link", "base", "img", "param", "area", "hr", "input")
 
     class TruncationCompleted(Exception):
-        def __init__(self, truncate_at):
+        def __init__(self, truncate_at: int) -> None:
             super().__init__(truncate_at)
             self.truncate_at = truncate_at
 
-    def __init__(self, max_words):
+    def __init__(self, max_words: int) -> None:
         super().__init__(convert_charrefs=False)
 
         self.max_words = max_words
         self.words_found = 0
         self.open_tags = []
         self.last_word_end = None
-        self.truncate_at = None
+        self.truncate_at: Optional[int] = None
 
-    def feed(self, *args, **kwargs):
+    def feed(self, *args, **kwargs) -> None:
         try:
             super().feed(*args, **kwargs)
         except self.TruncationCompleted as exc:
@@ -448,29 +485,29 @@ class _HTMLWordTruncator(HTMLParser):
         else:
             self.truncate_at = None
 
-    def getoffset(self):
+    def getoffset(self) -> int:
         line_start = 0
         lineno, line_offset = self.getpos()
         for i in range(lineno - 1):
             line_start = self.rawdata.index("\n", line_start) + 1
         return line_start + line_offset
 
-    def add_word(self, word_end):
+    def add_word(self, word_end: int) -> None:
         self.words_found += 1
         self.last_word_end = None
         if self.words_found == self.max_words:
             raise self.TruncationCompleted(word_end)
 
-    def add_last_word(self):
+    def add_last_word(self) -> None:
         if self.last_word_end is not None:
             self.add_word(self.last_word_end)
 
-    def handle_starttag(self, tag, attrs):
+    def handle_starttag(self, tag: str, attrs: Any) -> None:
         self.add_last_word()
         if tag not in self._singlets:
             self.open_tags.insert(0, tag)
 
-    def handle_endtag(self, tag):
+    def handle_endtag(self, tag: str) -> None:
         self.add_last_word()
         try:
             i = self.open_tags.index(tag)
@@ -481,7 +518,7 @@ class _HTMLWordTruncator(HTMLParser):
             # all unclosed intervening start tags with omitted end tags
             del self.open_tags[: i + 1]
 
-    def handle_data(self, data):
+    def handle_data(self, data: str) -> None:
         word_end = 0
         offset = self.getoffset()
 
@@ -499,7 +536,7 @@ class _HTMLWordTruncator(HTMLParser):
         if word_end < len(data):
             self.add_last_word()
 
-    def _handle_ref(self, name, char):
+    def _handle_ref(self, name: str, char: str) -> None:
         """
         Called by handle_entityref() or handle_charref() when a ref like
         `&mdash;`, `&#8212;`, or `&#x2014` is found.
@@ -543,7 +580,7 @@ class _HTMLWordTruncator(HTMLParser):
             else:
                 self.add_last_word()
 
-    def handle_entityref(self, name):
+    def handle_entityref(self, name: str) -> None:
         """
         Called when an entity ref like '&mdash;' is found
 
@@ -556,7 +593,7 @@ class _HTMLWordTruncator(HTMLParser):
             char = ""
         self._handle_ref(name, char)
 
-    def handle_charref(self, name):
+    def handle_charref(self, name: str) -> None:
         """
         Called when a char ref like '&#8212;' or '&#x2014' is found
 
@@ -574,7 +611,7 @@ class _HTMLWordTruncator(HTMLParser):
         self._handle_ref("#" + name, char)
 
 
-def truncate_html_words(s, num, end_text="…"):
+def truncate_html_words(s: str, num: int, end_text: str = "…") -> str:
     """Truncates HTML to a certain number of words.
 
     (not counting tags and comments). Closes opened tags if they were correctly
@@ -600,7 +637,10 @@ def truncate_html_words(s, num, end_text="…"):
     return out
 
 
-def process_translations(content_list, translation_id=None):
+def process_translations(
+    content_list: List[Content],
+    translation_id: Optional[Union[str, Collection[str]]] = None,
+) -> Tuple[List[Content], List[Content]]:
     """Finds translations and returns them.
 
     For each content_list item, populates the 'translations' attribute, and
@@ -658,7 +698,7 @@ def process_translations(content_list, translation_id=None):
     return index, translations
 
 
-def get_original_items(items, with_str):
+def get_original_items(items: List[Content], with_str: str) -> List[Content]:
     def _warn_source_paths(msg, items, *extra):
         args = [len(items)]
         args.extend(extra)
@@ -698,7 +738,10 @@ def get_original_items(items, with_str):
     return original_items
 
 
-def order_content(content_list, order_by="slug"):
+def order_content(
+    content_list: List[Content],
+    order_by: Union[str, Callable[[Content], Any], None] = "slug",
+) -> List[Content]:
     """Sorts content.
 
     order_by can be a string of an attribute or sorting function. If order_by
@@ -758,7 +801,11 @@ def order_content(content_list, order_by="slug"):
     return content_list
 
 
-def wait_for_changes(settings_file, reader_class, settings):
+def wait_for_changes(
+    settings_file: str,
+    reader_class: Type["Readers"],
+    settings: "Settings",
+):
     content_path = settings.get("PATH", "")
     theme_path = settings.get("THEME", "")
     ignore_files = {
@@ -788,13 +835,15 @@ def wait_for_changes(settings_file, reader_class, settings):
     return next(
         watchfiles.watch(
             *watching_paths,
-            watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files),
+            watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files),  # type: ignore
             rust_timeout=0,
         )
     )
 
 
-def set_date_tzinfo(d, tz_name=None):
+def set_date_tzinfo(
+    d: datetime.datetime, tz_name: Optional[str] = None
+) -> datetime.datetime:
     """Set the timezone for dates that don't have tzinfo"""
     if tz_name and not d.tzinfo:
         timezone = ZoneInfo(tz_name)
@@ -805,11 +854,11 @@ def set_date_tzinfo(d, tz_name=None):
     return d
 
 
-def mkdir_p(path):
+def mkdir_p(path: str) -> None:
     os.makedirs(path, exist_ok=True)
 
 
-def split_all(path):
+def split_all(path: Union[str, pathlib.Path, None]) -> Optional[Sequence[str]]:
     """Split a path into a list of components
 
     While os.path.split() splits a single component off the back of
@@ -840,12 +889,12 @@ def split_all(path):
         )
 
 
-def path_to_file_url(path):
+def path_to_file_url(path: str) -> str:
     """Convert file-system path to file:// URL"""
     return urllib.parse.urljoin("file://", urllib.request.pathname2url(path))
 
 
-def maybe_pluralize(count, singular, plural):
+def maybe_pluralize(count: int, singular: str, plural: str) -> str:
     """
     Returns a formatted string containing count and plural if count is not 1
     Returns count and singular if count is 1
@@ -862,7 +911,9 @@ def maybe_pluralize(count, singular, plural):
 
 
 @contextmanager
-def temporary_locale(temp_locale=None, lc_category=locale.LC_ALL):
+def temporary_locale(
+    temp_locale: Optional[str] = None, lc_category: int = locale.LC_ALL
+) -> Generator[None, None, None]:
     """
     Enable code to run in a context with a temporary locale
     Resets the locale back when exiting context.

From c36ab075269771834b5e05e4d1586d050743d457 Mon Sep 17 00:00:00 2001
From: MinchinWeb <w_minchin@hotmail.com>
Date: Fri, 26 Jan 2024 16:31:22 -0700
Subject: [PATCH 15/22] write back to `._summary`

---
 pelican/contents.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pelican/contents.py b/pelican/contents.py
index 27b8bbc3..e0629e2a 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -519,7 +519,7 @@ class Content:
                 setattr(self, key.lower(), value)
 
         # _summary is an internal variable that some plugins may be writing to,
-        # so ensure changes to it are picked up
+        # so ensure changes to it are picked up, and write summary back to it
         if "summary" in self.settings["FORMATTED_FIELDS"]:
             if hasattr(self, "_summary"):
                 self.metadata["summary"] = self._summary
@@ -528,8 +528,6 @@ class Content:
                 self.metadata["summary"] = self._update_content(
                     self.metadata["summary"], self.get_siteurl()
                 )
-
-            if hasattr(self, "_summary") and "summary" in self.metadata:
                 self._summary = self.metadata["summary"]
 
 

From f1f2ceccc757d9743dde39f626eccf05e3e9a5b0 Mon Sep 17 00:00:00 2001
From: MinchinWeb <w_minchin@hotmail.com>
Date: Sat, 27 Jan 2024 10:47:54 -0700
Subject: [PATCH 16/22] Warning/error logging: be explicit in how the
 `stacklevel` variable is handled

---
 pelican/log.py | 40 ++++++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/pelican/log.py b/pelican/log.py
index 6a8fcdf1..ef49d280 100644
--- a/pelican/log.py
+++ b/pelican/log.py
@@ -85,19 +85,39 @@ class FatalLogger(LimitLogger):
     warnings_fatal = False
     errors_fatal = False
 
-    # adding `stacklevel=2` means that the displayed filename and line number
-    # will match the "original" calling location, rather than the wrapper here
-    def warning(self, *args, **kwargs):
-        if "stacklevel" not in kwargs.keys():
-            kwargs["stacklevel"] = 2
-        super().warning(*args, **kwargs)
+    def warning(self, *args, stacklevel=1, **kwargs):
+        """
+        Displays a logging warning.
+
+        Wrapping it here allows Pelican to filter warnings, and conditionally
+        make warnings fatal.
+
+        Args:
+            stacklevel (int): the stacklevel that would be used to display the
+            calling location, except for this function. Adjusting the
+            stacklevel allows you to see the "true" calling location of the
+            warning, rather than this wrapper location.
+        """
+        stacklevel += 1
+        super().warning(*args, stacklevel=stacklevel, **kwargs)
         if FatalLogger.warnings_fatal:
             raise RuntimeError("Warning encountered")
 
-    def error(self, *args, **kwargs):
-        if "stacklevel" not in kwargs.keys():
-            kwargs["stacklevel"] = 2
-        super().error(*args, **kwargs)
+    def error(self, *args, stacklevel=1, **kwargs):
+        """
+        Displays a logging error.
+
+        Wrapping it here allows Pelican to filter errors, and conditionally
+        make errors non-fatal.
+
+        Args:
+            stacklevel (int): the stacklevel that would be used to display the
+            calling location, except for this function. Adjusting the
+            stacklevel allows you to see the "true" calling location of the
+            error, rather than this wrapper location.
+        """
+        stacklevel += 1
+        super().error(*args, stacklevel=stacklevel, **kwargs)
         if FatalLogger.errors_fatal:
             raise RuntimeError("Error encountered")
 

From 1f14606f8339385c5176ba05adca4664a3ad8868 Mon Sep 17 00:00:00 2001
From: MinchinWeb <w_minchin@hotmail.com>
Date: Sat, 27 Jan 2024 10:51:35 -0700
Subject: [PATCH 17/22] On failing to load a plugin, show the stacktrace is
 pelican is run in debug mode

---
 pelican/__init__.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pelican/__init__.py b/pelican/__init__.py
index 40251887..68f3e553 100644
--- a/pelican/__init__.py
+++ b/pelican/__init__.py
@@ -80,8 +80,14 @@ class Pelican:
                 plugin.register()
                 self.plugins.append(plugin)
             except Exception as e:
-                logger.error("Cannot register plugin `%s`\n%s", name, e, stacklevel=3)
-                print(e.stacktrace)
+                logger.error(
+                    "Cannot register plugin `%s`\n%s",
+                    name,
+                    e,
+                    stacklevel=2,
+                )
+                if self.settings.get("DEBUG", False):
+                    console.print_exception()
 
         self.settings["PLUGINS"] = [get_plugin_name(p) for p in self.plugins]
 

From 7c7c9355b6c27122dbff6446cd366017f81eb0f2 Mon Sep 17 00:00:00 2001
From: Justin Mayer <entroP@gmail.com>
Date: Tue, 12 Mar 2024 11:57:46 +0100
Subject: [PATCH 18/22] Pin Ruff to major semantic version 0.1.x

Upgrading to 0.3.0+ requires code style changes to the code base.
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c8bbe985..eb1884a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -95,7 +95,7 @@ dev = [
     "pytest-xdist>=3.4.0",
     "tox>=4.11.3",
     "invoke>=2.2.0",
-    "ruff>=0.1.5",
+    "ruff>=0.1.5,<0.2.0",
     "tomli>=2.0.1; python_version < \"3.11\"",
 ]
 

From 74541381848f1d65ec64463469b5980ba0646617 Mon Sep 17 00:00:00 2001
From: Justin Mayer <entroP@gmail.com>
Date: Tue, 12 Mar 2024 12:05:09 +0100
Subject: [PATCH 19/22] Update `setup-python` & `setup-pdm` GitHub Actions

---
 .github/workflows/main.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index cd646522..8cd63cc7 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -25,7 +25,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
           cache: "pip"
@@ -53,7 +53,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: pdm-project/setup-pdm@v3
+      - uses: pdm-project/setup-pdm@v4
         with:
           python-version: "3.11"
           cache: true
@@ -71,7 +71,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: pdm-project/setup-pdm@v3
+      - uses: pdm-project/setup-pdm@v4
         with:
           python-version: "3.11"
           cache: true
@@ -90,7 +90,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
           cache: "pip"
@@ -122,7 +122,7 @@ jobs:
           token: ${{ secrets.GH_TOKEN }}
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 

From fabc40927750f52f11f27695e89ff76c0863a79f Mon Sep 17 00:00:00 2001
From: Justin Mayer <entroP@gmail.com>
Date: Tue, 12 Mar 2024 12:18:11 +0100
Subject: [PATCH 20/22] Update more GitHub Actions to resolve warnings

---
 .github/workflows/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8cd63cc7..4c0127df 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -64,7 +64,7 @@ jobs:
       - name: Run linters
         run: pdm lint --diff
       - name: Run pre-commit checks on all files
-        uses: pre-commit/action@v3.0.0
+        uses: pre-commit/action@v3.0.1
 
   build:
     name: Test build
@@ -100,7 +100,7 @@ jobs:
       - name: Check
         run: tox -e docs
       - name: cache the docs for inspection
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: docs
           path: docs/_build/html/

From b87308cfaaa269c44784cda69855ecaf298f9f5e Mon Sep 17 00:00:00 2001
From: Justin Mayer <entroP@gmail.com>
Date: Wed, 27 Mar 2024 08:25:48 +0100
Subject: [PATCH 21/22] Update Ruff dependency version

---
 .pre-commit-config.yaml | 2 +-
 pyproject.toml          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 333bc3c0..d6cfac07 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,7 +14,7 @@ repos:
       - id: forbid-new-submodules
       - id: trailing-whitespace
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.5
+    rev: v0.1.15
     hooks:
       - id: ruff
       - id: ruff-format
diff --git a/pyproject.toml b/pyproject.toml
index eb1884a9..2f7d677c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -95,7 +95,7 @@ dev = [
     "pytest-xdist>=3.4.0",
     "tox>=4.11.3",
     "invoke>=2.2.0",
-    "ruff>=0.1.5,<0.2.0",
+    "ruff>=0.1.15,<0.2.0",
     "tomli>=2.0.1; python_version < \"3.11\"",
 ]
 

From 94bcd41f27d7f38a9dbd0847c6166e91a66d2090 Mon Sep 17 00:00:00 2001
From: Justin Mayer <entroP@gmail.com>
Date: Wed, 27 Mar 2024 08:26:55 +0100
Subject: [PATCH 22/22] Ignore Sphinx 7.2.x package install warnings

Sphinx 7.2+ requires Python 3.9+, which results in annoying warnings
since we still support Python 3.8.x.
---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 2f7d677c..3ca06df4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,6 +69,7 @@ changelog-header = "###############"
 version-header = "="
 
 [tool.pdm]
+ignore_package_warnings = ["sphinx"]
 
 [tool.pdm.scripts]
 docbuild = "invoke docbuild"