Merge branch 'master' into rss_rel

2025-10-15 20:28:56 +02:00 · 2024-04-19 20:41:17 +02:00 · 2024-04-19 20:41:17 +02:00 · c80baf5776
commit c80baf5776
parent 23fb03f133 0f5179b816
20 changed files with 637 additions and 170 deletions
--- a/.github/workflows/github_pages.yml
+++ b/.github/workflows/github_pages.yml
@ -28,7 +28,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -23,9 +23,9 @@ jobs:
            python: "3.9"

    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - name: Set up Python ${{ matrix.python }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python }}
          cache: "pip"
@ -52,10 +52,10 @@ jobs:
    name: Lint
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
-      - uses: pdm-project/setup-pdm@v3
+      - uses: actions/checkout@v4
+      - uses: pdm-project/setup-pdm@v4
        with:
-          python-version: 3.9
+          python-version: "3.11"
          cache: true
          cache-dependency-path: ./pyproject.toml
      - name: Install dependencies
@ -64,16 +64,16 @@ jobs:
      - name: Run linters
        run: pdm lint --diff
      - name: Run pre-commit checks on all files
-        uses: pre-commit/action@v3.0.0
+        uses: pre-commit/action@v3.0.1

  build:
    name: Test build
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
-      - uses: pdm-project/setup-pdm@v3
+      - uses: actions/checkout@v4
+      - uses: pdm-project/setup-pdm@v4
        with:
-          python-version: 3.9
+          python-version: "3.11"
          cache: true
          cache-dependency-path: ./pyproject.toml
      - name: Install dependencies
@ -88,11 +88,11 @@ jobs:
    runs-on: ubuntu-latest

    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
        with:
-          python-version: "3.9"
+          python-version: "3.11"
          cache: "pip"
          cache-dependency-path: "**/requirements/*"
      - name: Install tox
@ -100,7 +100,7 @@ jobs:
      - name: Check
        run: tox -e docs
      - name: cache the docs for inspection
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
        with:
          name: docs
          path: docs/_build/html/
@ -117,14 +117,14 @@ jobs:
      id-token: write

    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          token: ${{ secrets.GH_TOKEN }}

      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
        with:
-          python-version: "3.9"
+          python-version: "3.11"

      - name: Check release
        id: check_release
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -14,7 +14,7 @@ repos:
      - id: forbid-new-submodules
      - id: trailing-whitespace
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.5
+    rev: v0.1.15
    hooks:
      - id: ruff
      - id: ruff-format
--- a/docs/content.rst
+++ b/docs/content.rst
@ -439,8 +439,8 @@ For **Markdown**, one must rely on an extension. For example, using the `mdx_inc
 Importing an existing site
 ==========================

-It is possible to import your site from WordPress, Tumblr, Dotclear, and RSS
-feeds using a simple script. See :ref:`import`.
+It is possible to import your site from several other blogging sites
+(like WordPress, Tumblr, ..) using a simple script. See :ref:`import`.

 Translations
 ============
@ -631,7 +631,7 @@ are not included by default in tag, category, and author indexes, nor in the
 main article feed. This has the effect of creating an "unlisted" post.

 .. _W3C ISO 8601: https://www.w3.org/TR/NOTE-datetime
-.. _AsciiDoc: https://www.methods.co.nz/asciidoc/
+.. _AsciiDoc: https://asciidoc.org
 .. _Pelican Plugins: https://github.com/pelican-plugins
 .. _pelican-plugins: https://github.com/getpelican/pelican-plugins
 .. _Python-Markdown: https://github.com/Python-Markdown/markdown
--- a/docs/importer.rst
+++ b/docs/importer.rst
@ -11,6 +11,7 @@ software to reStructuredText or Markdown. The supported import formats are:

 - Blogger XML export
 - Dotclear export
+- Medium export
 - Tumblr API
 - WordPress XML export
 - RSS/Atom feed
@ -26,6 +27,12 @@ not be converted (as Pelican also supports Markdown).
   manually, or use a plugin such as `More Categories`_ that enables multiple
   categories per article.

+.. note::
+
+   Imported pages may contain links to images that still point to the original site.
+   So you might want to download those images into your local content and manually
+   re-link them from the relevant pages of your site.
+
 Dependencies
 ============

@ -65,6 +72,7 @@ Optional arguments
  -h, --help            Show this help message and exit
  --blogger             Blogger XML export (default: False)
  --dotclear            Dotclear export (default: False)
+  --medium              Medium export (default: False)
  --tumblr              Tumblr API (default: False)
  --wpfile              WordPress XML export (default: False)
  --feed                Feed to parse (default: False)
@ -80,8 +88,7 @@ Optional arguments
                          (default: False)
  --filter-author       Import only post from the specified author
  --strip-raw           Strip raw HTML code that can't be converted to markup
-                        such as flash embeds or iframes (wordpress import
-                        only) (default: False)
+                        such as flash embeds or iframes (default: False)
  --wp-custpost         Put wordpress custom post types in directories. If
                        used with --dir-cat option directories will be created
                        as "/post_type/category/" (wordpress import only)
@ -113,6 +120,14 @@ For Dotclear::

    $ pelican-import --dotclear -o ~/output ~/backup.txt

+For Medium::
+
+    $ pelican-import --medium -o ~/output ~/medium-export/posts/
+
+The Medium export is a zip file.  Unzip it, and point this tool to the
+"posts" subdirectory.  For more information on how to export, see
+https://help.medium.com/hc/en-us/articles/115004745787-Export-your-account-data.
+
 For Tumblr::

    $ pelican-import --tumblr -o ~/output --blogname=<blogname> <api_key>
@ -121,6 +136,15 @@ For WordPress::

    $ pelican-import --wpfile -o ~/output ~/posts.xml

+For Medium (an example of using an RSS feed):
+
+    $ python -m pip install feedparser
+    $ pelican-import --feed https://medium.com/feed/@username
+
+.. note::
+
+   The RSS feed may only return the most recent posts — not all of them.
+
 Tests
 =====

--- a/pelican/init.py
+++ b/pelican/init.py
@ -80,7 +80,14 @@ class Pelican:
                plugin.register()
                self.plugins.append(plugin)
            except Exception as e:
-                logger.error("Cannot register plugin `%s`\n%s", name, e)
+                logger.error(
+                    "Cannot register plugin `%s`\n%s",
+                    name,
+                    e,
+                    stacklevel=2,
+                )
+                if self.settings.get("DEBUG", False):
+                    console.print_exception()

        self.settings["PLUGINS"] = [get_plugin_name(p) for p in self.plugins]

@ -120,12 +127,15 @@ class Pelican:
            if hasattr(p, "generate_context"):
                p.generate_context()

+        # for plugins that create/edit the summary
+        logger.debug("Signal all_generators_finalized.send(<generators>)")
+        signals.all_generators_finalized.send(generators)
+
+        # update links in the summary, etc
        for p in generators:
            if hasattr(p, "refresh_metadata_intersite_links"):
                p.refresh_metadata_intersite_links()

-        signals.all_generators_finalized.send(generators)
-
        writer = self._get_writer()

        for p in generators:
--- a/pelican/contents.py
+++ b/pelican/contents.py
@ -6,7 +6,8 @@ import os
 import re
 from datetime import timezone
 from html import unescape
-from urllib.parse import unquote, urljoin, urlparse, urlunparse
+from typing import Any, Dict, Optional, Set, Tuple
+from urllib.parse import ParseResult, unquote, urljoin, urlparse, urlunparse

 try:
    from zoneinfo import ZoneInfo
@ -15,7 +16,7 @@ except ModuleNotFoundError:


 from pelican.plugins import signals
-from pelican.settings import DEFAULT_CONFIG
+from pelican.settings import DEFAULT_CONFIG, Settings
 from pelican.utils import (
    deprecated_attribute,
    memoized,
@ -44,12 +45,20 @@ class Content:

    """

+    default_template: Optional[str] = None
+    mandatory_properties: Tuple[str, ...] = ()
+
    @deprecated_attribute(old="filename", new="source_path", since=(3, 2, 0))
    def filename():
        return None

    def __init__(
-        self, content, metadata=None, settings=None, source_path=None, context=None
+        self,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+        settings: Optional[Settings] = None,
+        source_path: Optional[str] = None,
+        context: Optional[Dict[Any, Any]] = None,
    ):
        if metadata is None:
            metadata = {}
@ -156,10 +165,10 @@ class Content:

        signals.content_object_init.send(self)

-    def __str__(self):
+    def __str__(self) -> str:
        return self.source_path or repr(self)

-    def _has_valid_mandatory_properties(self):
+    def _has_valid_mandatory_properties(self) -> bool:
        """Test mandatory properties are set."""
        for prop in self.mandatory_properties:
            if not hasattr(self, prop):
@ -169,7 +178,7 @@ class Content:
                return False
        return True

-    def _has_valid_save_as(self):
+    def _has_valid_save_as(self) -> bool:
        """Return true if save_as doesn't write outside output path, false
        otherwise."""
        try:
@ -190,7 +199,7 @@ class Content:

        return True

-    def _has_valid_status(self):
+    def _has_valid_status(self) -> bool:
        if hasattr(self, "allowed_statuses"):
            if self.status not in self.allowed_statuses:
                logger.error(
@ -204,7 +213,7 @@ class Content:
        # if undefined we allow all
        return True

-    def is_valid(self):
+    def is_valid(self) -> bool:
        """Validate Content"""
        # Use all() to not short circuit and get results of all validations
        return all(
@ -216,7 +225,7 @@ class Content:
        )

    @property
-    def url_format(self):
+    def url_format(self) -> Dict[str, Any]:
        """Returns the URL, formatted with the proper values"""
        metadata = copy.copy(self.metadata)
        path = self.metadata.get("path", self.get_relative_source_path())
@ -232,19 +241,19 @@ class Content:
        )
        return metadata

-    def _expand_settings(self, key, klass=None):
+    def _expand_settings(self, key: str, klass: Optional[str] = None) -> str:
        if not klass:
            klass = self.__class__.__name__
        fq_key = (f"{klass}_{key}").upper()
        return str(self.settings[fq_key]).format(**self.url_format)

-    def get_url_setting(self, key):
+    def get_url_setting(self, key: str) -> str:
        if hasattr(self, "override_" + key):
            return getattr(self, "override_" + key)
        key = key if self.in_default_lang else "lang_%s" % key
        return self._expand_settings(key)

-    def _link_replacer(self, siteurl, m):
+    def _link_replacer(self, siteurl: str, m: re.Match) -> str:
        what = m.group("what")
        value = urlparse(m.group("value"))
        path = value.path
@ -272,15 +281,15 @@ class Content:
        # XXX Put this in a different location.
        if what in {"filename", "static", "attach"}:

-            def _get_linked_content(key, url):
+            def _get_linked_content(key: str, url: ParseResult) -> Optional[Content]:
                nonlocal value

-                def _find_path(path):
+                def _find_path(path: str) -> Optional[Content]:
                    if path.startswith("/"):
                        path = path[1:]
                    else:
                        # relative to the source path of this content
-                        path = self.get_relative_source_path(
+                        path = self.get_relative_source_path(  # type: ignore
                            os.path.join(self.relative_dir, path)
                        )
                    return self._context[key].get(path, None)
@ -324,7 +333,7 @@ class Content:
            linked_content = _get_linked_content(key, value)
            if linked_content:
                if what == "attach":
-                    linked_content.attach_to(self)
+                    linked_content.attach_to(self)  # type: ignore
                origin = joiner(siteurl, linked_content.url)
                origin = origin.replace("\\", "/")  # for Windows paths.
            else:
@ -359,7 +368,7 @@ class Content:

        return "".join((m.group("markup"), m.group("quote"), origin, m.group("quote")))

-    def _get_intrasite_link_regex(self):
+    def _get_intrasite_link_regex(self) -> re.Pattern:
        intrasite_link_regex = self.settings["INTRASITE_LINK_REGEX"]
        regex = r"""
            (?P<markup><[^\>]+  # match tag with all url-value attributes
@ -370,7 +379,7 @@ class Content:
            (?P=quote)""".format(intrasite_link_regex)
        return re.compile(regex, re.X)

-    def _update_content(self, content, siteurl):
+    def _update_content(self, content: str, siteurl: str) -> str:
        """Update the content attribute.

        Change all the relative paths of the content to relative paths
@ -386,7 +395,7 @@ class Content:
        hrefs = self._get_intrasite_link_regex()
        return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)

-    def get_static_links(self):
+    def get_static_links(self) -> Set[str]:
        static_links = set()
        hrefs = self._get_intrasite_link_regex()
        for m in hrefs.finditer(self._content):
@ -402,15 +411,15 @@ class Content:
                path = self.get_relative_source_path(
                    os.path.join(self.relative_dir, path)
                )
-            path = path.replace("%20", " ")
+            path = path.replace("%20", " ")  # type: ignore
            static_links.add(path)
        return static_links

-    def get_siteurl(self):
+    def get_siteurl(self) -> str:
        return self._context.get("localsiteurl", "")

    @memoized
-    def get_content(self, siteurl):
+    def get_content(self, siteurl: str) -> str:
        if hasattr(self, "_get_content"):
            content = self._get_content()
        else:
@ -418,11 +427,11 @@ class Content:
        return self._update_content(content, siteurl)

    @property
-    def content(self):
+    def content(self) -> str:
        return self.get_content(self.get_siteurl())

    @memoized
-    def get_summary(self, siteurl):
+    def get_summary(self, siteurl: str) -> str:
        """Returns the summary of an article.

        This is based on the summary metadata if set, otherwise truncate the
@ -441,10 +450,10 @@ class Content:
        )

    @property
-    def summary(self):
+    def summary(self) -> str:
        return self.get_summary(self.get_siteurl())

-    def _get_summary(self):
+    def _get_summary(self) -> str:
        """deprecated function to access summary"""

        logger.warning(
@ -454,34 +463,36 @@ class Content:
        return self.summary

    @summary.setter
-    def summary(self, value):
+    def summary(self, value: str):
        """Dummy function"""
        pass

    @property
-    def status(self):
+    def status(self) -> str:
        return self._status

    @status.setter
-    def status(self, value):
+    def status(self, value: str) -> None:
        # TODO maybe typecheck
        self._status = value.lower()

    @property
-    def url(self):
+    def url(self) -> str:
        return self.get_url_setting("url")

    @property
-    def save_as(self):
+    def save_as(self) -> str:
        return self.get_url_setting("save_as")

-    def _get_template(self):
+    def _get_template(self) -> str:
        if hasattr(self, "template") and self.template is not None:
            return self.template
        else:
            return self.default_template

-    def get_relative_source_path(self, source_path=None):
+    def get_relative_source_path(
+        self, source_path: Optional[str] = None
+    ) -> Optional[str]:
        """Return the relative path (from the content path) to the given
        source_path.

@ -501,7 +512,7 @@ class Content:
        )

    @property
-    def relative_dir(self):
+    def relative_dir(self) -> str:
        return posixize_path(
            os.path.dirname(
                os.path.relpath(
@ -511,7 +522,7 @@ class Content:
            )
        )

-    def refresh_metadata_intersite_links(self):
+    def refresh_metadata_intersite_links(self) -> None:
        for key in self.settings["FORMATTED_FIELDS"]:
            if key in self.metadata and key != "summary":
                value = self._update_content(self.metadata[key], self.get_siteurl())
@ -519,13 +530,16 @@ class Content:
                setattr(self, key.lower(), value)

        # _summary is an internal variable that some plugins may be writing to,
-        # so ensure changes to it are picked up
-        if (
-            "summary" in self.settings["FORMATTED_FIELDS"]
-            and "summary" in self.metadata
-        ):
-            self._summary = self._update_content(self._summary, self.get_siteurl())
-            self.metadata["summary"] = self._summary
+        # so ensure changes to it are picked up, and write summary back to it
+        if "summary" in self.settings["FORMATTED_FIELDS"]:
+            if hasattr(self, "_summary"):
+                self.metadata["summary"] = self._summary
+
+            if "summary" in self.metadata:
+                self.metadata["summary"] = self._update_content(
+                    self.metadata["summary"], self.get_siteurl()
+                )
+                self._summary = self.metadata["summary"]


 class Page(Content):
@ -534,7 +548,7 @@ class Page(Content):
    default_status = "published"
    default_template = "page"

-    def _expand_settings(self, key):
+    def _expand_settings(self, key: str) -> str:
        klass = "draft_page" if self.status == "draft" else None
        return super()._expand_settings(key, klass)

@ -561,7 +575,7 @@ class Article(Content):
        if not hasattr(self, "date") and self.status == "draft":
            self.date = datetime.datetime.max.replace(tzinfo=self.timezone)

-    def _expand_settings(self, key):
+    def _expand_settings(self, key: str) -> str:
        klass = "draft" if self.status == "draft" else "article"
        return super()._expand_settings(key, klass)

@ -571,7 +585,7 @@ class Static(Content):
    default_status = "published"
    default_template = None

-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self._output_location_referenced = False

@ -588,18 +602,18 @@ class Static(Content):
        return None

    @property
-    def url(self):
+    def url(self) -> str:
        # Note when url has been referenced, so we can avoid overriding it.
        self._output_location_referenced = True
        return super().url

    @property
-    def save_as(self):
+    def save_as(self) -> str:
        # Note when save_as has been referenced, so we can avoid overriding it.
        self._output_location_referenced = True
        return super().save_as

-    def attach_to(self, content):
+    def attach_to(self, content: Content) -> None:
        """Override our output directory with that of the given content object."""

        # Determine our file's new output path relative to the linking
@ -624,7 +638,7 @@ class Static(Content):

        new_url = path_to_url(new_save_as)

-        def _log_reason(reason):
+        def _log_reason(reason: str) -> None:
            logger.warning(
                "The {attach} link in %s cannot relocate "
                "%s because %s. Falling back to "
--- a/pelican/generators.py
+++ b/pelican/generators.py
@ -384,8 +384,8 @@ class ArticlesGenerator(CachingGenerator):
                    str(self.settings["CATEGORY_FEED_ATOM"]).format(slug=cat.slug),
                    self.settings.get(
                        "CATEGORY_FEED_ATOM_URL",
-                        str(self.settings["CATEGORY_FEED_ATOM"]).format(slug=cat.slug),
-                    ),
+                        str(self.settings["CATEGORY_FEED_ATOM"]),
+                    ).format(slug=cat.slug),
                    feed_title=cat.name,
                )

@ -396,8 +396,8 @@ class ArticlesGenerator(CachingGenerator):
                    str(self.settings["CATEGORY_FEED_RSS"]).format(slug=cat.slug),
                    self.settings.get(
                        "CATEGORY_FEED_RSS_URL",
-                        str(self.settings["CATEGORY_FEED_RSS"]).format(slug=cat.slug),
-                    ),
+                        str(self.settings["CATEGORY_FEED_RSS"]),
+                    ).format(slug=cat.slug),
                    feed_title=cat.name,
                    feed_type="rss",
                )
@ -410,8 +410,8 @@ class ArticlesGenerator(CachingGenerator):
                    str(self.settings["AUTHOR_FEED_ATOM"]).format(slug=auth.slug),
                    self.settings.get(
                        "AUTHOR_FEED_ATOM_URL",
-                        str(self.settings["AUTHOR_FEED_ATOM"]).format(slug=auth.slug),
-                    ),
+                        str(self.settings["AUTHOR_FEED_ATOM"]),
+                    ).format(slug=auth.slug),
                    feed_title=auth.name,
                )

@ -422,8 +422,8 @@ class ArticlesGenerator(CachingGenerator):
                    str(self.settings["AUTHOR_FEED_RSS"]).format(slug=auth.slug),
                    self.settings.get(
                        "AUTHOR_FEED_RSS_URL",
-                        str(self.settings["AUTHOR_FEED_RSS"]).format(slug=auth.slug),
-                    ),
+                        str(self.settings["AUTHOR_FEED_RSS"]),
+                    ).format(slug=auth.slug),
                    feed_title=auth.name,
                    feed_type="rss",
                )
@ -437,8 +437,8 @@ class ArticlesGenerator(CachingGenerator):
                        str(self.settings["TAG_FEED_ATOM"]).format(slug=tag.slug),
                        self.settings.get(
                            "TAG_FEED_ATOM_URL",
-                            str(self.settings["TAG_FEED_ATOM"]).format(slug=tag.slug),
-                        ),
+                            str(self.settings["TAG_FEED_ATOM"]),
+                        ).format(slug=tag.slug),
                        feed_title=tag.name,
                    )

@ -449,8 +449,8 @@ class ArticlesGenerator(CachingGenerator):
                        str(self.settings["TAG_FEED_RSS"]).format(slug=tag.slug),
                        self.settings.get(
                            "TAG_FEED_RSS_URL",
-                            str(self.settings["TAG_FEED_RSS"]).format(slug=tag.slug),
-                        ),
+                            str(self.settings["TAG_FEED_RSS"]),
+                        ).format(slug=tag.slug),
                        feed_title=tag.name,
                        feed_type="rss",
                    )
@ -471,10 +471,8 @@ class ArticlesGenerator(CachingGenerator):
                        str(self.settings["TRANSLATION_FEED_ATOM"]).format(lang=lang),
                        self.settings.get(
                            "TRANSLATION_FEED_ATOM_URL",
-                            str(self.settings["TRANSLATION_FEED_ATOM"]).format(
-                                lang=lang
-                            ),
-                        ),
+                            str(self.settings["TRANSLATION_FEED_ATOM"]),
+                        ).format(lang=lang),
                    )
                if self.settings.get("TRANSLATION_FEED_RSS"):
                    writer.write_feed(
--- a/pelican/log.py
+++ b/pelican/log.py
@ -85,13 +85,39 @@ class FatalLogger(LimitLogger):
    warnings_fatal = False
    errors_fatal = False

-    def warning(self, *args, **kwargs):
-        super().warning(*args, **kwargs)
+    def warning(self, *args, stacklevel=1, **kwargs):
+        """
+        Displays a logging warning.
+
+        Wrapping it here allows Pelican to filter warnings, and conditionally
+        make warnings fatal.
+
+        Args:
+            stacklevel (int): the stacklevel that would be used to display the
+            calling location, except for this function. Adjusting the
+            stacklevel allows you to see the "true" calling location of the
+            warning, rather than this wrapper location.
+        """
+        stacklevel += 1
+        super().warning(*args, stacklevel=stacklevel, **kwargs)
        if FatalLogger.warnings_fatal:
            raise RuntimeError("Warning encountered")

-    def error(self, *args, **kwargs):
-        super().error(*args, **kwargs)
+    def error(self, *args, stacklevel=1, **kwargs):
+        """
+        Displays a logging error.
+
+        Wrapping it here allows Pelican to filter errors, and conditionally
+        make errors non-fatal.
+
+        Args:
+            stacklevel (int): the stacklevel that would be used to display the
+            calling location, except for this function. Adjusting the
+            stacklevel allows you to see the "true" calling location of the
+            error, rather than this wrapper location.
+        """
+        stacklevel += 1
+        super().error(*args, stacklevel=stacklevel, **kwargs)
        if FatalLogger.errors_fatal:
            raise RuntimeError("Error encountered")

--- a/pelican/settings.py
+++ b/pelican/settings.py
@ -8,11 +8,13 @@ import re
 import sys
 from os.path import isabs
 from pathlib import Path
+from types import ModuleType
+from typing import Any, Dict, Optional

 from pelican.log import LimitFilter


-def load_source(name, path):
+def load_source(name: str, path: str) -> ModuleType:
    spec = importlib.util.spec_from_file_location(name, path)
    mod = importlib.util.module_from_spec(spec)
    sys.modules[name] = mod
@ -22,6 +24,8 @@ def load_source(name, path):

 logger = logging.getLogger(__name__)

+Settings = Dict[str, Any]
+
 DEFAULT_THEME = os.path.join(
    os.path.dirname(os.path.abspath(__file__)), "themes", "notmyidea"
 )
@ -178,7 +182,9 @@ DEFAULT_CONFIG = {
 PYGMENTS_RST_OPTIONS = None


-def read_settings(path=None, override=None):
+def read_settings(
+    path: Optional[str] = None, override: Optional[Settings] = None
+) -> Settings:
    settings = override or {}

    if path:
@ -222,7 +228,7 @@ def read_settings(path=None, override=None):
    return settings


-def get_settings_from_module(module=None):
+def get_settings_from_module(module: Optional[ModuleType] = None) -> Settings:
    """Loads settings from a module, returns a dictionary."""

    context = {}
@ -231,7 +237,7 @@ def get_settings_from_module(module=None):
    return context


-def get_settings_from_file(path):
+def get_settings_from_file(path: str) -> Settings:
    """Loads settings from a file path, returning a dict."""

    name, ext = os.path.splitext(os.path.basename(path))
@ -239,7 +245,7 @@ def get_settings_from_file(path):
    return get_settings_from_module(module)


-def get_jinja_environment(settings):
+def get_jinja_environment(settings: Settings) -> Settings:
    """Sets the environment for Jinja"""

    jinja_env = settings.setdefault(
@ -254,7 +260,7 @@ def get_jinja_environment(settings):
    return settings


-def _printf_s_to_format_field(printf_string, format_field):
+def _printf_s_to_format_field(printf_string: str, format_field: str) -> str:
    """Tries to replace %s with {format_field} in the provided printf_string.
    Raises ValueError in case of failure.
    """
@ -270,7 +276,7 @@ def _printf_s_to_format_field(printf_string, format_field):
    return result


-def handle_deprecated_settings(settings):
+def handle_deprecated_settings(settings: Settings) -> Settings:
    """Converts deprecated settings and issues warnings. Issues an exception
    if both old and new setting is specified.
    """
@ -567,7 +573,7 @@ def handle_deprecated_settings(settings):
    return settings


-def configure_settings(settings):
+def configure_settings(settings: Settings) -> Settings:
    """Provide optimizations, error checking, and warnings for the given
    settings.
    Also, specify the log messages to be ignored.
--- a/pelican/tests/content/medium_post_content.txt
+++ b/pelican/tests/content/medium_post_content.txt
@ -0,0 +1,4 @@
+
+<hr/><h3>Title header</h3><p>A paragraph of content.</p><p>Paragraph number two.</p><p>A list:</p><ol><li>One.</li><li>Two.</li><li>Three.</li></ol><p>A link: <a data-href="https://example.com/example" href="https://example.com/example" target="_blank">link text</a>.</p><h3>Header 2</h3><p>A block quote:</p><blockquote>quote words <strong>strong words</strong></blockquote><p>after blockquote</p><figure><img data-height="282" data-image-id="image1.png" data-width="739" src="https://cdn-images-1.medium.com/max/800/image1.png"/><figcaption>A figure caption.</figcaption></figure><p>A final note: <a data-href="http://stats.stackexchange.com/" href="http://stats.stackexchange.com/" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p><hr/><p><em>Next: </em><a data-href="https://medium.com/@username/post-url" href="https://medium.com/@username/post-url" target="_blank"><em>Next post</em>
+</a></p>
+<p>By <a href="https://medium.com/@username">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p>
--- a/pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
+++ b/pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
@ -0,0 +1,72 @@
+<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>A title</title><style>
+      * {
+        font-family: Georgia, Cambria, "Times New Roman", Times, serif;
+      }
+      html, body {
+        margin: 0;
+        padding: 0;
+      }
+      h1 {
+        font-size: 50px;
+        margin-bottom: 17px;
+        color: #333;
+      }
+      h2 {
+        font-size: 24px;
+        line-height: 1.6;
+        margin: 30px 0 0 0;
+        margin-bottom: 18px;
+        margin-top: 33px;
+        color: #333;
+      }
+      h3 {
+        font-size: 30px;
+        margin: 10px 0 20px 0;
+        color: #333;
+      }
+      header {
+        width: 640px;
+        margin: auto;
+      }
+      section {
+        width: 640px;
+        margin: auto;
+      }
+      section p {
+        margin-bottom: 27px;
+        font-size: 20px;
+        line-height: 1.6;
+        color: #333;
+      }
+      section img {
+        max-width: 640px;
+      }
+      footer {
+        padding: 0 20px;
+        margin: 50px 0;
+        text-align: center;
+        font-size: 12px;
+      }
+      .aspectRatioPlaceholder {
+        max-width: auto !important;
+        max-height: auto !important;
+      }
+      .aspectRatioPlaceholder-fill {
+        padding-bottom: 0 !important;
+      }
+      header,
+      section[data-field=subtitle],
+      section[data-field=description] {
+        display: none;
+      }
+      </style></head><body><article class="h-entry">
+<header>
+<h1 class="p-name">A name (like title)</h1>
+</header>
+<section data-field="subtitle" class="p-summary">
+    Summary (first several words of content)
+</section>
+<section data-field="body" class="e-content">
+<section name="ad15" class="section section--body section--first"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><h3 name="20a3" id="20a3" class="graf graf--h3 graf--leading graf--title">Title header</h3><p name="e3d6" id="e3d6" class="graf graf--p graf-after--h3">A paragraph of content.</p><p name="c7a8" id="c7a8" class="graf graf--p graf-after--p">Paragraph number two.</p><p name="42aa" id="42aa" class="graf graf--p graf-after--p">A list:</p><ol class="postList"><li name="d65f" id="d65f" class="graf graf--li graf-after--p">One.</li><li name="232b" id="232b" class="graf graf--li graf-after--li">Two.</li><li name="ef87" id="ef87" class="graf graf--li graf-after--li">Three.</li></ol><p name="e743" id="e743" class="graf graf--p graf-after--p">A link: <a href="https://example.com/example" data-href="https://example.com/example" class="markup--anchor markup--p-anchor" target="_blank">link text</a>.</p><h3 name="4cfd" id="4cfd" class="graf graf--h3 graf-after--p">Header 2</h3><p name="433c" id="433c" class="graf graf--p graf-after--p">A block quote:</p><blockquote name="3537" id="3537" class="graf graf--blockquote graf-after--p">quote words <strong class="markup--strong markup--blockquote-strong">strong words</strong></blockquote><p name="00cc" id="00cc" class="graf graf--p graf-after--blockquote">after blockquote</p><figure name="edb0" id="edb0" class="graf graf--figure graf-after--p"><img class="graf-image" data-image-id="image1.png" data-width="739" data-height="282" src="https://cdn-images-1.medium.com/max/800/image1.png"><figcaption class="imageCaption">A figure caption.</figcaption></figure><p name="f401" id="f401" class="graf graf--p graf-after--p graf--trailing">A final note: <a href="http://stats.stackexchange.com/" data-href="http://stats.stackexchange.com/" class="markup--anchor markup--p-anchor" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p></div></div></section><section name="09a3" class="section section--body section--last"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><p name="81e8" id="81e8" class="graf graf--p graf--leading"><em class="markup--em markup--p-em">Next: </em><a href="https://medium.com/@username/post-url" data-href="https://medium.com/@username/post-url" class="markup--anchor markup--p-anchor" target="_blank"><em class="markup--em markup--p-em">Next post</em>
+</section>
+<footer><p>By <a href="https://medium.com/@username" class="p-author h-card">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time class="dt-published" datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url" class="p-canonical">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p></footer></article></body></html>
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -264,6 +264,7 @@ class TestArticlesGenerator(unittest.TestCase):

    def test_generate_context(self):
        articles_expected = [
+            ["A title", "published", "medium_posts", "article"],
            ["Article title", "published", "Default", "article"],
            [
                "Article with markdown and summary metadata multi",
@ -391,13 +392,24 @@ class TestArticlesGenerator(unittest.TestCase):
        # terms of process order will define the name for that category
        categories = [cat.name for cat, _ in self.generator.categories]
        categories_alternatives = (
-            sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
-            sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
+            ),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
+            ),
        )
        self.assertIn(sorted(categories), categories_alternatives)
        # test for slug
        categories = [cat.slug for cat, _ in self.generator.categories]
-        categories_expected = ["default", "testcategory", "yeah", "test", "zhi-dao-shu"]
+        categories_expected = [
+            "default",
+            "testcategory",
+            "medium_posts",
+            "yeah",
+            "test",
+            "zhi-dao-shu",
+        ]
        self.assertEqual(sorted(categories), sorted(categories_expected))

    def test_do_not_use_folder_as_category(self):
@ -549,7 +561,8 @@ class TestArticlesGenerator(unittest.TestCase):
            granularity: {period["period"] for period in periods}
            for granularity, periods in period_archives.items()
        }
-        expected = {"year": {(1970,), (2010,), (2012,), (2014,)}}
+        self.maxDiff = None
+        expected = {"year": {(1970,), (2010,), (2012,), (2014,), (2017,)}}
        self.assertEqual(expected, abbreviated_archives)

        # Month archives enabled:
@ -570,7 +583,7 @@ class TestArticlesGenerator(unittest.TestCase):
            for granularity, periods in period_archives.items()
        }
        expected = {
-            "year": {(1970,), (2010,), (2012,), (2014,)},
+            "year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
            "month": {
                (1970, "January"),
                (2010, "December"),
@ -578,6 +591,7 @@ class TestArticlesGenerator(unittest.TestCase):
                (2012, "November"),
                (2012, "October"),
                (2014, "February"),
+                (2017, "April"),
            },
        }
        self.assertEqual(expected, abbreviated_archives)
@ -602,7 +616,7 @@ class TestArticlesGenerator(unittest.TestCase):
            for granularity, periods in period_archives.items()
        }
        expected = {
-            "year": {(1970,), (2010,), (2012,), (2014,)},
+            "year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
            "month": {
                (1970, "January"),
                (2010, "December"),
@ -610,6 +624,7 @@ class TestArticlesGenerator(unittest.TestCase):
                (2012, "November"),
                (2012, "October"),
                (2014, "February"),
+                (2017, "April"),
            },
            "day": {
                (1970, "January", 1),
@ -619,6 +634,7 @@ class TestArticlesGenerator(unittest.TestCase):
                (2012, "October", 30),
                (2012, "October", 31),
                (2014, "February", 9),
+                (2017, "April", 21),
            },
        }
        self.assertEqual(expected, abbreviated_archives)
@ -836,8 +852,12 @@ class TestArticlesGenerator(unittest.TestCase):

        categories = sorted([category.name for category, _ in generator.categories])
        categories_expected = [
-            sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
-            sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
+            ),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
+            ),
        ]
        self.assertIn(categories, categories_expected)

@ -864,6 +884,7 @@ class TestArticlesGenerator(unittest.TestCase):
        generator.generate_context()

        expected = [
+            "A title",
            "An Article With Code Block To Test Typogrify Ignore",
            "Article title",
            "Article with Nonconformant HTML meta tags",
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@ -21,6 +21,10 @@ from pelican.tools.pelican_import import (
    get_attachments,
    tumblr2fields,
    wp2fields,
+    mediumpost2fields,
+    mediumposts2fields,
+    strip_medium_post_content,
+    medium_slug,
 )
 from pelican.utils import path_to_file_url, slugify

@ -708,3 +712,82 @@ class TestTumblrImporter(TestCaseWithCLocale):
            posts,
            posts,
        )
+
+
+class TestMediumImporter(TestCaseWithCLocale):
+    def setUp(self):
+        super().setUp()
+        self.test_content_root = "pelican/tests/content"
+        # The content coming out of parsing is similar, but not the same.
+        # Beautiful soup rearranges the order of attributes, for example.
+        # So, we keep a copy of the content for the test.
+        content_filename = f"{self.test_content_root}/medium_post_content.txt"
+        with open(content_filename, encoding="utf-8") as the_content_file:
+            # Many editors and scripts add a final newline, so live with that
+            # in our test
+            the_content = the_content_file.read()
+            assert the_content[-1] == "\n"
+            the_content = the_content[:-1]
+        self.post_tuple = (
+            "A title",
+            the_content,
+            # slug:
+            "2017-04-21-medium-post",
+            "2017-04-21 17:11",
+            "User Name",
+            None,
+            (),
+            "published",
+            "article",
+            "html",
+        )
+
+    def test_mediumpost2field(self):
+        """Parse one post"""
+        post_filename = f"{self.test_content_root}/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html"
+        val = mediumpost2fields(post_filename)
+        self.assertEqual(self.post_tuple, val, val)
+
+    def test_mediumposts2field(self):
+        """Parse all posts in an export directory"""
+        posts = [
+            fields
+            for fields in mediumposts2fields(f"{self.test_content_root}/medium_posts")
+        ]
+        self.assertEqual(1, len(posts))
+        self.assertEqual(self.post_tuple, posts[0])
+
+    def test_strip_content(self):
+        """Strip out unhelpful tags"""
+        html_doc = (
+            "<section>This keeps <i>lots</i> of <b>tags</b>, but not "
+            "the <section>section</section> tags</section>"
+        )
+        soup = BeautifulSoup(html_doc, "html.parser")
+        self.assertEqual(
+            "This keeps <i>lots</i> of <b>tags</b>, but not the section tags",
+            strip_medium_post_content(soup),
+        )
+
+    def test_medium_slug(self):
+        # Remove hex stuff at the end
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug(
+                "medium-export/posts/2017-04-27_A-long-title--2971442227dd.html"
+            ),
+        )
+        # Remove "--DRAFT" at the end
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug("medium-export/posts/2017-04-27_A-long-title--DRAFT.html"),
+        )
+        # Remove both (which happens)
+        self.assertEqual(
+            "draft_How-to-do", medium_slug("draft_How-to-do--DRAFT--87225c81dddd.html")
+        )
+        # If no hex stuff, leave it alone
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug("medium-export/posts/2017-04-27_A-long-title.html"),
+        )
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@ -15,6 +15,8 @@ from urllib.error import URLError
 from urllib.parse import quote, urlparse, urlsplit, urlunsplit
 from urllib.request import urlretrieve

+import dateutil.parser
+
 # because logging.setLoggerClass has to be called before logging.getLogger
 from pelican.log import init
 from pelican.settings import DEFAULT_CONFIG
@ -114,19 +116,25 @@ def decode_wp_content(content, br=True):
    return content


-def xml_to_soup(xml):
-    """Opens an xml file"""
+def _import_bs4():
+    """Import and return bs4, otherwise sys.exit."""
    try:
-        from bs4 import BeautifulSoup
+        import bs4
    except ImportError:
        error = (
            'Missing dependency "BeautifulSoup4" and "lxml" required to '
            "import XML files."
        )
        sys.exit(error)
+    return bs4
+
+
+def file_to_soup(xml, features="xml"):
+    """Reads a file, returns soup."""
+    bs4 = _import_bs4()
    with open(xml, encoding="utf-8") as infile:
        xmlfile = infile.read()
-    soup = BeautifulSoup(xmlfile, "xml")
+    soup = bs4.BeautifulSoup(xmlfile, features)
    return soup


@ -140,7 +148,7 @@ def get_filename(post_name, post_id):
 def wp2fields(xml, wp_custpost=False):
    """Opens a wordpress XML file, and yield Pelican fields"""

-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
    items = soup.rss.channel.findAll("item")
    for item in items:
        if item.find("status").string in ["publish", "draft"]:
@ -210,7 +218,7 @@ def wp2fields(xml, wp_custpost=False):
 def blogger2fields(xml):
    """Opens a blogger XML file, and yield Pelican fields"""

-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
    entries = soup.feed.findAll("entry")
    for entry in entries:
        raw_kind = entry.find(
@ -536,6 +544,133 @@ def tumblr2fields(api_key, blogname):
        posts = _get_tumblr_posts(api_key, blogname, offset)


+def strip_medium_post_content(soup) -> str:
+    """Strip some tags and attributes from medium post content.
+
+    For example, the 'section' and 'div' tags cause trouble while rendering.
+
+    The problem with these tags is you can get a section divider (--------------)
+    that is not between two pieces of content.  For example:
+
+      Some text.
+
+      .. container:: section-divider
+
+         --------------
+
+      .. container:: section-content
+
+      More content.
+
+    In this case, pandoc complains: "Unexpected section title or transition."
+
+    Also, the "id" and "name" attributes in tags cause similar problems.  They show
+    up in .rst as extra junk that separates transitions.
+    """
+    # Remove tags
+    # section and div cause problems
+    # footer also can cause problems, and has nothing we want to keep
+    # See https://stackoverflow.com/a/8439761
+    invalid_tags = ["section", "div", "footer"]
+    for tag in invalid_tags:
+        for match in soup.findAll(tag):
+            match.replaceWithChildren()
+
+    # Remove attributes
+    # See https://stackoverflow.com/a/9045719
+    invalid_attributes = ["name", "id", "class"]
+    bs4 = _import_bs4()
+    for tag in soup.descendants:
+        if isinstance(tag, bs4.element.Tag):
+            tag.attrs = {
+                key: value
+                for key, value in tag.attrs.items()
+                if key not in invalid_attributes
+            }
+
+    # Get the string of all content, keeping other tags
+    all_content = "".join(str(element) for element in soup.contents)
+    return all_content
+
+
+def mediumpost2fields(filepath: str) -> tuple:
+    """Take an HTML post from a medium export, return Pelican fields."""
+
+    soup = file_to_soup(filepath, "html.parser")
+    if not soup:
+        raise ValueError(f"{filepath} could not be parsed by beautifulsoup")
+    kind = "article"
+
+    content = soup.find("section", class_="e-content")
+    if not content:
+        raise ValueError(f"{filepath}: Post has no content")
+
+    title = soup.find("title").string or ""
+
+    raw_date = soup.find("time", class_="dt-published")
+    date = None
+    if raw_date:
+        # This datetime can include timezone, e.g., "2017-04-21T17:11:55.799Z"
+        # python before 3.11 can't parse the timezone using datetime.fromisoformat
+        # See also https://docs.python.org/3.10/library/datetime.html#datetime.datetime.fromisoformat
+        # "This does not support parsing arbitrary ISO 8601 strings"
+        # So, we use dateutil.parser, which can handle it.
+        date_object = dateutil.parser.parse(raw_date.attrs["datetime"])
+        date = date_object.strftime("%Y-%m-%d %H:%M")
+        status = "published"
+    else:
+        status = "draft"
+    author = soup.find("a", class_="p-author h-card")
+    if author:
+        author = author.string
+
+    # Now that we're done with classes, we can strip the content
+    content = strip_medium_post_content(content)
+
+    # medium HTML export doesn't have tag or category
+    # RSS feed has tags, but it doesn't have all the posts.
+    tags = ()
+
+    slug = medium_slug(filepath)
+
+    # TODO: make the fields a python dataclass
+    return (
+        title,
+        content,
+        slug,
+        date,
+        author,
+        None,
+        tags,
+        status,
+        kind,
+        "html",
+    )
+
+
+def medium_slug(filepath: str) -> str:
+    """Make the filepath of a medium exported file into a slug."""
+    # slug: filename without extension
+    slug = os.path.basename(filepath)
+    slug = os.path.splitext(slug)[0]
+    # A medium export filename looks like date_-title-...html
+    # But, RST doesn't like "_-" (see https://github.com/sphinx-doc/sphinx/issues/4350)
+    # so get rid of it
+    slug = slug.replace("_-", "-")
+    # drop the hex string medium puts on the end of the filename, why keep it.
+    # e.g., "-a8a8a8a8" or "---a9a9a9a9"
+    # also: drafts don't need "--DRAFT"
+    slug = re.sub(r"((-)+([0-9a-f]+|DRAFT))+$", "", slug)
+    return slug
+
+
+def mediumposts2fields(medium_export_dir: str):
+    """Take HTML posts in a medium export directory, and yield Pelican fields."""
+    for file in os.listdir(medium_export_dir):
+        filename = os.fsdecode(file)
+        yield mediumpost2fields(os.path.join(medium_export_dir, filename))
+
+
 def feed2fields(file):
    """Read a feed and yield pelican fields"""
    import feedparser
@ -711,7 +846,7 @@ def get_attachments(xml):
    """returns a dictionary of posts that have attachments with a list
    of the attachment_urls
    """
-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
    items = soup.rss.channel.findAll("item")
    names = {}
    attachments = []
@ -837,6 +972,9 @@ def fields2pelican(
            posts_require_pandoc.append(filename)

        slug = not disable_slugs and filename or None
+        assert slug is None or filename == os.path.basename(
+            filename
+        ), f"filename is not a basename: {filename}"

        if wp_attach and attachments:
            try:
@ -984,6 +1122,9 @@ def main():
    parser.add_argument(
        "--dotclear", action="store_true", dest="dotclear", help="Dotclear export"
    )
+    parser.add_argument(
+        "--medium", action="store_true", dest="medium", help="Medium export"
+    )
    parser.add_argument(
        "--tumblr", action="store_true", dest="tumblr", help="Tumblr export"
    )
@ -1069,6 +1210,8 @@ def main():
        input_type = "blogger"
    elif args.dotclear:
        input_type = "dotclear"
+    elif args.medium:
+        input_type = "medium"
    elif args.tumblr:
        input_type = "tumblr"
    elif args.wpfile:
@ -1077,8 +1220,8 @@ def main():
        input_type = "feed"
    else:
        error = (
-            "You must provide either --blogger, --dotclear, "
-            "--tumblr, --wpfile or --feed options"
+            "You must provide one of --blogger, --dotclear, "
+            "--medium, --tumblr, --wpfile or --feed options"
        )
        exit(error)

@ -1097,12 +1240,16 @@ def main():
        fields = blogger2fields(args.input)
    elif input_type == "dotclear":
        fields = dc2fields(args.input)
+    elif input_type == "medium":
+        fields = mediumposts2fields(args.input)
    elif input_type == "tumblr":
        fields = tumblr2fields(args.input, args.blogname)
    elif input_type == "wordpress":
        fields = wp2fields(args.input, args.wp_custpost or False)
    elif input_type == "feed":
        fields = feed2fields(args.input)
+    else:
+        raise ValueError(f"Unhandled input_type {input_type}")

    if args.wp_attach:
        attachments = get_attachments(args.input)
--- a/pelican/tools/pelican_quickstart.py
+++ b/pelican/tools/pelican_quickstart.py
@ -44,6 +44,7 @@ _TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templ
 _jinja_env = Environment(
    loader=FileSystemLoader(_TEMPLATES_DIR),
    trim_blocks=True,
+    keep_trailing_newline=True,
 )


--- a/pelican/tools/templates/Makefile.jinja2
+++ b/pelican/tools/templates/Makefile.jinja2
@ -37,6 +37,7 @@ DROPBOX_DIR={{dropbox_dir}}
 {% endif %}
 {% if github %}
 GITHUB_PAGES_BRANCH={{github_pages_branch}}
+GITHUB_PAGES_COMMIT_MESSAGE=Generate Pelican site

 {% endif %}

@ -161,7 +162,7 @@ cf_upload: publish
 {% if github %}
 {% set upload = upload + ["github"] %}
 github: publish
-	ghp-import -m "Generate Pelican site" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)"
+	ghp-import -m "$(GITHUB_PAGES_COMMIT_MESSAGE)" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)" --no-jekyll
 	git push origin $(GITHUB_PAGES_BRANCH)

 {% endif %}
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import datetime
 import fnmatch
 import locale
@ -16,6 +18,21 @@ from html import entities
 from html.parser import HTMLParser
 from itertools import groupby
 from operator import attrgetter
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Collection,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+)

 import dateutil.parser

@ -27,11 +44,15 @@ from markupsafe import Markup

 import watchfiles

+if TYPE_CHECKING:
+    from pelican.contents import Content
+    from pelican.readers import Readers
+    from pelican.settings import Settings

 logger = logging.getLogger(__name__)


-def sanitised_join(base_directory, *parts):
+def sanitised_join(base_directory: str, *parts: str) -> str:
    joined = posixize_path(os.path.abspath(os.path.join(base_directory, *parts)))
    base = posixize_path(os.path.abspath(base_directory))
    if not joined.startswith(base):
@ -40,7 +61,7 @@ def sanitised_join(base_directory, *parts):
    return joined


-def strftime(date, date_format):
+def strftime(date: datetime.datetime, date_format: str) -> str:
    """
    Enhanced replacement for built-in strftime with zero stripping

@ -109,10 +130,14 @@ class DateFormatter:
    defined in LOCALE setting
    """

-    def __init__(self):
+    def __init__(self) -> None:
        self.locale = locale.setlocale(locale.LC_TIME)
+        # python has issue with Turkish_Türkiye.1254 locale, replace it to
+        # something accepted: Turkish
+        if self.locale == "Turkish_Türkiye.1254":
+            self.locale = "Turkish"

-    def __call__(self, date, date_format):
+    def __call__(self, date: datetime.datetime, date_format: str) -> str:
        # on OSX, encoding from LC_CTYPE determines the unicode output in PY3
        # make sure it's same as LC_TIME
        with temporary_locale(self.locale, locale.LC_TIME), temporary_locale(
@ -131,11 +156,11 @@ class memoized:

    """

-    def __init__(self, func):
+    def __init__(self, func: Callable) -> None:
        self.func = func
-        self.cache = {}
+        self.cache: Dict[Any, Any] = {}

-    def __call__(self, *args):
+    def __call__(self, *args) -> Any:
        if not isinstance(args, Hashable):
            # uncacheable. a list, for instance.
            # better to not cache than blow up.
@ -147,17 +172,23 @@ class memoized:
            self.cache[args] = value
            return value

-    def __repr__(self):
+    def __repr__(self) -> Optional[str]:
        return self.func.__doc__

-    def __get__(self, obj, objtype):
+    def __get__(self, obj: Any, objtype):
        """Support instance methods."""
        fn = partial(self.__call__, obj)
        fn.cache = self.cache
        return fn


-def deprecated_attribute(old, new, since=None, remove=None, doc=None):
+def deprecated_attribute(
+    old: str,
+    new: str,
+    since: Tuple[int, ...],
+    remove: Optional[Tuple[int, ...]] = None,
+    doc: Optional[str] = None,
+):
    """Attribute deprecation decorator for gentle upgrades

    For example:
@ -198,7 +229,7 @@ def deprecated_attribute(old, new, since=None, remove=None, doc=None):
    return decorator


-def get_date(string):
+def get_date(string: str) -> datetime.datetime:
    """Return a datetime object from a string.

    If no format matches the given date, raise a ValueError.
@ -212,7 +243,9 @@ def get_date(string):


@contextmanager
-def pelican_open(filename, mode="r", strip_crs=(sys.platform == "win32")):
+def pelican_open(
+    filename: str, mode: str = "r", strip_crs: bool = (sys.platform == "win32")
+) -> Generator[str, None, None]:
    """Open a file and return its content"""

    # utf-8-sig will clear any BOM if present
@ -221,7 +254,12 @@ def pelican_open(filename, mode="r", strip_crs=(sys.platform == "win32")):
    yield content


-def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
+def slugify(
+    value: str,
+    regex_subs: Iterable[Tuple[str, str]] = (),
+    preserve_case: bool = False,
+    use_unicode: bool = False,
+) -> str:
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
@ -233,9 +271,10 @@ def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
    """

    import unicodedata
+
    import unidecode

-    def normalize_unicode(text):
+    def normalize_unicode(text: str) -> str:
        # normalize text by compatibility composition
        # see: https://en.wikipedia.org/wiki/Unicode_equivalence
        return unicodedata.normalize("NFKC", text)
@ -262,7 +301,9 @@ def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
    return value.strip()


-def copy(source, destination, ignores=None):
+def copy(
+    source: str, destination: str, ignores: Optional[Iterable[str]] = None
+) -> None:
    """Recursively copy source into destination.

    If source is a file, destination has to be a file as well.
@ -334,7 +375,7 @@ def copy(source, destination, ignores=None):
                    )


-def copy_file(source, destination):
+def copy_file(source: str, destination: str) -> None:
    """Copy a file"""
    try:
        shutil.copyfile(source, destination)
@ -344,7 +385,7 @@ def copy_file(source, destination):
        )


-def clean_output_dir(path, retention):
+def clean_output_dir(path: str, retention: Iterable[str]) -> None:
    """Remove all files from output directory except those in retention list"""

    if not os.path.exists(path):
@ -381,24 +422,24 @@ def clean_output_dir(path, retention):
            logger.error("Unable to delete %s, file type unknown", file)


-def get_relative_path(path):
+def get_relative_path(path: str) -> str:
    """Return the relative path from the given path to the root path."""
    components = split_all(path)
-    if len(components) <= 1:
+    if components is None or len(components) <= 1:
        return os.curdir
    else:
        parents = [os.pardir] * (len(components) - 1)
        return os.path.join(*parents)


-def path_to_url(path):
+def path_to_url(path: str) -> str:
    """Return the URL corresponding to a given path."""
    if path is not None:
        path = posixize_path(path)
    return path


-def posixize_path(rel_path):
+def posixize_path(rel_path: str) -> str:
    """Use '/' as path separator, so that source references,
    like '{static}/foo/bar.jpg' or 'extras/favicon.ico',
    will work on Windows as well as on Mac and Linux."""
@ -427,20 +468,20 @@ class _HTMLWordTruncator(HTMLParser):
    _singlets = ("br", "col", "link", "base", "img", "param", "area", "hr", "input")

    class TruncationCompleted(Exception):
-        def __init__(self, truncate_at):
+        def __init__(self, truncate_at: int) -> None:
            super().__init__(truncate_at)
            self.truncate_at = truncate_at

-    def __init__(self, max_words):
+    def __init__(self, max_words: int) -> None:
        super().__init__(convert_charrefs=False)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.last_word_end = None
-        self.truncate_at = None
+        self.truncate_at: Optional[int] = None

-    def feed(self, *args, **kwargs):
+    def feed(self, *args, **kwargs) -> None:
        try:
            super().feed(*args, **kwargs)
        except self.TruncationCompleted as exc:
@ -448,29 +489,29 @@ class _HTMLWordTruncator(HTMLParser):
        else:
            self.truncate_at = None

-    def getoffset(self):
+    def getoffset(self) -> int:
        line_start = 0
        lineno, line_offset = self.getpos()
        for i in range(lineno - 1):
            line_start = self.rawdata.index("\n", line_start) + 1
        return line_start + line_offset

-    def add_word(self, word_end):
+    def add_word(self, word_end: int) -> None:
        self.words_found += 1
        self.last_word_end = None
        if self.words_found == self.max_words:
            raise self.TruncationCompleted(word_end)

-    def add_last_word(self):
+    def add_last_word(self) -> None:
        if self.last_word_end is not None:
            self.add_word(self.last_word_end)

-    def handle_starttag(self, tag, attrs):
+    def handle_starttag(self, tag: str, attrs: Any) -> None:
        self.add_last_word()
        if tag not in self._singlets:
            self.open_tags.insert(0, tag)

-    def handle_endtag(self, tag):
+    def handle_endtag(self, tag: str) -> None:
        self.add_last_word()
        try:
            i = self.open_tags.index(tag)
@ -481,7 +522,7 @@ class _HTMLWordTruncator(HTMLParser):
            # all unclosed intervening start tags with omitted end tags
            del self.open_tags[: i + 1]

-    def handle_data(self, data):
+    def handle_data(self, data: str) -> None:
        word_end = 0
        offset = self.getoffset()

@ -499,7 +540,7 @@ class _HTMLWordTruncator(HTMLParser):
        if word_end < len(data):
            self.add_last_word()

-    def _handle_ref(self, name, char):
+    def _handle_ref(self, name: str, char: str) -> None:
        """
        Called by handle_entityref() or handle_charref() when a ref like
        `&mdash;`, `&#8212;`, or `&#x2014` is found.
@ -543,7 +584,7 @@ class _HTMLWordTruncator(HTMLParser):
            else:
                self.add_last_word()

-    def handle_entityref(self, name):
+    def handle_entityref(self, name: str) -> None:
        """
        Called when an entity ref like '&mdash;' is found

@ -556,7 +597,7 @@ class _HTMLWordTruncator(HTMLParser):
            char = ""
        self._handle_ref(name, char)

-    def handle_charref(self, name):
+    def handle_charref(self, name: str) -> None:
        """
        Called when a char ref like '&#8212;' or '&#x2014' is found

@ -574,7 +615,7 @@ class _HTMLWordTruncator(HTMLParser):
        self._handle_ref("#" + name, char)


-def truncate_html_words(s, num, end_text="…"):
+def truncate_html_words(s: str, num: int, end_text: str = "…") -> str:
    """Truncates HTML to a certain number of words.

    (not counting tags and comments). Closes opened tags if they were correctly
@ -600,7 +641,10 @@ def truncate_html_words(s, num, end_text="…"):
    return out


-def process_translations(content_list, translation_id=None):
+def process_translations(
+    content_list: List[Content],
+    translation_id: Optional[Union[str, Collection[str]]] = None,
+) -> Tuple[List[Content], List[Content]]:
    """Finds translations and returns them.

    For each content_list item, populates the 'translations' attribute, and
@ -658,7 +702,7 @@ def process_translations(content_list, translation_id=None):
    return index, translations


-def get_original_items(items, with_str):
+def get_original_items(items: List[Content], with_str: str) -> List[Content]:
    def _warn_source_paths(msg, items, *extra):
        args = [len(items)]
        args.extend(extra)
@ -698,7 +742,10 @@ def get_original_items(items, with_str):
    return original_items


-def order_content(content_list, order_by="slug"):
+def order_content(
+    content_list: List[Content],
+    order_by: Union[str, Callable[[Content], Any], None] = "slug",
+) -> List[Content]:
    """Sorts content.

    order_by can be a string of an attribute or sorting function. If order_by
@ -758,7 +805,11 @@ def order_content(content_list, order_by="slug"):
    return content_list


-def wait_for_changes(settings_file, reader_class, settings):
+def wait_for_changes(
+    settings_file: str,
+    reader_class: Type["Readers"],
+    settings: "Settings",
+):
    content_path = settings.get("PATH", "")
    theme_path = settings.get("THEME", "")
    ignore_files = {
@ -788,13 +839,15 @@ def wait_for_changes(settings_file, reader_class, settings):
    return next(
        watchfiles.watch(
            *watching_paths,
-            watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files),
+            watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files),  # type: ignore
            rust_timeout=0,
        )
    )


-def set_date_tzinfo(d, tz_name=None):
+def set_date_tzinfo(
+    d: datetime.datetime, tz_name: Optional[str] = None
+) -> datetime.datetime:
    """Set the timezone for dates that don't have tzinfo"""
    if tz_name and not d.tzinfo:
        timezone = ZoneInfo(tz_name)
@ -805,11 +858,11 @@ def set_date_tzinfo(d, tz_name=None):
    return d


-def mkdir_p(path):
+def mkdir_p(path: str) -> None:
    os.makedirs(path, exist_ok=True)


-def split_all(path):
+def split_all(path: Union[str, pathlib.Path, None]) -> Optional[Sequence[str]]:
    """Split a path into a list of components

    While os.path.split() splits a single component off the back of
@ -840,12 +893,12 @@ def split_all(path):
        )


-def path_to_file_url(path):
+def path_to_file_url(path: str) -> str:
    """Convert file-system path to file:// URL"""
    return urllib.parse.urljoin("file://", urllib.request.pathname2url(path))


-def maybe_pluralize(count, singular, plural):
+def maybe_pluralize(count: int, singular: str, plural: str) -> str:
    """
    Returns a formatted string containing count and plural if count is not 1
    Returns count and singular if count is 1
@ -862,7 +915,9 @@ def maybe_pluralize(count, singular, plural):


@contextmanager
-def temporary_locale(temp_locale=None, lc_category=locale.LC_ALL):
+def temporary_locale(
+    temp_locale: Optional[str] = None, lc_category: int = locale.LC_ALL
+) -> Generator[None, None, None]:
    """
    Enable code to run in a context with a temporary locale
    Resets the locale back when exiting context.
@ -871,6 +926,10 @@ def temporary_locale(temp_locale=None, lc_category=locale.LC_ALL):
    class to use the C locale.
    """
    orig_locale = locale.setlocale(lc_category)
+    # python has issue with Turkish_Türkiye.1254 locale, replace it to
+    # something accepted: Turkish
+    if orig_locale == "Turkish_Türkiye.1254":
+        orig_locale = "Turkish"
    if temp_locale:
        locale.setlocale(lc_category, temp_locale)
    yield
--- a/pyproject.toml
+++ b/pyproject.toml
@ -69,6 +69,7 @@ changelog-header = "###############"
 version-header = "="

 [tool.pdm]
+ignore_package_warnings = ["sphinx"]

 [tool.pdm.scripts]
 docbuild = "invoke docbuild"
@ -95,7 +96,7 @@ dev = [
    "pytest-xdist>=3.4.0",
    "tox>=4.11.3",
    "invoke>=2.2.0",
-    "ruff>=0.1.5",
+    "ruff>=0.1.15,<0.2.0",
    "tomli>=2.0.1; python_version < \"3.11\"",
 ]

--- a/tox.ini
+++ b/tox.ini
@ -1,5 +1,5 @@
 [tox]
-envlist = py{3.8,3.9,3.10,3.11.3.12},docs
+envlist = py{3.8,3.9,3.10,3.11,3.12},docs

 [testenv]
 basepython =
@ -18,7 +18,7 @@ commands =
    pytest -s --cov=pelican pelican

 [testenv:docs]
-basepython = python3.9
+basepython = python3.11
 deps =
    -rrequirements/docs.pip
 changedir = docs