Merge branch 'getpelican:master' into Chinese-translation

This commit is contained in:
GeorgeHu 2024-03-13 10:20:18 +08:00 committed by GitHub
commit 051b749b00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 627 additions and 169 deletions

View file

@ -28,7 +28,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:

View file

@ -23,9 +23,9 @@ jobs:
python: "3.9"
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
cache: "pip"
@ -52,10 +52,10 @@ jobs:
name: Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: pdm-project/setup-pdm@v3
- uses: actions/checkout@v4
- uses: pdm-project/setup-pdm@v4
with:
python-version: 3.9
python-version: "3.11"
cache: true
cache-dependency-path: ./pyproject.toml
- name: Install dependencies
@ -64,16 +64,16 @@ jobs:
- name: Run linters
run: pdm lint --diff
- name: Run pre-commit checks on all files
uses: pre-commit/action@v3.0.0
uses: pre-commit/action@v3.0.1
build:
name: Test build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: pdm-project/setup-pdm@v3
- uses: actions/checkout@v4
- uses: pdm-project/setup-pdm@v4
with:
python-version: 3.9
python-version: "3.11"
cache: true
cache-dependency-path: ./pyproject.toml
- name: Install dependencies
@ -88,11 +88,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.9"
python-version: "3.11"
cache: "pip"
cache-dependency-path: "**/requirements/*"
- name: Install tox
@ -100,7 +100,7 @@ jobs:
- name: Check
run: tox -e docs
- name: cache the docs for inspection
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: docs
path: docs/_build/html/
@ -117,14 +117,14 @@ jobs:
id-token: write
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
token: ${{ secrets.GH_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.9"
python-version: "3.11"
- name: Check release
id: check_release

View file

@ -439,8 +439,8 @@ For **Markdown**, one must rely on an extension. For example, using the `mdx_inc
Importing an existing site
==========================
It is possible to import your site from WordPress, Tumblr, Dotclear, and RSS
feeds using a simple script. See :ref:`import`.
It is possible to import your site from several other blogging sites
(like WordPress, Tumblr, ..) using a simple script. See :ref:`import`.
Translations
============
@ -631,7 +631,7 @@ are not included by default in tag, category, and author indexes, nor in the
main article feed. This has the effect of creating an "unlisted" post.
.. _W3C ISO 8601: https://www.w3.org/TR/NOTE-datetime
.. _AsciiDoc: https://www.methods.co.nz/asciidoc/
.. _AsciiDoc: https://asciidoc.org
.. _Pelican Plugins: https://github.com/pelican-plugins
.. _pelican-plugins: https://github.com/getpelican/pelican-plugins
.. _Python-Markdown: https://github.com/Python-Markdown/markdown

View file

@ -11,6 +11,7 @@ software to reStructuredText or Markdown. The supported import formats are:
- Blogger XML export
- Dotclear export
- Medium export
- Tumblr API
- WordPress XML export
- RSS/Atom feed
@ -26,6 +27,12 @@ not be converted (as Pelican also supports Markdown).
manually, or use a plugin such as `More Categories`_ that enables multiple
categories per article.
.. note::
Imported pages may contain links to images that still point to the original site.
So you might want to download those images into your local content and manually
re-link them from the relevant pages of your site.
Dependencies
============
@ -65,6 +72,7 @@ Optional arguments
-h, --help Show this help message and exit
--blogger Blogger XML export (default: False)
--dotclear Dotclear export (default: False)
--medium Medium export (default: False)
--tumblr Tumblr API (default: False)
--wpfile WordPress XML export (default: False)
--feed Feed to parse (default: False)
@ -80,8 +88,7 @@ Optional arguments
(default: False)
--filter-author Import only post from the specified author
--strip-raw Strip raw HTML code that can't be converted to markup
such as flash embeds or iframes (wordpress import
only) (default: False)
such as flash embeds or iframes (default: False)
--wp-custpost Put wordpress custom post types in directories. If
used with --dir-cat option directories will be created
as "/post_type/category/" (wordpress import only)
@ -113,6 +120,14 @@ For Dotclear::
$ pelican-import --dotclear -o ~/output ~/backup.txt
For Medium::
$ pelican-import --medium -o ~/output ~/medium-export/posts/
The Medium export is a zip file. Unzip it, and point this tool to the
"posts" subdirectory. For more information on how to export, see
https://help.medium.com/hc/en-us/articles/115004745787-Export-your-account-data.
For Tumblr::
$ pelican-import --tumblr -o ~/output --blogname=<blogname> <api_key>
@ -121,6 +136,15 @@ For WordPress::
$ pelican-import --wpfile -o ~/output ~/posts.xml
For Medium (an example of using an RSS feed):
$ python -m pip install feedparser
$ pelican-import --feed https://medium.com/feed/@username
.. note::
The RSS feed may only return the most recent posts — not all of them.
Tests
=====

View file

@ -80,7 +80,14 @@ class Pelican:
plugin.register()
self.plugins.append(plugin)
except Exception as e:
logger.error("Cannot register plugin `%s`\n%s", name, e)
logger.error(
"Cannot register plugin `%s`\n%s",
name,
e,
stacklevel=2,
)
if self.settings.get("DEBUG", False):
console.print_exception()
self.settings["PLUGINS"] = [get_plugin_name(p) for p in self.plugins]
@ -120,12 +127,15 @@ class Pelican:
if hasattr(p, "generate_context"):
p.generate_context()
# for plugins that create/edit the summary
logger.debug("Signal all_generators_finalized.send(<generators>)")
signals.all_generators_finalized.send(generators)
# update links in the summary, etc
for p in generators:
if hasattr(p, "refresh_metadata_intersite_links"):
p.refresh_metadata_intersite_links()
signals.all_generators_finalized.send(generators)
writer = self._get_writer()
for p in generators:

View file

@ -6,7 +6,8 @@ import os
import re
from datetime import timezone
from html import unescape
from urllib.parse import unquote, urljoin, urlparse, urlunparse
from typing import Any, Dict, Optional, Set, Tuple
from urllib.parse import ParseResult, unquote, urljoin, urlparse, urlunparse
try:
from zoneinfo import ZoneInfo
@ -15,7 +16,7 @@ except ModuleNotFoundError:
from pelican.plugins import signals
from pelican.settings import DEFAULT_CONFIG
from pelican.settings import DEFAULT_CONFIG, Settings
from pelican.utils import (
deprecated_attribute,
memoized,
@ -44,12 +45,20 @@ class Content:
"""
default_template: Optional[str] = None
mandatory_properties: Tuple[str, ...] = ()
@deprecated_attribute(old="filename", new="source_path", since=(3, 2, 0))
def filename():
return None
def __init__(
self, content, metadata=None, settings=None, source_path=None, context=None
self,
content: str,
metadata: Optional[Dict[str, Any]] = None,
settings: Optional[Settings] = None,
source_path: Optional[str] = None,
context: Optional[Dict[Any, Any]] = None,
):
if metadata is None:
metadata = {}
@ -156,10 +165,10 @@ class Content:
signals.content_object_init.send(self)
def __str__(self):
def __str__(self) -> str:
return self.source_path or repr(self)
def _has_valid_mandatory_properties(self):
def _has_valid_mandatory_properties(self) -> bool:
"""Test mandatory properties are set."""
for prop in self.mandatory_properties:
if not hasattr(self, prop):
@ -169,7 +178,7 @@ class Content:
return False
return True
def _has_valid_save_as(self):
def _has_valid_save_as(self) -> bool:
"""Return true if save_as doesn't write outside output path, false
otherwise."""
try:
@ -190,7 +199,7 @@ class Content:
return True
def _has_valid_status(self):
def _has_valid_status(self) -> bool:
if hasattr(self, "allowed_statuses"):
if self.status not in self.allowed_statuses:
logger.error(
@ -204,7 +213,7 @@ class Content:
# if undefined we allow all
return True
def is_valid(self):
def is_valid(self) -> bool:
"""Validate Content"""
# Use all() to not short circuit and get results of all validations
return all(
@ -216,7 +225,7 @@ class Content:
)
@property
def url_format(self):
def url_format(self) -> Dict[str, Any]:
"""Returns the URL, formatted with the proper values"""
metadata = copy.copy(self.metadata)
path = self.metadata.get("path", self.get_relative_source_path())
@ -232,19 +241,19 @@ class Content:
)
return metadata
def _expand_settings(self, key, klass=None):
def _expand_settings(self, key: str, klass: Optional[str] = None) -> str:
if not klass:
klass = self.__class__.__name__
fq_key = (f"{klass}_{key}").upper()
return str(self.settings[fq_key]).format(**self.url_format)
def get_url_setting(self, key):
def get_url_setting(self, key: str) -> str:
if hasattr(self, "override_" + key):
return getattr(self, "override_" + key)
key = key if self.in_default_lang else "lang_%s" % key
return self._expand_settings(key)
def _link_replacer(self, siteurl, m):
def _link_replacer(self, siteurl: str, m: re.Match) -> str:
what = m.group("what")
value = urlparse(m.group("value"))
path = value.path
@ -272,15 +281,15 @@ class Content:
# XXX Put this in a different location.
if what in {"filename", "static", "attach"}:
def _get_linked_content(key, url):
def _get_linked_content(key: str, url: ParseResult) -> Optional[Content]:
nonlocal value
def _find_path(path):
def _find_path(path: str) -> Optional[Content]:
if path.startswith("/"):
path = path[1:]
else:
# relative to the source path of this content
path = self.get_relative_source_path(
path = self.get_relative_source_path( # type: ignore
os.path.join(self.relative_dir, path)
)
return self._context[key].get(path, None)
@ -324,7 +333,7 @@ class Content:
linked_content = _get_linked_content(key, value)
if linked_content:
if what == "attach":
linked_content.attach_to(self)
linked_content.attach_to(self) # type: ignore
origin = joiner(siteurl, linked_content.url)
origin = origin.replace("\\", "/") # for Windows paths.
else:
@ -359,7 +368,7 @@ class Content:
return "".join((m.group("markup"), m.group("quote"), origin, m.group("quote")))
def _get_intrasite_link_regex(self):
def _get_intrasite_link_regex(self) -> re.Pattern:
intrasite_link_regex = self.settings["INTRASITE_LINK_REGEX"]
regex = r"""
(?P<markup><[^\>]+ # match tag with all url-value attributes
@ -370,7 +379,7 @@ class Content:
(?P=quote)""".format(intrasite_link_regex)
return re.compile(regex, re.X)
def _update_content(self, content, siteurl):
def _update_content(self, content: str, siteurl: str) -> str:
"""Update the content attribute.
Change all the relative paths of the content to relative paths
@ -386,7 +395,7 @@ class Content:
hrefs = self._get_intrasite_link_regex()
return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
def get_static_links(self):
def get_static_links(self) -> Set[str]:
static_links = set()
hrefs = self._get_intrasite_link_regex()
for m in hrefs.finditer(self._content):
@ -402,15 +411,15 @@ class Content:
path = self.get_relative_source_path(
os.path.join(self.relative_dir, path)
)
path = path.replace("%20", " ")
path = path.replace("%20", " ") # type: ignore
static_links.add(path)
return static_links
def get_siteurl(self):
def get_siteurl(self) -> str:
return self._context.get("localsiteurl", "")
@memoized
def get_content(self, siteurl):
def get_content(self, siteurl: str) -> str:
if hasattr(self, "_get_content"):
content = self._get_content()
else:
@ -418,11 +427,11 @@ class Content:
return self._update_content(content, siteurl)
@property
def content(self):
def content(self) -> str:
return self.get_content(self.get_siteurl())
@memoized
def get_summary(self, siteurl):
def get_summary(self, siteurl: str) -> str:
"""Returns the summary of an article.
This is based on the summary metadata if set, otherwise truncate the
@ -441,10 +450,10 @@ class Content:
)
@property
def summary(self):
def summary(self) -> str:
return self.get_summary(self.get_siteurl())
def _get_summary(self):
def _get_summary(self) -> str:
"""deprecated function to access summary"""
logger.warning(
@ -454,34 +463,36 @@ class Content:
return self.summary
@summary.setter
def summary(self, value):
def summary(self, value: str):
"""Dummy function"""
pass
@property
def status(self):
def status(self) -> str:
return self._status
@status.setter
def status(self, value):
def status(self, value: str) -> None:
# TODO maybe typecheck
self._status = value.lower()
@property
def url(self):
def url(self) -> str:
return self.get_url_setting("url")
@property
def save_as(self):
def save_as(self) -> str:
return self.get_url_setting("save_as")
def _get_template(self):
def _get_template(self) -> str:
if hasattr(self, "template") and self.template is not None:
return self.template
else:
return self.default_template
def get_relative_source_path(self, source_path=None):
def get_relative_source_path(
self, source_path: Optional[str] = None
) -> Optional[str]:
"""Return the relative path (from the content path) to the given
source_path.
@ -501,7 +512,7 @@ class Content:
)
@property
def relative_dir(self):
def relative_dir(self) -> str:
return posixize_path(
os.path.dirname(
os.path.relpath(
@ -511,7 +522,7 @@ class Content:
)
)
def refresh_metadata_intersite_links(self):
def refresh_metadata_intersite_links(self) -> None:
for key in self.settings["FORMATTED_FIELDS"]:
if key in self.metadata and key != "summary":
value = self._update_content(self.metadata[key], self.get_siteurl())
@ -519,13 +530,16 @@ class Content:
setattr(self, key.lower(), value)
# _summary is an internal variable that some plugins may be writing to,
# so ensure changes to it are picked up
if (
"summary" in self.settings["FORMATTED_FIELDS"]
and "summary" in self.metadata
):
self._summary = self._update_content(self._summary, self.get_siteurl())
self.metadata["summary"] = self._summary
# so ensure changes to it are picked up, and write summary back to it
if "summary" in self.settings["FORMATTED_FIELDS"]:
if hasattr(self, "_summary"):
self.metadata["summary"] = self._summary
if "summary" in self.metadata:
self.metadata["summary"] = self._update_content(
self.metadata["summary"], self.get_siteurl()
)
self._summary = self.metadata["summary"]
class Page(Content):
@ -534,7 +548,7 @@ class Page(Content):
default_status = "published"
default_template = "page"
def _expand_settings(self, key):
def _expand_settings(self, key: str) -> str:
klass = "draft_page" if self.status == "draft" else None
return super()._expand_settings(key, klass)
@ -561,7 +575,7 @@ class Article(Content):
if not hasattr(self, "date") and self.status == "draft":
self.date = datetime.datetime.max.replace(tzinfo=self.timezone)
def _expand_settings(self, key):
def _expand_settings(self, key: str) -> str:
klass = "draft" if self.status == "draft" else "article"
return super()._expand_settings(key, klass)
@ -571,7 +585,7 @@ class Static(Content):
default_status = "published"
default_template = None
def __init__(self, *args, **kwargs):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self._output_location_referenced = False
@ -588,18 +602,18 @@ class Static(Content):
return None
@property
def url(self):
def url(self) -> str:
# Note when url has been referenced, so we can avoid overriding it.
self._output_location_referenced = True
return super().url
@property
def save_as(self):
def save_as(self) -> str:
# Note when save_as has been referenced, so we can avoid overriding it.
self._output_location_referenced = True
return super().save_as
def attach_to(self, content):
def attach_to(self, content: Content) -> None:
"""Override our output directory with that of the given content object."""
# Determine our file's new output path relative to the linking
@ -624,7 +638,7 @@ class Static(Content):
new_url = path_to_url(new_save_as)
def _log_reason(reason):
def _log_reason(reason: str) -> None:
logger.warning(
"The {attach} link in %s cannot relocate "
"%s because %s. Falling back to "

View file

@ -384,8 +384,8 @@ class ArticlesGenerator(CachingGenerator):
str(self.settings["CATEGORY_FEED_ATOM"]).format(slug=cat.slug),
self.settings.get(
"CATEGORY_FEED_ATOM_URL",
str(self.settings["CATEGORY_FEED_ATOM"]).format(slug=cat.slug),
),
str(self.settings["CATEGORY_FEED_ATOM"]),
).format(slug=cat.slug),
feed_title=cat.name,
)
@ -396,8 +396,8 @@ class ArticlesGenerator(CachingGenerator):
str(self.settings["CATEGORY_FEED_RSS"]).format(slug=cat.slug),
self.settings.get(
"CATEGORY_FEED_RSS_URL",
str(self.settings["CATEGORY_FEED_RSS"]).format(slug=cat.slug),
),
str(self.settings["CATEGORY_FEED_RSS"]),
).format(slug=cat.slug),
feed_title=cat.name,
feed_type="rss",
)
@ -410,8 +410,8 @@ class ArticlesGenerator(CachingGenerator):
str(self.settings["AUTHOR_FEED_ATOM"]).format(slug=auth.slug),
self.settings.get(
"AUTHOR_FEED_ATOM_URL",
str(self.settings["AUTHOR_FEED_ATOM"]).format(slug=auth.slug),
),
str(self.settings["AUTHOR_FEED_ATOM"]),
).format(slug=auth.slug),
feed_title=auth.name,
)
@ -422,8 +422,8 @@ class ArticlesGenerator(CachingGenerator):
str(self.settings["AUTHOR_FEED_RSS"]).format(slug=auth.slug),
self.settings.get(
"AUTHOR_FEED_RSS_URL",
str(self.settings["AUTHOR_FEED_RSS"]).format(slug=auth.slug),
),
str(self.settings["AUTHOR_FEED_RSS"]),
).format(slug=auth.slug),
feed_title=auth.name,
feed_type="rss",
)
@ -437,8 +437,8 @@ class ArticlesGenerator(CachingGenerator):
str(self.settings["TAG_FEED_ATOM"]).format(slug=tag.slug),
self.settings.get(
"TAG_FEED_ATOM_URL",
str(self.settings["TAG_FEED_ATOM"]).format(slug=tag.slug),
),
str(self.settings["TAG_FEED_ATOM"]),
).format(slug=tag.slug),
feed_title=tag.name,
)
@ -449,8 +449,8 @@ class ArticlesGenerator(CachingGenerator):
str(self.settings["TAG_FEED_RSS"]).format(slug=tag.slug),
self.settings.get(
"TAG_FEED_RSS_URL",
str(self.settings["TAG_FEED_RSS"]).format(slug=tag.slug),
),
str(self.settings["TAG_FEED_RSS"]),
).format(slug=tag.slug),
feed_title=tag.name,
feed_type="rss",
)
@ -471,10 +471,8 @@ class ArticlesGenerator(CachingGenerator):
str(self.settings["TRANSLATION_FEED_ATOM"]).format(lang=lang),
self.settings.get(
"TRANSLATION_FEED_ATOM_URL",
str(self.settings["TRANSLATION_FEED_ATOM"]).format(
lang=lang
),
),
str(self.settings["TRANSLATION_FEED_ATOM"]),
).format(lang=lang),
)
if self.settings.get("TRANSLATION_FEED_RSS"):
writer.write_feed(

View file

@ -85,13 +85,39 @@ class FatalLogger(LimitLogger):
warnings_fatal = False
errors_fatal = False
def warning(self, *args, **kwargs):
super().warning(*args, **kwargs)
def warning(self, *args, stacklevel=1, **kwargs):
"""
Displays a logging warning.
Wrapping it here allows Pelican to filter warnings, and conditionally
make warnings fatal.
Args:
stacklevel (int): the stacklevel that would be used to display the
calling location, except for this function. Adjusting the
stacklevel allows you to see the "true" calling location of the
warning, rather than this wrapper location.
"""
stacklevel += 1
super().warning(*args, stacklevel=stacklevel, **kwargs)
if FatalLogger.warnings_fatal:
raise RuntimeError("Warning encountered")
def error(self, *args, **kwargs):
super().error(*args, **kwargs)
def error(self, *args, stacklevel=1, **kwargs):
"""
Displays a logging error.
Wrapping it here allows Pelican to filter errors, and conditionally
make errors non-fatal.
Args:
stacklevel (int): the stacklevel that would be used to display the
calling location, except for this function. Adjusting the
stacklevel allows you to see the "true" calling location of the
error, rather than this wrapper location.
"""
stacklevel += 1
super().error(*args, stacklevel=stacklevel, **kwargs)
if FatalLogger.errors_fatal:
raise RuntimeError("Error encountered")

View file

@ -8,11 +8,13 @@ import re
import sys
from os.path import isabs
from pathlib import Path
from types import ModuleType
from typing import Any, Dict, Optional
from pelican.log import LimitFilter
def load_source(name, path):
def load_source(name: str, path: str) -> ModuleType:
spec = importlib.util.spec_from_file_location(name, path)
mod = importlib.util.module_from_spec(spec)
sys.modules[name] = mod
@ -22,6 +24,8 @@ def load_source(name, path):
logger = logging.getLogger(__name__)
Settings = Dict[str, Any]
DEFAULT_THEME = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "themes", "notmyidea"
)
@ -177,7 +181,9 @@ DEFAULT_CONFIG = {
PYGMENTS_RST_OPTIONS = None
def read_settings(path=None, override=None):
def read_settings(
path: Optional[str] = None, override: Optional[Settings] = None
) -> Settings:
settings = override or {}
if path:
@ -221,7 +227,7 @@ def read_settings(path=None, override=None):
return settings
def get_settings_from_module(module=None):
def get_settings_from_module(module: Optional[ModuleType] = None) -> Settings:
"""Loads settings from a module, returns a dictionary."""
context = {}
@ -230,7 +236,7 @@ def get_settings_from_module(module=None):
return context
def get_settings_from_file(path):
def get_settings_from_file(path: str) -> Settings:
"""Loads settings from a file path, returning a dict."""
name, ext = os.path.splitext(os.path.basename(path))
@ -238,7 +244,7 @@ def get_settings_from_file(path):
return get_settings_from_module(module)
def get_jinja_environment(settings):
def get_jinja_environment(settings: Settings) -> Settings:
"""Sets the environment for Jinja"""
jinja_env = settings.setdefault(
@ -253,7 +259,7 @@ def get_jinja_environment(settings):
return settings
def _printf_s_to_format_field(printf_string, format_field):
def _printf_s_to_format_field(printf_string: str, format_field: str) -> str:
"""Tries to replace %s with {format_field} in the provided printf_string.
Raises ValueError in case of failure.
"""
@ -269,7 +275,7 @@ def _printf_s_to_format_field(printf_string, format_field):
return result
def handle_deprecated_settings(settings):
def handle_deprecated_settings(settings: Settings) -> Settings:
"""Converts deprecated settings and issues warnings. Issues an exception
if both old and new setting is specified.
"""
@ -566,7 +572,7 @@ def handle_deprecated_settings(settings):
return settings
def configure_settings(settings):
def configure_settings(settings: Settings) -> Settings:
"""Provide optimizations, error checking, and warnings for the given
settings.
Also, specify the log messages to be ignored.

View file

@ -0,0 +1,4 @@
<hr/><h3>Title header</h3><p>A paragraph of content.</p><p>Paragraph number two.</p><p>A list:</p><ol><li>One.</li><li>Two.</li><li>Three.</li></ol><p>A link: <a data-href="https://example.com/example" href="https://example.com/example" target="_blank">link text</a>.</p><h3>Header 2</h3><p>A block quote:</p><blockquote>quote words <strong>strong words</strong></blockquote><p>after blockquote</p><figure><img data-height="282" data-image-id="image1.png" data-width="739" src="https://cdn-images-1.medium.com/max/800/image1.png"/><figcaption>A figure caption.</figcaption></figure><p>A final note: <a data-href="http://stats.stackexchange.com/" href="http://stats.stackexchange.com/" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p><hr/><p><em>Next: </em><a data-href="https://medium.com/@username/post-url" href="https://medium.com/@username/post-url" target="_blank"><em>Next post</em>
</a></p>
<p>By <a href="https://medium.com/@username">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p>

View file

@ -0,0 +1,72 @@
<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>A title</title><style>
* {
font-family: Georgia, Cambria, "Times New Roman", Times, serif;
}
html, body {
margin: 0;
padding: 0;
}
h1 {
font-size: 50px;
margin-bottom: 17px;
color: #333;
}
h2 {
font-size: 24px;
line-height: 1.6;
margin: 30px 0 0 0;
margin-bottom: 18px;
margin-top: 33px;
color: #333;
}
h3 {
font-size: 30px;
margin: 10px 0 20px 0;
color: #333;
}
header {
width: 640px;
margin: auto;
}
section {
width: 640px;
margin: auto;
}
section p {
margin-bottom: 27px;
font-size: 20px;
line-height: 1.6;
color: #333;
}
section img {
max-width: 640px;
}
footer {
padding: 0 20px;
margin: 50px 0;
text-align: center;
font-size: 12px;
}
.aspectRatioPlaceholder {
max-width: auto !important;
max-height: auto !important;
}
.aspectRatioPlaceholder-fill {
padding-bottom: 0 !important;
}
header,
section[data-field=subtitle],
section[data-field=description] {
display: none;
}
</style></head><body><article class="h-entry">
<header>
<h1 class="p-name">A name (like title)</h1>
</header>
<section data-field="subtitle" class="p-summary">
Summary (first several words of content)
</section>
<section data-field="body" class="e-content">
<section name="ad15" class="section section--body section--first"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><h3 name="20a3" id="20a3" class="graf graf--h3 graf--leading graf--title">Title header</h3><p name="e3d6" id="e3d6" class="graf graf--p graf-after--h3">A paragraph of content.</p><p name="c7a8" id="c7a8" class="graf graf--p graf-after--p">Paragraph number two.</p><p name="42aa" id="42aa" class="graf graf--p graf-after--p">A list:</p><ol class="postList"><li name="d65f" id="d65f" class="graf graf--li graf-after--p">One.</li><li name="232b" id="232b" class="graf graf--li graf-after--li">Two.</li><li name="ef87" id="ef87" class="graf graf--li graf-after--li">Three.</li></ol><p name="e743" id="e743" class="graf graf--p graf-after--p">A link: <a href="https://example.com/example" data-href="https://example.com/example" class="markup--anchor markup--p-anchor" target="_blank">link text</a>.</p><h3 name="4cfd" id="4cfd" class="graf graf--h3 graf-after--p">Header 2</h3><p name="433c" id="433c" class="graf graf--p graf-after--p">A block quote:</p><blockquote name="3537" id="3537" class="graf graf--blockquote graf-after--p">quote words <strong class="markup--strong markup--blockquote-strong">strong words</strong></blockquote><p name="00cc" id="00cc" class="graf graf--p graf-after--blockquote">after blockquote</p><figure name="edb0" id="edb0" class="graf graf--figure graf-after--p"><img class="graf-image" data-image-id="image1.png" data-width="739" data-height="282" src="https://cdn-images-1.medium.com/max/800/image1.png"><figcaption class="imageCaption">A figure caption.</figcaption></figure><p name="f401" id="f401" class="graf graf--p graf-after--p graf--trailing">A final note: <a href="http://stats.stackexchange.com/" data-href="http://stats.stackexchange.com/" class="markup--anchor markup--p-anchor" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p></div></div></section><section name="09a3" class="section section--body section--last"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><p name="81e8" id="81e8" class="graf graf--p graf--leading"><em class="markup--em markup--p-em">Next: </em><a href="https://medium.com/@username/post-url" data-href="https://medium.com/@username/post-url" class="markup--anchor markup--p-anchor" target="_blank"><em class="markup--em markup--p-em">Next post</em>
</section>
<footer><p>By <a href="https://medium.com/@username" class="p-author h-card">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time class="dt-published" datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url" class="p-canonical">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p></footer></article></body></html>

View file

@ -264,6 +264,7 @@ class TestArticlesGenerator(unittest.TestCase):
def test_generate_context(self):
articles_expected = [
["A title", "published", "medium_posts", "article"],
["Article title", "published", "Default", "article"],
[
"Article with markdown and summary metadata multi",
@ -391,13 +392,24 @@ class TestArticlesGenerator(unittest.TestCase):
# terms of process order will define the name for that category
categories = [cat.name for cat, _ in self.generator.categories]
categories_alternatives = (
sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
sorted(
["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
),
sorted(
["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
),
)
self.assertIn(sorted(categories), categories_alternatives)
# test for slug
categories = [cat.slug for cat, _ in self.generator.categories]
categories_expected = ["default", "testcategory", "yeah", "test", "zhi-dao-shu"]
categories_expected = [
"default",
"testcategory",
"medium_posts",
"yeah",
"test",
"zhi-dao-shu",
]
self.assertEqual(sorted(categories), sorted(categories_expected))
def test_do_not_use_folder_as_category(self):
@ -549,7 +561,8 @@ class TestArticlesGenerator(unittest.TestCase):
granularity: {period["period"] for period in periods}
for granularity, periods in period_archives.items()
}
expected = {"year": {(1970,), (2010,), (2012,), (2014,)}}
self.maxDiff = None
expected = {"year": {(1970,), (2010,), (2012,), (2014,), (2017,)}}
self.assertEqual(expected, abbreviated_archives)
# Month archives enabled:
@ -570,7 +583,7 @@ class TestArticlesGenerator(unittest.TestCase):
for granularity, periods in period_archives.items()
}
expected = {
"year": {(1970,), (2010,), (2012,), (2014,)},
"year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
"month": {
(1970, "January"),
(2010, "December"),
@ -578,6 +591,7 @@ class TestArticlesGenerator(unittest.TestCase):
(2012, "November"),
(2012, "October"),
(2014, "February"),
(2017, "April"),
},
}
self.assertEqual(expected, abbreviated_archives)
@ -602,7 +616,7 @@ class TestArticlesGenerator(unittest.TestCase):
for granularity, periods in period_archives.items()
}
expected = {
"year": {(1970,), (2010,), (2012,), (2014,)},
"year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
"month": {
(1970, "January"),
(2010, "December"),
@ -610,6 +624,7 @@ class TestArticlesGenerator(unittest.TestCase):
(2012, "November"),
(2012, "October"),
(2014, "February"),
(2017, "April"),
},
"day": {
(1970, "January", 1),
@ -619,6 +634,7 @@ class TestArticlesGenerator(unittest.TestCase):
(2012, "October", 30),
(2012, "October", 31),
(2014, "February", 9),
(2017, "April", 21),
},
}
self.assertEqual(expected, abbreviated_archives)
@ -836,8 +852,12 @@ class TestArticlesGenerator(unittest.TestCase):
categories = sorted([category.name for category, _ in generator.categories])
categories_expected = [
sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
sorted(
["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
),
sorted(
["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
),
]
self.assertIn(categories, categories_expected)
@ -864,6 +884,7 @@ class TestArticlesGenerator(unittest.TestCase):
generator.generate_context()
expected = [
"A title",
"An Article With Code Block To Test Typogrify Ignore",
"Article title",
"Article with Nonconformant HTML meta tags",

View file

@ -21,6 +21,10 @@ from pelican.tools.pelican_import import (
get_attachments,
tumblr2fields,
wp2fields,
mediumpost2fields,
mediumposts2fields,
strip_medium_post_content,
medium_slug,
)
from pelican.utils import path_to_file_url, slugify
@ -708,3 +712,82 @@ class TestTumblrImporter(TestCaseWithCLocale):
posts,
posts,
)
class TestMediumImporter(TestCaseWithCLocale):
def setUp(self):
super().setUp()
self.test_content_root = "pelican/tests/content"
# The content coming out of parsing is similar, but not the same.
# Beautiful soup rearranges the order of attributes, for example.
# So, we keep a copy of the content for the test.
content_filename = f"{self.test_content_root}/medium_post_content.txt"
with open(content_filename, encoding="utf-8") as the_content_file:
# Many editors and scripts add a final newline, so live with that
# in our test
the_content = the_content_file.read()
assert the_content[-1] == "\n"
the_content = the_content[:-1]
self.post_tuple = (
"A title",
the_content,
# slug:
"2017-04-21-medium-post",
"2017-04-21 17:11",
"User Name",
None,
(),
"published",
"article",
"html",
)
def test_mediumpost2field(self):
"""Parse one post"""
post_filename = f"{self.test_content_root}/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html"
val = mediumpost2fields(post_filename)
self.assertEqual(self.post_tuple, val, val)
def test_mediumposts2field(self):
"""Parse all posts in an export directory"""
posts = [
fields
for fields in mediumposts2fields(f"{self.test_content_root}/medium_posts")
]
self.assertEqual(1, len(posts))
self.assertEqual(self.post_tuple, posts[0])
def test_strip_content(self):
"""Strip out unhelpful tags"""
html_doc = (
"<section>This keeps <i>lots</i> of <b>tags</b>, but not "
"the <section>section</section> tags</section>"
)
soup = BeautifulSoup(html_doc, "html.parser")
self.assertEqual(
"This keeps <i>lots</i> of <b>tags</b>, but not the section tags",
strip_medium_post_content(soup),
)
def test_medium_slug(self):
# Remove hex stuff at the end
self.assertEqual(
"2017-04-27_A-long-title",
medium_slug(
"medium-export/posts/2017-04-27_A-long-title--2971442227dd.html"
),
)
# Remove "--DRAFT" at the end
self.assertEqual(
"2017-04-27_A-long-title",
medium_slug("medium-export/posts/2017-04-27_A-long-title--DRAFT.html"),
)
# Remove both (which happens)
self.assertEqual(
"draft_How-to-do", medium_slug("draft_How-to-do--DRAFT--87225c81dddd.html")
)
# If no hex stuff, leave it alone
self.assertEqual(
"2017-04-27_A-long-title",
medium_slug("medium-export/posts/2017-04-27_A-long-title.html"),
)

View file

@ -15,6 +15,8 @@ from urllib.error import URLError
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
from urllib.request import urlretrieve
import dateutil.parser
# because logging.setLoggerClass has to be called before logging.getLogger
from pelican.log import init
from pelican.settings import DEFAULT_CONFIG
@ -114,19 +116,25 @@ def decode_wp_content(content, br=True):
return content
def xml_to_soup(xml):
"""Opens an xml file"""
def _import_bs4():
"""Import and return bs4, otherwise sys.exit."""
try:
from bs4 import BeautifulSoup
import bs4
except ImportError:
error = (
'Missing dependency "BeautifulSoup4" and "lxml" required to '
"import XML files."
)
sys.exit(error)
return bs4
def file_to_soup(xml, features="xml"):
"""Reads a file, returns soup."""
bs4 = _import_bs4()
with open(xml, encoding="utf-8") as infile:
xmlfile = infile.read()
soup = BeautifulSoup(xmlfile, "xml")
soup = bs4.BeautifulSoup(xmlfile, features)
return soup
@ -140,7 +148,7 @@ def get_filename(post_name, post_id):
def wp2fields(xml, wp_custpost=False):
"""Opens a wordpress XML file, and yield Pelican fields"""
soup = xml_to_soup(xml)
soup = file_to_soup(xml)
items = soup.rss.channel.findAll("item")
for item in items:
if item.find("status").string in ["publish", "draft"]:
@ -210,7 +218,7 @@ def wp2fields(xml, wp_custpost=False):
def blogger2fields(xml):
"""Opens a blogger XML file, and yield Pelican fields"""
soup = xml_to_soup(xml)
soup = file_to_soup(xml)
entries = soup.feed.findAll("entry")
for entry in entries:
raw_kind = entry.find(
@ -536,6 +544,133 @@ def tumblr2fields(api_key, blogname):
posts = _get_tumblr_posts(api_key, blogname, offset)
def strip_medium_post_content(soup) -> str:
"""Strip some tags and attributes from medium post content.
For example, the 'section' and 'div' tags cause trouble while rendering.
The problem with these tags is you can get a section divider (--------------)
that is not between two pieces of content. For example:
Some text.
.. container:: section-divider
--------------
.. container:: section-content
More content.
In this case, pandoc complains: "Unexpected section title or transition."
Also, the "id" and "name" attributes in tags cause similar problems. They show
up in .rst as extra junk that separates transitions.
"""
# Remove tags
# section and div cause problems
# footer also can cause problems, and has nothing we want to keep
# See https://stackoverflow.com/a/8439761
invalid_tags = ["section", "div", "footer"]
for tag in invalid_tags:
for match in soup.findAll(tag):
match.replaceWithChildren()
# Remove attributes
# See https://stackoverflow.com/a/9045719
invalid_attributes = ["name", "id", "class"]
bs4 = _import_bs4()
for tag in soup.descendants:
if isinstance(tag, bs4.element.Tag):
tag.attrs = {
key: value
for key, value in tag.attrs.items()
if key not in invalid_attributes
}
# Get the string of all content, keeping other tags
all_content = "".join(str(element) for element in soup.contents)
return all_content
def mediumpost2fields(filepath: str) -> tuple:
"""Take an HTML post from a medium export, return Pelican fields."""
soup = file_to_soup(filepath, "html.parser")
if not soup:
raise ValueError(f"{filepath} could not be parsed by beautifulsoup")
kind = "article"
content = soup.find("section", class_="e-content")
if not content:
raise ValueError(f"{filepath}: Post has no content")
title = soup.find("title").string or ""
raw_date = soup.find("time", class_="dt-published")
date = None
if raw_date:
# This datetime can include timezone, e.g., "2017-04-21T17:11:55.799Z"
# python before 3.11 can't parse the timezone using datetime.fromisoformat
# See also https://docs.python.org/3.10/library/datetime.html#datetime.datetime.fromisoformat
# "This does not support parsing arbitrary ISO 8601 strings"
# So, we use dateutil.parser, which can handle it.
date_object = dateutil.parser.parse(raw_date.attrs["datetime"])
date = date_object.strftime("%Y-%m-%d %H:%M")
status = "published"
else:
status = "draft"
author = soup.find("a", class_="p-author h-card")
if author:
author = author.string
# Now that we're done with classes, we can strip the content
content = strip_medium_post_content(content)
# medium HTML export doesn't have tag or category
# RSS feed has tags, but it doesn't have all the posts.
tags = ()
slug = medium_slug(filepath)
# TODO: make the fields a python dataclass
return (
title,
content,
slug,
date,
author,
None,
tags,
status,
kind,
"html",
)
def medium_slug(filepath: str) -> str:
"""Make the filepath of a medium exported file into a slug."""
# slug: filename without extension
slug = os.path.basename(filepath)
slug = os.path.splitext(slug)[0]
# A medium export filename looks like date_-title-...html
# But, RST doesn't like "_-" (see https://github.com/sphinx-doc/sphinx/issues/4350)
# so get rid of it
slug = slug.replace("_-", "-")
# drop the hex string medium puts on the end of the filename, why keep it.
# e.g., "-a8a8a8a8" or "---a9a9a9a9"
# also: drafts don't need "--DRAFT"
slug = re.sub(r"((-)+([0-9a-f]+|DRAFT))+$", "", slug)
return slug
def mediumposts2fields(medium_export_dir: str):
"""Take HTML posts in a medium export directory, and yield Pelican fields."""
for file in os.listdir(medium_export_dir):
filename = os.fsdecode(file)
yield mediumpost2fields(os.path.join(medium_export_dir, filename))
def feed2fields(file):
"""Read a feed and yield pelican fields"""
import feedparser
@ -711,7 +846,7 @@ def get_attachments(xml):
"""returns a dictionary of posts that have attachments with a list
of the attachment_urls
"""
soup = xml_to_soup(xml)
soup = file_to_soup(xml)
items = soup.rss.channel.findAll("item")
names = {}
attachments = []
@ -837,6 +972,9 @@ def fields2pelican(
posts_require_pandoc.append(filename)
slug = not disable_slugs and filename or None
assert slug is None or filename == os.path.basename(
filename
), f"filename is not a basename: {filename}"
if wp_attach and attachments:
try:
@ -984,6 +1122,9 @@ def main():
parser.add_argument(
"--dotclear", action="store_true", dest="dotclear", help="Dotclear export"
)
parser.add_argument(
"--medium", action="store_true", dest="medium", help="Medium export"
)
parser.add_argument(
"--tumblr", action="store_true", dest="tumblr", help="Tumblr export"
)
@ -1069,6 +1210,8 @@ def main():
input_type = "blogger"
elif args.dotclear:
input_type = "dotclear"
elif args.medium:
input_type = "medium"
elif args.tumblr:
input_type = "tumblr"
elif args.wpfile:
@ -1077,8 +1220,8 @@ def main():
input_type = "feed"
else:
error = (
"You must provide either --blogger, --dotclear, "
"--tumblr, --wpfile or --feed options"
"You must provide one of --blogger, --dotclear, "
"--medium, --tumblr, --wpfile or --feed options"
)
exit(error)
@ -1097,12 +1240,16 @@ def main():
fields = blogger2fields(args.input)
elif input_type == "dotclear":
fields = dc2fields(args.input)
elif input_type == "medium":
fields = mediumposts2fields(args.input)
elif input_type == "tumblr":
fields = tumblr2fields(args.input, args.blogname)
elif input_type == "wordpress":
fields = wp2fields(args.input, args.wp_custpost or False)
elif input_type == "feed":
fields = feed2fields(args.input)
else:
raise ValueError(f"Unhandled input_type {input_type}")
if args.wp_attach:
attachments = get_attachments(args.input)

View file

@ -44,6 +44,7 @@ _TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templ
_jinja_env = Environment(
loader=FileSystemLoader(_TEMPLATES_DIR),
trim_blocks=True,
keep_trailing_newline=True,
)

View file

@ -37,6 +37,7 @@ DROPBOX_DIR={{dropbox_dir}}
{% endif %}
{% if github %}
GITHUB_PAGES_BRANCH={{github_pages_branch}}
GITHUB_PAGES_COMMIT_MESSAGE=Generate Pelican site
{% endif %}
@ -161,7 +162,7 @@ cf_upload: publish
{% if github %}
{% set upload = upload + ["github"] %}
github: publish
ghp-import -m "Generate Pelican site" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)"
ghp-import -m "$(GITHUB_PAGES_COMMIT_MESSAGE)" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)" --no-jekyll
git push origin $(GITHUB_PAGES_BRANCH)
{% endif %}

View file

@ -1,3 +1,5 @@
from __future__ import annotations
import datetime
import fnmatch
import locale
@ -16,6 +18,21 @@ from html import entities
from html.parser import HTMLParser
from itertools import groupby
from operator import attrgetter
from typing import (
TYPE_CHECKING,
Any,
Callable,
Collection,
Dict,
Generator,
Iterable,
List,
Optional,
Sequence,
Tuple,
Type,
Union,
)
import dateutil.parser
@ -27,11 +44,15 @@ from markupsafe import Markup
import watchfiles
if TYPE_CHECKING:
from pelican.contents import Content
from pelican.readers import Readers
from pelican.settings import Settings
logger = logging.getLogger(__name__)
def sanitised_join(base_directory, *parts):
def sanitised_join(base_directory: str, *parts: str) -> str:
joined = posixize_path(os.path.abspath(os.path.join(base_directory, *parts)))
base = posixize_path(os.path.abspath(base_directory))
if not joined.startswith(base):
@ -40,7 +61,7 @@ def sanitised_join(base_directory, *parts):
return joined
def strftime(date, date_format):
def strftime(date: datetime.datetime, date_format: str) -> str:
"""
Enhanced replacement for built-in strftime with zero stripping
@ -109,10 +130,10 @@ class DateFormatter:
defined in LOCALE setting
"""
def __init__(self):
def __init__(self) -> None:
self.locale = locale.setlocale(locale.LC_TIME)
def __call__(self, date, date_format):
def __call__(self, date: datetime.datetime, date_format: str) -> str:
# on OSX, encoding from LC_CTYPE determines the unicode output in PY3
# make sure it's same as LC_TIME
with temporary_locale(self.locale, locale.LC_TIME), temporary_locale(
@ -131,11 +152,11 @@ class memoized:
"""
def __init__(self, func):
def __init__(self, func: Callable) -> None:
self.func = func
self.cache = {}
self.cache: Dict[Any, Any] = {}
def __call__(self, *args):
def __call__(self, *args) -> Any:
if not isinstance(args, Hashable):
# uncacheable. a list, for instance.
# better to not cache than blow up.
@ -147,17 +168,23 @@ class memoized:
self.cache[args] = value
return value
def __repr__(self):
def __repr__(self) -> Optional[str]:
return self.func.__doc__
def __get__(self, obj, objtype):
def __get__(self, obj: Any, objtype):
"""Support instance methods."""
fn = partial(self.__call__, obj)
fn.cache = self.cache
return fn
def deprecated_attribute(old, new, since=None, remove=None, doc=None):
def deprecated_attribute(
old: str,
new: str,
since: Tuple[int, ...],
remove: Optional[Tuple[int, ...]] = None,
doc: Optional[str] = None,
):
"""Attribute deprecation decorator for gentle upgrades
For example:
@ -198,7 +225,7 @@ def deprecated_attribute(old, new, since=None, remove=None, doc=None):
return decorator
def get_date(string):
def get_date(string: str) -> datetime.datetime:
"""Return a datetime object from a string.
If no format matches the given date, raise a ValueError.
@ -212,7 +239,9 @@ def get_date(string):
@contextmanager
def pelican_open(filename, mode="r", strip_crs=(sys.platform == "win32")):
def pelican_open(
filename: str, mode: str = "r", strip_crs: bool = (sys.platform == "win32")
) -> Generator[str, None, None]:
"""Open a file and return its content"""
# utf-8-sig will clear any BOM if present
@ -221,7 +250,12 @@ def pelican_open(filename, mode="r", strip_crs=(sys.platform == "win32")):
yield content
def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
def slugify(
value: str,
regex_subs: Iterable[Tuple[str, str]] = (),
preserve_case: bool = False,
use_unicode: bool = False,
) -> str:
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
@ -233,9 +267,10 @@ def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
"""
import unicodedata
import unidecode
def normalize_unicode(text):
def normalize_unicode(text: str) -> str:
# normalize text by compatibility composition
# see: https://en.wikipedia.org/wiki/Unicode_equivalence
return unicodedata.normalize("NFKC", text)
@ -262,7 +297,9 @@ def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
return value.strip()
def copy(source, destination, ignores=None):
def copy(
source: str, destination: str, ignores: Optional[Iterable[str]] = None
) -> None:
"""Recursively copy source into destination.
If source is a file, destination has to be a file as well.
@ -334,7 +371,7 @@ def copy(source, destination, ignores=None):
)
def copy_file(source, destination):
def copy_file(source: str, destination: str) -> None:
"""Copy a file"""
try:
shutil.copyfile(source, destination)
@ -344,7 +381,7 @@ def copy_file(source, destination):
)
def clean_output_dir(path, retention):
def clean_output_dir(path: str, retention: Iterable[str]) -> None:
"""Remove all files from output directory except those in retention list"""
if not os.path.exists(path):
@ -381,24 +418,24 @@ def clean_output_dir(path, retention):
logger.error("Unable to delete %s, file type unknown", file)
def get_relative_path(path):
def get_relative_path(path: str) -> str:
"""Return the relative path from the given path to the root path."""
components = split_all(path)
if len(components) <= 1:
if components is None or len(components) <= 1:
return os.curdir
else:
parents = [os.pardir] * (len(components) - 1)
return os.path.join(*parents)
def path_to_url(path):
def path_to_url(path: str) -> str:
"""Return the URL corresponding to a given path."""
if path is not None:
path = posixize_path(path)
return path
def posixize_path(rel_path):
def posixize_path(rel_path: str) -> str:
"""Use '/' as path separator, so that source references,
like '{static}/foo/bar.jpg' or 'extras/favicon.ico',
will work on Windows as well as on Mac and Linux."""
@ -427,20 +464,20 @@ class _HTMLWordTruncator(HTMLParser):
_singlets = ("br", "col", "link", "base", "img", "param", "area", "hr", "input")
class TruncationCompleted(Exception):
def __init__(self, truncate_at):
def __init__(self, truncate_at: int) -> None:
super().__init__(truncate_at)
self.truncate_at = truncate_at
def __init__(self, max_words):
def __init__(self, max_words: int) -> None:
super().__init__(convert_charrefs=False)
self.max_words = max_words
self.words_found = 0
self.open_tags = []
self.last_word_end = None
self.truncate_at = None
self.truncate_at: Optional[int] = None
def feed(self, *args, **kwargs):
def feed(self, *args, **kwargs) -> None:
try:
super().feed(*args, **kwargs)
except self.TruncationCompleted as exc:
@ -448,29 +485,29 @@ class _HTMLWordTruncator(HTMLParser):
else:
self.truncate_at = None
def getoffset(self):
def getoffset(self) -> int:
line_start = 0
lineno, line_offset = self.getpos()
for i in range(lineno - 1):
line_start = self.rawdata.index("\n", line_start) + 1
return line_start + line_offset
def add_word(self, word_end):
def add_word(self, word_end: int) -> None:
self.words_found += 1
self.last_word_end = None
if self.words_found == self.max_words:
raise self.TruncationCompleted(word_end)
def add_last_word(self):
def add_last_word(self) -> None:
if self.last_word_end is not None:
self.add_word(self.last_word_end)
def handle_starttag(self, tag, attrs):
def handle_starttag(self, tag: str, attrs: Any) -> None:
self.add_last_word()
if tag not in self._singlets:
self.open_tags.insert(0, tag)
def handle_endtag(self, tag):
def handle_endtag(self, tag: str) -> None:
self.add_last_word()
try:
i = self.open_tags.index(tag)
@ -481,7 +518,7 @@ class _HTMLWordTruncator(HTMLParser):
# all unclosed intervening start tags with omitted end tags
del self.open_tags[: i + 1]
def handle_data(self, data):
def handle_data(self, data: str) -> None:
word_end = 0
offset = self.getoffset()
@ -499,7 +536,7 @@ class _HTMLWordTruncator(HTMLParser):
if word_end < len(data):
self.add_last_word()
def _handle_ref(self, name, char):
def _handle_ref(self, name: str, char: str) -> None:
"""
Called by handle_entityref() or handle_charref() when a ref like
`&mdash;`, `&#8212;`, or `&#x2014` is found.
@ -543,7 +580,7 @@ class _HTMLWordTruncator(HTMLParser):
else:
self.add_last_word()
def handle_entityref(self, name):
def handle_entityref(self, name: str) -> None:
"""
Called when an entity ref like '&mdash;' is found
@ -556,7 +593,7 @@ class _HTMLWordTruncator(HTMLParser):
char = ""
self._handle_ref(name, char)
def handle_charref(self, name):
def handle_charref(self, name: str) -> None:
"""
Called when a char ref like '&#8212;' or '&#x2014' is found
@ -574,7 +611,7 @@ class _HTMLWordTruncator(HTMLParser):
self._handle_ref("#" + name, char)
def truncate_html_words(s, num, end_text=""):
def truncate_html_words(s: str, num: int, end_text: str = "") -> str:
"""Truncates HTML to a certain number of words.
(not counting tags and comments). Closes opened tags if they were correctly
@ -600,7 +637,10 @@ def truncate_html_words(s, num, end_text="…"):
return out
def process_translations(content_list, translation_id=None):
def process_translations(
content_list: List[Content],
translation_id: Optional[Union[str, Collection[str]]] = None,
) -> Tuple[List[Content], List[Content]]:
"""Finds translations and returns them.
For each content_list item, populates the 'translations' attribute, and
@ -658,7 +698,7 @@ def process_translations(content_list, translation_id=None):
return index, translations
def get_original_items(items, with_str):
def get_original_items(items: List[Content], with_str: str) -> List[Content]:
def _warn_source_paths(msg, items, *extra):
args = [len(items)]
args.extend(extra)
@ -698,7 +738,10 @@ def get_original_items(items, with_str):
return original_items
def order_content(content_list, order_by="slug"):
def order_content(
content_list: List[Content],
order_by: Union[str, Callable[[Content], Any], None] = "slug",
) -> List[Content]:
"""Sorts content.
order_by can be a string of an attribute or sorting function. If order_by
@ -758,7 +801,11 @@ def order_content(content_list, order_by="slug"):
return content_list
def wait_for_changes(settings_file, reader_class, settings):
def wait_for_changes(
settings_file: str,
reader_class: Type["Readers"],
settings: "Settings",
):
content_path = settings.get("PATH", "")
theme_path = settings.get("THEME", "")
ignore_files = {
@ -788,13 +835,15 @@ def wait_for_changes(settings_file, reader_class, settings):
return next(
watchfiles.watch(
*watching_paths,
watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files),
watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files), # type: ignore
rust_timeout=0,
)
)
def set_date_tzinfo(d, tz_name=None):
def set_date_tzinfo(
d: datetime.datetime, tz_name: Optional[str] = None
) -> datetime.datetime:
"""Set the timezone for dates that don't have tzinfo"""
if tz_name and not d.tzinfo:
timezone = ZoneInfo(tz_name)
@ -805,11 +854,11 @@ def set_date_tzinfo(d, tz_name=None):
return d
def mkdir_p(path):
def mkdir_p(path: str) -> None:
os.makedirs(path, exist_ok=True)
def split_all(path):
def split_all(path: Union[str, pathlib.Path, None]) -> Optional[Sequence[str]]:
"""Split a path into a list of components
While os.path.split() splits a single component off the back of
@ -840,12 +889,12 @@ def split_all(path):
)
def path_to_file_url(path):
def path_to_file_url(path: str) -> str:
"""Convert file-system path to file:// URL"""
return urllib.parse.urljoin("file://", urllib.request.pathname2url(path))
def maybe_pluralize(count, singular, plural):
def maybe_pluralize(count: int, singular: str, plural: str) -> str:
"""
Returns a formatted string containing count and plural if count is not 1
Returns count and singular if count is 1
@ -862,7 +911,9 @@ def maybe_pluralize(count, singular, plural):
@contextmanager
def temporary_locale(temp_locale=None, lc_category=locale.LC_ALL):
def temporary_locale(
temp_locale: Optional[str] = None, lc_category: int = locale.LC_ALL
) -> Generator[None, None, None]:
"""
Enable code to run in a context with a temporary locale
Resets the locale back when exiting context.

View file

@ -95,7 +95,7 @@ dev = [
"pytest-xdist>=3.4.0",
"tox>=4.11.3",
"invoke>=2.2.0",
"ruff>=0.1.5",
"ruff>=0.1.5,<0.2.0",
"tomli>=2.0.1; python_version < \"3.11\"",
]

View file

@ -1,5 +1,5 @@
[tox]
envlist = py{3.8,3.9,3.10,3.11.3.12},docs
envlist = py{3.8,3.9,3.10,3.11,3.12},docs
[testenv]
basepython =
@ -18,7 +18,7 @@ commands =
pytest -s --cov=pelican pelican
[testenv:docs]
basepython = python3.9
basepython = python3.11
deps =
-rrequirements/docs.pip
changedir = docs