mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge branch 'master' into rss_rel
This commit is contained in:
commit
c80baf5776
20 changed files with 637 additions and 170 deletions
2
.github/workflows/github_pages.yml
vendored
2
.github/workflows/github_pages.yml
vendored
|
|
@ -28,7 +28,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
|
|
|
|||
32
.github/workflows/main.yml
vendored
32
.github/workflows/main.yml
vendored
|
|
@ -23,9 +23,9 @@ jobs:
|
|||
python: "3.9"
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
cache: "pip"
|
||||
|
|
@ -52,10 +52,10 @@ jobs:
|
|||
name: Lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: pdm-project/setup-pdm@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pdm-project/setup-pdm@v4
|
||||
with:
|
||||
python-version: 3.9
|
||||
python-version: "3.11"
|
||||
cache: true
|
||||
cache-dependency-path: ./pyproject.toml
|
||||
- name: Install dependencies
|
||||
|
|
@ -64,16 +64,16 @@ jobs:
|
|||
- name: Run linters
|
||||
run: pdm lint --diff
|
||||
- name: Run pre-commit checks on all files
|
||||
uses: pre-commit/action@v3.0.0
|
||||
uses: pre-commit/action@v3.0.1
|
||||
|
||||
build:
|
||||
name: Test build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: pdm-project/setup-pdm@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pdm-project/setup-pdm@v4
|
||||
with:
|
||||
python-version: 3.9
|
||||
python-version: "3.11"
|
||||
cache: true
|
||||
cache-dependency-path: ./pyproject.toml
|
||||
- name: Install dependencies
|
||||
|
|
@ -88,11 +88,11 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.9"
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
cache-dependency-path: "**/requirements/*"
|
||||
- name: Install tox
|
||||
|
|
@ -100,7 +100,7 @@ jobs:
|
|||
- name: Check
|
||||
run: tox -e docs
|
||||
- name: cache the docs for inspection
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: docs
|
||||
path: docs/_build/html/
|
||||
|
|
@ -117,14 +117,14 @@ jobs:
|
|||
id-token: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
token: ${{ secrets.GH_TOKEN }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.9"
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Check release
|
||||
id: check_release
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ repos:
|
|||
- id: forbid-new-submodules
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.1.5
|
||||
rev: v0.1.15
|
||||
hooks:
|
||||
- id: ruff
|
||||
- id: ruff-format
|
||||
|
|
|
|||
|
|
@ -439,8 +439,8 @@ For **Markdown**, one must rely on an extension. For example, using the `mdx_inc
|
|||
Importing an existing site
|
||||
==========================
|
||||
|
||||
It is possible to import your site from WordPress, Tumblr, Dotclear, and RSS
|
||||
feeds using a simple script. See :ref:`import`.
|
||||
It is possible to import your site from several other blogging sites
|
||||
(like WordPress, Tumblr, ..) using a simple script. See :ref:`import`.
|
||||
|
||||
Translations
|
||||
============
|
||||
|
|
@ -631,7 +631,7 @@ are not included by default in tag, category, and author indexes, nor in the
|
|||
main article feed. This has the effect of creating an "unlisted" post.
|
||||
|
||||
.. _W3C ISO 8601: https://www.w3.org/TR/NOTE-datetime
|
||||
.. _AsciiDoc: https://www.methods.co.nz/asciidoc/
|
||||
.. _AsciiDoc: https://asciidoc.org
|
||||
.. _Pelican Plugins: https://github.com/pelican-plugins
|
||||
.. _pelican-plugins: https://github.com/getpelican/pelican-plugins
|
||||
.. _Python-Markdown: https://github.com/Python-Markdown/markdown
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ software to reStructuredText or Markdown. The supported import formats are:
|
|||
|
||||
- Blogger XML export
|
||||
- Dotclear export
|
||||
- Medium export
|
||||
- Tumblr API
|
||||
- WordPress XML export
|
||||
- RSS/Atom feed
|
||||
|
|
@ -26,6 +27,12 @@ not be converted (as Pelican also supports Markdown).
|
|||
manually, or use a plugin such as `More Categories`_ that enables multiple
|
||||
categories per article.
|
||||
|
||||
.. note::
|
||||
|
||||
Imported pages may contain links to images that still point to the original site.
|
||||
So you might want to download those images into your local content and manually
|
||||
re-link them from the relevant pages of your site.
|
||||
|
||||
Dependencies
|
||||
============
|
||||
|
||||
|
|
@ -65,6 +72,7 @@ Optional arguments
|
|||
-h, --help Show this help message and exit
|
||||
--blogger Blogger XML export (default: False)
|
||||
--dotclear Dotclear export (default: False)
|
||||
--medium Medium export (default: False)
|
||||
--tumblr Tumblr API (default: False)
|
||||
--wpfile WordPress XML export (default: False)
|
||||
--feed Feed to parse (default: False)
|
||||
|
|
@ -80,8 +88,7 @@ Optional arguments
|
|||
(default: False)
|
||||
--filter-author Import only post from the specified author
|
||||
--strip-raw Strip raw HTML code that can't be converted to markup
|
||||
such as flash embeds or iframes (wordpress import
|
||||
only) (default: False)
|
||||
such as flash embeds or iframes (default: False)
|
||||
--wp-custpost Put wordpress custom post types in directories. If
|
||||
used with --dir-cat option directories will be created
|
||||
as "/post_type/category/" (wordpress import only)
|
||||
|
|
@ -113,6 +120,14 @@ For Dotclear::
|
|||
|
||||
$ pelican-import --dotclear -o ~/output ~/backup.txt
|
||||
|
||||
For Medium::
|
||||
|
||||
$ pelican-import --medium -o ~/output ~/medium-export/posts/
|
||||
|
||||
The Medium export is a zip file. Unzip it, and point this tool to the
|
||||
"posts" subdirectory. For more information on how to export, see
|
||||
https://help.medium.com/hc/en-us/articles/115004745787-Export-your-account-data.
|
||||
|
||||
For Tumblr::
|
||||
|
||||
$ pelican-import --tumblr -o ~/output --blogname=<blogname> <api_key>
|
||||
|
|
@ -121,6 +136,15 @@ For WordPress::
|
|||
|
||||
$ pelican-import --wpfile -o ~/output ~/posts.xml
|
||||
|
||||
For Medium (an example of using an RSS feed):
|
||||
|
||||
$ python -m pip install feedparser
|
||||
$ pelican-import --feed https://medium.com/feed/@username
|
||||
|
||||
.. note::
|
||||
|
||||
The RSS feed may only return the most recent posts — not all of them.
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
||||
|
|
|
|||
|
|
@ -80,7 +80,14 @@ class Pelican:
|
|||
plugin.register()
|
||||
self.plugins.append(plugin)
|
||||
except Exception as e:
|
||||
logger.error("Cannot register plugin `%s`\n%s", name, e)
|
||||
logger.error(
|
||||
"Cannot register plugin `%s`\n%s",
|
||||
name,
|
||||
e,
|
||||
stacklevel=2,
|
||||
)
|
||||
if self.settings.get("DEBUG", False):
|
||||
console.print_exception()
|
||||
|
||||
self.settings["PLUGINS"] = [get_plugin_name(p) for p in self.plugins]
|
||||
|
||||
|
|
@ -120,12 +127,15 @@ class Pelican:
|
|||
if hasattr(p, "generate_context"):
|
||||
p.generate_context()
|
||||
|
||||
# for plugins that create/edit the summary
|
||||
logger.debug("Signal all_generators_finalized.send(<generators>)")
|
||||
signals.all_generators_finalized.send(generators)
|
||||
|
||||
# update links in the summary, etc
|
||||
for p in generators:
|
||||
if hasattr(p, "refresh_metadata_intersite_links"):
|
||||
p.refresh_metadata_intersite_links()
|
||||
|
||||
signals.all_generators_finalized.send(generators)
|
||||
|
||||
writer = self._get_writer()
|
||||
|
||||
for p in generators:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@ import os
|
|||
import re
|
||||
from datetime import timezone
|
||||
from html import unescape
|
||||
from urllib.parse import unquote, urljoin, urlparse, urlunparse
|
||||
from typing import Any, Dict, Optional, Set, Tuple
|
||||
from urllib.parse import ParseResult, unquote, urljoin, urlparse, urlunparse
|
||||
|
||||
try:
|
||||
from zoneinfo import ZoneInfo
|
||||
|
|
@ -15,7 +16,7 @@ except ModuleNotFoundError:
|
|||
|
||||
|
||||
from pelican.plugins import signals
|
||||
from pelican.settings import DEFAULT_CONFIG
|
||||
from pelican.settings import DEFAULT_CONFIG, Settings
|
||||
from pelican.utils import (
|
||||
deprecated_attribute,
|
||||
memoized,
|
||||
|
|
@ -44,12 +45,20 @@ class Content:
|
|||
|
||||
"""
|
||||
|
||||
default_template: Optional[str] = None
|
||||
mandatory_properties: Tuple[str, ...] = ()
|
||||
|
||||
@deprecated_attribute(old="filename", new="source_path", since=(3, 2, 0))
|
||||
def filename():
|
||||
return None
|
||||
|
||||
def __init__(
|
||||
self, content, metadata=None, settings=None, source_path=None, context=None
|
||||
self,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
settings: Optional[Settings] = None,
|
||||
source_path: Optional[str] = None,
|
||||
context: Optional[Dict[Any, Any]] = None,
|
||||
):
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
|
|
@ -156,10 +165,10 @@ class Content:
|
|||
|
||||
signals.content_object_init.send(self)
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return self.source_path or repr(self)
|
||||
|
||||
def _has_valid_mandatory_properties(self):
|
||||
def _has_valid_mandatory_properties(self) -> bool:
|
||||
"""Test mandatory properties are set."""
|
||||
for prop in self.mandatory_properties:
|
||||
if not hasattr(self, prop):
|
||||
|
|
@ -169,7 +178,7 @@ class Content:
|
|||
return False
|
||||
return True
|
||||
|
||||
def _has_valid_save_as(self):
|
||||
def _has_valid_save_as(self) -> bool:
|
||||
"""Return true if save_as doesn't write outside output path, false
|
||||
otherwise."""
|
||||
try:
|
||||
|
|
@ -190,7 +199,7 @@ class Content:
|
|||
|
||||
return True
|
||||
|
||||
def _has_valid_status(self):
|
||||
def _has_valid_status(self) -> bool:
|
||||
if hasattr(self, "allowed_statuses"):
|
||||
if self.status not in self.allowed_statuses:
|
||||
logger.error(
|
||||
|
|
@ -204,7 +213,7 @@ class Content:
|
|||
# if undefined we allow all
|
||||
return True
|
||||
|
||||
def is_valid(self):
|
||||
def is_valid(self) -> bool:
|
||||
"""Validate Content"""
|
||||
# Use all() to not short circuit and get results of all validations
|
||||
return all(
|
||||
|
|
@ -216,7 +225,7 @@ class Content:
|
|||
)
|
||||
|
||||
@property
|
||||
def url_format(self):
|
||||
def url_format(self) -> Dict[str, Any]:
|
||||
"""Returns the URL, formatted with the proper values"""
|
||||
metadata = copy.copy(self.metadata)
|
||||
path = self.metadata.get("path", self.get_relative_source_path())
|
||||
|
|
@ -232,19 +241,19 @@ class Content:
|
|||
)
|
||||
return metadata
|
||||
|
||||
def _expand_settings(self, key, klass=None):
|
||||
def _expand_settings(self, key: str, klass: Optional[str] = None) -> str:
|
||||
if not klass:
|
||||
klass = self.__class__.__name__
|
||||
fq_key = (f"{klass}_{key}").upper()
|
||||
return str(self.settings[fq_key]).format(**self.url_format)
|
||||
|
||||
def get_url_setting(self, key):
|
||||
def get_url_setting(self, key: str) -> str:
|
||||
if hasattr(self, "override_" + key):
|
||||
return getattr(self, "override_" + key)
|
||||
key = key if self.in_default_lang else "lang_%s" % key
|
||||
return self._expand_settings(key)
|
||||
|
||||
def _link_replacer(self, siteurl, m):
|
||||
def _link_replacer(self, siteurl: str, m: re.Match) -> str:
|
||||
what = m.group("what")
|
||||
value = urlparse(m.group("value"))
|
||||
path = value.path
|
||||
|
|
@ -272,15 +281,15 @@ class Content:
|
|||
# XXX Put this in a different location.
|
||||
if what in {"filename", "static", "attach"}:
|
||||
|
||||
def _get_linked_content(key, url):
|
||||
def _get_linked_content(key: str, url: ParseResult) -> Optional[Content]:
|
||||
nonlocal value
|
||||
|
||||
def _find_path(path):
|
||||
def _find_path(path: str) -> Optional[Content]:
|
||||
if path.startswith("/"):
|
||||
path = path[1:]
|
||||
else:
|
||||
# relative to the source path of this content
|
||||
path = self.get_relative_source_path(
|
||||
path = self.get_relative_source_path( # type: ignore
|
||||
os.path.join(self.relative_dir, path)
|
||||
)
|
||||
return self._context[key].get(path, None)
|
||||
|
|
@ -324,7 +333,7 @@ class Content:
|
|||
linked_content = _get_linked_content(key, value)
|
||||
if linked_content:
|
||||
if what == "attach":
|
||||
linked_content.attach_to(self)
|
||||
linked_content.attach_to(self) # type: ignore
|
||||
origin = joiner(siteurl, linked_content.url)
|
||||
origin = origin.replace("\\", "/") # for Windows paths.
|
||||
else:
|
||||
|
|
@ -359,7 +368,7 @@ class Content:
|
|||
|
||||
return "".join((m.group("markup"), m.group("quote"), origin, m.group("quote")))
|
||||
|
||||
def _get_intrasite_link_regex(self):
|
||||
def _get_intrasite_link_regex(self) -> re.Pattern:
|
||||
intrasite_link_regex = self.settings["INTRASITE_LINK_REGEX"]
|
||||
regex = r"""
|
||||
(?P<markup><[^\>]+ # match tag with all url-value attributes
|
||||
|
|
@ -370,7 +379,7 @@ class Content:
|
|||
(?P=quote)""".format(intrasite_link_regex)
|
||||
return re.compile(regex, re.X)
|
||||
|
||||
def _update_content(self, content, siteurl):
|
||||
def _update_content(self, content: str, siteurl: str) -> str:
|
||||
"""Update the content attribute.
|
||||
|
||||
Change all the relative paths of the content to relative paths
|
||||
|
|
@ -386,7 +395,7 @@ class Content:
|
|||
hrefs = self._get_intrasite_link_regex()
|
||||
return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
|
||||
|
||||
def get_static_links(self):
|
||||
def get_static_links(self) -> Set[str]:
|
||||
static_links = set()
|
||||
hrefs = self._get_intrasite_link_regex()
|
||||
for m in hrefs.finditer(self._content):
|
||||
|
|
@ -402,15 +411,15 @@ class Content:
|
|||
path = self.get_relative_source_path(
|
||||
os.path.join(self.relative_dir, path)
|
||||
)
|
||||
path = path.replace("%20", " ")
|
||||
path = path.replace("%20", " ") # type: ignore
|
||||
static_links.add(path)
|
||||
return static_links
|
||||
|
||||
def get_siteurl(self):
|
||||
def get_siteurl(self) -> str:
|
||||
return self._context.get("localsiteurl", "")
|
||||
|
||||
@memoized
|
||||
def get_content(self, siteurl):
|
||||
def get_content(self, siteurl: str) -> str:
|
||||
if hasattr(self, "_get_content"):
|
||||
content = self._get_content()
|
||||
else:
|
||||
|
|
@ -418,11 +427,11 @@ class Content:
|
|||
return self._update_content(content, siteurl)
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
def content(self) -> str:
|
||||
return self.get_content(self.get_siteurl())
|
||||
|
||||
@memoized
|
||||
def get_summary(self, siteurl):
|
||||
def get_summary(self, siteurl: str) -> str:
|
||||
"""Returns the summary of an article.
|
||||
|
||||
This is based on the summary metadata if set, otherwise truncate the
|
||||
|
|
@ -441,10 +450,10 @@ class Content:
|
|||
)
|
||||
|
||||
@property
|
||||
def summary(self):
|
||||
def summary(self) -> str:
|
||||
return self.get_summary(self.get_siteurl())
|
||||
|
||||
def _get_summary(self):
|
||||
def _get_summary(self) -> str:
|
||||
"""deprecated function to access summary"""
|
||||
|
||||
logger.warning(
|
||||
|
|
@ -454,34 +463,36 @@ class Content:
|
|||
return self.summary
|
||||
|
||||
@summary.setter
|
||||
def summary(self, value):
|
||||
def summary(self, value: str):
|
||||
"""Dummy function"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
def status(self) -> str:
|
||||
return self._status
|
||||
|
||||
@status.setter
|
||||
def status(self, value):
|
||||
def status(self, value: str) -> None:
|
||||
# TODO maybe typecheck
|
||||
self._status = value.lower()
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
def url(self) -> str:
|
||||
return self.get_url_setting("url")
|
||||
|
||||
@property
|
||||
def save_as(self):
|
||||
def save_as(self) -> str:
|
||||
return self.get_url_setting("save_as")
|
||||
|
||||
def _get_template(self):
|
||||
def _get_template(self) -> str:
|
||||
if hasattr(self, "template") and self.template is not None:
|
||||
return self.template
|
||||
else:
|
||||
return self.default_template
|
||||
|
||||
def get_relative_source_path(self, source_path=None):
|
||||
def get_relative_source_path(
|
||||
self, source_path: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Return the relative path (from the content path) to the given
|
||||
source_path.
|
||||
|
||||
|
|
@ -501,7 +512,7 @@ class Content:
|
|||
)
|
||||
|
||||
@property
|
||||
def relative_dir(self):
|
||||
def relative_dir(self) -> str:
|
||||
return posixize_path(
|
||||
os.path.dirname(
|
||||
os.path.relpath(
|
||||
|
|
@ -511,7 +522,7 @@ class Content:
|
|||
)
|
||||
)
|
||||
|
||||
def refresh_metadata_intersite_links(self):
|
||||
def refresh_metadata_intersite_links(self) -> None:
|
||||
for key in self.settings["FORMATTED_FIELDS"]:
|
||||
if key in self.metadata and key != "summary":
|
||||
value = self._update_content(self.metadata[key], self.get_siteurl())
|
||||
|
|
@ -519,13 +530,16 @@ class Content:
|
|||
setattr(self, key.lower(), value)
|
||||
|
||||
# _summary is an internal variable that some plugins may be writing to,
|
||||
# so ensure changes to it are picked up
|
||||
if (
|
||||
"summary" in self.settings["FORMATTED_FIELDS"]
|
||||
and "summary" in self.metadata
|
||||
):
|
||||
self._summary = self._update_content(self._summary, self.get_siteurl())
|
||||
self.metadata["summary"] = self._summary
|
||||
# so ensure changes to it are picked up, and write summary back to it
|
||||
if "summary" in self.settings["FORMATTED_FIELDS"]:
|
||||
if hasattr(self, "_summary"):
|
||||
self.metadata["summary"] = self._summary
|
||||
|
||||
if "summary" in self.metadata:
|
||||
self.metadata["summary"] = self._update_content(
|
||||
self.metadata["summary"], self.get_siteurl()
|
||||
)
|
||||
self._summary = self.metadata["summary"]
|
||||
|
||||
|
||||
class Page(Content):
|
||||
|
|
@ -534,7 +548,7 @@ class Page(Content):
|
|||
default_status = "published"
|
||||
default_template = "page"
|
||||
|
||||
def _expand_settings(self, key):
|
||||
def _expand_settings(self, key: str) -> str:
|
||||
klass = "draft_page" if self.status == "draft" else None
|
||||
return super()._expand_settings(key, klass)
|
||||
|
||||
|
|
@ -561,7 +575,7 @@ class Article(Content):
|
|||
if not hasattr(self, "date") and self.status == "draft":
|
||||
self.date = datetime.datetime.max.replace(tzinfo=self.timezone)
|
||||
|
||||
def _expand_settings(self, key):
|
||||
def _expand_settings(self, key: str) -> str:
|
||||
klass = "draft" if self.status == "draft" else "article"
|
||||
return super()._expand_settings(key, klass)
|
||||
|
||||
|
|
@ -571,7 +585,7 @@ class Static(Content):
|
|||
default_status = "published"
|
||||
default_template = None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
self._output_location_referenced = False
|
||||
|
||||
|
|
@ -588,18 +602,18 @@ class Static(Content):
|
|||
return None
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
def url(self) -> str:
|
||||
# Note when url has been referenced, so we can avoid overriding it.
|
||||
self._output_location_referenced = True
|
||||
return super().url
|
||||
|
||||
@property
|
||||
def save_as(self):
|
||||
def save_as(self) -> str:
|
||||
# Note when save_as has been referenced, so we can avoid overriding it.
|
||||
self._output_location_referenced = True
|
||||
return super().save_as
|
||||
|
||||
def attach_to(self, content):
|
||||
def attach_to(self, content: Content) -> None:
|
||||
"""Override our output directory with that of the given content object."""
|
||||
|
||||
# Determine our file's new output path relative to the linking
|
||||
|
|
@ -624,7 +638,7 @@ class Static(Content):
|
|||
|
||||
new_url = path_to_url(new_save_as)
|
||||
|
||||
def _log_reason(reason):
|
||||
def _log_reason(reason: str) -> None:
|
||||
logger.warning(
|
||||
"The {attach} link in %s cannot relocate "
|
||||
"%s because %s. Falling back to "
|
||||
|
|
|
|||
|
|
@ -384,8 +384,8 @@ class ArticlesGenerator(CachingGenerator):
|
|||
str(self.settings["CATEGORY_FEED_ATOM"]).format(slug=cat.slug),
|
||||
self.settings.get(
|
||||
"CATEGORY_FEED_ATOM_URL",
|
||||
str(self.settings["CATEGORY_FEED_ATOM"]).format(slug=cat.slug),
|
||||
),
|
||||
str(self.settings["CATEGORY_FEED_ATOM"]),
|
||||
).format(slug=cat.slug),
|
||||
feed_title=cat.name,
|
||||
)
|
||||
|
||||
|
|
@ -396,8 +396,8 @@ class ArticlesGenerator(CachingGenerator):
|
|||
str(self.settings["CATEGORY_FEED_RSS"]).format(slug=cat.slug),
|
||||
self.settings.get(
|
||||
"CATEGORY_FEED_RSS_URL",
|
||||
str(self.settings["CATEGORY_FEED_RSS"]).format(slug=cat.slug),
|
||||
),
|
||||
str(self.settings["CATEGORY_FEED_RSS"]),
|
||||
).format(slug=cat.slug),
|
||||
feed_title=cat.name,
|
||||
feed_type="rss",
|
||||
)
|
||||
|
|
@ -410,8 +410,8 @@ class ArticlesGenerator(CachingGenerator):
|
|||
str(self.settings["AUTHOR_FEED_ATOM"]).format(slug=auth.slug),
|
||||
self.settings.get(
|
||||
"AUTHOR_FEED_ATOM_URL",
|
||||
str(self.settings["AUTHOR_FEED_ATOM"]).format(slug=auth.slug),
|
||||
),
|
||||
str(self.settings["AUTHOR_FEED_ATOM"]),
|
||||
).format(slug=auth.slug),
|
||||
feed_title=auth.name,
|
||||
)
|
||||
|
||||
|
|
@ -422,8 +422,8 @@ class ArticlesGenerator(CachingGenerator):
|
|||
str(self.settings["AUTHOR_FEED_RSS"]).format(slug=auth.slug),
|
||||
self.settings.get(
|
||||
"AUTHOR_FEED_RSS_URL",
|
||||
str(self.settings["AUTHOR_FEED_RSS"]).format(slug=auth.slug),
|
||||
),
|
||||
str(self.settings["AUTHOR_FEED_RSS"]),
|
||||
).format(slug=auth.slug),
|
||||
feed_title=auth.name,
|
||||
feed_type="rss",
|
||||
)
|
||||
|
|
@ -437,8 +437,8 @@ class ArticlesGenerator(CachingGenerator):
|
|||
str(self.settings["TAG_FEED_ATOM"]).format(slug=tag.slug),
|
||||
self.settings.get(
|
||||
"TAG_FEED_ATOM_URL",
|
||||
str(self.settings["TAG_FEED_ATOM"]).format(slug=tag.slug),
|
||||
),
|
||||
str(self.settings["TAG_FEED_ATOM"]),
|
||||
).format(slug=tag.slug),
|
||||
feed_title=tag.name,
|
||||
)
|
||||
|
||||
|
|
@ -449,8 +449,8 @@ class ArticlesGenerator(CachingGenerator):
|
|||
str(self.settings["TAG_FEED_RSS"]).format(slug=tag.slug),
|
||||
self.settings.get(
|
||||
"TAG_FEED_RSS_URL",
|
||||
str(self.settings["TAG_FEED_RSS"]).format(slug=tag.slug),
|
||||
),
|
||||
str(self.settings["TAG_FEED_RSS"]),
|
||||
).format(slug=tag.slug),
|
||||
feed_title=tag.name,
|
||||
feed_type="rss",
|
||||
)
|
||||
|
|
@ -471,10 +471,8 @@ class ArticlesGenerator(CachingGenerator):
|
|||
str(self.settings["TRANSLATION_FEED_ATOM"]).format(lang=lang),
|
||||
self.settings.get(
|
||||
"TRANSLATION_FEED_ATOM_URL",
|
||||
str(self.settings["TRANSLATION_FEED_ATOM"]).format(
|
||||
lang=lang
|
||||
),
|
||||
),
|
||||
str(self.settings["TRANSLATION_FEED_ATOM"]),
|
||||
).format(lang=lang),
|
||||
)
|
||||
if self.settings.get("TRANSLATION_FEED_RSS"):
|
||||
writer.write_feed(
|
||||
|
|
|
|||
|
|
@ -85,13 +85,39 @@ class FatalLogger(LimitLogger):
|
|||
warnings_fatal = False
|
||||
errors_fatal = False
|
||||
|
||||
def warning(self, *args, **kwargs):
|
||||
super().warning(*args, **kwargs)
|
||||
def warning(self, *args, stacklevel=1, **kwargs):
|
||||
"""
|
||||
Displays a logging warning.
|
||||
|
||||
Wrapping it here allows Pelican to filter warnings, and conditionally
|
||||
make warnings fatal.
|
||||
|
||||
Args:
|
||||
stacklevel (int): the stacklevel that would be used to display the
|
||||
calling location, except for this function. Adjusting the
|
||||
stacklevel allows you to see the "true" calling location of the
|
||||
warning, rather than this wrapper location.
|
||||
"""
|
||||
stacklevel += 1
|
||||
super().warning(*args, stacklevel=stacklevel, **kwargs)
|
||||
if FatalLogger.warnings_fatal:
|
||||
raise RuntimeError("Warning encountered")
|
||||
|
||||
def error(self, *args, **kwargs):
|
||||
super().error(*args, **kwargs)
|
||||
def error(self, *args, stacklevel=1, **kwargs):
|
||||
"""
|
||||
Displays a logging error.
|
||||
|
||||
Wrapping it here allows Pelican to filter errors, and conditionally
|
||||
make errors non-fatal.
|
||||
|
||||
Args:
|
||||
stacklevel (int): the stacklevel that would be used to display the
|
||||
calling location, except for this function. Adjusting the
|
||||
stacklevel allows you to see the "true" calling location of the
|
||||
error, rather than this wrapper location.
|
||||
"""
|
||||
stacklevel += 1
|
||||
super().error(*args, stacklevel=stacklevel, **kwargs)
|
||||
if FatalLogger.errors_fatal:
|
||||
raise RuntimeError("Error encountered")
|
||||
|
||||
|
|
|
|||
|
|
@ -8,11 +8,13 @@ import re
|
|||
import sys
|
||||
from os.path import isabs
|
||||
from pathlib import Path
|
||||
from types import ModuleType
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pelican.log import LimitFilter
|
||||
|
||||
|
||||
def load_source(name, path):
|
||||
def load_source(name: str, path: str) -> ModuleType:
|
||||
spec = importlib.util.spec_from_file_location(name, path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[name] = mod
|
||||
|
|
@ -22,6 +24,8 @@ def load_source(name, path):
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
Settings = Dict[str, Any]
|
||||
|
||||
DEFAULT_THEME = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), "themes", "notmyidea"
|
||||
)
|
||||
|
|
@ -178,7 +182,9 @@ DEFAULT_CONFIG = {
|
|||
PYGMENTS_RST_OPTIONS = None
|
||||
|
||||
|
||||
def read_settings(path=None, override=None):
|
||||
def read_settings(
|
||||
path: Optional[str] = None, override: Optional[Settings] = None
|
||||
) -> Settings:
|
||||
settings = override or {}
|
||||
|
||||
if path:
|
||||
|
|
@ -222,7 +228,7 @@ def read_settings(path=None, override=None):
|
|||
return settings
|
||||
|
||||
|
||||
def get_settings_from_module(module=None):
|
||||
def get_settings_from_module(module: Optional[ModuleType] = None) -> Settings:
|
||||
"""Loads settings from a module, returns a dictionary."""
|
||||
|
||||
context = {}
|
||||
|
|
@ -231,7 +237,7 @@ def get_settings_from_module(module=None):
|
|||
return context
|
||||
|
||||
|
||||
def get_settings_from_file(path):
|
||||
def get_settings_from_file(path: str) -> Settings:
|
||||
"""Loads settings from a file path, returning a dict."""
|
||||
|
||||
name, ext = os.path.splitext(os.path.basename(path))
|
||||
|
|
@ -239,7 +245,7 @@ def get_settings_from_file(path):
|
|||
return get_settings_from_module(module)
|
||||
|
||||
|
||||
def get_jinja_environment(settings):
|
||||
def get_jinja_environment(settings: Settings) -> Settings:
|
||||
"""Sets the environment for Jinja"""
|
||||
|
||||
jinja_env = settings.setdefault(
|
||||
|
|
@ -254,7 +260,7 @@ def get_jinja_environment(settings):
|
|||
return settings
|
||||
|
||||
|
||||
def _printf_s_to_format_field(printf_string, format_field):
|
||||
def _printf_s_to_format_field(printf_string: str, format_field: str) -> str:
|
||||
"""Tries to replace %s with {format_field} in the provided printf_string.
|
||||
Raises ValueError in case of failure.
|
||||
"""
|
||||
|
|
@ -270,7 +276,7 @@ def _printf_s_to_format_field(printf_string, format_field):
|
|||
return result
|
||||
|
||||
|
||||
def handle_deprecated_settings(settings):
|
||||
def handle_deprecated_settings(settings: Settings) -> Settings:
|
||||
"""Converts deprecated settings and issues warnings. Issues an exception
|
||||
if both old and new setting is specified.
|
||||
"""
|
||||
|
|
@ -567,7 +573,7 @@ def handle_deprecated_settings(settings):
|
|||
return settings
|
||||
|
||||
|
||||
def configure_settings(settings):
|
||||
def configure_settings(settings: Settings) -> Settings:
|
||||
"""Provide optimizations, error checking, and warnings for the given
|
||||
settings.
|
||||
Also, specify the log messages to be ignored.
|
||||
|
|
|
|||
4
pelican/tests/content/medium_post_content.txt
vendored
Normal file
4
pelican/tests/content/medium_post_content.txt
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
<hr/><h3>Title header</h3><p>A paragraph of content.</p><p>Paragraph number two.</p><p>A list:</p><ol><li>One.</li><li>Two.</li><li>Three.</li></ol><p>A link: <a data-href="https://example.com/example" href="https://example.com/example" target="_blank">link text</a>.</p><h3>Header 2</h3><p>A block quote:</p><blockquote>quote words <strong>strong words</strong></blockquote><p>after blockquote</p><figure><img data-height="282" data-image-id="image1.png" data-width="739" src="https://cdn-images-1.medium.com/max/800/image1.png"/><figcaption>A figure caption.</figcaption></figure><p>A final note: <a data-href="http://stats.stackexchange.com/" href="http://stats.stackexchange.com/" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p><hr/><p><em>Next: </em><a data-href="https://medium.com/@username/post-url" href="https://medium.com/@username/post-url" target="_blank"><em>Next post</em>
|
||||
</a></p>
|
||||
<p>By <a href="https://medium.com/@username">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p>
|
||||
72
pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
vendored
Normal file
72
pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
vendored
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>A title</title><style>
|
||||
* {
|
||||
font-family: Georgia, Cambria, "Times New Roman", Times, serif;
|
||||
}
|
||||
html, body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
h1 {
|
||||
font-size: 50px;
|
||||
margin-bottom: 17px;
|
||||
color: #333;
|
||||
}
|
||||
h2 {
|
||||
font-size: 24px;
|
||||
line-height: 1.6;
|
||||
margin: 30px 0 0 0;
|
||||
margin-bottom: 18px;
|
||||
margin-top: 33px;
|
||||
color: #333;
|
||||
}
|
||||
h3 {
|
||||
font-size: 30px;
|
||||
margin: 10px 0 20px 0;
|
||||
color: #333;
|
||||
}
|
||||
header {
|
||||
width: 640px;
|
||||
margin: auto;
|
||||
}
|
||||
section {
|
||||
width: 640px;
|
||||
margin: auto;
|
||||
}
|
||||
section p {
|
||||
margin-bottom: 27px;
|
||||
font-size: 20px;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
}
|
||||
section img {
|
||||
max-width: 640px;
|
||||
}
|
||||
footer {
|
||||
padding: 0 20px;
|
||||
margin: 50px 0;
|
||||
text-align: center;
|
||||
font-size: 12px;
|
||||
}
|
||||
.aspectRatioPlaceholder {
|
||||
max-width: auto !important;
|
||||
max-height: auto !important;
|
||||
}
|
||||
.aspectRatioPlaceholder-fill {
|
||||
padding-bottom: 0 !important;
|
||||
}
|
||||
header,
|
||||
section[data-field=subtitle],
|
||||
section[data-field=description] {
|
||||
display: none;
|
||||
}
|
||||
</style></head><body><article class="h-entry">
|
||||
<header>
|
||||
<h1 class="p-name">A name (like title)</h1>
|
||||
</header>
|
||||
<section data-field="subtitle" class="p-summary">
|
||||
Summary (first several words of content)
|
||||
</section>
|
||||
<section data-field="body" class="e-content">
|
||||
<section name="ad15" class="section section--body section--first"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><h3 name="20a3" id="20a3" class="graf graf--h3 graf--leading graf--title">Title header</h3><p name="e3d6" id="e3d6" class="graf graf--p graf-after--h3">A paragraph of content.</p><p name="c7a8" id="c7a8" class="graf graf--p graf-after--p">Paragraph number two.</p><p name="42aa" id="42aa" class="graf graf--p graf-after--p">A list:</p><ol class="postList"><li name="d65f" id="d65f" class="graf graf--li graf-after--p">One.</li><li name="232b" id="232b" class="graf graf--li graf-after--li">Two.</li><li name="ef87" id="ef87" class="graf graf--li graf-after--li">Three.</li></ol><p name="e743" id="e743" class="graf graf--p graf-after--p">A link: <a href="https://example.com/example" data-href="https://example.com/example" class="markup--anchor markup--p-anchor" target="_blank">link text</a>.</p><h3 name="4cfd" id="4cfd" class="graf graf--h3 graf-after--p">Header 2</h3><p name="433c" id="433c" class="graf graf--p graf-after--p">A block quote:</p><blockquote name="3537" id="3537" class="graf graf--blockquote graf-after--p">quote words <strong class="markup--strong markup--blockquote-strong">strong words</strong></blockquote><p name="00cc" id="00cc" class="graf graf--p graf-after--blockquote">after blockquote</p><figure name="edb0" id="edb0" class="graf graf--figure graf-after--p"><img class="graf-image" data-image-id="image1.png" data-width="739" data-height="282" src="https://cdn-images-1.medium.com/max/800/image1.png"><figcaption class="imageCaption">A figure caption.</figcaption></figure><p name="f401" id="f401" class="graf graf--p graf-after--p graf--trailing">A final note: <a href="http://stats.stackexchange.com/" data-href="http://stats.stackexchange.com/" class="markup--anchor markup--p-anchor" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p></div></div></section><section name="09a3" class="section section--body section--last"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><p name="81e8" id="81e8" class="graf graf--p graf--leading"><em class="markup--em markup--p-em">Next: </em><a href="https://medium.com/@username/post-url" data-href="https://medium.com/@username/post-url" class="markup--anchor markup--p-anchor" target="_blank"><em class="markup--em markup--p-em">Next post</em>
|
||||
</section>
|
||||
<footer><p>By <a href="https://medium.com/@username" class="p-author h-card">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time class="dt-published" datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url" class="p-canonical">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p></footer></article></body></html>
|
||||
|
|
@ -264,6 +264,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
|
||||
def test_generate_context(self):
|
||||
articles_expected = [
|
||||
["A title", "published", "medium_posts", "article"],
|
||||
["Article title", "published", "Default", "article"],
|
||||
[
|
||||
"Article with markdown and summary metadata multi",
|
||||
|
|
@ -391,13 +392,24 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
# terms of process order will define the name for that category
|
||||
categories = [cat.name for cat, _ in self.generator.categories]
|
||||
categories_alternatives = (
|
||||
sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
|
||||
sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
|
||||
sorted(
|
||||
["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
|
||||
),
|
||||
sorted(
|
||||
["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
|
||||
),
|
||||
)
|
||||
self.assertIn(sorted(categories), categories_alternatives)
|
||||
# test for slug
|
||||
categories = [cat.slug for cat, _ in self.generator.categories]
|
||||
categories_expected = ["default", "testcategory", "yeah", "test", "zhi-dao-shu"]
|
||||
categories_expected = [
|
||||
"default",
|
||||
"testcategory",
|
||||
"medium_posts",
|
||||
"yeah",
|
||||
"test",
|
||||
"zhi-dao-shu",
|
||||
]
|
||||
self.assertEqual(sorted(categories), sorted(categories_expected))
|
||||
|
||||
def test_do_not_use_folder_as_category(self):
|
||||
|
|
@ -549,7 +561,8 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
granularity: {period["period"] for period in periods}
|
||||
for granularity, periods in period_archives.items()
|
||||
}
|
||||
expected = {"year": {(1970,), (2010,), (2012,), (2014,)}}
|
||||
self.maxDiff = None
|
||||
expected = {"year": {(1970,), (2010,), (2012,), (2014,), (2017,)}}
|
||||
self.assertEqual(expected, abbreviated_archives)
|
||||
|
||||
# Month archives enabled:
|
||||
|
|
@ -570,7 +583,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
for granularity, periods in period_archives.items()
|
||||
}
|
||||
expected = {
|
||||
"year": {(1970,), (2010,), (2012,), (2014,)},
|
||||
"year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
|
||||
"month": {
|
||||
(1970, "January"),
|
||||
(2010, "December"),
|
||||
|
|
@ -578,6 +591,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
(2012, "November"),
|
||||
(2012, "October"),
|
||||
(2014, "February"),
|
||||
(2017, "April"),
|
||||
},
|
||||
}
|
||||
self.assertEqual(expected, abbreviated_archives)
|
||||
|
|
@ -602,7 +616,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
for granularity, periods in period_archives.items()
|
||||
}
|
||||
expected = {
|
||||
"year": {(1970,), (2010,), (2012,), (2014,)},
|
||||
"year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
|
||||
"month": {
|
||||
(1970, "January"),
|
||||
(2010, "December"),
|
||||
|
|
@ -610,6 +624,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
(2012, "November"),
|
||||
(2012, "October"),
|
||||
(2014, "February"),
|
||||
(2017, "April"),
|
||||
},
|
||||
"day": {
|
||||
(1970, "January", 1),
|
||||
|
|
@ -619,6 +634,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
(2012, "October", 30),
|
||||
(2012, "October", 31),
|
||||
(2014, "February", 9),
|
||||
(2017, "April", 21),
|
||||
},
|
||||
}
|
||||
self.assertEqual(expected, abbreviated_archives)
|
||||
|
|
@ -836,8 +852,12 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
|
||||
categories = sorted([category.name for category, _ in generator.categories])
|
||||
categories_expected = [
|
||||
sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
|
||||
sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
|
||||
sorted(
|
||||
["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
|
||||
),
|
||||
sorted(
|
||||
["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
|
||||
),
|
||||
]
|
||||
self.assertIn(categories, categories_expected)
|
||||
|
||||
|
|
@ -864,6 +884,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
generator.generate_context()
|
||||
|
||||
expected = [
|
||||
"A title",
|
||||
"An Article With Code Block To Test Typogrify Ignore",
|
||||
"Article title",
|
||||
"Article with Nonconformant HTML meta tags",
|
||||
|
|
|
|||
|
|
@ -21,6 +21,10 @@ from pelican.tools.pelican_import import (
|
|||
get_attachments,
|
||||
tumblr2fields,
|
||||
wp2fields,
|
||||
mediumpost2fields,
|
||||
mediumposts2fields,
|
||||
strip_medium_post_content,
|
||||
medium_slug,
|
||||
)
|
||||
from pelican.utils import path_to_file_url, slugify
|
||||
|
||||
|
|
@ -708,3 +712,82 @@ class TestTumblrImporter(TestCaseWithCLocale):
|
|||
posts,
|
||||
posts,
|
||||
)
|
||||
|
||||
|
||||
class TestMediumImporter(TestCaseWithCLocale):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.test_content_root = "pelican/tests/content"
|
||||
# The content coming out of parsing is similar, but not the same.
|
||||
# Beautiful soup rearranges the order of attributes, for example.
|
||||
# So, we keep a copy of the content for the test.
|
||||
content_filename = f"{self.test_content_root}/medium_post_content.txt"
|
||||
with open(content_filename, encoding="utf-8") as the_content_file:
|
||||
# Many editors and scripts add a final newline, so live with that
|
||||
# in our test
|
||||
the_content = the_content_file.read()
|
||||
assert the_content[-1] == "\n"
|
||||
the_content = the_content[:-1]
|
||||
self.post_tuple = (
|
||||
"A title",
|
||||
the_content,
|
||||
# slug:
|
||||
"2017-04-21-medium-post",
|
||||
"2017-04-21 17:11",
|
||||
"User Name",
|
||||
None,
|
||||
(),
|
||||
"published",
|
||||
"article",
|
||||
"html",
|
||||
)
|
||||
|
||||
def test_mediumpost2field(self):
|
||||
"""Parse one post"""
|
||||
post_filename = f"{self.test_content_root}/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html"
|
||||
val = mediumpost2fields(post_filename)
|
||||
self.assertEqual(self.post_tuple, val, val)
|
||||
|
||||
def test_mediumposts2field(self):
|
||||
"""Parse all posts in an export directory"""
|
||||
posts = [
|
||||
fields
|
||||
for fields in mediumposts2fields(f"{self.test_content_root}/medium_posts")
|
||||
]
|
||||
self.assertEqual(1, len(posts))
|
||||
self.assertEqual(self.post_tuple, posts[0])
|
||||
|
||||
def test_strip_content(self):
|
||||
"""Strip out unhelpful tags"""
|
||||
html_doc = (
|
||||
"<section>This keeps <i>lots</i> of <b>tags</b>, but not "
|
||||
"the <section>section</section> tags</section>"
|
||||
)
|
||||
soup = BeautifulSoup(html_doc, "html.parser")
|
||||
self.assertEqual(
|
||||
"This keeps <i>lots</i> of <b>tags</b>, but not the section tags",
|
||||
strip_medium_post_content(soup),
|
||||
)
|
||||
|
||||
def test_medium_slug(self):
|
||||
# Remove hex stuff at the end
|
||||
self.assertEqual(
|
||||
"2017-04-27_A-long-title",
|
||||
medium_slug(
|
||||
"medium-export/posts/2017-04-27_A-long-title--2971442227dd.html"
|
||||
),
|
||||
)
|
||||
# Remove "--DRAFT" at the end
|
||||
self.assertEqual(
|
||||
"2017-04-27_A-long-title",
|
||||
medium_slug("medium-export/posts/2017-04-27_A-long-title--DRAFT.html"),
|
||||
)
|
||||
# Remove both (which happens)
|
||||
self.assertEqual(
|
||||
"draft_How-to-do", medium_slug("draft_How-to-do--DRAFT--87225c81dddd.html")
|
||||
)
|
||||
# If no hex stuff, leave it alone
|
||||
self.assertEqual(
|
||||
"2017-04-27_A-long-title",
|
||||
medium_slug("medium-export/posts/2017-04-27_A-long-title.html"),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ from urllib.error import URLError
|
|||
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
import dateutil.parser
|
||||
|
||||
# because logging.setLoggerClass has to be called before logging.getLogger
|
||||
from pelican.log import init
|
||||
from pelican.settings import DEFAULT_CONFIG
|
||||
|
|
@ -114,19 +116,25 @@ def decode_wp_content(content, br=True):
|
|||
return content
|
||||
|
||||
|
||||
def xml_to_soup(xml):
|
||||
"""Opens an xml file"""
|
||||
def _import_bs4():
|
||||
"""Import and return bs4, otherwise sys.exit."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
import bs4
|
||||
except ImportError:
|
||||
error = (
|
||||
'Missing dependency "BeautifulSoup4" and "lxml" required to '
|
||||
"import XML files."
|
||||
)
|
||||
sys.exit(error)
|
||||
return bs4
|
||||
|
||||
|
||||
def file_to_soup(xml, features="xml"):
|
||||
"""Reads a file, returns soup."""
|
||||
bs4 = _import_bs4()
|
||||
with open(xml, encoding="utf-8") as infile:
|
||||
xmlfile = infile.read()
|
||||
soup = BeautifulSoup(xmlfile, "xml")
|
||||
soup = bs4.BeautifulSoup(xmlfile, features)
|
||||
return soup
|
||||
|
||||
|
||||
|
|
@ -140,7 +148,7 @@ def get_filename(post_name, post_id):
|
|||
def wp2fields(xml, wp_custpost=False):
|
||||
"""Opens a wordpress XML file, and yield Pelican fields"""
|
||||
|
||||
soup = xml_to_soup(xml)
|
||||
soup = file_to_soup(xml)
|
||||
items = soup.rss.channel.findAll("item")
|
||||
for item in items:
|
||||
if item.find("status").string in ["publish", "draft"]:
|
||||
|
|
@ -210,7 +218,7 @@ def wp2fields(xml, wp_custpost=False):
|
|||
def blogger2fields(xml):
|
||||
"""Opens a blogger XML file, and yield Pelican fields"""
|
||||
|
||||
soup = xml_to_soup(xml)
|
||||
soup = file_to_soup(xml)
|
||||
entries = soup.feed.findAll("entry")
|
||||
for entry in entries:
|
||||
raw_kind = entry.find(
|
||||
|
|
@ -536,6 +544,133 @@ def tumblr2fields(api_key, blogname):
|
|||
posts = _get_tumblr_posts(api_key, blogname, offset)
|
||||
|
||||
|
||||
def strip_medium_post_content(soup) -> str:
|
||||
"""Strip some tags and attributes from medium post content.
|
||||
|
||||
For example, the 'section' and 'div' tags cause trouble while rendering.
|
||||
|
||||
The problem with these tags is you can get a section divider (--------------)
|
||||
that is not between two pieces of content. For example:
|
||||
|
||||
Some text.
|
||||
|
||||
.. container:: section-divider
|
||||
|
||||
--------------
|
||||
|
||||
.. container:: section-content
|
||||
|
||||
More content.
|
||||
|
||||
In this case, pandoc complains: "Unexpected section title or transition."
|
||||
|
||||
Also, the "id" and "name" attributes in tags cause similar problems. They show
|
||||
up in .rst as extra junk that separates transitions.
|
||||
"""
|
||||
# Remove tags
|
||||
# section and div cause problems
|
||||
# footer also can cause problems, and has nothing we want to keep
|
||||
# See https://stackoverflow.com/a/8439761
|
||||
invalid_tags = ["section", "div", "footer"]
|
||||
for tag in invalid_tags:
|
||||
for match in soup.findAll(tag):
|
||||
match.replaceWithChildren()
|
||||
|
||||
# Remove attributes
|
||||
# See https://stackoverflow.com/a/9045719
|
||||
invalid_attributes = ["name", "id", "class"]
|
||||
bs4 = _import_bs4()
|
||||
for tag in soup.descendants:
|
||||
if isinstance(tag, bs4.element.Tag):
|
||||
tag.attrs = {
|
||||
key: value
|
||||
for key, value in tag.attrs.items()
|
||||
if key not in invalid_attributes
|
||||
}
|
||||
|
||||
# Get the string of all content, keeping other tags
|
||||
all_content = "".join(str(element) for element in soup.contents)
|
||||
return all_content
|
||||
|
||||
|
||||
def mediumpost2fields(filepath: str) -> tuple:
|
||||
"""Take an HTML post from a medium export, return Pelican fields."""
|
||||
|
||||
soup = file_to_soup(filepath, "html.parser")
|
||||
if not soup:
|
||||
raise ValueError(f"{filepath} could not be parsed by beautifulsoup")
|
||||
kind = "article"
|
||||
|
||||
content = soup.find("section", class_="e-content")
|
||||
if not content:
|
||||
raise ValueError(f"{filepath}: Post has no content")
|
||||
|
||||
title = soup.find("title").string or ""
|
||||
|
||||
raw_date = soup.find("time", class_="dt-published")
|
||||
date = None
|
||||
if raw_date:
|
||||
# This datetime can include timezone, e.g., "2017-04-21T17:11:55.799Z"
|
||||
# python before 3.11 can't parse the timezone using datetime.fromisoformat
|
||||
# See also https://docs.python.org/3.10/library/datetime.html#datetime.datetime.fromisoformat
|
||||
# "This does not support parsing arbitrary ISO 8601 strings"
|
||||
# So, we use dateutil.parser, which can handle it.
|
||||
date_object = dateutil.parser.parse(raw_date.attrs["datetime"])
|
||||
date = date_object.strftime("%Y-%m-%d %H:%M")
|
||||
status = "published"
|
||||
else:
|
||||
status = "draft"
|
||||
author = soup.find("a", class_="p-author h-card")
|
||||
if author:
|
||||
author = author.string
|
||||
|
||||
# Now that we're done with classes, we can strip the content
|
||||
content = strip_medium_post_content(content)
|
||||
|
||||
# medium HTML export doesn't have tag or category
|
||||
# RSS feed has tags, but it doesn't have all the posts.
|
||||
tags = ()
|
||||
|
||||
slug = medium_slug(filepath)
|
||||
|
||||
# TODO: make the fields a python dataclass
|
||||
return (
|
||||
title,
|
||||
content,
|
||||
slug,
|
||||
date,
|
||||
author,
|
||||
None,
|
||||
tags,
|
||||
status,
|
||||
kind,
|
||||
"html",
|
||||
)
|
||||
|
||||
|
||||
def medium_slug(filepath: str) -> str:
|
||||
"""Make the filepath of a medium exported file into a slug."""
|
||||
# slug: filename without extension
|
||||
slug = os.path.basename(filepath)
|
||||
slug = os.path.splitext(slug)[0]
|
||||
# A medium export filename looks like date_-title-...html
|
||||
# But, RST doesn't like "_-" (see https://github.com/sphinx-doc/sphinx/issues/4350)
|
||||
# so get rid of it
|
||||
slug = slug.replace("_-", "-")
|
||||
# drop the hex string medium puts on the end of the filename, why keep it.
|
||||
# e.g., "-a8a8a8a8" or "---a9a9a9a9"
|
||||
# also: drafts don't need "--DRAFT"
|
||||
slug = re.sub(r"((-)+([0-9a-f]+|DRAFT))+$", "", slug)
|
||||
return slug
|
||||
|
||||
|
||||
def mediumposts2fields(medium_export_dir: str):
|
||||
"""Take HTML posts in a medium export directory, and yield Pelican fields."""
|
||||
for file in os.listdir(medium_export_dir):
|
||||
filename = os.fsdecode(file)
|
||||
yield mediumpost2fields(os.path.join(medium_export_dir, filename))
|
||||
|
||||
|
||||
def feed2fields(file):
|
||||
"""Read a feed and yield pelican fields"""
|
||||
import feedparser
|
||||
|
|
@ -711,7 +846,7 @@ def get_attachments(xml):
|
|||
"""returns a dictionary of posts that have attachments with a list
|
||||
of the attachment_urls
|
||||
"""
|
||||
soup = xml_to_soup(xml)
|
||||
soup = file_to_soup(xml)
|
||||
items = soup.rss.channel.findAll("item")
|
||||
names = {}
|
||||
attachments = []
|
||||
|
|
@ -837,6 +972,9 @@ def fields2pelican(
|
|||
posts_require_pandoc.append(filename)
|
||||
|
||||
slug = not disable_slugs and filename or None
|
||||
assert slug is None or filename == os.path.basename(
|
||||
filename
|
||||
), f"filename is not a basename: {filename}"
|
||||
|
||||
if wp_attach and attachments:
|
||||
try:
|
||||
|
|
@ -984,6 +1122,9 @@ def main():
|
|||
parser.add_argument(
|
||||
"--dotclear", action="store_true", dest="dotclear", help="Dotclear export"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--medium", action="store_true", dest="medium", help="Medium export"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tumblr", action="store_true", dest="tumblr", help="Tumblr export"
|
||||
)
|
||||
|
|
@ -1069,6 +1210,8 @@ def main():
|
|||
input_type = "blogger"
|
||||
elif args.dotclear:
|
||||
input_type = "dotclear"
|
||||
elif args.medium:
|
||||
input_type = "medium"
|
||||
elif args.tumblr:
|
||||
input_type = "tumblr"
|
||||
elif args.wpfile:
|
||||
|
|
@ -1077,8 +1220,8 @@ def main():
|
|||
input_type = "feed"
|
||||
else:
|
||||
error = (
|
||||
"You must provide either --blogger, --dotclear, "
|
||||
"--tumblr, --wpfile or --feed options"
|
||||
"You must provide one of --blogger, --dotclear, "
|
||||
"--medium, --tumblr, --wpfile or --feed options"
|
||||
)
|
||||
exit(error)
|
||||
|
||||
|
|
@ -1097,12 +1240,16 @@ def main():
|
|||
fields = blogger2fields(args.input)
|
||||
elif input_type == "dotclear":
|
||||
fields = dc2fields(args.input)
|
||||
elif input_type == "medium":
|
||||
fields = mediumposts2fields(args.input)
|
||||
elif input_type == "tumblr":
|
||||
fields = tumblr2fields(args.input, args.blogname)
|
||||
elif input_type == "wordpress":
|
||||
fields = wp2fields(args.input, args.wp_custpost or False)
|
||||
elif input_type == "feed":
|
||||
fields = feed2fields(args.input)
|
||||
else:
|
||||
raise ValueError(f"Unhandled input_type {input_type}")
|
||||
|
||||
if args.wp_attach:
|
||||
attachments = get_attachments(args.input)
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ _TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templ
|
|||
_jinja_env = Environment(
|
||||
loader=FileSystemLoader(_TEMPLATES_DIR),
|
||||
trim_blocks=True,
|
||||
keep_trailing_newline=True,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ DROPBOX_DIR={{dropbox_dir}}
|
|||
{% endif %}
|
||||
{% if github %}
|
||||
GITHUB_PAGES_BRANCH={{github_pages_branch}}
|
||||
GITHUB_PAGES_COMMIT_MESSAGE=Generate Pelican site
|
||||
|
||||
{% endif %}
|
||||
|
||||
|
|
@ -161,7 +162,7 @@ cf_upload: publish
|
|||
{% if github %}
|
||||
{% set upload = upload + ["github"] %}
|
||||
github: publish
|
||||
ghp-import -m "Generate Pelican site" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)"
|
||||
ghp-import -m "$(GITHUB_PAGES_COMMIT_MESSAGE)" -b $(GITHUB_PAGES_BRANCH) "$(OUTPUTDIR)" --no-jekyll
|
||||
git push origin $(GITHUB_PAGES_BRANCH)
|
||||
|
||||
{% endif %}
|
||||
|
|
|
|||
151
pelican/utils.py
151
pelican/utils.py
|
|
@ -1,3 +1,5 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import fnmatch
|
||||
import locale
|
||||
|
|
@ -16,6 +18,21 @@ from html import entities
|
|||
from html.parser import HTMLParser
|
||||
from itertools import groupby
|
||||
from operator import attrgetter
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Collection,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
)
|
||||
|
||||
import dateutil.parser
|
||||
|
||||
|
|
@ -27,11 +44,15 @@ from markupsafe import Markup
|
|||
|
||||
import watchfiles
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pelican.contents import Content
|
||||
from pelican.readers import Readers
|
||||
from pelican.settings import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def sanitised_join(base_directory, *parts):
|
||||
def sanitised_join(base_directory: str, *parts: str) -> str:
|
||||
joined = posixize_path(os.path.abspath(os.path.join(base_directory, *parts)))
|
||||
base = posixize_path(os.path.abspath(base_directory))
|
||||
if not joined.startswith(base):
|
||||
|
|
@ -40,7 +61,7 @@ def sanitised_join(base_directory, *parts):
|
|||
return joined
|
||||
|
||||
|
||||
def strftime(date, date_format):
|
||||
def strftime(date: datetime.datetime, date_format: str) -> str:
|
||||
"""
|
||||
Enhanced replacement for built-in strftime with zero stripping
|
||||
|
||||
|
|
@ -109,10 +130,14 @@ class DateFormatter:
|
|||
defined in LOCALE setting
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.locale = locale.setlocale(locale.LC_TIME)
|
||||
# python has issue with Turkish_Türkiye.1254 locale, replace it to
|
||||
# something accepted: Turkish
|
||||
if self.locale == "Turkish_Türkiye.1254":
|
||||
self.locale = "Turkish"
|
||||
|
||||
def __call__(self, date, date_format):
|
||||
def __call__(self, date: datetime.datetime, date_format: str) -> str:
|
||||
# on OSX, encoding from LC_CTYPE determines the unicode output in PY3
|
||||
# make sure it's same as LC_TIME
|
||||
with temporary_locale(self.locale, locale.LC_TIME), temporary_locale(
|
||||
|
|
@ -131,11 +156,11 @@ class memoized:
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, func):
|
||||
def __init__(self, func: Callable) -> None:
|
||||
self.func = func
|
||||
self.cache = {}
|
||||
self.cache: Dict[Any, Any] = {}
|
||||
|
||||
def __call__(self, *args):
|
||||
def __call__(self, *args) -> Any:
|
||||
if not isinstance(args, Hashable):
|
||||
# uncacheable. a list, for instance.
|
||||
# better to not cache than blow up.
|
||||
|
|
@ -147,17 +172,23 @@ class memoized:
|
|||
self.cache[args] = value
|
||||
return value
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self) -> Optional[str]:
|
||||
return self.func.__doc__
|
||||
|
||||
def __get__(self, obj, objtype):
|
||||
def __get__(self, obj: Any, objtype):
|
||||
"""Support instance methods."""
|
||||
fn = partial(self.__call__, obj)
|
||||
fn.cache = self.cache
|
||||
return fn
|
||||
|
||||
|
||||
def deprecated_attribute(old, new, since=None, remove=None, doc=None):
|
||||
def deprecated_attribute(
|
||||
old: str,
|
||||
new: str,
|
||||
since: Tuple[int, ...],
|
||||
remove: Optional[Tuple[int, ...]] = None,
|
||||
doc: Optional[str] = None,
|
||||
):
|
||||
"""Attribute deprecation decorator for gentle upgrades
|
||||
|
||||
For example:
|
||||
|
|
@ -198,7 +229,7 @@ def deprecated_attribute(old, new, since=None, remove=None, doc=None):
|
|||
return decorator
|
||||
|
||||
|
||||
def get_date(string):
|
||||
def get_date(string: str) -> datetime.datetime:
|
||||
"""Return a datetime object from a string.
|
||||
|
||||
If no format matches the given date, raise a ValueError.
|
||||
|
|
@ -212,7 +243,9 @@ def get_date(string):
|
|||
|
||||
|
||||
@contextmanager
|
||||
def pelican_open(filename, mode="r", strip_crs=(sys.platform == "win32")):
|
||||
def pelican_open(
|
||||
filename: str, mode: str = "r", strip_crs: bool = (sys.platform == "win32")
|
||||
) -> Generator[str, None, None]:
|
||||
"""Open a file and return its content"""
|
||||
|
||||
# utf-8-sig will clear any BOM if present
|
||||
|
|
@ -221,7 +254,12 @@ def pelican_open(filename, mode="r", strip_crs=(sys.platform == "win32")):
|
|||
yield content
|
||||
|
||||
|
||||
def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
|
||||
def slugify(
|
||||
value: str,
|
||||
regex_subs: Iterable[Tuple[str, str]] = (),
|
||||
preserve_case: bool = False,
|
||||
use_unicode: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Normalizes string, converts to lowercase, removes non-alpha characters,
|
||||
and converts spaces to hyphens.
|
||||
|
|
@ -233,9 +271,10 @@ def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
|
|||
"""
|
||||
|
||||
import unicodedata
|
||||
|
||||
import unidecode
|
||||
|
||||
def normalize_unicode(text):
|
||||
def normalize_unicode(text: str) -> str:
|
||||
# normalize text by compatibility composition
|
||||
# see: https://en.wikipedia.org/wiki/Unicode_equivalence
|
||||
return unicodedata.normalize("NFKC", text)
|
||||
|
|
@ -262,7 +301,9 @@ def slugify(value, regex_subs=(), preserve_case=False, use_unicode=False):
|
|||
return value.strip()
|
||||
|
||||
|
||||
def copy(source, destination, ignores=None):
|
||||
def copy(
|
||||
source: str, destination: str, ignores: Optional[Iterable[str]] = None
|
||||
) -> None:
|
||||
"""Recursively copy source into destination.
|
||||
|
||||
If source is a file, destination has to be a file as well.
|
||||
|
|
@ -334,7 +375,7 @@ def copy(source, destination, ignores=None):
|
|||
)
|
||||
|
||||
|
||||
def copy_file(source, destination):
|
||||
def copy_file(source: str, destination: str) -> None:
|
||||
"""Copy a file"""
|
||||
try:
|
||||
shutil.copyfile(source, destination)
|
||||
|
|
@ -344,7 +385,7 @@ def copy_file(source, destination):
|
|||
)
|
||||
|
||||
|
||||
def clean_output_dir(path, retention):
|
||||
def clean_output_dir(path: str, retention: Iterable[str]) -> None:
|
||||
"""Remove all files from output directory except those in retention list"""
|
||||
|
||||
if not os.path.exists(path):
|
||||
|
|
@ -381,24 +422,24 @@ def clean_output_dir(path, retention):
|
|||
logger.error("Unable to delete %s, file type unknown", file)
|
||||
|
||||
|
||||
def get_relative_path(path):
|
||||
def get_relative_path(path: str) -> str:
|
||||
"""Return the relative path from the given path to the root path."""
|
||||
components = split_all(path)
|
||||
if len(components) <= 1:
|
||||
if components is None or len(components) <= 1:
|
||||
return os.curdir
|
||||
else:
|
||||
parents = [os.pardir] * (len(components) - 1)
|
||||
return os.path.join(*parents)
|
||||
|
||||
|
||||
def path_to_url(path):
|
||||
def path_to_url(path: str) -> str:
|
||||
"""Return the URL corresponding to a given path."""
|
||||
if path is not None:
|
||||
path = posixize_path(path)
|
||||
return path
|
||||
|
||||
|
||||
def posixize_path(rel_path):
|
||||
def posixize_path(rel_path: str) -> str:
|
||||
"""Use '/' as path separator, so that source references,
|
||||
like '{static}/foo/bar.jpg' or 'extras/favicon.ico',
|
||||
will work on Windows as well as on Mac and Linux."""
|
||||
|
|
@ -427,20 +468,20 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
_singlets = ("br", "col", "link", "base", "img", "param", "area", "hr", "input")
|
||||
|
||||
class TruncationCompleted(Exception):
|
||||
def __init__(self, truncate_at):
|
||||
def __init__(self, truncate_at: int) -> None:
|
||||
super().__init__(truncate_at)
|
||||
self.truncate_at = truncate_at
|
||||
|
||||
def __init__(self, max_words):
|
||||
def __init__(self, max_words: int) -> None:
|
||||
super().__init__(convert_charrefs=False)
|
||||
|
||||
self.max_words = max_words
|
||||
self.words_found = 0
|
||||
self.open_tags = []
|
||||
self.last_word_end = None
|
||||
self.truncate_at = None
|
||||
self.truncate_at: Optional[int] = None
|
||||
|
||||
def feed(self, *args, **kwargs):
|
||||
def feed(self, *args, **kwargs) -> None:
|
||||
try:
|
||||
super().feed(*args, **kwargs)
|
||||
except self.TruncationCompleted as exc:
|
||||
|
|
@ -448,29 +489,29 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
else:
|
||||
self.truncate_at = None
|
||||
|
||||
def getoffset(self):
|
||||
def getoffset(self) -> int:
|
||||
line_start = 0
|
||||
lineno, line_offset = self.getpos()
|
||||
for i in range(lineno - 1):
|
||||
line_start = self.rawdata.index("\n", line_start) + 1
|
||||
return line_start + line_offset
|
||||
|
||||
def add_word(self, word_end):
|
||||
def add_word(self, word_end: int) -> None:
|
||||
self.words_found += 1
|
||||
self.last_word_end = None
|
||||
if self.words_found == self.max_words:
|
||||
raise self.TruncationCompleted(word_end)
|
||||
|
||||
def add_last_word(self):
|
||||
def add_last_word(self) -> None:
|
||||
if self.last_word_end is not None:
|
||||
self.add_word(self.last_word_end)
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
def handle_starttag(self, tag: str, attrs: Any) -> None:
|
||||
self.add_last_word()
|
||||
if tag not in self._singlets:
|
||||
self.open_tags.insert(0, tag)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
self.add_last_word()
|
||||
try:
|
||||
i = self.open_tags.index(tag)
|
||||
|
|
@ -481,7 +522,7 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
# all unclosed intervening start tags with omitted end tags
|
||||
del self.open_tags[: i + 1]
|
||||
|
||||
def handle_data(self, data):
|
||||
def handle_data(self, data: str) -> None:
|
||||
word_end = 0
|
||||
offset = self.getoffset()
|
||||
|
||||
|
|
@ -499,7 +540,7 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
if word_end < len(data):
|
||||
self.add_last_word()
|
||||
|
||||
def _handle_ref(self, name, char):
|
||||
def _handle_ref(self, name: str, char: str) -> None:
|
||||
"""
|
||||
Called by handle_entityref() or handle_charref() when a ref like
|
||||
`—`, `—`, or `—` is found.
|
||||
|
|
@ -543,7 +584,7 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
else:
|
||||
self.add_last_word()
|
||||
|
||||
def handle_entityref(self, name):
|
||||
def handle_entityref(self, name: str) -> None:
|
||||
"""
|
||||
Called when an entity ref like '—' is found
|
||||
|
||||
|
|
@ -556,7 +597,7 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
char = ""
|
||||
self._handle_ref(name, char)
|
||||
|
||||
def handle_charref(self, name):
|
||||
def handle_charref(self, name: str) -> None:
|
||||
"""
|
||||
Called when a char ref like '—' or '—' is found
|
||||
|
||||
|
|
@ -574,7 +615,7 @@ class _HTMLWordTruncator(HTMLParser):
|
|||
self._handle_ref("#" + name, char)
|
||||
|
||||
|
||||
def truncate_html_words(s, num, end_text="…"):
|
||||
def truncate_html_words(s: str, num: int, end_text: str = "…") -> str:
|
||||
"""Truncates HTML to a certain number of words.
|
||||
|
||||
(not counting tags and comments). Closes opened tags if they were correctly
|
||||
|
|
@ -600,7 +641,10 @@ def truncate_html_words(s, num, end_text="…"):
|
|||
return out
|
||||
|
||||
|
||||
def process_translations(content_list, translation_id=None):
|
||||
def process_translations(
|
||||
content_list: List[Content],
|
||||
translation_id: Optional[Union[str, Collection[str]]] = None,
|
||||
) -> Tuple[List[Content], List[Content]]:
|
||||
"""Finds translations and returns them.
|
||||
|
||||
For each content_list item, populates the 'translations' attribute, and
|
||||
|
|
@ -658,7 +702,7 @@ def process_translations(content_list, translation_id=None):
|
|||
return index, translations
|
||||
|
||||
|
||||
def get_original_items(items, with_str):
|
||||
def get_original_items(items: List[Content], with_str: str) -> List[Content]:
|
||||
def _warn_source_paths(msg, items, *extra):
|
||||
args = [len(items)]
|
||||
args.extend(extra)
|
||||
|
|
@ -698,7 +742,10 @@ def get_original_items(items, with_str):
|
|||
return original_items
|
||||
|
||||
|
||||
def order_content(content_list, order_by="slug"):
|
||||
def order_content(
|
||||
content_list: List[Content],
|
||||
order_by: Union[str, Callable[[Content], Any], None] = "slug",
|
||||
) -> List[Content]:
|
||||
"""Sorts content.
|
||||
|
||||
order_by can be a string of an attribute or sorting function. If order_by
|
||||
|
|
@ -758,7 +805,11 @@ def order_content(content_list, order_by="slug"):
|
|||
return content_list
|
||||
|
||||
|
||||
def wait_for_changes(settings_file, reader_class, settings):
|
||||
def wait_for_changes(
|
||||
settings_file: str,
|
||||
reader_class: Type["Readers"],
|
||||
settings: "Settings",
|
||||
):
|
||||
content_path = settings.get("PATH", "")
|
||||
theme_path = settings.get("THEME", "")
|
||||
ignore_files = {
|
||||
|
|
@ -788,13 +839,15 @@ def wait_for_changes(settings_file, reader_class, settings):
|
|||
return next(
|
||||
watchfiles.watch(
|
||||
*watching_paths,
|
||||
watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files),
|
||||
watch_filter=watchfiles.DefaultFilter(ignore_entity_patterns=ignore_files), # type: ignore
|
||||
rust_timeout=0,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def set_date_tzinfo(d, tz_name=None):
|
||||
def set_date_tzinfo(
|
||||
d: datetime.datetime, tz_name: Optional[str] = None
|
||||
) -> datetime.datetime:
|
||||
"""Set the timezone for dates that don't have tzinfo"""
|
||||
if tz_name and not d.tzinfo:
|
||||
timezone = ZoneInfo(tz_name)
|
||||
|
|
@ -805,11 +858,11 @@ def set_date_tzinfo(d, tz_name=None):
|
|||
return d
|
||||
|
||||
|
||||
def mkdir_p(path):
|
||||
def mkdir_p(path: str) -> None:
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
|
||||
def split_all(path):
|
||||
def split_all(path: Union[str, pathlib.Path, None]) -> Optional[Sequence[str]]:
|
||||
"""Split a path into a list of components
|
||||
|
||||
While os.path.split() splits a single component off the back of
|
||||
|
|
@ -840,12 +893,12 @@ def split_all(path):
|
|||
)
|
||||
|
||||
|
||||
def path_to_file_url(path):
|
||||
def path_to_file_url(path: str) -> str:
|
||||
"""Convert file-system path to file:// URL"""
|
||||
return urllib.parse.urljoin("file://", urllib.request.pathname2url(path))
|
||||
|
||||
|
||||
def maybe_pluralize(count, singular, plural):
|
||||
def maybe_pluralize(count: int, singular: str, plural: str) -> str:
|
||||
"""
|
||||
Returns a formatted string containing count and plural if count is not 1
|
||||
Returns count and singular if count is 1
|
||||
|
|
@ -862,7 +915,9 @@ def maybe_pluralize(count, singular, plural):
|
|||
|
||||
|
||||
@contextmanager
|
||||
def temporary_locale(temp_locale=None, lc_category=locale.LC_ALL):
|
||||
def temporary_locale(
|
||||
temp_locale: Optional[str] = None, lc_category: int = locale.LC_ALL
|
||||
) -> Generator[None, None, None]:
|
||||
"""
|
||||
Enable code to run in a context with a temporary locale
|
||||
Resets the locale back when exiting context.
|
||||
|
|
@ -871,6 +926,10 @@ def temporary_locale(temp_locale=None, lc_category=locale.LC_ALL):
|
|||
class to use the C locale.
|
||||
"""
|
||||
orig_locale = locale.setlocale(lc_category)
|
||||
# python has issue with Turkish_Türkiye.1254 locale, replace it to
|
||||
# something accepted: Turkish
|
||||
if orig_locale == "Turkish_Türkiye.1254":
|
||||
orig_locale = "Turkish"
|
||||
if temp_locale:
|
||||
locale.setlocale(lc_category, temp_locale)
|
||||
yield
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@ changelog-header = "###############"
|
|||
version-header = "="
|
||||
|
||||
[tool.pdm]
|
||||
ignore_package_warnings = ["sphinx"]
|
||||
|
||||
[tool.pdm.scripts]
|
||||
docbuild = "invoke docbuild"
|
||||
|
|
@ -95,7 +96,7 @@ dev = [
|
|||
"pytest-xdist>=3.4.0",
|
||||
"tox>=4.11.3",
|
||||
"invoke>=2.2.0",
|
||||
"ruff>=0.1.5",
|
||||
"ruff>=0.1.15,<0.2.0",
|
||||
"tomli>=2.0.1; python_version < \"3.11\"",
|
||||
]
|
||||
|
||||
|
|
|
|||
4
tox.ini
4
tox.ini
|
|
@ -1,5 +1,5 @@
|
|||
[tox]
|
||||
envlist = py{3.8,3.9,3.10,3.11.3.12},docs
|
||||
envlist = py{3.8,3.9,3.10,3.11,3.12},docs
|
||||
|
||||
[testenv]
|
||||
basepython =
|
||||
|
|
@ -18,7 +18,7 @@ commands =
|
|||
pytest -s --cov=pelican pelican
|
||||
|
||||
[testenv:docs]
|
||||
basepython = python3.9
|
||||
basepython = python3.11
|
||||
deps =
|
||||
-rrequirements/docs.pip
|
||||
changedir = docs
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue