forked from github/pelican
653 lines
23 KiB
Python
653 lines
23 KiB
Python
import copy
|
|
import datetime
|
|
import locale
|
|
import logging
|
|
import os
|
|
import re
|
|
from datetime import timezone
|
|
from html import unescape
|
|
from urllib.parse import unquote, urljoin, urlparse, urlunparse
|
|
|
|
try:
|
|
from zoneinfo import ZoneInfo
|
|
except ModuleNotFoundError:
|
|
from backports.zoneinfo import ZoneInfo
|
|
|
|
|
|
from pelican.plugins import signals
|
|
from pelican.settings import DEFAULT_CONFIG
|
|
from pelican.utils import (
|
|
deprecated_attribute,
|
|
memoized,
|
|
path_to_url,
|
|
posixize_path,
|
|
sanitised_join,
|
|
set_date_tzinfo,
|
|
slugify,
|
|
truncate_html_words,
|
|
)
|
|
|
|
# Import these so that they're available when you import from pelican.contents.
|
|
from pelican.urlwrappers import Author, Category, Tag, URLWrapper # NOQA
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class Content:
|
|
"""Represents a content.
|
|
|
|
:param content: the string to parse, containing the original content.
|
|
:param metadata: the metadata associated to this page (optional).
|
|
:param settings: the settings dictionary (optional).
|
|
:param source_path: The location of the source of this content (if any).
|
|
:param context: The shared context between generators.
|
|
|
|
"""
|
|
|
|
@deprecated_attribute(old="filename", new="source_path", since=(3, 2, 0))
|
|
def filename():
|
|
return None
|
|
|
|
def __init__(
|
|
self, content, metadata=None, settings=None, source_path=None, context=None
|
|
):
|
|
if metadata is None:
|
|
metadata = {}
|
|
if settings is None:
|
|
settings = copy.deepcopy(DEFAULT_CONFIG)
|
|
|
|
self.settings = settings
|
|
self._content = content
|
|
if context is None:
|
|
context = {}
|
|
self._context = context
|
|
self.translations = []
|
|
|
|
local_metadata = dict()
|
|
local_metadata.update(metadata)
|
|
|
|
# set metadata as attributes
|
|
for key, value in local_metadata.items():
|
|
if key in ("save_as", "url"):
|
|
key = "override_" + key
|
|
setattr(self, key.lower(), value)
|
|
|
|
# also keep track of the metadata attributes available
|
|
self.metadata = local_metadata
|
|
|
|
# default template if it's not defined in page
|
|
self.template = self._get_template()
|
|
|
|
# First, read the authors from "authors", if not, fallback to "author"
|
|
# and if not use the settings defined one, if any.
|
|
if not hasattr(self, "author"):
|
|
if hasattr(self, "authors"):
|
|
self.author = self.authors[0]
|
|
elif "AUTHOR" in settings:
|
|
self.author = Author(settings["AUTHOR"], settings)
|
|
|
|
if not hasattr(self, "authors") and hasattr(self, "author"):
|
|
self.authors = [self.author]
|
|
|
|
# XXX Split all the following code into pieces, there is too much here.
|
|
|
|
# manage languages
|
|
self.in_default_lang = True
|
|
if "DEFAULT_LANG" in settings:
|
|
default_lang = settings["DEFAULT_LANG"].lower()
|
|
if not hasattr(self, "lang"):
|
|
self.lang = default_lang
|
|
|
|
self.in_default_lang = self.lang == default_lang
|
|
|
|
# create the slug if not existing, generate slug according to
|
|
# setting of SLUG_ATTRIBUTE
|
|
if not hasattr(self, "slug"):
|
|
if settings["SLUGIFY_SOURCE"] == "title" and hasattr(self, "title"):
|
|
value = self.title
|
|
elif settings["SLUGIFY_SOURCE"] == "basename" and source_path is not None:
|
|
value = os.path.basename(os.path.splitext(source_path)[0])
|
|
else:
|
|
value = None
|
|
if value is not None:
|
|
self.slug = slugify(
|
|
value,
|
|
regex_subs=settings.get("SLUG_REGEX_SUBSTITUTIONS", []),
|
|
preserve_case=settings.get("SLUGIFY_PRESERVE_CASE", False),
|
|
use_unicode=settings.get("SLUGIFY_USE_UNICODE", False),
|
|
)
|
|
|
|
self.source_path = source_path
|
|
self.relative_source_path = self.get_relative_source_path()
|
|
|
|
# manage the date format
|
|
if not hasattr(self, "date_format"):
|
|
if hasattr(self, "lang") and self.lang in settings["DATE_FORMATS"]:
|
|
self.date_format = settings["DATE_FORMATS"][self.lang]
|
|
else:
|
|
self.date_format = settings["DEFAULT_DATE_FORMAT"]
|
|
|
|
if isinstance(self.date_format, tuple):
|
|
locale_string = self.date_format[0]
|
|
locale.setlocale(locale.LC_ALL, locale_string)
|
|
self.date_format = self.date_format[1]
|
|
|
|
# manage timezone
|
|
default_timezone = settings.get("TIMEZONE", "UTC")
|
|
timezone = getattr(self, "timezone", default_timezone)
|
|
self.timezone = ZoneInfo(timezone)
|
|
|
|
if hasattr(self, "date"):
|
|
self.date = set_date_tzinfo(self.date, timezone)
|
|
self.locale_date = self.date.strftime(self.date_format)
|
|
|
|
if hasattr(self, "modified"):
|
|
self.modified = set_date_tzinfo(self.modified, timezone)
|
|
self.locale_modified = self.modified.strftime(self.date_format)
|
|
|
|
# manage status
|
|
if not hasattr(self, "status"):
|
|
# Previous default of None broke comment plugins and perhaps others
|
|
self.status = getattr(self, "default_status", "")
|
|
|
|
# store the summary metadata if it is set
|
|
if "summary" in metadata:
|
|
self._summary = metadata["summary"]
|
|
|
|
signals.content_object_init.send(self)
|
|
|
|
def __str__(self):
|
|
return self.source_path or repr(self)
|
|
|
|
def _has_valid_mandatory_properties(self):
|
|
"""Test mandatory properties are set."""
|
|
for prop in self.mandatory_properties:
|
|
if not hasattr(self, prop):
|
|
logger.error(
|
|
"Skipping %s: could not find information about '%s'", self, prop
|
|
)
|
|
return False
|
|
return True
|
|
|
|
def _has_valid_save_as(self):
|
|
"""Return true if save_as doesn't write outside output path, false
|
|
otherwise."""
|
|
try:
|
|
output_path = self.settings["OUTPUT_PATH"]
|
|
except KeyError:
|
|
# we cannot check
|
|
return True
|
|
|
|
try:
|
|
sanitised_join(output_path, self.save_as)
|
|
except RuntimeError: # outside output_dir
|
|
logger.error(
|
|
"Skipping %s: file %r would be written outside output path",
|
|
self,
|
|
self.save_as,
|
|
)
|
|
return False
|
|
|
|
return True
|
|
|
|
def _has_valid_status(self):
|
|
if hasattr(self, "allowed_statuses"):
|
|
if self.status not in self.allowed_statuses:
|
|
logger.error(
|
|
"Unknown status '%s' for file %s, skipping it. (Not in %s)",
|
|
self.status,
|
|
self,
|
|
self.allowed_statuses,
|
|
)
|
|
return False
|
|
|
|
# if undefined we allow all
|
|
return True
|
|
|
|
def is_valid(self):
|
|
"""Validate Content"""
|
|
# Use all() to not short circuit and get results of all validations
|
|
return all(
|
|
[
|
|
self._has_valid_mandatory_properties(),
|
|
self._has_valid_save_as(),
|
|
self._has_valid_status(),
|
|
]
|
|
)
|
|
|
|
@property
|
|
def url_format(self):
|
|
"""Returns the URL, formatted with the proper values"""
|
|
metadata = copy.copy(self.metadata)
|
|
path = self.metadata.get("path", self.get_relative_source_path())
|
|
metadata.update(
|
|
{
|
|
"path": path_to_url(path),
|
|
"slug": getattr(self, "slug", ""),
|
|
"lang": getattr(self, "lang", "en"),
|
|
"date": getattr(self, "date", datetime.datetime.now()),
|
|
"author": self.author.slug if hasattr(self, "author") else "",
|
|
"category": self.category.slug if hasattr(self, "category") else "",
|
|
}
|
|
)
|
|
return metadata
|
|
|
|
def _expand_settings(self, key, klass=None):
|
|
if not klass:
|
|
klass = self.__class__.__name__
|
|
fq_key = (f"{klass}_{key}").upper()
|
|
return str(self.settings[fq_key]).format(**self.url_format)
|
|
|
|
def get_url_setting(self, key):
|
|
if hasattr(self, "override_" + key):
|
|
return getattr(self, "override_" + key)
|
|
key = key if self.in_default_lang else "lang_%s" % key
|
|
return self._expand_settings(key)
|
|
|
|
def _link_replacer(self, siteurl, m):
|
|
what = m.group("what")
|
|
value = urlparse(m.group("value"))
|
|
path = value.path
|
|
origin = m.group("path")
|
|
|
|
# urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
|
|
# so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
|
|
# properly get `../a.html`. However, os.path.join() produces
|
|
# `baz/http://foo/bar.html` for join("baz", "http://foo/bar.html")
|
|
# instead of correct "http://foo/bar.html", so one has to pick a side
|
|
# as there is no silver bullet.
|
|
if self.settings["RELATIVE_URLS"]:
|
|
joiner = os.path.join
|
|
else:
|
|
joiner = urljoin
|
|
|
|
# However, it's not *that* simple: urljoin("blog", "index.html")
|
|
# produces just `index.html` instead of `blog/index.html` (unlike
|
|
# os.path.join()), so in order to get a correct answer one needs to
|
|
# append a trailing slash to siteurl in that case. This also makes
|
|
# the new behavior fully compatible with Pelican 3.7.1.
|
|
if not siteurl.endswith("/"):
|
|
siteurl += "/"
|
|
|
|
# XXX Put this in a different location.
|
|
if what in {"filename", "static", "attach"}:
|
|
|
|
def _get_linked_content(key, url):
|
|
nonlocal value
|
|
|
|
def _find_path(path):
|
|
if path.startswith("/"):
|
|
path = path[1:]
|
|
else:
|
|
# relative to the source path of this content
|
|
path = self.get_relative_source_path(
|
|
os.path.join(self.relative_dir, path)
|
|
)
|
|
return self._context[key].get(path, None)
|
|
|
|
# try path
|
|
result = _find_path(url.path)
|
|
if result is not None:
|
|
return result
|
|
|
|
# try unquoted path
|
|
result = _find_path(unquote(url.path))
|
|
if result is not None:
|
|
return result
|
|
|
|
# try html unescaped url
|
|
unescaped_url = urlparse(unescape(url.geturl()))
|
|
result = _find_path(unescaped_url.path)
|
|
if result is not None:
|
|
value = unescaped_url
|
|
return result
|
|
|
|
# check if a static file is linked with {filename}
|
|
if what == "filename" and key == "generated_content":
|
|
linked_content = _get_linked_content("static_content", value)
|
|
if linked_content:
|
|
logger.warning(
|
|
"{filename} used for linking to static"
|
|
" content %s in %s. Use {static} instead",
|
|
value.path,
|
|
self.get_relative_source_path(),
|
|
)
|
|
return linked_content
|
|
|
|
return None
|
|
|
|
if what == "filename":
|
|
key = "generated_content"
|
|
else:
|
|
key = "static_content"
|
|
|
|
linked_content = _get_linked_content(key, value)
|
|
if linked_content:
|
|
if what == "attach":
|
|
linked_content.attach_to(self)
|
|
origin = joiner(siteurl, linked_content.url)
|
|
origin = origin.replace("\\", "/") # for Windows paths.
|
|
else:
|
|
logger.warning(
|
|
"Unable to find '%s', skipping url replacement.",
|
|
value.geturl(),
|
|
extra={
|
|
"limit_msg": (
|
|
"Other resources were not found "
|
|
"and their urls not replaced"
|
|
)
|
|
},
|
|
)
|
|
elif what == "category":
|
|
origin = joiner(siteurl, Category(path, self.settings).url)
|
|
elif what == "tag":
|
|
origin = joiner(siteurl, Tag(path, self.settings).url)
|
|
elif what == "index":
|
|
origin = joiner(siteurl, self.settings["INDEX_SAVE_AS"])
|
|
elif what == "author":
|
|
origin = joiner(siteurl, Author(path, self.settings).url)
|
|
else:
|
|
logger.warning(
|
|
"Replacement Indicator '%s' not recognized, " "skipping replacement",
|
|
what,
|
|
)
|
|
|
|
# keep all other parts, such as query, fragment, etc.
|
|
parts = list(value)
|
|
parts[2] = origin
|
|
origin = urlunparse(parts)
|
|
|
|
return "".join((m.group("markup"), m.group("quote"), origin, m.group("quote")))
|
|
|
|
def _get_intrasite_link_regex(self):
|
|
intrasite_link_regex = self.settings["INTRASITE_LINK_REGEX"]
|
|
regex = r"""
|
|
(?P<markup><[^\>]+ # match tag with all url-value attributes
|
|
(?:href|src|poster|data|cite|formaction|action|content)\s*=\s*)
|
|
|
|
(?P<quote>["\']) # require value to be quoted
|
|
(?P<path>{}(?P<value>.*?)) # the url value
|
|
(?P=quote)""".format(intrasite_link_regex)
|
|
return re.compile(regex, re.X)
|
|
|
|
def _update_content(self, content, siteurl):
|
|
"""Update the content attribute.
|
|
|
|
Change all the relative paths of the content to relative paths
|
|
suitable for the output content.
|
|
|
|
:param content: content resource that will be passed to the templates.
|
|
:param siteurl: siteurl which is locally generated by the writer in
|
|
case of RELATIVE_URLS.
|
|
"""
|
|
if not content:
|
|
return content
|
|
|
|
hrefs = self._get_intrasite_link_regex()
|
|
return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
|
|
|
|
def get_static_links(self):
|
|
static_links = set()
|
|
hrefs = self._get_intrasite_link_regex()
|
|
for m in hrefs.finditer(self._content):
|
|
what = m.group("what")
|
|
value = urlparse(m.group("value"))
|
|
path = value.path
|
|
if what not in {"static", "attach"}:
|
|
continue
|
|
if path.startswith("/"):
|
|
path = path[1:]
|
|
else:
|
|
# relative to the source path of this content
|
|
path = self.get_relative_source_path(
|
|
os.path.join(self.relative_dir, path)
|
|
)
|
|
path = path.replace("%20", " ")
|
|
static_links.add(path)
|
|
return static_links
|
|
|
|
def get_siteurl(self):
|
|
return self._context.get("localsiteurl", "")
|
|
|
|
@memoized
|
|
def get_content(self, siteurl):
|
|
if hasattr(self, "_get_content"):
|
|
content = self._get_content()
|
|
else:
|
|
content = self._content
|
|
return self._update_content(content, siteurl)
|
|
|
|
@property
|
|
def content(self):
|
|
return self.get_content(self.get_siteurl())
|
|
|
|
@memoized
|
|
def get_summary(self, siteurl):
|
|
"""Returns the summary of an article.
|
|
|
|
This is based on the summary metadata if set, otherwise truncate the
|
|
content.
|
|
"""
|
|
if "summary" in self.metadata:
|
|
return self.metadata["summary"]
|
|
|
|
if self.settings["SUMMARY_MAX_LENGTH"] is None:
|
|
return self.content
|
|
|
|
return truncate_html_words(
|
|
self.content,
|
|
self.settings["SUMMARY_MAX_LENGTH"],
|
|
self.settings["SUMMARY_END_SUFFIX"],
|
|
)
|
|
|
|
@property
|
|
def summary(self):
|
|
return self.get_summary(self.get_siteurl())
|
|
|
|
def _get_summary(self):
|
|
"""deprecated function to access summary"""
|
|
|
|
logger.warning(
|
|
"_get_summary() has been deprecated since 3.6.4. "
|
|
"Use the summary decorator instead"
|
|
)
|
|
return self.summary
|
|
|
|
@summary.setter
|
|
def summary(self, value):
|
|
"""Dummy function"""
|
|
pass
|
|
|
|
@property
|
|
def status(self):
|
|
return self._status
|
|
|
|
@status.setter
|
|
def status(self, value):
|
|
# TODO maybe typecheck
|
|
self._status = value.lower()
|
|
|
|
@property
|
|
def url(self):
|
|
return self.get_url_setting("url")
|
|
|
|
@property
|
|
def save_as(self):
|
|
return self.get_url_setting("save_as")
|
|
|
|
def _get_template(self):
|
|
if hasattr(self, "template") and self.template is not None:
|
|
return self.template
|
|
else:
|
|
return self.default_template
|
|
|
|
def get_relative_source_path(self, source_path=None):
|
|
"""Return the relative path (from the content path) to the given
|
|
source_path.
|
|
|
|
If no source path is specified, use the source path of this
|
|
content object.
|
|
"""
|
|
if not source_path:
|
|
source_path = self.source_path
|
|
if source_path is None:
|
|
return None
|
|
|
|
return posixize_path(
|
|
os.path.relpath(
|
|
os.path.abspath(os.path.join(self.settings["PATH"], source_path)),
|
|
os.path.abspath(self.settings["PATH"]),
|
|
)
|
|
)
|
|
|
|
@property
|
|
def relative_dir(self):
|
|
return posixize_path(
|
|
os.path.dirname(
|
|
os.path.relpath(
|
|
os.path.abspath(self.source_path),
|
|
os.path.abspath(self.settings["PATH"]),
|
|
)
|
|
)
|
|
)
|
|
|
|
def refresh_metadata_intersite_links(self):
|
|
for key in self.settings["FORMATTED_FIELDS"]:
|
|
if key in self.metadata and key != "summary":
|
|
value = self._update_content(self.metadata[key], self.get_siteurl())
|
|
self.metadata[key] = value
|
|
setattr(self, key.lower(), value)
|
|
|
|
# _summary is an internal variable that some plugins may be writing to,
|
|
# so ensure changes to it are picked up
|
|
if (
|
|
"summary" in self.settings["FORMATTED_FIELDS"]
|
|
and "summary" in self.metadata
|
|
):
|
|
self._summary = self._update_content(self._summary, self.get_siteurl())
|
|
self.metadata["summary"] = self._summary
|
|
|
|
|
|
class Page(Content):
|
|
mandatory_properties = ("title",)
|
|
allowed_statuses = ("published", "hidden", "draft")
|
|
default_status = "published"
|
|
default_template = "page"
|
|
|
|
def _expand_settings(self, key):
|
|
klass = "draft_page" if self.status == "draft" else None
|
|
return super()._expand_settings(key, klass)
|
|
|
|
|
|
class Article(Content):
|
|
mandatory_properties = ("title", "date", "category")
|
|
allowed_statuses = ("published", "hidden", "draft")
|
|
default_status = "published"
|
|
default_template = "article"
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
|
|
# handle WITH_FUTURE_DATES (designate article to draft based on date)
|
|
if not self.settings["WITH_FUTURE_DATES"] and hasattr(self, "date"):
|
|
if self.date.tzinfo is None:
|
|
now = datetime.datetime.now()
|
|
else:
|
|
now = datetime.datetime.utcnow().replace(tzinfo=timezone.utc)
|
|
if self.date > now:
|
|
self.status = "draft"
|
|
|
|
# if we are a draft and there is no date provided, set max datetime
|
|
if not hasattr(self, "date") and self.status == "draft":
|
|
self.date = datetime.datetime.max.replace(tzinfo=self.timezone)
|
|
|
|
def _expand_settings(self, key):
|
|
klass = "draft" if self.status == "draft" else "article"
|
|
return super()._expand_settings(key, klass)
|
|
|
|
|
|
class Static(Content):
|
|
mandatory_properties = ("title",)
|
|
default_status = "published"
|
|
default_template = None
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self._output_location_referenced = False
|
|
|
|
@deprecated_attribute(old="filepath", new="source_path", since=(3, 2, 0))
|
|
def filepath():
|
|
return None
|
|
|
|
@deprecated_attribute(old="src", new="source_path", since=(3, 2, 0))
|
|
def src():
|
|
return None
|
|
|
|
@deprecated_attribute(old="dst", new="save_as", since=(3, 2, 0))
|
|
def dst():
|
|
return None
|
|
|
|
@property
|
|
def url(self):
|
|
# Note when url has been referenced, so we can avoid overriding it.
|
|
self._output_location_referenced = True
|
|
return super().url
|
|
|
|
@property
|
|
def save_as(self):
|
|
# Note when save_as has been referenced, so we can avoid overriding it.
|
|
self._output_location_referenced = True
|
|
return super().save_as
|
|
|
|
def attach_to(self, content):
|
|
"""Override our output directory with that of the given content object."""
|
|
|
|
# Determine our file's new output path relative to the linking
|
|
# document. If it currently lives beneath the linking
|
|
# document's source directory, preserve that relationship on output.
|
|
# Otherwise, make it a sibling.
|
|
|
|
linking_source_dir = os.path.dirname(content.source_path)
|
|
tail_path = os.path.relpath(self.source_path, linking_source_dir)
|
|
if tail_path.startswith(os.pardir + os.sep):
|
|
tail_path = os.path.basename(tail_path)
|
|
new_save_as = os.path.join(os.path.dirname(content.save_as), tail_path)
|
|
|
|
# We do not build our new url by joining tail_path with the linking
|
|
# document's url, because we cannot know just by looking at the latter
|
|
# whether it points to the document itself or to its parent directory.
|
|
# (An url like 'some/content' might mean a directory named 'some'
|
|
# with a file named 'content', or it might mean a directory named
|
|
# 'some/content' with a file named 'index.html'.) Rather than trying
|
|
# to figure it out by comparing the linking document's url and save_as
|
|
# path, we simply build our new url from our new save_as path.
|
|
|
|
new_url = path_to_url(new_save_as)
|
|
|
|
def _log_reason(reason):
|
|
logger.warning(
|
|
"The {attach} link in %s cannot relocate "
|
|
"%s because %s. Falling back to "
|
|
"{filename} link behavior instead.",
|
|
content.get_relative_source_path(),
|
|
self.get_relative_source_path(),
|
|
reason,
|
|
extra={"limit_msg": "More {attach} warnings silenced."},
|
|
)
|
|
|
|
# We never override an override, because we don't want to interfere
|
|
# with user-defined overrides that might be in EXTRA_PATH_METADATA.
|
|
if hasattr(self, "override_save_as") or hasattr(self, "override_url"):
|
|
if new_save_as != self.save_as or new_url != self.url:
|
|
_log_reason("its output location was already overridden")
|
|
return
|
|
|
|
# We never change an output path that has already been referenced,
|
|
# because we don't want to break links that depend on that path.
|
|
if self._output_location_referenced:
|
|
if new_save_as != self.save_as or new_url != self.url:
|
|
_log_reason("another link already referenced its location")
|
|
return
|
|
|
|
self.override_save_as = new_save_as
|
|
self.override_url = new_url
|