2012-08-29 12:17:59 -07:00
|
|
|
import copy
|
2019-11-05 23:17:19 -08:00
|
|
|
import datetime
|
2012-03-20 13:01:21 +00:00
|
|
|
import locale
|
|
|
|
|
import logging
|
2012-11-30 10:46:32 +01:00
|
|
|
import os
|
|
|
|
|
import re
|
2023-07-26 16:29:43 +01:00
|
|
|
from datetime import timezone
|
2020-10-04 19:29:32 +03:00
|
|
|
from html import unescape
|
|
|
|
|
from urllib.parse import unquote, urljoin, urlparse, urlunparse
|
2012-03-20 13:01:21 +00:00
|
|
|
|
2023-07-26 16:29:43 +01:00
|
|
|
try:
|
2023-08-15 19:07:39 +01:00
|
|
|
from zoneinfo import ZoneInfo
|
2023-07-26 16:29:43 +01:00
|
|
|
except ModuleNotFoundError:
|
2023-08-15 19:07:39 +01:00
|
|
|
from backports.zoneinfo import ZoneInfo
|
2023-07-26 16:29:43 +01:00
|
|
|
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2019-12-01 18:14:13 +03:00
|
|
|
from pelican.plugins import signals
|
2013-03-24 08:38:19 -04:00
|
|
|
from pelican.settings import DEFAULT_CONFIG
|
2019-11-05 23:17:19 -08:00
|
|
|
from pelican.utils import (
|
|
|
|
|
deprecated_attribute,
|
|
|
|
|
memoized,
|
|
|
|
|
path_to_url,
|
|
|
|
|
posixize_path,
|
|
|
|
|
sanitised_join,
|
|
|
|
|
set_date_tzinfo,
|
|
|
|
|
slugify,
|
|
|
|
|
truncate_html_words,
|
|
|
|
|
)
|
2013-03-10 20:11:36 -07:00
|
|
|
|
2022-04-28 19:28:26 -07:00
|
|
|
# Import these so that they're available when you import from pelican.contents.
|
2015-09-15 02:24:21 +03:00
|
|
|
from pelican.urlwrappers import Author, Category, Tag, URLWrapper # NOQA
|
2012-03-09 16:21:38 +01:00
|
|
|
|
2012-03-20 13:01:21 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
2012-11-21 14:24:40 +01:00
|
|
|
|
2020-04-26 09:55:08 +02:00
|
|
|
class Content:
|
2013-03-10 20:11:36 -07:00
|
|
|
"""Represents a content.
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2011-05-06 16:44:12 +06:00
|
|
|
:param content: the string to parse, containing the original content.
|
2013-03-10 20:11:36 -07:00
|
|
|
:param metadata: the metadata associated to this page (optional).
|
|
|
|
|
:param settings: the settings dictionary (optional).
|
|
|
|
|
:param source_path: The location of the source of this content (if any).
|
|
|
|
|
:param context: The shared context between generators.
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2013-03-10 20:11:36 -07:00
|
|
|
"""
|
2023-10-29 22:18:29 +01:00
|
|
|
|
2013-01-04 13:03:19 -05:00
|
|
|
@deprecated_attribute(old="filename", new="source_path", since=(3, 2, 0))
|
|
|
|
|
def filename():
|
|
|
|
|
return None
|
|
|
|
|
|
2012-02-20 17:58:23 +01:00
|
|
|
def __init__(
|
|
|
|
|
self, content, metadata=None, settings=None, source_path=None, context=None
|
2013-01-04 10:50:09 -05:00
|
|
|
):
|
2013-03-10 20:11:36 -07:00
|
|
|
if metadata is None:
|
2011-05-10 23:18:11 +01:00
|
|
|
metadata = {}
|
2013-03-10 20:11:36 -07:00
|
|
|
if settings is None:
|
2013-03-24 08:38:19 -04:00
|
|
|
settings = copy.deepcopy(DEFAULT_CONFIG)
|
2011-05-10 23:18:11 +01:00
|
|
|
|
2011-12-23 22:01:32 +00:00
|
|
|
self.settings = settings
|
2010-12-30 14:11:37 +00:00
|
|
|
self._content = content
|
2013-01-04 19:47:55 -05:00
|
|
|
if context is None:
|
|
|
|
|
context = {}
|
2012-11-30 10:46:32 +01:00
|
|
|
self._context = context
|
2010-12-19 00:32:43 +03:00
|
|
|
self.translations = []
|
|
|
|
|
|
2015-02-17 20:05:00 -05:00
|
|
|
local_metadata = dict()
|
2011-05-07 20:00:30 +01:00
|
|
|
local_metadata.update(metadata)
|
2011-05-10 23:18:11 +01:00
|
|
|
|
|
|
|
|
# set metadata as attributes
|
2011-05-07 19:56:55 +01:00
|
|
|
for key, value in local_metadata.items():
|
2012-12-07 00:10:30 +01:00
|
|
|
if key in ("save_as", "url"):
|
|
|
|
|
key = "override_" + key
|
2011-01-05 16:22:52 +01:00
|
|
|
setattr(self, key.lower(), value)
|
2012-02-20 17:39:47 +01:00
|
|
|
|
2012-04-03 11:58:31 +02:00
|
|
|
# also keep track of the metadata attributes available
|
|
|
|
|
self.metadata = local_metadata
|
|
|
|
|
|
2015-06-16 09:25:09 +02:00
|
|
|
# default template if it's not defined in page
|
2012-07-07 12:15:35 -07:00
|
|
|
self.template = self._get_template()
|
|
|
|
|
|
2013-07-05 01:08:45 +02:00
|
|
|
# First, read the authors from "authors", if not, fallback to "author"
|
|
|
|
|
# and if not use the settings defined one, if any.
|
2010-10-30 00:56:40 +01:00
|
|
|
if not hasattr(self, "author"):
|
2013-07-05 01:08:45 +02:00
|
|
|
if hasattr(self, "authors"):
|
|
|
|
|
self.author = self.authors[0]
|
|
|
|
|
elif "AUTHOR" in settings:
|
2011-12-23 23:43:32 +00:00
|
|
|
self.author = Author(settings["AUTHOR"], settings)
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2013-07-05 01:08:45 +02:00
|
|
|
if not hasattr(self, "authors") and hasattr(self, "author"):
|
|
|
|
|
self.authors = [self.author]
|
|
|
|
|
|
2013-03-10 20:11:36 -07:00
|
|
|
# XXX Split all the following code into pieces, there is too much here.
|
|
|
|
|
|
2011-05-10 23:18:11 +01:00
|
|
|
# manage languages
|
|
|
|
|
self.in_default_lang = True
|
|
|
|
|
if "DEFAULT_LANG" in settings:
|
|
|
|
|
default_lang = settings["DEFAULT_LANG"].lower()
|
|
|
|
|
if not hasattr(self, "lang"):
|
|
|
|
|
self.lang = default_lang
|
2010-12-19 00:32:43 +03:00
|
|
|
|
2011-05-10 23:18:11 +01:00
|
|
|
self.in_default_lang = self.lang == default_lang
|
2010-12-19 00:32:43 +03:00
|
|
|
|
2015-03-17 01:23:29 +02:00
|
|
|
# create the slug if not existing, generate slug according to
|
2014-01-31 19:42:20 -05:00
|
|
|
# setting of SLUG_ATTRIBUTE
|
|
|
|
|
if not hasattr(self, "slug"):
|
2015-06-16 09:25:09 +02:00
|
|
|
if settings["SLUGIFY_SOURCE"] == "title" and hasattr(self, "title"):
|
2020-04-19 18:51:55 +03:00
|
|
|
value = self.title
|
2015-06-16 09:25:09 +02:00
|
|
|
elif settings["SLUGIFY_SOURCE"] == "basename" and source_path is not None:
|
2020-04-19 18:51:55 +03:00
|
|
|
value = os.path.basename(os.path.splitext(source_path)[0])
|
|
|
|
|
else:
|
|
|
|
|
value = None
|
|
|
|
|
if value is not None:
|
2015-06-16 09:25:09 +02:00
|
|
|
self.slug = slugify(
|
2020-04-19 18:51:55 +03:00
|
|
|
value,
|
|
|
|
|
regex_subs=settings.get("SLUG_REGEX_SUBSTITUTIONS", []),
|
2020-04-21 00:26:00 +03:00
|
|
|
preserve_case=settings.get("SLUGIFY_PRESERVE_CASE", False),
|
|
|
|
|
use_unicode=settings.get("SLUGIFY_USE_UNICODE", False),
|
|
|
|
|
)
|
2010-12-17 00:32:12 +03:00
|
|
|
|
2013-01-04 13:54:08 -05:00
|
|
|
self.source_path = source_path
|
2018-11-30 14:23:14 -06:00
|
|
|
self.relative_source_path = self.get_relative_source_path()
|
2010-11-06 02:03:32 +00:00
|
|
|
|
2011-05-10 23:18:11 +01:00
|
|
|
# manage the date format
|
2011-02-01 21:44:50 +00:00
|
|
|
if not hasattr(self, "date_format"):
|
2011-05-10 23:18:11 +01:00
|
|
|
if hasattr(self, "lang") and self.lang in settings["DATE_FORMATS"]:
|
2011-02-01 21:44:50 +00:00
|
|
|
self.date_format = settings["DATE_FORMATS"][self.lang]
|
|
|
|
|
else:
|
|
|
|
|
self.date_format = settings["DEFAULT_DATE_FORMAT"]
|
|
|
|
|
|
2012-02-28 01:43:36 +09:00
|
|
|
if isinstance(self.date_format, tuple):
|
2013-01-17 09:49:03 -05:00
|
|
|
locale_string = self.date_format[0]
|
|
|
|
|
locale.setlocale(locale.LC_ALL, locale_string)
|
2012-02-28 01:43:36 +09:00
|
|
|
self.date_format = self.date_format[1]
|
2012-02-21 17:53:53 +01:00
|
|
|
|
2014-06-10 09:47:14 +09:00
|
|
|
# manage timezone
|
2023-07-26 16:29:43 +01:00
|
|
|
default_timezone = settings.get("TIMEZONE", "UTC")
|
|
|
|
|
timezone = getattr(self, "timezone", default_timezone)
|
2023-08-15 19:07:39 +01:00
|
|
|
self.timezone = ZoneInfo(timezone)
|
2014-06-10 09:47:14 +09:00
|
|
|
|
2011-02-01 22:49:33 +00:00
|
|
|
if hasattr(self, "date"):
|
2014-06-10 09:47:14 +09:00
|
|
|
self.date = set_date_tzinfo(self.date, timezone)
|
2019-11-05 23:17:19 -08:00
|
|
|
self.locale_date = self.date.strftime(self.date_format)
|
2014-06-10 09:47:14 +09:00
|
|
|
|
2013-11-05 19:38:58 +01:00
|
|
|
if hasattr(self, "modified"):
|
2014-06-10 09:47:14 +09:00
|
|
|
self.modified = set_date_tzinfo(self.modified, timezone)
|
2019-11-05 23:17:19 -08:00
|
|
|
self.locale_modified = self.modified.strftime(self.date_format)
|
2012-02-20 17:39:47 +01:00
|
|
|
|
2011-05-10 23:18:11 +01:00
|
|
|
# manage status
|
2011-05-08 14:58:57 +01:00
|
|
|
if not hasattr(self, "status"):
|
2019-08-23 19:02:48 +02:00
|
|
|
# Previous default of None broke comment plugins and perhaps others
|
2019-08-20 19:39:14 -06:00
|
|
|
self.status = getattr(self, "default_status", "")
|
2012-02-20 17:39:47 +01:00
|
|
|
|
2018-10-09 23:39:41 +02:00
|
|
|
# store the summary metadata if it is set
|
|
|
|
|
if "summary" in metadata:
|
|
|
|
|
self._summary = metadata["summary"]
|
|
|
|
|
|
2013-03-01 00:06:05 +01:00
|
|
|
signals.content_object_init.send(self)
|
2012-09-02 19:20:42 +01:00
|
|
|
|
2013-01-04 13:54:08 -05:00
|
|
|
def __str__(self):
|
2015-06-19 11:19:21 +02:00
|
|
|
return self.source_path or repr(self)
|
2013-01-04 13:54:08 -05:00
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
def _has_valid_mandatory_properties(self):
|
2013-03-10 20:11:36 -07:00
|
|
|
"""Test mandatory properties are set."""
|
2010-10-30 00:56:40 +01:00
|
|
|
for prop in self.mandatory_properties:
|
|
|
|
|
if not hasattr(self, prop):
|
2017-07-24 19:01:14 +02:00
|
|
|
logger.error(
|
|
|
|
|
"Skipping %s: could not find information about '%s'", self, prop
|
|
|
|
|
)
|
|
|
|
|
return False
|
|
|
|
|
return True
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
def _has_valid_save_as(self):
|
2017-02-03 09:13:14 +01:00
|
|
|
"""Return true if save_as doesn't write outside output path, false
|
|
|
|
|
otherwise."""
|
|
|
|
|
try:
|
|
|
|
|
output_path = self.settings["OUTPUT_PATH"]
|
|
|
|
|
except KeyError:
|
|
|
|
|
# we cannot check
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
sanitised_join(output_path, self.save_as)
|
|
|
|
|
except RuntimeError: # outside output_dir
|
2017-07-24 19:01:14 +02:00
|
|
|
logger.error(
|
|
|
|
|
"Skipping %s: file %r would be written outside output path",
|
|
|
|
|
self,
|
|
|
|
|
self.save_as,
|
|
|
|
|
)
|
2017-02-03 09:13:14 +01:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
def _has_valid_status(self):
|
|
|
|
|
if hasattr(self, "allowed_statuses"):
|
|
|
|
|
if self.status not in self.allowed_statuses:
|
|
|
|
|
logger.error(
|
2021-04-16 19:07:35 -05:00
|
|
|
"Unknown status '%s' for file %s, skipping it. (Not in %s)",
|
2017-07-24 19:01:14 +02:00
|
|
|
self.status,
|
2021-04-16 19:07:35 -05:00
|
|
|
self,
|
|
|
|
|
self.allowed_statuses,
|
2017-07-24 19:01:14 +02:00
|
|
|
)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# if undefined we allow all
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def is_valid(self):
|
|
|
|
|
"""Validate Content"""
|
|
|
|
|
# Use all() to not short circuit and get results of all validations
|
|
|
|
|
return all(
|
2023-10-29 22:18:29 +01:00
|
|
|
[
|
2017-07-24 19:01:14 +02:00
|
|
|
self._has_valid_mandatory_properties(),
|
|
|
|
|
self._has_valid_save_as(),
|
|
|
|
|
self._has_valid_status(),
|
2023-10-29 22:18:29 +01:00
|
|
|
]
|
2017-07-24 19:01:14 +02:00
|
|
|
)
|
|
|
|
|
|
2011-12-23 22:01:32 +00:00
|
|
|
@property
|
|
|
|
|
def url_format(self):
|
2013-03-10 20:11:36 -07:00
|
|
|
"""Returns the URL, formatted with the proper values"""
|
2013-01-03 18:10:08 -05:00
|
|
|
metadata = copy.copy(self.metadata)
|
2013-03-06 10:06:42 -05:00
|
|
|
path = self.metadata.get("path", self.get_relative_source_path())
|
2013-01-03 18:10:08 -05:00
|
|
|
metadata.update(
|
|
|
|
|
{
|
2013-03-11 08:38:33 -04:00
|
|
|
"path": path_to_url(path),
|
2011-12-23 22:01:32 +00:00
|
|
|
"slug": getattr(self, "slug", ""),
|
|
|
|
|
"lang": getattr(self, "lang", "en"),
|
2019-11-05 23:17:19 -08:00
|
|
|
"date": getattr(self, "date", datetime.datetime.now()),
|
2015-02-17 20:05:00 -05:00
|
|
|
"author": self.author.slug if hasattr(self, "author") else "",
|
|
|
|
|
"category": self.category.slug if hasattr(self, "category") else "",
|
2013-05-15 22:18:35 -07:00
|
|
|
}
|
|
|
|
|
)
|
2013-01-03 18:10:08 -05:00
|
|
|
return metadata
|
2011-12-23 22:01:32 +00:00
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
def _expand_settings(self, key, klass=None):
|
|
|
|
|
if not klass:
|
|
|
|
|
klass = self.__class__.__name__
|
2020-04-26 09:55:08 +02:00
|
|
|
fq_key = ("{}_{}".format(klass, key)).upper()
|
2020-04-23 13:47:10 -06:00
|
|
|
return str(self.settings[fq_key]).format(**self.url_format)
|
2011-12-23 22:01:32 +00:00
|
|
|
|
2012-03-11 01:14:22 +01:00
|
|
|
def get_url_setting(self, key):
|
2012-12-07 00:10:30 +01:00
|
|
|
if hasattr(self, "override_" + key):
|
|
|
|
|
return getattr(self, "override_" + key)
|
2012-03-11 01:14:22 +01:00
|
|
|
key = key if self.in_default_lang else "lang_%s" % key
|
|
|
|
|
return self._expand_settings(key)
|
2011-12-23 22:01:32 +00:00
|
|
|
|
2017-09-19 18:22:56 +02:00
|
|
|
def _link_replacer(self, siteurl, m):
|
|
|
|
|
what = m.group("what")
|
|
|
|
|
value = urlparse(m.group("value"))
|
|
|
|
|
path = value.path
|
|
|
|
|
origin = m.group("path")
|
|
|
|
|
|
Make URL part joining aware of absolute URLs.
Previously, with RELATIVE_URLS disabled, when both SITEURL and
STATIC_URL were absolute, the final generate data URLs looked wrong like
this (two absolute URLs joined by `/`):
http://your.site/http://static.your.site/image.png
With this patch, the data URLs are correctly:
http://static.your.site/image.png
This also applies to all *_URL configuration options (for example,
ability to have pages and articles on different domains) and behaves
like one expects even with URLs starting with just `//`, thanks to
making use of urllib.parse.urljoin().
However, when RELATIVE_URLS are enabled, urllib.parse.urljoin() doesn't
handle the relative base correctly. In that case, simple os.path.join()
is used. That, however, breaks the above case, but as RELATIVE_URLS are
meant for local development (thus no data scattered across multiple
domains), I don't see any problem.
Just to clarify, this is a fully backwards-compatible change, it only
enables new use cases that were impossible before.
2017-07-20 23:21:17 +02:00
|
|
|
# urllib.parse.urljoin() produces `a.html` for urljoin("..", "a.html")
|
|
|
|
|
# so if RELATIVE_URLS are enabled, we fall back to os.path.join() to
|
|
|
|
|
# properly get `../a.html`. However, os.path.join() produces
|
|
|
|
|
# `baz/http://foo/bar.html` for join("baz", "http://foo/bar.html")
|
|
|
|
|
# instead of correct "http://foo/bar.html", so one has to pick a side
|
|
|
|
|
# as there is no silver bullet.
|
|
|
|
|
if self.settings["RELATIVE_URLS"]:
|
|
|
|
|
joiner = os.path.join
|
|
|
|
|
else:
|
|
|
|
|
joiner = urljoin
|
|
|
|
|
|
|
|
|
|
# However, it's not *that* simple: urljoin("blog", "index.html")
|
|
|
|
|
# produces just `index.html` instead of `blog/index.html` (unlike
|
|
|
|
|
# os.path.join()), so in order to get a correct answer one needs to
|
|
|
|
|
# append a trailing slash to siteurl in that case. This also makes
|
|
|
|
|
# the new behavior fully compatible with Pelican 3.7.1.
|
|
|
|
|
if not siteurl.endswith("/"):
|
|
|
|
|
siteurl += "/"
|
|
|
|
|
|
2017-09-19 18:22:56 +02:00
|
|
|
# XXX Put this in a different location.
|
2018-07-11 15:54:47 +02:00
|
|
|
if what in {"filename", "static", "attach"}:
|
2023-10-29 22:18:29 +01:00
|
|
|
|
2020-10-04 19:29:32 +03:00
|
|
|
def _get_linked_content(key, url):
|
|
|
|
|
nonlocal value
|
|
|
|
|
|
|
|
|
|
def _find_path(path):
|
|
|
|
|
if path.startswith("/"):
|
|
|
|
|
path = path[1:]
|
|
|
|
|
else:
|
|
|
|
|
# relative to the source path of this content
|
|
|
|
|
path = self.get_relative_source_path(
|
|
|
|
|
os.path.join(self.relative_dir, path)
|
|
|
|
|
)
|
|
|
|
|
return self._context[key].get(path, None)
|
|
|
|
|
|
|
|
|
|
# try path
|
|
|
|
|
result = _find_path(url.path)
|
|
|
|
|
if result is not None:
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
# try unquoted path
|
|
|
|
|
result = _find_path(unquote(url.path))
|
|
|
|
|
if result is not None:
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
# try html unescaped url
|
|
|
|
|
unescaped_url = urlparse(unescape(url.geturl()))
|
|
|
|
|
result = _find_path(unescaped_url.path)
|
|
|
|
|
if result is not None:
|
|
|
|
|
value = unescaped_url
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
# check if a static file is linked with {filename}
|
|
|
|
|
if what == "filename" and key == "generated_content":
|
|
|
|
|
linked_content = _get_linked_content("static_content", value)
|
|
|
|
|
if linked_content:
|
|
|
|
|
logger.warning(
|
|
|
|
|
"{filename} used for linking to static"
|
|
|
|
|
" content %s in %s. Use {static} instead",
|
|
|
|
|
value.path,
|
|
|
|
|
self.get_relative_source_path(),
|
|
|
|
|
)
|
|
|
|
|
return linked_content
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if what == "filename":
|
|
|
|
|
key = "generated_content"
|
2017-09-19 18:22:56 +02:00
|
|
|
else:
|
2020-10-04 19:29:32 +03:00
|
|
|
key = "static_content"
|
2017-09-19 18:22:56 +02:00
|
|
|
|
2020-10-04 19:29:32 +03:00
|
|
|
linked_content = _get_linked_content(key, value)
|
2017-09-19 18:22:56 +02:00
|
|
|
if linked_content:
|
|
|
|
|
if what == "attach":
|
2018-07-11 15:54:47 +02:00
|
|
|
linked_content.attach_to(self)
|
Make URL part joining aware of absolute URLs.
Previously, with RELATIVE_URLS disabled, when both SITEURL and
STATIC_URL were absolute, the final generate data URLs looked wrong like
this (two absolute URLs joined by `/`):
http://your.site/http://static.your.site/image.png
With this patch, the data URLs are correctly:
http://static.your.site/image.png
This also applies to all *_URL configuration options (for example,
ability to have pages and articles on different domains) and behaves
like one expects even with URLs starting with just `//`, thanks to
making use of urllib.parse.urljoin().
However, when RELATIVE_URLS are enabled, urllib.parse.urljoin() doesn't
handle the relative base correctly. In that case, simple os.path.join()
is used. That, however, breaks the above case, but as RELATIVE_URLS are
meant for local development (thus no data scattered across multiple
domains), I don't see any problem.
Just to clarify, this is a fully backwards-compatible change, it only
enables new use cases that were impossible before.
2017-07-20 23:21:17 +02:00
|
|
|
origin = joiner(siteurl, linked_content.url)
|
2017-09-19 18:22:56 +02:00
|
|
|
origin = origin.replace("\\", "/") # for Windows paths.
|
|
|
|
|
else:
|
|
|
|
|
logger.warning(
|
|
|
|
|
"Unable to find '%s', skipping url replacement.",
|
|
|
|
|
value.geturl(),
|
|
|
|
|
extra={
|
|
|
|
|
"limit_msg": (
|
|
|
|
|
"Other resources were not found "
|
|
|
|
|
"and their urls not replaced"
|
|
|
|
|
)
|
2023-10-29 22:18:29 +01:00
|
|
|
},
|
|
|
|
|
)
|
2017-09-19 18:22:56 +02:00
|
|
|
elif what == "category":
|
Make URL part joining aware of absolute URLs.
Previously, with RELATIVE_URLS disabled, when both SITEURL and
STATIC_URL were absolute, the final generate data URLs looked wrong like
this (two absolute URLs joined by `/`):
http://your.site/http://static.your.site/image.png
With this patch, the data URLs are correctly:
http://static.your.site/image.png
This also applies to all *_URL configuration options (for example,
ability to have pages and articles on different domains) and behaves
like one expects even with URLs starting with just `//`, thanks to
making use of urllib.parse.urljoin().
However, when RELATIVE_URLS are enabled, urllib.parse.urljoin() doesn't
handle the relative base correctly. In that case, simple os.path.join()
is used. That, however, breaks the above case, but as RELATIVE_URLS are
meant for local development (thus no data scattered across multiple
domains), I don't see any problem.
Just to clarify, this is a fully backwards-compatible change, it only
enables new use cases that were impossible before.
2017-07-20 23:21:17 +02:00
|
|
|
origin = joiner(siteurl, Category(path, self.settings).url)
|
2017-09-19 18:22:56 +02:00
|
|
|
elif what == "tag":
|
Make URL part joining aware of absolute URLs.
Previously, with RELATIVE_URLS disabled, when both SITEURL and
STATIC_URL were absolute, the final generate data URLs looked wrong like
this (two absolute URLs joined by `/`):
http://your.site/http://static.your.site/image.png
With this patch, the data URLs are correctly:
http://static.your.site/image.png
This also applies to all *_URL configuration options (for example,
ability to have pages and articles on different domains) and behaves
like one expects even with URLs starting with just `//`, thanks to
making use of urllib.parse.urljoin().
However, when RELATIVE_URLS are enabled, urllib.parse.urljoin() doesn't
handle the relative base correctly. In that case, simple os.path.join()
is used. That, however, breaks the above case, but as RELATIVE_URLS are
meant for local development (thus no data scattered across multiple
domains), I don't see any problem.
Just to clarify, this is a fully backwards-compatible change, it only
enables new use cases that were impossible before.
2017-07-20 23:21:17 +02:00
|
|
|
origin = joiner(siteurl, Tag(path, self.settings).url)
|
2017-09-19 18:22:56 +02:00
|
|
|
elif what == "index":
|
Make URL part joining aware of absolute URLs.
Previously, with RELATIVE_URLS disabled, when both SITEURL and
STATIC_URL were absolute, the final generate data URLs looked wrong like
this (two absolute URLs joined by `/`):
http://your.site/http://static.your.site/image.png
With this patch, the data URLs are correctly:
http://static.your.site/image.png
This also applies to all *_URL configuration options (for example,
ability to have pages and articles on different domains) and behaves
like one expects even with URLs starting with just `//`, thanks to
making use of urllib.parse.urljoin().
However, when RELATIVE_URLS are enabled, urllib.parse.urljoin() doesn't
handle the relative base correctly. In that case, simple os.path.join()
is used. That, however, breaks the above case, but as RELATIVE_URLS are
meant for local development (thus no data scattered across multiple
domains), I don't see any problem.
Just to clarify, this is a fully backwards-compatible change, it only
enables new use cases that were impossible before.
2017-07-20 23:21:17 +02:00
|
|
|
origin = joiner(siteurl, self.settings["INDEX_SAVE_AS"])
|
2017-09-19 18:22:56 +02:00
|
|
|
elif what == "author":
|
Make URL part joining aware of absolute URLs.
Previously, with RELATIVE_URLS disabled, when both SITEURL and
STATIC_URL were absolute, the final generate data URLs looked wrong like
this (two absolute URLs joined by `/`):
http://your.site/http://static.your.site/image.png
With this patch, the data URLs are correctly:
http://static.your.site/image.png
This also applies to all *_URL configuration options (for example,
ability to have pages and articles on different domains) and behaves
like one expects even with URLs starting with just `//`, thanks to
making use of urllib.parse.urljoin().
However, when RELATIVE_URLS are enabled, urllib.parse.urljoin() doesn't
handle the relative base correctly. In that case, simple os.path.join()
is used. That, however, breaks the above case, but as RELATIVE_URLS are
meant for local development (thus no data scattered across multiple
domains), I don't see any problem.
Just to clarify, this is a fully backwards-compatible change, it only
enables new use cases that were impossible before.
2017-07-20 23:21:17 +02:00
|
|
|
origin = joiner(siteurl, Author(path, self.settings).url)
|
2017-09-19 18:22:56 +02:00
|
|
|
else:
|
|
|
|
|
logger.warning(
|
|
|
|
|
"Replacement Indicator '%s' not recognized, " "skipping replacement",
|
|
|
|
|
what,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# keep all other parts, such as query, fragment, etc.
|
|
|
|
|
parts = list(value)
|
|
|
|
|
parts[2] = origin
|
|
|
|
|
origin = urlunparse(parts)
|
|
|
|
|
|
|
|
|
|
return "".join((m.group("markup"), m.group("quote"), origin, m.group("quote")))
|
|
|
|
|
|
2018-07-11 15:54:47 +02:00
|
|
|
def _get_intrasite_link_regex(self):
|
|
|
|
|
intrasite_link_regex = self.settings["INTRASITE_LINK_REGEX"]
|
|
|
|
|
regex = r"""
|
|
|
|
|
(?P<markup><[^\>]+ # match tag with all url-value attributes
|
2022-04-28 19:31:21 -07:00
|
|
|
(?:href|src|poster|data|cite|formaction|action|content)\s*=\s*)
|
2018-07-11 15:54:47 +02:00
|
|
|
|
|
|
|
|
(?P<quote>["\']) # require value to be quoted
|
2020-04-26 09:55:08 +02:00
|
|
|
(?P<path>{}(?P<value>.*?)) # the url value
|
2022-10-23 10:25:10 +01:00
|
|
|
(?P=quote)""".format(intrasite_link_regex)
|
2018-07-11 15:54:47 +02:00
|
|
|
return re.compile(regex, re.X)
|
|
|
|
|
|
2012-12-01 18:22:43 +01:00
|
|
|
def _update_content(self, content, siteurl):
|
2013-03-10 20:11:36 -07:00
|
|
|
"""Update the content attribute.
|
|
|
|
|
|
|
|
|
|
Change all the relative paths of the content to relative paths
|
2014-09-15 20:29:00 -04:00
|
|
|
suitable for the output content.
|
2012-11-30 10:46:32 +01:00
|
|
|
|
|
|
|
|
:param content: content resource that will be passed to the templates.
|
2012-12-01 18:22:43 +01:00
|
|
|
:param siteurl: siteurl which is locally generated by the writer in
|
2013-03-10 20:11:36 -07:00
|
|
|
case of RELATIVE_URLS.
|
2012-11-30 10:46:32 +01:00
|
|
|
"""
|
2013-03-22 00:13:37 -04:00
|
|
|
if not content:
|
|
|
|
|
return content
|
|
|
|
|
|
2018-07-11 15:54:47 +02:00
|
|
|
hrefs = self._get_intrasite_link_regex()
|
2017-09-19 18:22:56 +02:00
|
|
|
return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
|
2012-11-30 10:46:32 +01:00
|
|
|
|
2018-07-11 15:54:47 +02:00
|
|
|
def get_static_links(self):
|
|
|
|
|
static_links = set()
|
|
|
|
|
hrefs = self._get_intrasite_link_regex()
|
|
|
|
|
for m in hrefs.finditer(self._content):
|
|
|
|
|
what = m.group("what")
|
|
|
|
|
value = urlparse(m.group("value"))
|
|
|
|
|
path = value.path
|
|
|
|
|
if what not in {"static", "attach"}:
|
|
|
|
|
continue
|
|
|
|
|
if path.startswith("/"):
|
|
|
|
|
path = path[1:]
|
|
|
|
|
else:
|
|
|
|
|
# relative to the source path of this content
|
|
|
|
|
path = self.get_relative_source_path(
|
|
|
|
|
os.path.join(self.relative_dir, path)
|
|
|
|
|
)
|
|
|
|
|
path = path.replace("%20", " ")
|
|
|
|
|
static_links.add(path)
|
|
|
|
|
return static_links
|
|
|
|
|
|
2015-10-19 16:38:07 +02:00
|
|
|
def get_siteurl(self):
|
|
|
|
|
return self._context.get("localsiteurl", "")
|
|
|
|
|
|
2012-11-30 10:46:32 +01:00
|
|
|
@memoized
|
2012-12-01 18:22:43 +01:00
|
|
|
def get_content(self, siteurl):
|
2013-03-10 20:11:36 -07:00
|
|
|
if hasattr(self, "_get_content"):
|
|
|
|
|
content = self._get_content()
|
|
|
|
|
else:
|
|
|
|
|
content = self._content
|
|
|
|
|
return self._update_content(content, siteurl)
|
2012-12-01 18:22:43 +01:00
|
|
|
|
|
|
|
|
@property
|
2010-12-30 14:11:37 +00:00
|
|
|
def content(self):
|
2014-05-25 13:41:30 +02:00
|
|
|
return self.get_content(self.get_siteurl())
|
2010-12-30 14:11:37 +00:00
|
|
|
|
2015-10-19 16:38:07 +02:00
|
|
|
@memoized
|
|
|
|
|
def get_summary(self, siteurl):
|
2013-03-10 20:11:36 -07:00
|
|
|
"""Returns the summary of an article.
|
|
|
|
|
|
|
|
|
|
This is based on the summary metadata if set, otherwise truncate the
|
|
|
|
|
content.
|
|
|
|
|
"""
|
2018-02-09 10:40:47 +01:00
|
|
|
if "summary" in self.metadata:
|
|
|
|
|
return self.metadata["summary"]
|
2013-02-09 19:43:32 -05:00
|
|
|
|
2013-04-26 19:37:31 -04:00
|
|
|
if self.settings["SUMMARY_MAX_LENGTH"] is None:
|
|
|
|
|
return self.content
|
|
|
|
|
|
|
|
|
|
return truncate_html_words(
|
|
|
|
|
self.content,
|
2020-04-11 15:35:26 +10:00
|
|
|
self.settings["SUMMARY_MAX_LENGTH"],
|
2020-08-19 20:25:19 +02:00
|
|
|
self.settings["SUMMARY_END_SUFFIX"],
|
|
|
|
|
)
|
2011-10-29 12:57:15 +02:00
|
|
|
|
2014-05-25 13:41:30 +02:00
|
|
|
@property
|
|
|
|
|
def summary(self):
|
|
|
|
|
return self.get_summary(self.get_siteurl())
|
|
|
|
|
|
2015-10-19 16:38:07 +02:00
|
|
|
def _get_summary(self):
|
|
|
|
|
"""deprecated function to access summary"""
|
|
|
|
|
|
2015-11-03 16:53:13 +01:00
|
|
|
logger.warning(
|
|
|
|
|
"_get_summary() has been deprecated since 3.6.4. "
|
|
|
|
|
"Use the summary decorator instead"
|
|
|
|
|
)
|
2015-10-19 16:38:07 +02:00
|
|
|
return self.summary
|
|
|
|
|
|
2014-05-25 13:41:30 +02:00
|
|
|
@summary.setter
|
|
|
|
|
def summary(self, value):
|
2011-10-29 12:57:15 +02:00
|
|
|
"""Dummy function"""
|
|
|
|
|
pass
|
|
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
@property
|
|
|
|
|
def status(self):
|
|
|
|
|
return self._status
|
|
|
|
|
|
|
|
|
|
@status.setter
|
|
|
|
|
def status(self, value):
|
|
|
|
|
# TODO maybe typecheck
|
|
|
|
|
self._status = value.lower()
|
|
|
|
|
|
2015-03-17 01:23:29 +02:00
|
|
|
@property
|
|
|
|
|
def url(self):
|
|
|
|
|
return self.get_url_setting("url")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def save_as(self):
|
|
|
|
|
return self.get_url_setting("save_as")
|
2012-03-11 01:14:22 +01:00
|
|
|
|
2012-07-07 12:15:35 -07:00
|
|
|
def _get_template(self):
|
|
|
|
|
if hasattr(self, "template") and self.template is not None:
|
|
|
|
|
return self.template
|
|
|
|
|
else:
|
|
|
|
|
return self.default_template
|
|
|
|
|
|
2013-01-04 10:50:09 -05:00
|
|
|
def get_relative_source_path(self, source_path=None):
|
2012-11-30 10:46:32 +01:00
|
|
|
"""Return the relative path (from the content path) to the given
|
2013-01-04 10:50:09 -05:00
|
|
|
source_path.
|
2012-11-30 10:46:32 +01:00
|
|
|
|
2013-01-04 10:50:09 -05:00
|
|
|
If no source path is specified, use the source path of this
|
|
|
|
|
content object.
|
2012-11-30 10:46:32 +01:00
|
|
|
"""
|
2013-01-04 10:50:09 -05:00
|
|
|
if not source_path:
|
|
|
|
|
source_path = self.source_path
|
2013-01-04 13:54:08 -05:00
|
|
|
if source_path is None:
|
|
|
|
|
return None
|
2012-11-30 10:46:32 +01:00
|
|
|
|
2015-01-02 23:45:44 -08:00
|
|
|
return posixize_path(
|
|
|
|
|
os.path.relpath(
|
2015-06-16 09:25:09 +02:00
|
|
|
os.path.abspath(os.path.join(self.settings["PATH"], source_path)),
|
2015-01-02 23:45:44 -08:00
|
|
|
os.path.abspath(self.settings["PATH"]),
|
|
|
|
|
)
|
|
|
|
|
)
|
2012-11-30 10:46:32 +01:00
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def relative_dir(self):
|
2015-01-02 23:45:44 -08:00
|
|
|
return posixize_path(
|
|
|
|
|
os.path.dirname(
|
|
|
|
|
os.path.relpath(
|
|
|
|
|
os.path.abspath(self.source_path),
|
|
|
|
|
os.path.abspath(self.settings["PATH"]),
|
|
|
|
|
)
|
2023-10-29 22:18:29 +01:00
|
|
|
)
|
|
|
|
|
)
|
2012-11-30 10:46:32 +01:00
|
|
|
|
2018-02-09 10:39:11 +01:00
|
|
|
def refresh_metadata_intersite_links(self):
|
|
|
|
|
for key in self.settings["FORMATTED_FIELDS"]:
|
2018-10-09 23:39:41 +02:00
|
|
|
if key in self.metadata and key != "summary":
|
2018-02-09 10:39:11 +01:00
|
|
|
value = self._update_content(self.metadata[key], self.get_siteurl())
|
|
|
|
|
self.metadata[key] = value
|
|
|
|
|
setattr(self, key.lower(), value)
|
|
|
|
|
|
2018-10-09 23:39:41 +02:00
|
|
|
# _summary is an internal variable that some plugins may be writing to,
|
|
|
|
|
# so ensure changes to it are picked up
|
|
|
|
|
if (
|
|
|
|
|
"summary" in self.settings["FORMATTED_FIELDS"]
|
|
|
|
|
and "summary" in self.metadata
|
|
|
|
|
):
|
|
|
|
|
self._summary = self._update_content(self._summary, self.get_siteurl())
|
|
|
|
|
self.metadata["summary"] = self._summary
|
|
|
|
|
|
2011-10-29 12:57:15 +02:00
|
|
|
|
2013-03-10 20:11:36 -07:00
|
|
|
class Page(Content):
|
|
|
|
|
mandatory_properties = ("title",)
|
2018-07-03 12:08:27 +02:00
|
|
|
allowed_statuses = ("published", "hidden", "draft")
|
2017-07-24 19:01:14 +02:00
|
|
|
default_status = "published"
|
2013-03-10 20:11:36 -07:00
|
|
|
default_template = "page"
|
|
|
|
|
|
2018-07-03 12:08:27 +02:00
|
|
|
def _expand_settings(self, key):
|
|
|
|
|
klass = "draft_page" if self.status == "draft" else None
|
2019-11-18 20:28:48 +03:00
|
|
|
return super()._expand_settings(key, klass)
|
2018-07-03 12:08:27 +02:00
|
|
|
|
2013-03-10 20:11:36 -07:00
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
class Article(Content):
|
2010-10-30 16:47:59 +01:00
|
|
|
mandatory_properties = ("title", "date", "category")
|
2021-04-16 19:07:35 -05:00
|
|
|
allowed_statuses = ("published", "hidden", "draft")
|
2017-07-24 19:01:14 +02:00
|
|
|
default_status = "published"
|
2012-07-07 12:15:35 -07:00
|
|
|
default_template = "article"
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
def __init__(self, *args, **kwargs):
|
2019-11-18 20:28:48 +03:00
|
|
|
super().__init__(*args, **kwargs)
|
2010-10-30 00:56:40 +01:00
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
# handle WITH_FUTURE_DATES (designate article to draft based on date)
|
|
|
|
|
if not self.settings["WITH_FUTURE_DATES"] and hasattr(self, "date"):
|
|
|
|
|
if self.date.tzinfo is None:
|
2019-11-05 23:17:19 -08:00
|
|
|
now = datetime.datetime.now()
|
2017-07-24 19:01:14 +02:00
|
|
|
else:
|
2023-07-26 16:29:43 +01:00
|
|
|
now = datetime.datetime.utcnow().replace(tzinfo=timezone.utc)
|
2017-07-24 19:01:14 +02:00
|
|
|
if self.date > now:
|
|
|
|
|
self.status = "draft"
|
2013-12-26 19:30:55 +01:00
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
# if we are a draft and there is no date provided, set max datetime
|
|
|
|
|
if not hasattr(self, "date") and self.status == "draft":
|
2020-04-14 13:48:03 +02:00
|
|
|
self.date = datetime.datetime.max.replace(tzinfo=self.timezone)
|
2013-12-26 19:30:55 +01:00
|
|
|
|
2017-07-24 19:01:14 +02:00
|
|
|
def _expand_settings(self, key):
|
2018-07-03 12:08:27 +02:00
|
|
|
klass = "draft" if self.status == "draft" else "article"
|
2019-11-18 20:28:48 +03:00
|
|
|
return super()._expand_settings(key, klass)
|
2010-11-05 00:22:03 +00:00
|
|
|
|
2013-01-03 13:54:56 -05:00
|
|
|
|
2018-11-16 18:09:22 +01:00
|
|
|
class Static(Content):
|
|
|
|
|
mandatory_properties = ("title",)
|
|
|
|
|
default_status = "published"
|
|
|
|
|
default_template = None
|
|
|
|
|
|
2014-10-31 17:21:15 -07:00
|
|
|
def __init__(self, *args, **kwargs):
|
2019-11-18 20:28:48 +03:00
|
|
|
super().__init__(*args, **kwargs)
|
2014-10-31 17:21:15 -07:00
|
|
|
self._output_location_referenced = False
|
|
|
|
|
|
2013-01-04 13:03:19 -05:00
|
|
|
@deprecated_attribute(old="filepath", new="source_path", since=(3, 2, 0))
|
|
|
|
|
def filepath():
|
|
|
|
|
return None
|
|
|
|
|
|
2013-01-04 13:54:08 -05:00
|
|
|
@deprecated_attribute(old="src", new="source_path", since=(3, 2, 0))
|
|
|
|
|
def src():
|
|
|
|
|
return None
|
2012-11-30 10:46:32 +01:00
|
|
|
|
2013-01-04 13:54:08 -05:00
|
|
|
@deprecated_attribute(old="dst", new="save_as", since=(3, 2, 0))
|
|
|
|
|
def dst():
|
|
|
|
|
return None
|
2012-11-30 10:46:32 +01:00
|
|
|
|
2014-10-31 17:21:15 -07:00
|
|
|
@property
|
|
|
|
|
def url(self):
|
|
|
|
|
# Note when url has been referenced, so we can avoid overriding it.
|
|
|
|
|
self._output_location_referenced = True
|
2019-11-18 20:28:48 +03:00
|
|
|
return super().url
|
2014-10-31 17:21:15 -07:00
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def save_as(self):
|
|
|
|
|
# Note when save_as has been referenced, so we can avoid overriding it.
|
|
|
|
|
self._output_location_referenced = True
|
2019-11-18 20:28:48 +03:00
|
|
|
return super().save_as
|
2014-10-31 17:21:15 -07:00
|
|
|
|
|
|
|
|
def attach_to(self, content):
|
|
|
|
|
"""Override our output directory with that of the given content object."""
|
2015-06-16 09:25:09 +02:00
|
|
|
|
|
|
|
|
# Determine our file's new output path relative to the linking
|
|
|
|
|
# document. If it currently lives beneath the linking
|
|
|
|
|
# document's source directory, preserve that relationship on output.
|
|
|
|
|
# Otherwise, make it a sibling.
|
|
|
|
|
|
2014-10-31 17:21:15 -07:00
|
|
|
linking_source_dir = os.path.dirname(content.source_path)
|
|
|
|
|
tail_path = os.path.relpath(self.source_path, linking_source_dir)
|
|
|
|
|
if tail_path.startswith(os.pardir + os.sep):
|
|
|
|
|
tail_path = os.path.basename(tail_path)
|
|
|
|
|
new_save_as = os.path.join(os.path.dirname(content.save_as), tail_path)
|
|
|
|
|
|
|
|
|
|
# We do not build our new url by joining tail_path with the linking
|
|
|
|
|
# document's url, because we cannot know just by looking at the latter
|
|
|
|
|
# whether it points to the document itself or to its parent directory.
|
|
|
|
|
# (An url like 'some/content' might mean a directory named 'some'
|
|
|
|
|
# with a file named 'content', or it might mean a directory named
|
|
|
|
|
# 'some/content' with a file named 'index.html'.) Rather than trying
|
|
|
|
|
# to figure it out by comparing the linking document's url and save_as
|
|
|
|
|
# path, we simply build our new url from our new save_as path.
|
2015-06-16 09:25:09 +02:00
|
|
|
|
2014-10-31 17:21:15 -07:00
|
|
|
new_url = path_to_url(new_save_as)
|
|
|
|
|
|
|
|
|
|
def _log_reason(reason):
|
2015-06-16 09:25:09 +02:00
|
|
|
logger.warning(
|
|
|
|
|
"The {attach} link in %s cannot relocate "
|
|
|
|
|
"%s because %s. Falling back to "
|
|
|
|
|
"{filename} link behavior instead.",
|
2014-10-31 17:21:15 -07:00
|
|
|
content.get_relative_source_path(),
|
|
|
|
|
self.get_relative_source_path(),
|
|
|
|
|
reason,
|
|
|
|
|
extra={"limit_msg": "More {attach} warnings silenced."},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# We never override an override, because we don't want to interfere
|
|
|
|
|
# with user-defined overrides that might be in EXTRA_PATH_METADATA.
|
|
|
|
|
if hasattr(self, "override_save_as") or hasattr(self, "override_url"):
|
|
|
|
|
if new_save_as != self.save_as or new_url != self.url:
|
|
|
|
|
_log_reason("its output location was already overridden")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# We never change an output path that has already been referenced,
|
|
|
|
|
# because we don't want to break links that depend on that path.
|
|
|
|
|
if self._output_location_referenced:
|
|
|
|
|
if new_save_as != self.save_as or new_url != self.url:
|
|
|
|
|
_log_reason("another link already referenced its location")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self.override_save_as = new_save_as
|
|
|
|
|
self.override_url = new_url
|