Apply code style to project via: ruff format .

This commit is contained in:
Chris Rose 2023-10-29 22:18:29 +01:00 committed by Justin Mayer
commit cabdb26cee
41 changed files with 6505 additions and 5163 deletions

View file

@ -31,33 +31,29 @@ except ImportError:
_DISCARD = object()
DUPLICATES_DEFINITIONS_ALLOWED = {
'tags': False,
'date': False,
'modified': False,
'status': False,
'category': False,
'author': False,
'save_as': False,
'url': False,
'authors': False,
'slug': False
"tags": False,
"date": False,
"modified": False,
"status": False,
"category": False,
"author": False,
"save_as": False,
"url": False,
"authors": False,
"slug": False,
}
METADATA_PROCESSORS = {
'tags': lambda x, y: ([
Tag(tag, y)
for tag in ensure_metadata_list(x)
] or _DISCARD),
'date': lambda x, y: get_date(x.replace('_', ' ')),
'modified': lambda x, y: get_date(x),
'status': lambda x, y: x.strip() or _DISCARD,
'category': lambda x, y: _process_if_nonempty(Category, x, y),
'author': lambda x, y: _process_if_nonempty(Author, x, y),
'authors': lambda x, y: ([
Author(author, y)
for author in ensure_metadata_list(x)
] or _DISCARD),
'slug': lambda x, y: x.strip() or _DISCARD,
"tags": lambda x, y: ([Tag(tag, y) for tag in ensure_metadata_list(x)] or _DISCARD),
"date": lambda x, y: get_date(x.replace("_", " ")),
"modified": lambda x, y: get_date(x),
"status": lambda x, y: x.strip() or _DISCARD,
"category": lambda x, y: _process_if_nonempty(Category, x, y),
"author": lambda x, y: _process_if_nonempty(Author, x, y),
"authors": lambda x, y: (
[Author(author, y) for author in ensure_metadata_list(x)] or _DISCARD
),
"slug": lambda x, y: x.strip() or _DISCARD,
}
logger = logging.getLogger(__name__)
@ -65,25 +61,23 @@ logger = logging.getLogger(__name__)
def ensure_metadata_list(text):
"""Canonicalize the format of a list of authors or tags. This works
the same way as Docutils' "authors" field: if it's already a list,
those boundaries are preserved; otherwise, it must be a string;
if the string contains semicolons, it is split on semicolons;
otherwise, it is split on commas. This allows you to write
author lists in either "Jane Doe, John Doe" or "Doe, Jane; Doe, John"
format.
the same way as Docutils' "authors" field: if it's already a list,
those boundaries are preserved; otherwise, it must be a string;
if the string contains semicolons, it is split on semicolons;
otherwise, it is split on commas. This allows you to write
author lists in either "Jane Doe, John Doe" or "Doe, Jane; Doe, John"
format.
Regardless, all list items undergo .strip() before returning, and
empty items are discarded.
Regardless, all list items undergo .strip() before returning, and
empty items are discarded.
"""
if isinstance(text, str):
if ';' in text:
text = text.split(';')
if ";" in text:
text = text.split(";")
else:
text = text.split(',')
text = text.split(",")
return list(OrderedDict.fromkeys(
[v for v in (w.strip() for w in text) if v]
))
return list(OrderedDict.fromkeys([v for v in (w.strip() for w in text) if v]))
def _process_if_nonempty(processor, name, settings):
@ -112,8 +106,9 @@ class BaseReader:
Markdown).
"""
enabled = True
file_extensions = ['static']
file_extensions = ["static"]
extensions = None
def __init__(self, settings):
@ -132,13 +127,12 @@ class BaseReader:
class _FieldBodyTranslator(HTMLTranslator):
def __init__(self, document):
super().__init__(document)
self.compact_p = None
def astext(self):
return ''.join(self.body)
return "".join(self.body)
def visit_field_body(self, node):
pass
@ -154,27 +148,25 @@ def render_node_to_html(document, node, field_body_translator_class):
class PelicanHTMLWriter(Writer):
def __init__(self):
super().__init__()
self.translator_class = PelicanHTMLTranslator
class PelicanHTMLTranslator(HTMLTranslator):
def visit_abbreviation(self, node):
attrs = {}
if node.hasattr('explanation'):
attrs['title'] = node['explanation']
self.body.append(self.starttag(node, 'abbr', '', **attrs))
if node.hasattr("explanation"):
attrs["title"] = node["explanation"]
self.body.append(self.starttag(node, "abbr", "", **attrs))
def depart_abbreviation(self, node):
self.body.append('</abbr>')
self.body.append("</abbr>")
def visit_image(self, node):
# set an empty alt if alt is not specified
# avoids that alt is taken from src
node['alt'] = node.get('alt', '')
node["alt"] = node.get("alt", "")
return HTMLTranslator.visit_image(self, node)
@ -194,7 +186,7 @@ class RstReader(BaseReader):
"""
enabled = bool(docutils)
file_extensions = ['rst']
file_extensions = ["rst"]
writer_class = PelicanHTMLWriter
field_body_translator_class = _FieldBodyTranslator
@ -202,25 +194,28 @@ class RstReader(BaseReader):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
lang_code = self.settings.get('DEFAULT_LANG', 'en')
lang_code = self.settings.get("DEFAULT_LANG", "en")
if get_docutils_lang(lang_code):
self._language_code = lang_code
else:
logger.warning("Docutils has no localization for '%s'."
" Using 'en' instead.", lang_code)
self._language_code = 'en'
logger.warning(
"Docutils has no localization for '%s'." " Using 'en' instead.",
lang_code,
)
self._language_code = "en"
def _parse_metadata(self, document, source_path):
"""Return the dict containing document metadata"""
formatted_fields = self.settings['FORMATTED_FIELDS']
formatted_fields = self.settings["FORMATTED_FIELDS"]
output = {}
if document.first_child_matching_class(docutils.nodes.title) is None:
logger.warning(
'Document title missing in file %s: '
'Ensure exactly one top level section',
source_path)
"Document title missing in file %s: "
"Ensure exactly one top level section",
source_path,
)
try:
# docutils 0.18.1+
@ -231,16 +226,16 @@ class RstReader(BaseReader):
for docinfo in nodes:
for element in docinfo.children:
if element.tagname == 'field': # custom fields (e.g. summary)
if element.tagname == "field": # custom fields (e.g. summary)
name_elem, body_elem = element.children
name = name_elem.astext()
if name.lower() in formatted_fields:
value = render_node_to_html(
document, body_elem,
self.field_body_translator_class)
document, body_elem, self.field_body_translator_class
)
else:
value = body_elem.astext()
elif element.tagname == 'authors': # author list
elif element.tagname == "authors": # author list
name = element.tagname
value = [element.astext() for element in element.children]
else: # standard fields (e.g. address)
@ -252,22 +247,24 @@ class RstReader(BaseReader):
return output
def _get_publisher(self, source_path):
extra_params = {'initial_header_level': '2',
'syntax_highlight': 'short',
'input_encoding': 'utf-8',
'language_code': self._language_code,
'halt_level': 2,
'traceback': True,
'warning_stream': StringIO(),
'embed_stylesheet': False}
user_params = self.settings.get('DOCUTILS_SETTINGS')
extra_params = {
"initial_header_level": "2",
"syntax_highlight": "short",
"input_encoding": "utf-8",
"language_code": self._language_code,
"halt_level": 2,
"traceback": True,
"warning_stream": StringIO(),
"embed_stylesheet": False,
}
user_params = self.settings.get("DOCUTILS_SETTINGS")
if user_params:
extra_params.update(user_params)
pub = docutils.core.Publisher(
writer=self.writer_class(),
destination_class=docutils.io.StringOutput)
pub.set_components('standalone', 'restructuredtext', 'html')
writer=self.writer_class(), destination_class=docutils.io.StringOutput
)
pub.set_components("standalone", "restructuredtext", "html")
pub.process_programmatic_settings(None, extra_params, None)
pub.set_source(source_path=source_path)
pub.publish()
@ -277,10 +274,10 @@ class RstReader(BaseReader):
"""Parses restructured text"""
pub = self._get_publisher(source_path)
parts = pub.writer.parts
content = parts.get('body')
content = parts.get("body")
metadata = self._parse_metadata(pub.document, source_path)
metadata.setdefault('title', parts.get('title'))
metadata.setdefault("title", parts.get("title"))
return content, metadata
@ -289,26 +286,26 @@ class MarkdownReader(BaseReader):
"""Reader for Markdown files"""
enabled = bool(Markdown)
file_extensions = ['md', 'markdown', 'mkd', 'mdown']
file_extensions = ["md", "markdown", "mkd", "mdown"]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
settings = self.settings['MARKDOWN']
settings.setdefault('extension_configs', {})
settings.setdefault('extensions', [])
for extension in settings['extension_configs'].keys():
if extension not in settings['extensions']:
settings['extensions'].append(extension)
if 'markdown.extensions.meta' not in settings['extensions']:
settings['extensions'].append('markdown.extensions.meta')
settings = self.settings["MARKDOWN"]
settings.setdefault("extension_configs", {})
settings.setdefault("extensions", [])
for extension in settings["extension_configs"].keys():
if extension not in settings["extensions"]:
settings["extensions"].append(extension)
if "markdown.extensions.meta" not in settings["extensions"]:
settings["extensions"].append("markdown.extensions.meta")
self._source_path = None
def _parse_metadata(self, meta):
"""Return the dict containing document metadata"""
formatted_fields = self.settings['FORMATTED_FIELDS']
formatted_fields = self.settings["FORMATTED_FIELDS"]
# prevent metadata extraction in fields
self._md.preprocessors.deregister('meta')
self._md.preprocessors.deregister("meta")
output = {}
for name, value in meta.items():
@ -323,9 +320,10 @@ class MarkdownReader(BaseReader):
elif not DUPLICATES_DEFINITIONS_ALLOWED.get(name, True):
if len(value) > 1:
logger.warning(
'Duplicate definition of `%s` '
'for %s. Using first one.',
name, self._source_path)
"Duplicate definition of `%s` " "for %s. Using first one.",
name,
self._source_path,
)
output[name] = self.process_metadata(name, value[0])
elif len(value) > 1:
# handle list metadata as list of string
@ -339,11 +337,11 @@ class MarkdownReader(BaseReader):
"""Parse content and metadata of markdown files"""
self._source_path = source_path
self._md = Markdown(**self.settings['MARKDOWN'])
self._md = Markdown(**self.settings["MARKDOWN"])
with pelican_open(source_path) as text:
content = self._md.convert(text)
if hasattr(self._md, 'Meta'):
if hasattr(self._md, "Meta"):
metadata = self._parse_metadata(self._md.Meta)
else:
metadata = {}
@ -353,17 +351,17 @@ class MarkdownReader(BaseReader):
class HTMLReader(BaseReader):
"""Parses HTML files as input, looking for meta, title, and body tags"""
file_extensions = ['htm', 'html']
file_extensions = ["htm", "html"]
enabled = True
class _HTMLParser(HTMLParser):
def __init__(self, settings, filename):
super().__init__(convert_charrefs=False)
self.body = ''
self.body = ""
self.metadata = {}
self.settings = settings
self._data_buffer = ''
self._data_buffer = ""
self._filename = filename
@ -374,59 +372,59 @@ class HTMLReader(BaseReader):
self._in_tags = False
def handle_starttag(self, tag, attrs):
if tag == 'head' and self._in_top_level:
if tag == "head" and self._in_top_level:
self._in_top_level = False
self._in_head = True
elif tag == 'title' and self._in_head:
elif tag == "title" and self._in_head:
self._in_title = True
self._data_buffer = ''
elif tag == 'body' and self._in_top_level:
self._data_buffer = ""
elif tag == "body" and self._in_top_level:
self._in_top_level = False
self._in_body = True
self._data_buffer = ''
elif tag == 'meta' and self._in_head:
self._data_buffer = ""
elif tag == "meta" and self._in_head:
self._handle_meta_tag(attrs)
elif self._in_body:
self._data_buffer += self.build_tag(tag, attrs, False)
def handle_endtag(self, tag):
if tag == 'head':
if tag == "head":
if self._in_head:
self._in_head = False
self._in_top_level = True
elif self._in_head and tag == 'title':
elif self._in_head and tag == "title":
self._in_title = False
self.metadata['title'] = self._data_buffer
elif tag == 'body':
self.metadata["title"] = self._data_buffer
elif tag == "body":
self.body = self._data_buffer
self._in_body = False
self._in_top_level = True
elif self._in_body:
self._data_buffer += '</{}>'.format(escape(tag))
self._data_buffer += "</{}>".format(escape(tag))
def handle_startendtag(self, tag, attrs):
if tag == 'meta' and self._in_head:
if tag == "meta" and self._in_head:
self._handle_meta_tag(attrs)
if self._in_body:
self._data_buffer += self.build_tag(tag, attrs, True)
def handle_comment(self, data):
self._data_buffer += '<!--{}-->'.format(data)
self._data_buffer += "<!--{}-->".format(data)
def handle_data(self, data):
self._data_buffer += data
def handle_entityref(self, data):
self._data_buffer += '&{};'.format(data)
self._data_buffer += "&{};".format(data)
def handle_charref(self, data):
self._data_buffer += '&#{};'.format(data)
self._data_buffer += "&#{};".format(data)
def build_tag(self, tag, attrs, close_tag):
result = '<{}'.format(escape(tag))
result = "<{}".format(escape(tag))
for k, v in attrs:
result += ' ' + escape(k)
result += " " + escape(k)
if v is not None:
# If the attribute value contains a double quote, surround
# with single quotes, otherwise use double quotes.
@ -435,33 +433,39 @@ class HTMLReader(BaseReader):
else:
result += '="{}"'.format(escape(v, quote=False))
if close_tag:
return result + ' />'
return result + '>'
return result + " />"
return result + ">"
def _handle_meta_tag(self, attrs):
name = self._attr_value(attrs, 'name')
name = self._attr_value(attrs, "name")
if name is None:
attr_list = ['{}="{}"'.format(k, v) for k, v in attrs]
attr_serialized = ', '.join(attr_list)
logger.warning("Meta tag in file %s does not have a 'name' "
"attribute, skipping. Attributes: %s",
self._filename, attr_serialized)
attr_serialized = ", ".join(attr_list)
logger.warning(
"Meta tag in file %s does not have a 'name' "
"attribute, skipping. Attributes: %s",
self._filename,
attr_serialized,
)
return
name = name.lower()
contents = self._attr_value(attrs, 'content', '')
contents = self._attr_value(attrs, "content", "")
if not contents:
contents = self._attr_value(attrs, 'contents', '')
contents = self._attr_value(attrs, "contents", "")
if contents:
logger.warning(
"Meta tag attribute 'contents' used in file %s, should"
" be changed to 'content'",
self._filename,
extra={'limit_msg': "Other files have meta tag "
"attribute 'contents' that should "
"be changed to 'content'"})
extra={
"limit_msg": "Other files have meta tag "
"attribute 'contents' that should "
"be changed to 'content'"
},
)
if name == 'keywords':
name = 'tags'
if name == "keywords":
name = "tags"
if name in self.metadata:
# if this metadata already exists (i.e. a previous tag with the
@ -501,22 +505,23 @@ class Readers(FileStampDataCacher):
"""
def __init__(self, settings=None, cache_name=''):
def __init__(self, settings=None, cache_name=""):
self.settings = settings or {}
self.readers = {}
self.reader_classes = {}
for cls in [BaseReader] + BaseReader.__subclasses__():
if not cls.enabled:
logger.debug('Missing dependencies for %s',
', '.join(cls.file_extensions))
logger.debug(
"Missing dependencies for %s", ", ".join(cls.file_extensions)
)
continue
for ext in cls.file_extensions:
self.reader_classes[ext] = cls
if self.settings['READERS']:
self.reader_classes.update(self.settings['READERS'])
if self.settings["READERS"]:
self.reader_classes.update(self.settings["READERS"])
signals.readers_init.send(self)
@ -527,53 +532,67 @@ class Readers(FileStampDataCacher):
self.readers[fmt] = reader_class(self.settings)
# set up caching
cache_this_level = (cache_name != '' and
self.settings['CONTENT_CACHING_LAYER'] == 'reader')
caching_policy = cache_this_level and self.settings['CACHE_CONTENT']
load_policy = cache_this_level and self.settings['LOAD_CONTENT_CACHE']
cache_this_level = (
cache_name != "" and self.settings["CONTENT_CACHING_LAYER"] == "reader"
)
caching_policy = cache_this_level and self.settings["CACHE_CONTENT"]
load_policy = cache_this_level and self.settings["LOAD_CONTENT_CACHE"]
super().__init__(settings, cache_name, caching_policy, load_policy)
@property
def extensions(self):
return self.readers.keys()
def read_file(self, base_path, path, content_class=Page, fmt=None,
context=None, preread_signal=None, preread_sender=None,
context_signal=None, context_sender=None):
def read_file(
self,
base_path,
path,
content_class=Page,
fmt=None,
context=None,
preread_signal=None,
preread_sender=None,
context_signal=None,
context_sender=None,
):
"""Return a content object parsed with the given format."""
path = os.path.abspath(os.path.join(base_path, path))
source_path = posixize_path(os.path.relpath(path, base_path))
logger.debug(
'Read file %s -> %s',
source_path, content_class.__name__)
logger.debug("Read file %s -> %s", source_path, content_class.__name__)
if not fmt:
_, ext = os.path.splitext(os.path.basename(path))
fmt = ext[1:]
if fmt not in self.readers:
raise TypeError(
'Pelican does not know how to parse %s', path)
raise TypeError("Pelican does not know how to parse %s", path)
if preread_signal:
logger.debug(
'Signal %s.send(%s)',
preread_signal.name, preread_sender)
logger.debug("Signal %s.send(%s)", preread_signal.name, preread_sender)
preread_signal.send(preread_sender)
reader = self.readers[fmt]
metadata = _filter_discardable_metadata(default_metadata(
settings=self.settings, process=reader.process_metadata))
metadata.update(path_metadata(
full_path=path, source_path=source_path,
settings=self.settings))
metadata.update(_filter_discardable_metadata(parse_path_metadata(
source_path=source_path, settings=self.settings,
process=reader.process_metadata)))
metadata = _filter_discardable_metadata(
default_metadata(settings=self.settings, process=reader.process_metadata)
)
metadata.update(
path_metadata(
full_path=path, source_path=source_path, settings=self.settings
)
)
metadata.update(
_filter_discardable_metadata(
parse_path_metadata(
source_path=source_path,
settings=self.settings,
process=reader.process_metadata,
)
)
)
reader_name = reader.__class__.__name__
metadata['reader'] = reader_name.replace('Reader', '').lower()
metadata["reader"] = reader_name.replace("Reader", "").lower()
content, reader_metadata = self.get_cached_data(path, (None, None))
if content is None:
@ -587,14 +606,14 @@ class Readers(FileStampDataCacher):
find_empty_alt(content, path)
# eventually filter the content with typogrify if asked so
if self.settings['TYPOGRIFY']:
if self.settings["TYPOGRIFY"]:
from typogrify.filters import typogrify
import smartypants
typogrify_dashes = self.settings['TYPOGRIFY_DASHES']
if typogrify_dashes == 'oldschool':
typogrify_dashes = self.settings["TYPOGRIFY_DASHES"]
if typogrify_dashes == "oldschool":
smartypants.Attr.default = smartypants.Attr.set2
elif typogrify_dashes == 'oldschool_inverted':
elif typogrify_dashes == "oldschool_inverted":
smartypants.Attr.default = smartypants.Attr.set3
else:
smartypants.Attr.default = smartypants.Attr.set1
@ -608,31 +627,32 @@ class Readers(FileStampDataCacher):
def typogrify_wrapper(text):
"""Ensures ignore_tags feature is backward compatible"""
try:
return typogrify(
text,
self.settings['TYPOGRIFY_IGNORE_TAGS'])
return typogrify(text, self.settings["TYPOGRIFY_IGNORE_TAGS"])
except TypeError:
return typogrify(text)
if content:
content = typogrify_wrapper(content)
if 'title' in metadata:
metadata['title'] = typogrify_wrapper(metadata['title'])
if "title" in metadata:
metadata["title"] = typogrify_wrapper(metadata["title"])
if 'summary' in metadata:
metadata['summary'] = typogrify_wrapper(metadata['summary'])
if "summary" in metadata:
metadata["summary"] = typogrify_wrapper(metadata["summary"])
if context_signal:
logger.debug(
'Signal %s.send(%s, <metadata>)',
context_signal.name,
context_sender)
"Signal %s.send(%s, <metadata>)", context_signal.name, context_sender
)
context_signal.send(context_sender, metadata=metadata)
return content_class(content=content, metadata=metadata,
settings=self.settings, source_path=path,
context=context)
return content_class(
content=content,
metadata=metadata,
settings=self.settings,
source_path=path,
context=context,
)
def find_empty_alt(content, path):
@ -642,7 +662,8 @@ def find_empty_alt(content, path):
as they are really likely to be accessibility flaws.
"""
imgs = re.compile(r"""
imgs = re.compile(
r"""
(?:
# src before alt
<img
@ -658,53 +679,57 @@ def find_empty_alt(content, path):
[^\>]*
src=(['"])(.*?)\5
)
""", re.X)
""",
re.X,
)
for match in re.findall(imgs, content):
logger.warning(
'Empty alt attribute for image %s in %s',
os.path.basename(match[1] + match[5]), path,
extra={'limit_msg': 'Other images have empty alt attributes'})
"Empty alt attribute for image %s in %s",
os.path.basename(match[1] + match[5]),
path,
extra={"limit_msg": "Other images have empty alt attributes"},
)
def default_metadata(settings=None, process=None):
metadata = {}
if settings:
for name, value in dict(settings.get('DEFAULT_METADATA', {})).items():
for name, value in dict(settings.get("DEFAULT_METADATA", {})).items():
if process:
value = process(name, value)
metadata[name] = value
if 'DEFAULT_CATEGORY' in settings:
value = settings['DEFAULT_CATEGORY']
if "DEFAULT_CATEGORY" in settings:
value = settings["DEFAULT_CATEGORY"]
if process:
value = process('category', value)
metadata['category'] = value
if settings.get('DEFAULT_DATE', None) and \
settings['DEFAULT_DATE'] != 'fs':
if isinstance(settings['DEFAULT_DATE'], str):
metadata['date'] = get_date(settings['DEFAULT_DATE'])
value = process("category", value)
metadata["category"] = value
if settings.get("DEFAULT_DATE", None) and settings["DEFAULT_DATE"] != "fs":
if isinstance(settings["DEFAULT_DATE"], str):
metadata["date"] = get_date(settings["DEFAULT_DATE"])
else:
metadata['date'] = datetime.datetime(*settings['DEFAULT_DATE'])
metadata["date"] = datetime.datetime(*settings["DEFAULT_DATE"])
return metadata
def path_metadata(full_path, source_path, settings=None):
metadata = {}
if settings:
if settings.get('DEFAULT_DATE', None) == 'fs':
metadata['date'] = datetime.datetime.fromtimestamp(
os.stat(full_path).st_mtime)
metadata['modified'] = metadata['date']
if settings.get("DEFAULT_DATE", None) == "fs":
metadata["date"] = datetime.datetime.fromtimestamp(
os.stat(full_path).st_mtime
)
metadata["modified"] = metadata["date"]
# Apply EXTRA_PATH_METADATA for the source path and the paths of any
# parent directories. Sorting EPM first ensures that the most specific
# path wins conflicts.
epm = settings.get('EXTRA_PATH_METADATA', {})
epm = settings.get("EXTRA_PATH_METADATA", {})
for path, meta in sorted(epm.items()):
# Enforce a trailing slash when checking for parent directories.
# This prevents false positives when one file or directory's name
# is a prefix of another's.
dirpath = posixize_path(os.path.join(path, ''))
dirpath = posixize_path(os.path.join(path, ""))
if source_path == path or source_path.startswith(dirpath):
metadata.update(meta)
@ -736,11 +761,10 @@ def parse_path_metadata(source_path, settings=None, process=None):
subdir = os.path.basename(dirname)
if settings:
checks = []
for key, data in [('FILENAME_METADATA', base),
('PATH_METADATA', source_path)]:
for key, data in [("FILENAME_METADATA", base), ("PATH_METADATA", source_path)]:
checks.append((settings.get(key, None), data))
if settings.get('USE_FOLDER_AS_CATEGORY', None):
checks.append(('(?P<category>.*)', subdir))
if settings.get("USE_FOLDER_AS_CATEGORY", None):
checks.append(("(?P<category>.*)", subdir))
for regexp, data in checks:
if regexp and data:
match = re.match(regexp, data)