mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Apply code style to project via: ruff format .
This commit is contained in:
parent
8ea27b82f6
commit
cabdb26cee
41 changed files with 6505 additions and 5163 deletions
|
|
@ -31,33 +31,29 @@ except ImportError:
|
|||
_DISCARD = object()
|
||||
|
||||
DUPLICATES_DEFINITIONS_ALLOWED = {
|
||||
'tags': False,
|
||||
'date': False,
|
||||
'modified': False,
|
||||
'status': False,
|
||||
'category': False,
|
||||
'author': False,
|
||||
'save_as': False,
|
||||
'url': False,
|
||||
'authors': False,
|
||||
'slug': False
|
||||
"tags": False,
|
||||
"date": False,
|
||||
"modified": False,
|
||||
"status": False,
|
||||
"category": False,
|
||||
"author": False,
|
||||
"save_as": False,
|
||||
"url": False,
|
||||
"authors": False,
|
||||
"slug": False,
|
||||
}
|
||||
|
||||
METADATA_PROCESSORS = {
|
||||
'tags': lambda x, y: ([
|
||||
Tag(tag, y)
|
||||
for tag in ensure_metadata_list(x)
|
||||
] or _DISCARD),
|
||||
'date': lambda x, y: get_date(x.replace('_', ' ')),
|
||||
'modified': lambda x, y: get_date(x),
|
||||
'status': lambda x, y: x.strip() or _DISCARD,
|
||||
'category': lambda x, y: _process_if_nonempty(Category, x, y),
|
||||
'author': lambda x, y: _process_if_nonempty(Author, x, y),
|
||||
'authors': lambda x, y: ([
|
||||
Author(author, y)
|
||||
for author in ensure_metadata_list(x)
|
||||
] or _DISCARD),
|
||||
'slug': lambda x, y: x.strip() or _DISCARD,
|
||||
"tags": lambda x, y: ([Tag(tag, y) for tag in ensure_metadata_list(x)] or _DISCARD),
|
||||
"date": lambda x, y: get_date(x.replace("_", " ")),
|
||||
"modified": lambda x, y: get_date(x),
|
||||
"status": lambda x, y: x.strip() or _DISCARD,
|
||||
"category": lambda x, y: _process_if_nonempty(Category, x, y),
|
||||
"author": lambda x, y: _process_if_nonempty(Author, x, y),
|
||||
"authors": lambda x, y: (
|
||||
[Author(author, y) for author in ensure_metadata_list(x)] or _DISCARD
|
||||
),
|
||||
"slug": lambda x, y: x.strip() or _DISCARD,
|
||||
}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -65,25 +61,23 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
def ensure_metadata_list(text):
|
||||
"""Canonicalize the format of a list of authors or tags. This works
|
||||
the same way as Docutils' "authors" field: if it's already a list,
|
||||
those boundaries are preserved; otherwise, it must be a string;
|
||||
if the string contains semicolons, it is split on semicolons;
|
||||
otherwise, it is split on commas. This allows you to write
|
||||
author lists in either "Jane Doe, John Doe" or "Doe, Jane; Doe, John"
|
||||
format.
|
||||
the same way as Docutils' "authors" field: if it's already a list,
|
||||
those boundaries are preserved; otherwise, it must be a string;
|
||||
if the string contains semicolons, it is split on semicolons;
|
||||
otherwise, it is split on commas. This allows you to write
|
||||
author lists in either "Jane Doe, John Doe" or "Doe, Jane; Doe, John"
|
||||
format.
|
||||
|
||||
Regardless, all list items undergo .strip() before returning, and
|
||||
empty items are discarded.
|
||||
Regardless, all list items undergo .strip() before returning, and
|
||||
empty items are discarded.
|
||||
"""
|
||||
if isinstance(text, str):
|
||||
if ';' in text:
|
||||
text = text.split(';')
|
||||
if ";" in text:
|
||||
text = text.split(";")
|
||||
else:
|
||||
text = text.split(',')
|
||||
text = text.split(",")
|
||||
|
||||
return list(OrderedDict.fromkeys(
|
||||
[v for v in (w.strip() for w in text) if v]
|
||||
))
|
||||
return list(OrderedDict.fromkeys([v for v in (w.strip() for w in text) if v]))
|
||||
|
||||
|
||||
def _process_if_nonempty(processor, name, settings):
|
||||
|
|
@ -112,8 +106,9 @@ class BaseReader:
|
|||
Markdown).
|
||||
|
||||
"""
|
||||
|
||||
enabled = True
|
||||
file_extensions = ['static']
|
||||
file_extensions = ["static"]
|
||||
extensions = None
|
||||
|
||||
def __init__(self, settings):
|
||||
|
|
@ -132,13 +127,12 @@ class BaseReader:
|
|||
|
||||
|
||||
class _FieldBodyTranslator(HTMLTranslator):
|
||||
|
||||
def __init__(self, document):
|
||||
super().__init__(document)
|
||||
self.compact_p = None
|
||||
|
||||
def astext(self):
|
||||
return ''.join(self.body)
|
||||
return "".join(self.body)
|
||||
|
||||
def visit_field_body(self, node):
|
||||
pass
|
||||
|
|
@ -154,27 +148,25 @@ def render_node_to_html(document, node, field_body_translator_class):
|
|||
|
||||
|
||||
class PelicanHTMLWriter(Writer):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.translator_class = PelicanHTMLTranslator
|
||||
|
||||
|
||||
class PelicanHTMLTranslator(HTMLTranslator):
|
||||
|
||||
def visit_abbreviation(self, node):
|
||||
attrs = {}
|
||||
if node.hasattr('explanation'):
|
||||
attrs['title'] = node['explanation']
|
||||
self.body.append(self.starttag(node, 'abbr', '', **attrs))
|
||||
if node.hasattr("explanation"):
|
||||
attrs["title"] = node["explanation"]
|
||||
self.body.append(self.starttag(node, "abbr", "", **attrs))
|
||||
|
||||
def depart_abbreviation(self, node):
|
||||
self.body.append('</abbr>')
|
||||
self.body.append("</abbr>")
|
||||
|
||||
def visit_image(self, node):
|
||||
# set an empty alt if alt is not specified
|
||||
# avoids that alt is taken from src
|
||||
node['alt'] = node.get('alt', '')
|
||||
node["alt"] = node.get("alt", "")
|
||||
return HTMLTranslator.visit_image(self, node)
|
||||
|
||||
|
||||
|
|
@ -194,7 +186,7 @@ class RstReader(BaseReader):
|
|||
"""
|
||||
|
||||
enabled = bool(docutils)
|
||||
file_extensions = ['rst']
|
||||
file_extensions = ["rst"]
|
||||
|
||||
writer_class = PelicanHTMLWriter
|
||||
field_body_translator_class = _FieldBodyTranslator
|
||||
|
|
@ -202,25 +194,28 @@ class RstReader(BaseReader):
|
|||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
lang_code = self.settings.get('DEFAULT_LANG', 'en')
|
||||
lang_code = self.settings.get("DEFAULT_LANG", "en")
|
||||
if get_docutils_lang(lang_code):
|
||||
self._language_code = lang_code
|
||||
else:
|
||||
logger.warning("Docutils has no localization for '%s'."
|
||||
" Using 'en' instead.", lang_code)
|
||||
self._language_code = 'en'
|
||||
logger.warning(
|
||||
"Docutils has no localization for '%s'." " Using 'en' instead.",
|
||||
lang_code,
|
||||
)
|
||||
self._language_code = "en"
|
||||
|
||||
def _parse_metadata(self, document, source_path):
|
||||
"""Return the dict containing document metadata"""
|
||||
formatted_fields = self.settings['FORMATTED_FIELDS']
|
||||
formatted_fields = self.settings["FORMATTED_FIELDS"]
|
||||
|
||||
output = {}
|
||||
|
||||
if document.first_child_matching_class(docutils.nodes.title) is None:
|
||||
logger.warning(
|
||||
'Document title missing in file %s: '
|
||||
'Ensure exactly one top level section',
|
||||
source_path)
|
||||
"Document title missing in file %s: "
|
||||
"Ensure exactly one top level section",
|
||||
source_path,
|
||||
)
|
||||
|
||||
try:
|
||||
# docutils 0.18.1+
|
||||
|
|
@ -231,16 +226,16 @@ class RstReader(BaseReader):
|
|||
|
||||
for docinfo in nodes:
|
||||
for element in docinfo.children:
|
||||
if element.tagname == 'field': # custom fields (e.g. summary)
|
||||
if element.tagname == "field": # custom fields (e.g. summary)
|
||||
name_elem, body_elem = element.children
|
||||
name = name_elem.astext()
|
||||
if name.lower() in formatted_fields:
|
||||
value = render_node_to_html(
|
||||
document, body_elem,
|
||||
self.field_body_translator_class)
|
||||
document, body_elem, self.field_body_translator_class
|
||||
)
|
||||
else:
|
||||
value = body_elem.astext()
|
||||
elif element.tagname == 'authors': # author list
|
||||
elif element.tagname == "authors": # author list
|
||||
name = element.tagname
|
||||
value = [element.astext() for element in element.children]
|
||||
else: # standard fields (e.g. address)
|
||||
|
|
@ -252,22 +247,24 @@ class RstReader(BaseReader):
|
|||
return output
|
||||
|
||||
def _get_publisher(self, source_path):
|
||||
extra_params = {'initial_header_level': '2',
|
||||
'syntax_highlight': 'short',
|
||||
'input_encoding': 'utf-8',
|
||||
'language_code': self._language_code,
|
||||
'halt_level': 2,
|
||||
'traceback': True,
|
||||
'warning_stream': StringIO(),
|
||||
'embed_stylesheet': False}
|
||||
user_params = self.settings.get('DOCUTILS_SETTINGS')
|
||||
extra_params = {
|
||||
"initial_header_level": "2",
|
||||
"syntax_highlight": "short",
|
||||
"input_encoding": "utf-8",
|
||||
"language_code": self._language_code,
|
||||
"halt_level": 2,
|
||||
"traceback": True,
|
||||
"warning_stream": StringIO(),
|
||||
"embed_stylesheet": False,
|
||||
}
|
||||
user_params = self.settings.get("DOCUTILS_SETTINGS")
|
||||
if user_params:
|
||||
extra_params.update(user_params)
|
||||
|
||||
pub = docutils.core.Publisher(
|
||||
writer=self.writer_class(),
|
||||
destination_class=docutils.io.StringOutput)
|
||||
pub.set_components('standalone', 'restructuredtext', 'html')
|
||||
writer=self.writer_class(), destination_class=docutils.io.StringOutput
|
||||
)
|
||||
pub.set_components("standalone", "restructuredtext", "html")
|
||||
pub.process_programmatic_settings(None, extra_params, None)
|
||||
pub.set_source(source_path=source_path)
|
||||
pub.publish()
|
||||
|
|
@ -277,10 +274,10 @@ class RstReader(BaseReader):
|
|||
"""Parses restructured text"""
|
||||
pub = self._get_publisher(source_path)
|
||||
parts = pub.writer.parts
|
||||
content = parts.get('body')
|
||||
content = parts.get("body")
|
||||
|
||||
metadata = self._parse_metadata(pub.document, source_path)
|
||||
metadata.setdefault('title', parts.get('title'))
|
||||
metadata.setdefault("title", parts.get("title"))
|
||||
|
||||
return content, metadata
|
||||
|
||||
|
|
@ -289,26 +286,26 @@ class MarkdownReader(BaseReader):
|
|||
"""Reader for Markdown files"""
|
||||
|
||||
enabled = bool(Markdown)
|
||||
file_extensions = ['md', 'markdown', 'mkd', 'mdown']
|
||||
file_extensions = ["md", "markdown", "mkd", "mdown"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
settings = self.settings['MARKDOWN']
|
||||
settings.setdefault('extension_configs', {})
|
||||
settings.setdefault('extensions', [])
|
||||
for extension in settings['extension_configs'].keys():
|
||||
if extension not in settings['extensions']:
|
||||
settings['extensions'].append(extension)
|
||||
if 'markdown.extensions.meta' not in settings['extensions']:
|
||||
settings['extensions'].append('markdown.extensions.meta')
|
||||
settings = self.settings["MARKDOWN"]
|
||||
settings.setdefault("extension_configs", {})
|
||||
settings.setdefault("extensions", [])
|
||||
for extension in settings["extension_configs"].keys():
|
||||
if extension not in settings["extensions"]:
|
||||
settings["extensions"].append(extension)
|
||||
if "markdown.extensions.meta" not in settings["extensions"]:
|
||||
settings["extensions"].append("markdown.extensions.meta")
|
||||
self._source_path = None
|
||||
|
||||
def _parse_metadata(self, meta):
|
||||
"""Return the dict containing document metadata"""
|
||||
formatted_fields = self.settings['FORMATTED_FIELDS']
|
||||
formatted_fields = self.settings["FORMATTED_FIELDS"]
|
||||
|
||||
# prevent metadata extraction in fields
|
||||
self._md.preprocessors.deregister('meta')
|
||||
self._md.preprocessors.deregister("meta")
|
||||
|
||||
output = {}
|
||||
for name, value in meta.items():
|
||||
|
|
@ -323,9 +320,10 @@ class MarkdownReader(BaseReader):
|
|||
elif not DUPLICATES_DEFINITIONS_ALLOWED.get(name, True):
|
||||
if len(value) > 1:
|
||||
logger.warning(
|
||||
'Duplicate definition of `%s` '
|
||||
'for %s. Using first one.',
|
||||
name, self._source_path)
|
||||
"Duplicate definition of `%s` " "for %s. Using first one.",
|
||||
name,
|
||||
self._source_path,
|
||||
)
|
||||
output[name] = self.process_metadata(name, value[0])
|
||||
elif len(value) > 1:
|
||||
# handle list metadata as list of string
|
||||
|
|
@ -339,11 +337,11 @@ class MarkdownReader(BaseReader):
|
|||
"""Parse content and metadata of markdown files"""
|
||||
|
||||
self._source_path = source_path
|
||||
self._md = Markdown(**self.settings['MARKDOWN'])
|
||||
self._md = Markdown(**self.settings["MARKDOWN"])
|
||||
with pelican_open(source_path) as text:
|
||||
content = self._md.convert(text)
|
||||
|
||||
if hasattr(self._md, 'Meta'):
|
||||
if hasattr(self._md, "Meta"):
|
||||
metadata = self._parse_metadata(self._md.Meta)
|
||||
else:
|
||||
metadata = {}
|
||||
|
|
@ -353,17 +351,17 @@ class MarkdownReader(BaseReader):
|
|||
class HTMLReader(BaseReader):
|
||||
"""Parses HTML files as input, looking for meta, title, and body tags"""
|
||||
|
||||
file_extensions = ['htm', 'html']
|
||||
file_extensions = ["htm", "html"]
|
||||
enabled = True
|
||||
|
||||
class _HTMLParser(HTMLParser):
|
||||
def __init__(self, settings, filename):
|
||||
super().__init__(convert_charrefs=False)
|
||||
self.body = ''
|
||||
self.body = ""
|
||||
self.metadata = {}
|
||||
self.settings = settings
|
||||
|
||||
self._data_buffer = ''
|
||||
self._data_buffer = ""
|
||||
|
||||
self._filename = filename
|
||||
|
||||
|
|
@ -374,59 +372,59 @@ class HTMLReader(BaseReader):
|
|||
self._in_tags = False
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == 'head' and self._in_top_level:
|
||||
if tag == "head" and self._in_top_level:
|
||||
self._in_top_level = False
|
||||
self._in_head = True
|
||||
elif tag == 'title' and self._in_head:
|
||||
elif tag == "title" and self._in_head:
|
||||
self._in_title = True
|
||||
self._data_buffer = ''
|
||||
elif tag == 'body' and self._in_top_level:
|
||||
self._data_buffer = ""
|
||||
elif tag == "body" and self._in_top_level:
|
||||
self._in_top_level = False
|
||||
self._in_body = True
|
||||
self._data_buffer = ''
|
||||
elif tag == 'meta' and self._in_head:
|
||||
self._data_buffer = ""
|
||||
elif tag == "meta" and self._in_head:
|
||||
self._handle_meta_tag(attrs)
|
||||
|
||||
elif self._in_body:
|
||||
self._data_buffer += self.build_tag(tag, attrs, False)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag == 'head':
|
||||
if tag == "head":
|
||||
if self._in_head:
|
||||
self._in_head = False
|
||||
self._in_top_level = True
|
||||
elif self._in_head and tag == 'title':
|
||||
elif self._in_head and tag == "title":
|
||||
self._in_title = False
|
||||
self.metadata['title'] = self._data_buffer
|
||||
elif tag == 'body':
|
||||
self.metadata["title"] = self._data_buffer
|
||||
elif tag == "body":
|
||||
self.body = self._data_buffer
|
||||
self._in_body = False
|
||||
self._in_top_level = True
|
||||
elif self._in_body:
|
||||
self._data_buffer += '</{}>'.format(escape(tag))
|
||||
self._data_buffer += "</{}>".format(escape(tag))
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
if tag == 'meta' and self._in_head:
|
||||
if tag == "meta" and self._in_head:
|
||||
self._handle_meta_tag(attrs)
|
||||
if self._in_body:
|
||||
self._data_buffer += self.build_tag(tag, attrs, True)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self._data_buffer += '<!--{}-->'.format(data)
|
||||
self._data_buffer += "<!--{}-->".format(data)
|
||||
|
||||
def handle_data(self, data):
|
||||
self._data_buffer += data
|
||||
|
||||
def handle_entityref(self, data):
|
||||
self._data_buffer += '&{};'.format(data)
|
||||
self._data_buffer += "&{};".format(data)
|
||||
|
||||
def handle_charref(self, data):
|
||||
self._data_buffer += '&#{};'.format(data)
|
||||
self._data_buffer += "&#{};".format(data)
|
||||
|
||||
def build_tag(self, tag, attrs, close_tag):
|
||||
result = '<{}'.format(escape(tag))
|
||||
result = "<{}".format(escape(tag))
|
||||
for k, v in attrs:
|
||||
result += ' ' + escape(k)
|
||||
result += " " + escape(k)
|
||||
if v is not None:
|
||||
# If the attribute value contains a double quote, surround
|
||||
# with single quotes, otherwise use double quotes.
|
||||
|
|
@ -435,33 +433,39 @@ class HTMLReader(BaseReader):
|
|||
else:
|
||||
result += '="{}"'.format(escape(v, quote=False))
|
||||
if close_tag:
|
||||
return result + ' />'
|
||||
return result + '>'
|
||||
return result + " />"
|
||||
return result + ">"
|
||||
|
||||
def _handle_meta_tag(self, attrs):
|
||||
name = self._attr_value(attrs, 'name')
|
||||
name = self._attr_value(attrs, "name")
|
||||
if name is None:
|
||||
attr_list = ['{}="{}"'.format(k, v) for k, v in attrs]
|
||||
attr_serialized = ', '.join(attr_list)
|
||||
logger.warning("Meta tag in file %s does not have a 'name' "
|
||||
"attribute, skipping. Attributes: %s",
|
||||
self._filename, attr_serialized)
|
||||
attr_serialized = ", ".join(attr_list)
|
||||
logger.warning(
|
||||
"Meta tag in file %s does not have a 'name' "
|
||||
"attribute, skipping. Attributes: %s",
|
||||
self._filename,
|
||||
attr_serialized,
|
||||
)
|
||||
return
|
||||
name = name.lower()
|
||||
contents = self._attr_value(attrs, 'content', '')
|
||||
contents = self._attr_value(attrs, "content", "")
|
||||
if not contents:
|
||||
contents = self._attr_value(attrs, 'contents', '')
|
||||
contents = self._attr_value(attrs, "contents", "")
|
||||
if contents:
|
||||
logger.warning(
|
||||
"Meta tag attribute 'contents' used in file %s, should"
|
||||
" be changed to 'content'",
|
||||
self._filename,
|
||||
extra={'limit_msg': "Other files have meta tag "
|
||||
"attribute 'contents' that should "
|
||||
"be changed to 'content'"})
|
||||
extra={
|
||||
"limit_msg": "Other files have meta tag "
|
||||
"attribute 'contents' that should "
|
||||
"be changed to 'content'"
|
||||
},
|
||||
)
|
||||
|
||||
if name == 'keywords':
|
||||
name = 'tags'
|
||||
if name == "keywords":
|
||||
name = "tags"
|
||||
|
||||
if name in self.metadata:
|
||||
# if this metadata already exists (i.e. a previous tag with the
|
||||
|
|
@ -501,22 +505,23 @@ class Readers(FileStampDataCacher):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, settings=None, cache_name=''):
|
||||
def __init__(self, settings=None, cache_name=""):
|
||||
self.settings = settings or {}
|
||||
self.readers = {}
|
||||
self.reader_classes = {}
|
||||
|
||||
for cls in [BaseReader] + BaseReader.__subclasses__():
|
||||
if not cls.enabled:
|
||||
logger.debug('Missing dependencies for %s',
|
||||
', '.join(cls.file_extensions))
|
||||
logger.debug(
|
||||
"Missing dependencies for %s", ", ".join(cls.file_extensions)
|
||||
)
|
||||
continue
|
||||
|
||||
for ext in cls.file_extensions:
|
||||
self.reader_classes[ext] = cls
|
||||
|
||||
if self.settings['READERS']:
|
||||
self.reader_classes.update(self.settings['READERS'])
|
||||
if self.settings["READERS"]:
|
||||
self.reader_classes.update(self.settings["READERS"])
|
||||
|
||||
signals.readers_init.send(self)
|
||||
|
||||
|
|
@ -527,53 +532,67 @@ class Readers(FileStampDataCacher):
|
|||
self.readers[fmt] = reader_class(self.settings)
|
||||
|
||||
# set up caching
|
||||
cache_this_level = (cache_name != '' and
|
||||
self.settings['CONTENT_CACHING_LAYER'] == 'reader')
|
||||
caching_policy = cache_this_level and self.settings['CACHE_CONTENT']
|
||||
load_policy = cache_this_level and self.settings['LOAD_CONTENT_CACHE']
|
||||
cache_this_level = (
|
||||
cache_name != "" and self.settings["CONTENT_CACHING_LAYER"] == "reader"
|
||||
)
|
||||
caching_policy = cache_this_level and self.settings["CACHE_CONTENT"]
|
||||
load_policy = cache_this_level and self.settings["LOAD_CONTENT_CACHE"]
|
||||
super().__init__(settings, cache_name, caching_policy, load_policy)
|
||||
|
||||
@property
|
||||
def extensions(self):
|
||||
return self.readers.keys()
|
||||
|
||||
def read_file(self, base_path, path, content_class=Page, fmt=None,
|
||||
context=None, preread_signal=None, preread_sender=None,
|
||||
context_signal=None, context_sender=None):
|
||||
def read_file(
|
||||
self,
|
||||
base_path,
|
||||
path,
|
||||
content_class=Page,
|
||||
fmt=None,
|
||||
context=None,
|
||||
preread_signal=None,
|
||||
preread_sender=None,
|
||||
context_signal=None,
|
||||
context_sender=None,
|
||||
):
|
||||
"""Return a content object parsed with the given format."""
|
||||
|
||||
path = os.path.abspath(os.path.join(base_path, path))
|
||||
source_path = posixize_path(os.path.relpath(path, base_path))
|
||||
logger.debug(
|
||||
'Read file %s -> %s',
|
||||
source_path, content_class.__name__)
|
||||
logger.debug("Read file %s -> %s", source_path, content_class.__name__)
|
||||
|
||||
if not fmt:
|
||||
_, ext = os.path.splitext(os.path.basename(path))
|
||||
fmt = ext[1:]
|
||||
|
||||
if fmt not in self.readers:
|
||||
raise TypeError(
|
||||
'Pelican does not know how to parse %s', path)
|
||||
raise TypeError("Pelican does not know how to parse %s", path)
|
||||
|
||||
if preread_signal:
|
||||
logger.debug(
|
||||
'Signal %s.send(%s)',
|
||||
preread_signal.name, preread_sender)
|
||||
logger.debug("Signal %s.send(%s)", preread_signal.name, preread_sender)
|
||||
preread_signal.send(preread_sender)
|
||||
|
||||
reader = self.readers[fmt]
|
||||
|
||||
metadata = _filter_discardable_metadata(default_metadata(
|
||||
settings=self.settings, process=reader.process_metadata))
|
||||
metadata.update(path_metadata(
|
||||
full_path=path, source_path=source_path,
|
||||
settings=self.settings))
|
||||
metadata.update(_filter_discardable_metadata(parse_path_metadata(
|
||||
source_path=source_path, settings=self.settings,
|
||||
process=reader.process_metadata)))
|
||||
metadata = _filter_discardable_metadata(
|
||||
default_metadata(settings=self.settings, process=reader.process_metadata)
|
||||
)
|
||||
metadata.update(
|
||||
path_metadata(
|
||||
full_path=path, source_path=source_path, settings=self.settings
|
||||
)
|
||||
)
|
||||
metadata.update(
|
||||
_filter_discardable_metadata(
|
||||
parse_path_metadata(
|
||||
source_path=source_path,
|
||||
settings=self.settings,
|
||||
process=reader.process_metadata,
|
||||
)
|
||||
)
|
||||
)
|
||||
reader_name = reader.__class__.__name__
|
||||
metadata['reader'] = reader_name.replace('Reader', '').lower()
|
||||
metadata["reader"] = reader_name.replace("Reader", "").lower()
|
||||
|
||||
content, reader_metadata = self.get_cached_data(path, (None, None))
|
||||
if content is None:
|
||||
|
|
@ -587,14 +606,14 @@ class Readers(FileStampDataCacher):
|
|||
find_empty_alt(content, path)
|
||||
|
||||
# eventually filter the content with typogrify if asked so
|
||||
if self.settings['TYPOGRIFY']:
|
||||
if self.settings["TYPOGRIFY"]:
|
||||
from typogrify.filters import typogrify
|
||||
import smartypants
|
||||
|
||||
typogrify_dashes = self.settings['TYPOGRIFY_DASHES']
|
||||
if typogrify_dashes == 'oldschool':
|
||||
typogrify_dashes = self.settings["TYPOGRIFY_DASHES"]
|
||||
if typogrify_dashes == "oldschool":
|
||||
smartypants.Attr.default = smartypants.Attr.set2
|
||||
elif typogrify_dashes == 'oldschool_inverted':
|
||||
elif typogrify_dashes == "oldschool_inverted":
|
||||
smartypants.Attr.default = smartypants.Attr.set3
|
||||
else:
|
||||
smartypants.Attr.default = smartypants.Attr.set1
|
||||
|
|
@ -608,31 +627,32 @@ class Readers(FileStampDataCacher):
|
|||
def typogrify_wrapper(text):
|
||||
"""Ensures ignore_tags feature is backward compatible"""
|
||||
try:
|
||||
return typogrify(
|
||||
text,
|
||||
self.settings['TYPOGRIFY_IGNORE_TAGS'])
|
||||
return typogrify(text, self.settings["TYPOGRIFY_IGNORE_TAGS"])
|
||||
except TypeError:
|
||||
return typogrify(text)
|
||||
|
||||
if content:
|
||||
content = typogrify_wrapper(content)
|
||||
|
||||
if 'title' in metadata:
|
||||
metadata['title'] = typogrify_wrapper(metadata['title'])
|
||||
if "title" in metadata:
|
||||
metadata["title"] = typogrify_wrapper(metadata["title"])
|
||||
|
||||
if 'summary' in metadata:
|
||||
metadata['summary'] = typogrify_wrapper(metadata['summary'])
|
||||
if "summary" in metadata:
|
||||
metadata["summary"] = typogrify_wrapper(metadata["summary"])
|
||||
|
||||
if context_signal:
|
||||
logger.debug(
|
||||
'Signal %s.send(%s, <metadata>)',
|
||||
context_signal.name,
|
||||
context_sender)
|
||||
"Signal %s.send(%s, <metadata>)", context_signal.name, context_sender
|
||||
)
|
||||
context_signal.send(context_sender, metadata=metadata)
|
||||
|
||||
return content_class(content=content, metadata=metadata,
|
||||
settings=self.settings, source_path=path,
|
||||
context=context)
|
||||
return content_class(
|
||||
content=content,
|
||||
metadata=metadata,
|
||||
settings=self.settings,
|
||||
source_path=path,
|
||||
context=context,
|
||||
)
|
||||
|
||||
|
||||
def find_empty_alt(content, path):
|
||||
|
|
@ -642,7 +662,8 @@ def find_empty_alt(content, path):
|
|||
as they are really likely to be accessibility flaws.
|
||||
|
||||
"""
|
||||
imgs = re.compile(r"""
|
||||
imgs = re.compile(
|
||||
r"""
|
||||
(?:
|
||||
# src before alt
|
||||
<img
|
||||
|
|
@ -658,53 +679,57 @@ def find_empty_alt(content, path):
|
|||
[^\>]*
|
||||
src=(['"])(.*?)\5
|
||||
)
|
||||
""", re.X)
|
||||
""",
|
||||
re.X,
|
||||
)
|
||||
for match in re.findall(imgs, content):
|
||||
logger.warning(
|
||||
'Empty alt attribute for image %s in %s',
|
||||
os.path.basename(match[1] + match[5]), path,
|
||||
extra={'limit_msg': 'Other images have empty alt attributes'})
|
||||
"Empty alt attribute for image %s in %s",
|
||||
os.path.basename(match[1] + match[5]),
|
||||
path,
|
||||
extra={"limit_msg": "Other images have empty alt attributes"},
|
||||
)
|
||||
|
||||
|
||||
def default_metadata(settings=None, process=None):
|
||||
metadata = {}
|
||||
if settings:
|
||||
for name, value in dict(settings.get('DEFAULT_METADATA', {})).items():
|
||||
for name, value in dict(settings.get("DEFAULT_METADATA", {})).items():
|
||||
if process:
|
||||
value = process(name, value)
|
||||
metadata[name] = value
|
||||
if 'DEFAULT_CATEGORY' in settings:
|
||||
value = settings['DEFAULT_CATEGORY']
|
||||
if "DEFAULT_CATEGORY" in settings:
|
||||
value = settings["DEFAULT_CATEGORY"]
|
||||
if process:
|
||||
value = process('category', value)
|
||||
metadata['category'] = value
|
||||
if settings.get('DEFAULT_DATE', None) and \
|
||||
settings['DEFAULT_DATE'] != 'fs':
|
||||
if isinstance(settings['DEFAULT_DATE'], str):
|
||||
metadata['date'] = get_date(settings['DEFAULT_DATE'])
|
||||
value = process("category", value)
|
||||
metadata["category"] = value
|
||||
if settings.get("DEFAULT_DATE", None) and settings["DEFAULT_DATE"] != "fs":
|
||||
if isinstance(settings["DEFAULT_DATE"], str):
|
||||
metadata["date"] = get_date(settings["DEFAULT_DATE"])
|
||||
else:
|
||||
metadata['date'] = datetime.datetime(*settings['DEFAULT_DATE'])
|
||||
metadata["date"] = datetime.datetime(*settings["DEFAULT_DATE"])
|
||||
return metadata
|
||||
|
||||
|
||||
def path_metadata(full_path, source_path, settings=None):
|
||||
metadata = {}
|
||||
if settings:
|
||||
if settings.get('DEFAULT_DATE', None) == 'fs':
|
||||
metadata['date'] = datetime.datetime.fromtimestamp(
|
||||
os.stat(full_path).st_mtime)
|
||||
metadata['modified'] = metadata['date']
|
||||
if settings.get("DEFAULT_DATE", None) == "fs":
|
||||
metadata["date"] = datetime.datetime.fromtimestamp(
|
||||
os.stat(full_path).st_mtime
|
||||
)
|
||||
metadata["modified"] = metadata["date"]
|
||||
|
||||
# Apply EXTRA_PATH_METADATA for the source path and the paths of any
|
||||
# parent directories. Sorting EPM first ensures that the most specific
|
||||
# path wins conflicts.
|
||||
|
||||
epm = settings.get('EXTRA_PATH_METADATA', {})
|
||||
epm = settings.get("EXTRA_PATH_METADATA", {})
|
||||
for path, meta in sorted(epm.items()):
|
||||
# Enforce a trailing slash when checking for parent directories.
|
||||
# This prevents false positives when one file or directory's name
|
||||
# is a prefix of another's.
|
||||
dirpath = posixize_path(os.path.join(path, ''))
|
||||
dirpath = posixize_path(os.path.join(path, ""))
|
||||
if source_path == path or source_path.startswith(dirpath):
|
||||
metadata.update(meta)
|
||||
|
||||
|
|
@ -736,11 +761,10 @@ def parse_path_metadata(source_path, settings=None, process=None):
|
|||
subdir = os.path.basename(dirname)
|
||||
if settings:
|
||||
checks = []
|
||||
for key, data in [('FILENAME_METADATA', base),
|
||||
('PATH_METADATA', source_path)]:
|
||||
for key, data in [("FILENAME_METADATA", base), ("PATH_METADATA", source_path)]:
|
||||
checks.append((settings.get(key, None), data))
|
||||
if settings.get('USE_FOLDER_AS_CATEGORY', None):
|
||||
checks.append(('(?P<category>.*)', subdir))
|
||||
if settings.get("USE_FOLDER_AS_CATEGORY", None):
|
||||
checks.append(("(?P<category>.*)", subdir))
|
||||
for regexp, data in checks:
|
||||
if regexp and data:
|
||||
match = re.match(regexp, data)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue