diff --git a/docs/plugins.rst b/docs/plugins.rst index 58eee16b..3da0c078 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -66,6 +66,11 @@ Please note that while we do our best to review and maintain these plugins, they are submitted by the Pelican community and thus may have varying levels of support and interoperability. +Community plugins can also be found on PyPI tagged with "`Framework :: +Pelican :: Plugins`_". + +.. _Framework :: Pelican :: Plugins: https://pypi.org/search/?q=&o=-created&c=Framework+%3A%3A+Pelican+%3A%3A+Plugins + How to create plugins ===================== @@ -112,10 +117,10 @@ and have a folder structure as follows:: myplugin ├── pelican - │   └── plugins - │   └── myplugin - │   ├── __init__.py - │   └── ... + │   └── plugins + │       └── myplugin + │           ├── __init__.py + │           └── ... ├── ... └── setup.py @@ -146,7 +151,9 @@ finalized pelican object invoked after generator_init generator invoked in the Generator.__init__ all_generators_finalized generators invoked after all the generators are executed and before writing output readers_init readers invoked in the Readers.__init__ -article_generator_context article_generator, metadata +article_generator_context article_generator, metadata invoked after the content and metadata for the article has been generated; + use if you need to adjust the article metadata before it gets used by + Pelican. article_generator_preread article_generator invoked before a article is read in ArticlesGenerator.generate_context; use if code needs to do something before every article is parsed article_generator_init article_generator invoked in the ArticlesGenerator.__init__ diff --git a/docs/settings.rst b/docs/settings.rst index 67d1080f..3fac5f18 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -63,7 +63,7 @@ Basic settings .. data:: DEFAULT_CATEGORY - The default category to fall back on. The default is ``'misc'``. + The default category to fall back on. The default is ``"misc"``. .. data:: DISPLAY_PAGES_ON_MENU @@ -104,7 +104,7 @@ Basic settings A dictionary of custom Jinja2 environment variables you want to use. This also includes a list of extensions you may want to include. See `Jinja Environment documentation`_. The default is - ``{'extensions': [], 'trim_blocks': True, 'lstrip_blocks': True}``. + ``{"extensions": [], "trim_blocks": True, "lstrip_blocks": True}``. .. data:: JINJA_FILTERS @@ -114,10 +114,10 @@ Basic settings Example:: import sys - sys.path.append('to/your/path') + sys.path.append("to/your/path") from custom_filter import urlencode_filter - JINJA_FILTERS = {'urlencode': urlencode_filter} + JINJA_FILTERS = {"urlencode": urlencode_filter} See: `Jinja custom filters documentation`_. The default is ``{}``. @@ -141,7 +141,7 @@ Basic settings Example:: - LOG_FILTER = [(logging.WARN, 'TAG_SAVE_AS is set to False')] + LOG_FILTER = [(logging.WARN, "TAG_SAVE_AS is set to False")] The default is ``[]``. @@ -152,11 +152,11 @@ Basic settings For example, to avoid processing .html files, set:: - READERS = {'html': None} + READERS = {"html": None} To add a custom reader for the ``foo`` extension, set:: - READERS = {'foo': FooReader} + READERS = {"foo": FooReader} The default is ``{}``. @@ -164,14 +164,14 @@ Basic settings A list of Unix glob patterns. Files and directories matching any of these patterns or any of the commonly hidden files and directories set by ``watchfiles.DefaultFilter`` - will be ignored by the processor. For example, the default ``['**/.*']`` will - ignore "hidden" files and directories, and ``['__pycache__']`` would ignore + will be ignored by the processor. For example, the default ``["**/.*"]`` will + ignore "hidden" files and directories, and ``["__pycache__"]`` would ignore Python 3's bytecode caches. For a full list of the commonly hidden files set by ``watchfiles.DefaultFilter``, please refer to the `watchfiles documentation`_. - The default is ``['**/.*']``. + The default is ``["**/.*"]``. .. data:: MARKDOWN @@ -184,12 +184,12 @@ Basic settings The default is:: MARKDOWN = { - 'extension_configs': { - 'markdown.extensions.codehilite': {'css_class': 'highlight'}, - 'markdown.extensions.extra': {}, - 'markdown.extensions.meta': {}, + "extension_configs": { + "markdown.extensions.codehilite": {"css_class": "highlight"}, + "markdown.extensions.extra": {}, + "markdown.extensions.meta": {}, }, - 'output_format': 'html5', + "output_format": "html5", } .. Note:: @@ -201,18 +201,18 @@ Basic settings Where to output the generated files. This should correspond to your web server's virtual host root directory. - The default is ``'output'``. + The default is ``"output"``. .. data:: PATH Path to content directory to be processed by Pelican. If undefined, and content path is not specified via an argument to the ``pelican`` command, - Pelican will default to ``'.'``, the current working directory. + Pelican will default to ``"."``, the current working directory. .. data:: PAGE_PATHS A list of directories and files to look at for pages, relative to ``PATH``. - The default is ``['pages']``. + The default is ``["pages"]``. .. data:: PAGE_EXCLUDES @@ -222,7 +222,7 @@ Basic settings .. data:: ARTICLE_PATHS A list of directories and files to look at for articles, relative to - ``PATH``. The default is ``['']``. + ``PATH``. The default is ``[""]``. .. data:: ARTICLE_EXCLUDES @@ -239,7 +239,7 @@ Basic settings Controls the extension that will be used by the SourcesGenerator. Defaults to ``.text``. If not a valid string the default value will be used. The - default is ``'.text'``. + default is ``".text"``. .. data:: PLUGINS @@ -252,7 +252,7 @@ Basic settings .. data:: SITENAME - Your site's name. The default is ``'A Pelican Blog'``. + Your site's name. The default is ``"A Pelican Blog"``. .. data:: SITEURL @@ -261,9 +261,9 @@ Basic settings properly-formed URLs. If your site is available via HTTPS, this setting should begin with ``https://`` — otherwise use ``http://``. Then append your domain, with no trailing slash at the end. Example: ``SITEURL = - 'https://example.com'`` + "https://example.com"`` - The default is ``''``, the blank string. + The default is ``""``, the blank string. .. data:: STATIC_PATHS @@ -272,7 +272,7 @@ Basic settings modification. Articles, pages, and other content source files will normally be skipped, so it is safe for a directory to appear both here and in ``PAGE_PATHS`` or ``ARTICLE_PATHS``. Pelican's default settings include the - "images" directory here. The default is ``['images']``. + "images" directory here. The default is ``["images"]``. .. data:: STATIC_EXCLUDES @@ -318,8 +318,8 @@ Basic settings .. data:: TYPOGRIFY_OMIT_FILTERS - A list of Typogrify filters to skip. Allowed values are: ``'amp'``, - ``'smartypants'``, ``'caps'``, ``'initial_quotes'``, ``'widont'``. By + A list of Typogrify filters to skip. Allowed values are: ``"amp"``, + ``"smartypants"``, ``"caps"``, ``"initial_quotes"``, ``"widont"``. By default, no filter is omitted (in other words, all filters get applied). This setting requires that Typogrify version 2.1.0 or later is installed. The default is ``[]``. @@ -333,7 +333,7 @@ Basic settings ``oldschool`` setting renders both en-dashes and em-dashes when it sees two (``--``) and three (``---``) hyphen characters, respectively. The ``oldschool_inverted`` setting turns two hyphens into an em-dash and three - hyphens into an en-dash. The default is ``'default'``. + hyphens into an en-dash. The default is ``"default"``. .. data:: SUMMARY_MAX_LENGTH @@ -354,7 +354,7 @@ Basic settings When creating a short summary of an article and the result was truncated to match the required word length, this will be used as the truncation suffix. - The default is ``'…'``. + The default is ``"…"``. .. data:: WITH_FUTURE_DATES @@ -369,7 +369,7 @@ Basic settings ``filename``, in ``{}`` or ``||``. Identifier between ``{`` and ``}`` goes into the ``what`` capturing group. For details see :ref:`ref-linking-to-internal-content`. The default is - ``'[{|](?P.*?)[|}]'``. + ``"[{|](?P.*?)[|}]"``. .. data:: PYGMENTS_RST_OPTIONS @@ -385,13 +385,13 @@ Basic settings .. data:: CONTENT_CACHING_LAYER - If set to ``'reader'``, save only the raw content and metadata returned by - readers. If set to ``'generator'``, save processed content objects. The - default is ``'reader'``. + If set to ``"reader"``, save only the raw content and metadata returned by + readers. If set to ``"generator"``, save processed content objects. The + default is ``"reader"``. .. data:: CACHE_PATH - Directory in which to store cache files. The default is ``'cache'``. + Directory in which to store cache files. The default is ``"cache"``. .. data:: GZIP_CACHE @@ -402,12 +402,12 @@ Basic settings Controls how files are checked for modifications. - - If set to ``'mtime'``, the modification time of the file is + - If set to ``"mtime"``, the modification time of the file is checked. - If set to a name of a function provided by the ``hashlib`` - module, e.g. ``'md5'``, the file hash is checked. + module, e.g. ``"md5"``, the file hash is checked. - The default is ``'mtime'``. + The default is ``"mtime"``. .. data:: LOAD_CONTENT_CACHE @@ -416,7 +416,7 @@ Basic settings .. data:: FORMATTED_FIELDS A list of metadata fields containing reST/Markdown content to be parsed and - translated to HTML. The default is ``['summary']``. + translated to HTML. The default is ``["summary"]``. .. data:: PORT @@ -425,7 +425,7 @@ Basic settings .. data:: BIND - The IP to which to bind the HTTP server. The default is ``'127.0.0.1'``. + The IP to which to bind the HTTP server. The default is ``"127.0.0.1"``. .. _url-settings: @@ -458,8 +458,8 @@ If you don't want that flexibility and instead prefer that your generated output paths mirror your source content's filesystem path hierarchy, try the following settings:: - PATH_METADATA = r'(?P.*)\..*' - ARTICLE_URL = ARTICLE_SAVE_AS = PAGE_URL = PAGE_SAVE_AS = '{path_no_ext}.html' + PATH_METADATA = r"(?P.*)\..*" + ARTICLE_URL = ARTICLE_SAVE_AS = PAGE_URL = PAGE_SAVE_AS = "{path_no_ext}.html" Otherwise, you can use a variety of file metadata attributes within URL-related settings: @@ -472,10 +472,10 @@ settings: Example usage:: - ARTICLE_URL = 'posts/{date:%Y}/{date:%b}/{date:%d}/{slug}/' - ARTICLE_SAVE_AS = 'posts/{date:%Y}/{date:%b}/{date:%d}/{slug}/index.html' - PAGE_URL = 'pages/{slug}/' - PAGE_SAVE_AS = 'pages/{slug}/index.html' + ARTICLE_URL = "posts/{date:%Y}/{date:%b}/{date:%d}/{slug}/" + ARTICLE_SAVE_AS = "posts/{date:%Y}/{date:%b}/{date:%d}/{slug}/index.html" + PAGE_URL = "pages/{slug}/" + PAGE_SAVE_AS = "pages/{slug}/index.html" This would save your articles into something like ``/posts/2011/Aug/07/sample-post/index.html``, save your pages into @@ -499,111 +499,111 @@ This would save your articles into something like .. data:: ARTICLE_URL - The URL to refer to an article. The default is ``'{slug}.html'``. + The URL to refer to an article. The default is ``"{slug}.html"``. .. data:: ARTICLE_SAVE_AS - The place where we will save an article. The default is ``'{slug}.html'``. + The place where we will save an article. The default is ``"{slug}.html"``. .. data:: ARTICLE_LANG_URL The URL to refer to an article which doesn't use the default language. - The default is ``'{slug}-{lang}.html``. + The default is ``"{slug}-{lang}.html"``. .. data:: ARTICLE_LANG_SAVE_AS The place where we will save an article which doesn't use the default - language. The default is ``'{slug}-{lang}.html'``. + language. The default is ``"{slug}-{lang}.html"``. .. data:: DRAFT_URL The URL to refer to an article draft. The default is - ``'drafts/{slug}.html'``. + ``"drafts/{slug}.html"``. .. data:: DRAFT_SAVE_AS - The place where we will save an article draft. The default is ``'drafts/{slug}.html'``. + The place where we will save an article draft. The default is ``"drafts/{slug}.html"``. .. data:: DRAFT_LANG_URL The URL to refer to an article draft which doesn't use the default language. - The default is ``'drafts/{slug}-{lang}.html'``. + The default is ``"drafts/{slug}-{lang}.html"``. .. data:: DRAFT_LANG_SAVE_AS The place where we will save an article draft which doesn't use the default - language. The default is ``'drafts/{slug}-{lang}.html'``. + language. The default is ``"drafts/{slug}-{lang}.html"``. .. data:: PAGE_URL The URL we will use to link to a page. The default is - ``'pages/{slug}.html'``. + ``"pages/{slug}.html"``. .. data:: PAGE_SAVE_AS The location we will save the page. This value has to be the same as PAGE_URL or you need to use a rewrite in your server config. The default - is ``'pages/{slug}.html'``. + is ``"pages/{slug}.html"``. .. data:: PAGE_LANG_URL The URL we will use to link to a page which doesn't use the default - language. The default is ``'pages/{slug}-{lang}.html'``. + language. The default is ``"pages/{slug}-{lang}.html"``. .. data:: PAGE_LANG_SAVE_AS The location we will save the page which doesn't use the default language. - The default is ``'pages/{slug}-{lang}.html'``. + The default is ``"pages/{slug}-{lang}.html"``. .. data:: DRAFT_PAGE_URL The URL used to link to a page draft. The default is - ``'drafts/pages/{slug}.html'``. + ``"drafts/pages/{slug}.html"``. .. data:: DRAFT_PAGE_SAVE_AS The actual location a page draft is saved at. The default is - ``'drafts/pages/{slug}.html'``. + ``"drafts/pages/{slug}.html"``. .. data:: DRAFT_PAGE_LANG_URL The URL used to link to a page draft which doesn't use the default - language. The default is ``'drafts/pages/{slug}-{lang}.html'``. + language. The default is ``"drafts/pages/{slug}-{lang}.html"``. .. data:: DRAFT_PAGE_LANG_SAVE_AS The actual location a page draft which doesn't use the default language is - saved at. The default is ``'drafts/pages/{slug}-{lang}.html'``. + saved at. The default is ``"drafts/pages/{slug}-{lang}.html"``. .. data:: AUTHOR_URL - The URL to use for an author. The default is ``'author/{slug}.html'``. + The URL to use for an author. The default is ``"author/{slug}.html"``. .. data:: AUTHOR_SAVE_AS - The location to save an author. The default is ``'author/{slug}.html'``. + The location to save an author. The default is ``"author/{slug}.html"``. .. data:: CATEGORY_URL - The URL to use for a category. The default is ``'category/{slug}.html'``. + The URL to use for a category. The default is ``"category/{slug}.html"``. .. data:: CATEGORY_SAVE_AS - The location to save a category. The default is ``'category/{slug}.html'``. + The location to save a category. The default is ``"category/{slug}.html"``. .. data:: TAG_URL - The URL to use for a tag. The default is ``'tag/{slug}.html'``. + The URL to use for a tag. The default is ``"tag/{slug}.html"``. .. data:: TAG_SAVE_AS - The location to save the tag page. The default is ``'tag/{slug}.html'``. + The location to save the tag page. The default is ``"tag/{slug}.html"``. .. note:: If you do not want one or more of the default pages to be created (e.g., you are the only author on your site and thus do not need an Authors page), - set the corresponding ``*_SAVE_AS`` setting to ``''`` to prevent the + set the corresponding ``*_SAVE_AS`` setting to ``""`` to prevent the relevant page from being generated. Pelican can optionally create per-year, per-month, and per-day archives of your @@ -615,10 +615,10 @@ written over time. Example usage:: - YEAR_ARCHIVE_SAVE_AS = 'posts/{date:%Y}/index.html' - YEAR_ARCHIVE_URL = 'posts/{date:%Y}/' - MONTH_ARCHIVE_SAVE_AS = 'posts/{date:%Y}/{date:%b}/index.html' - MONTH_ARCHIVE_URL = 'posts/{date:%Y}/{date:%b}/' + YEAR_ARCHIVE_SAVE_AS = "posts/{date:%Y}/index.html" + YEAR_ARCHIVE_URL = "posts/{date:%Y}/" + MONTH_ARCHIVE_SAVE_AS = "posts/{date:%Y}/{date:%b}/index.html" + MONTH_ARCHIVE_URL = "posts/{date:%Y}/{date:%b}/" With these settings, Pelican will create an archive of all your posts for the year at (for instance) ``posts/2011/index.html`` and an archive of all your @@ -632,31 +632,36 @@ through the URLs ``posts/2011/`` and ``posts/2011/Aug/``, respectively. .. data:: YEAR_ARCHIVE_SAVE_AS - The location to save per-year archives of your posts. The default is ``''``. + The location to save per-year archives of your posts. The default is ``""``, + i.e. this is disabled by default. .. data:: YEAR_ARCHIVE_URL The URL to use for per-year archives of your posts. You should set this if - you enable per-year archives. The default is ``''``. + you enable per-year archives. The default is ``""``, i.e. this is disabled + by default. .. data:: MONTH_ARCHIVE_SAVE_AS The location to save per-month archives of your posts. The default is - ``''``. + ``""``, i.e. this is disabled by default. .. data:: MONTH_ARCHIVE_URL The URL to use for per-month archives of your posts. You should set this if - you enable per-month archives. The default is ``''``. + you enable per-month archives. The default is ``""``, i.e. this is disabled + by default. .. data:: DAY_ARCHIVE_SAVE_AS - The location to save per-day archives of your posts. The default is ``''``. + The location to save per-day archives of your posts. The default is ``""``, + i.e. this is disabled by default. .. data:: DAY_ARCHIVE_URL The URL to use for per-day archives of your posts. You should set this if - you enable per-day archives. The default is ``''``. + you enable per-day archives. The default is ``""``, i.e. this is disabled by + default. ``DIRECT_TEMPLATES`` work a bit differently than noted above. Only the ``_SAVE_AS`` settings are available, but it is available for any direct @@ -664,34 +669,34 @@ template. .. data:: ARCHIVES_SAVE_AS - The location to save the article archives page. The default is ``'archives.html'``. + The location to save the article archives page. The default is ``"archives.html"``. .. data:: AUTHORS_SAVE_AS - The location to save the author list. The default is ``'authors.html'``. + The location to save the author list. The default is ``"authors.html"``. .. data:: CATEGORIES_SAVE_AS - The location to save the category list. The default is ``'categories.html'``. + The location to save the category list. The default is ``"categories.html"``. .. data:: TAGS_SAVE_AS - The location to save the tag list. The default is ``'tags.html'``. + The location to save the tag list. The default is ``"tags.html"``. .. data:: INDEX_SAVE_AS - The location to save the list of all articles. The default is ``'index.html'``. + The location to save the list of all articles. The default is ``"index.html"``. URLs for direct template pages are theme-dependent. Some themes use corresponding ``*_URL`` setting as string, while others hard-code them: -``'archives.html'``, ``'authors.html'``, ``'categories.html'``, -``'tags.html'``. +``"archives.html"``, ``"authors.html"``, ``"categories.html"``, +``"tags.html"``. .. data:: SLUGIFY_SOURCE Specifies from where you want the slug to be automatically generated. Can be set to ``title`` to use the "Title:" metadata tag or ``basename`` to use the - article's file name when creating the slug. The default is ``'title'``. + article's file name when creating the slug. The default is ``"title"``. .. data:: SLUGIFY_USE_UNICODE @@ -715,10 +720,10 @@ corresponding ``*_URL`` setting as string, while others hard-code them: backward compatibility with existing URLs. The default is:: [ - (r'[^\w\s-]', ''), # remove non-alphabetical/whitespace/'-' chars - (r'(?u)\A\s*', ''), # strip leading whitespace - (r'(?u)\s*\Z', ''), # strip trailing whitespace - (r'[-\s]+', '-'), # reduce multiple whitespace or '-' to single '-' + (r"[^\w\s-]", ""), # remove non-alphabetical/whitespace/"-" chars + (r"(?u)\A\s*", ""), # strip leading whitespace + (r"(?u)\s*\Z", ""), # strip trailing whitespace + (r"[-\s]+", "-"), # reduce multiple whitespace or "-" to single "-" ] .. data:: AUTHOR_REGEX_SUBSTITUTIONS @@ -756,7 +761,7 @@ Time and Date .. data:: DEFAULT_DATE - The default date you want to use. If ``'fs'``, Pelican will use the file + The default date you want to use. If ``"fs"``, Pelican will use the file system timestamp information (mtime) if it can't get date information from the metadata. If given any other string, it will be parsed by the same method as article metadata. If set to a tuple object, the default datetime @@ -765,7 +770,8 @@ Time and Date .. data:: DEFAULT_DATE_FORMAT - The default date format you want to use. The default is ``'%a %d %B %Y'``. + The default date format you want to use. The default is ``"%a %d %B %Y"``, + e.g. "Mon 06 April 2026". .. data:: DATE_FORMATS @@ -785,8 +791,8 @@ Time and Date .. parsed-literal:: DATE_FORMATS = { - 'en': '%a, %d %b %Y', - 'jp': '%Y-%m-%d(%a)', + "en": "%a, %d %b %Y", + "jp": "%Y-%m-%d(%a)", } It is also possible to set different locale settings for each language by @@ -797,14 +803,14 @@ Time and Date # On Unix/Linux DATE_FORMATS = { - 'en': ('en_US','%a, %d %b %Y'), - 'jp': ('ja_JP','%Y-%m-%d(%a)'), + "en": ("en_US", "%a, %d %b %Y"), + "jp": ("ja_JP", "%Y-%m-%d(%a)"), } # On Windows DATE_FORMATS = { - 'en': ('usa','%a, %d %b %Y'), - 'jp': ('jpn','%Y-%m-%d(%a)'), + "en": ("usa", "%a, %d %b %Y"), + "jp": ("jpn", "%Y-%m-%d(%a)"), } The default is ``{}``. @@ -819,8 +825,9 @@ Time and Date .. parsed-literal:: - LOCALE = ['usa', 'jpn', # On Windows - 'en_US', 'ja_JP' # On Unix/Linux + LOCALE = [ + "usa", "jpn", # On Windows + "en_US", "ja_JP" # On Unix/Linux ] For a list of available locales refer to `locales on Windows`_ or on @@ -854,28 +861,30 @@ Template pages For instance, if you have a blog with three static pages — a list of books, your resume, and a contact page — you could have:: - TEMPLATE_PAGES = {'src/books.html': 'dest/books.html', - 'src/resume.html': 'dest/resume.html', - 'src/contact.html': 'dest/contact.html'} + TEMPLATE_PAGES = { + "src/books.html": "dest/books.html", + "src/resume.html": "dest/resume.html", + "src/contact.html": "dest/contact.html", + } The default is ``{}``. .. data:: TEMPLATE_EXTENSIONS The extensions to use when looking up template files from template names. - The default is ``['.html']``. + The default is ``[".html"]``. .. data:: DIRECT_TEMPLATES List of templates that are used directly to render content. Typically direct templates are used to generate index pages for collections of content (e.g., category and tag index pages). If the author, category and tag collections are not - needed, set ``DIRECT_TEMPLATES = ['index', 'archives']`` + needed, set ``DIRECT_TEMPLATES = ["index", "archives"]`` ``DIRECT_TEMPLATES`` are searched for over paths maintained in ``THEME_TEMPLATES_OVERRIDES``. - The default is ``['index', 'tags', 'categories', 'authors', 'archives']``. + The default is ``["index", "tags", "categories", "authors", "archives"]``. Metadata ======== @@ -893,18 +902,37 @@ Metadata The regexp that will be used to extract any metadata from the filename. All named groups that are matched will be set in the metadata object. The - default value will only extract the date from the filename. + default value is ``r"(?P\d{4}-\d{2}-\d{2}).*"`` and will only extract + the date from the filename. - For example, to extract both the date and the slug:: + For example, if your source file were titled ``2026-04-30_blog-article.md``, + you could extract both the date and the slug:: - FILENAME_METADATA = r'(?P\d{4}-\d{2}-\d{2})_(?P.*)' + FILENAME_METADATA = r"(?P\d{4}-\d{2}-\d{2})_(?P.*)" - See also ``SLUGIFY_SOURCE``. The default is ``r'(?P\d{4}-\d{2}-\d{2}).*'``. + giving you a date of *April 30, 2026* and a slug of *blog-article*. + + See also ``SLUGIFY_SOURCE``. The default is + ``r"(?P\d{4}-\d{2}-\d{2}).*"``, i.e. it assumed your filenames start + with an ISO-style date, e.g. ``2026-04-30``. + + See also, ``FILENAME_METADATA``. .. data:: PATH_METADATA Like ``FILENAME_METADATA``, but parsed from a page's full path relative to - the content source directory. The default is ``''``. + the content source directory, include the source filename. The default + value is ``""``. + + For example, if your source files were stored in folders by year and then my + month, with the filename being the day of the month, (e.g. + ``2026/04/30.rst``) you could extract that with:: + + PATH_METADATA = r"(?P\d{4}/\d{2}/\d{2}).*" + + (The above works on Windows as well.) + + See also ``FILENAME_METADATA``. .. data:: EXTRA_PATH_METADATA @@ -913,44 +941,44 @@ Metadata unlike some other Pelican file settings. Paths to a directory apply to all files under it. The most-specific path wins conflicts. -Not all metadata needs to be :ref:`embedded in source file itself -`. For example, blog posts are often named following a -``YYYY-MM-DD-SLUG.rst`` pattern, or nested into ``YYYY/MM/DD-SLUG`` -directories. To extract metadata from the filename or path, set -``FILENAME_METADATA`` or ``PATH_METADATA`` to regular expressions that use -Python's `group name notation`_ ``(?P…)``. If you want to attach -additional metadata but don't want to encode it in the path, you can set -``EXTRA_PATH_METADATA``: + Not all metadata needs to be :ref:`embedded in source file itself + `. For example, blog posts are often named following a + ``YYYY-MM-DD-SLUG.rst`` pattern, or nested into ``YYYY/MM/DD-SLUG`` + directories. To extract metadata from the filename or path, set + ``FILENAME_METADATA`` or ``PATH_METADATA`` to regular expressions that use + Python's `group name notation`_ ``(?P…)``. If you want to attach + additional metadata but don't want to encode it in the path, you can set + ``EXTRA_PATH_METADATA``: -.. parsed-literal:: + .. parsed-literal:: - EXTRA_PATH_METADATA = { - 'relative/path/to/file-1': { - 'key-1a': 'value-1a', - 'key-1b': 'value-1b', - }, - 'relative/path/to/file-2': { - 'key-2': 'value-2', - }, - } + EXTRA_PATH_METADATA = { + "relative/path/to/file-1": { + "key-1a": "value-1a", + "key-1b": "value-1b", + }, + "relative/path/to/file-2": { + "key-2": "value-2", + }, + } -This can be a convenient way to shift the installed location of a particular -file: + This can be a convenient way to shift the output location of a particular + file: -.. parsed-literal:: + .. parsed-literal:: - # Take advantage of the following defaults - # STATIC_SAVE_AS = '{path}' - # STATIC_URL = '{path}' - STATIC_PATHS = [ - 'static/robots.txt', - ] - EXTRA_PATH_METADATA = { - 'static/robots.txt': {'path': 'robots.txt'}, - } + # Take advantage of the following defaults: + # STATIC_SAVE_AS = "{path}" + # STATIC_URL = "{path}" + STATIC_PATHS = [ + "static/robots.txt", + ] + EXTRA_PATH_METADATA = { + "static/robots.txt": {"path": "robots.txt"}, + } -.. _group name notation: - https://docs.python.org/3/library/re.html#regular-expression-syntax + .. _group name notation: + https://docs.python.org/3/library/re.html#regular-expression-syntax The default is ``{}``. @@ -993,7 +1021,7 @@ the ``TAG_FEED_ATOM`` and ``TAG_FEED_RSS`` settings: .. data:: FEED_ALL_ATOM The location to save the all-posts Atom feed: this feed will contain all - posts regardless of their language. The default is ``'feeds/all.atom.xml'``. + posts regardless of their language. The default is ``"feeds/all.atom.xml"``. .. data:: FEED_ALL_ATOM_URL @@ -1014,7 +1042,7 @@ the ``TAG_FEED_ATOM`` and ``TAG_FEED_RSS`` settings: .. data:: CATEGORY_FEED_ATOM The location to save the category Atom feeds. [2]_ The default is - ``'feeds/{slug}.atom.xml'``. + ``"feeds/{slug}.atom.xml"``. .. data:: CATEGORY_FEED_ATOM_URL @@ -1036,7 +1064,7 @@ the ``TAG_FEED_ATOM`` and ``TAG_FEED_RSS`` settings: .. data:: AUTHOR_FEED_ATOM The location to save the author Atom feeds. [2]_ The default is - ``'feeds/{slug}.atom.xml'``. + ``"feeds/{slug}.atom.xml"``. .. data:: AUTHOR_FEED_ATOM_URL @@ -1047,7 +1075,7 @@ the ``TAG_FEED_ATOM`` and ``TAG_FEED_RSS`` settings: .. data:: AUTHOR_FEED_RSS The location to save the author RSS feeds. [2]_ The default is - ``'feeds/{slug}.rss.xml'``. + ``"feeds/{slug}.rss.xml"``. .. data:: AUTHOR_FEED_RSS_URL @@ -1074,7 +1102,7 @@ the ``TAG_FEED_ATOM`` and ``TAG_FEED_RSS`` settings: .. data:: FEED_MAX_ITEMS Maximum number of items allowed in a feed. Setting to ``None`` will cause the - feed to contains every article. 100 if not specified. The default is ``100``. + feed to contains every article. The default is ``100``. .. data:: RSS_FEED_SUMMARY_ONLY @@ -1118,7 +1146,7 @@ You can use the following settings to configure the pagination. The templates to use pagination with, and the number of articles to include on a page. If this value is ``None``, it defaults to ``DEFAULT_PAGINATION``. - The default is ``{'index': None, 'tag': None, 'category': None, 'author': None}``. + The default is ``{"index": None, "tag": None, "category": None, "author": None}``. .. data:: PAGINATION_PATTERNS @@ -1126,8 +1154,8 @@ You can use the following settings to configure the pagination. default is:: ( - (1, '{name}{extension}', '{name}{extension}'), - (2, '{name}{number}{extension}', '{name}{number}{extension}'), + (1, "{name}{extension}", "{name}{extension}"), + (2, "{name}{number}{extension}", "{name}{number}{extension}""), ) @@ -1152,15 +1180,15 @@ subsequent pages at ``.../page/2/`` etc, you could set ``PAGINATION_PATTERNS`` as follows:: PAGINATION_PATTERNS = ( - (1, '{url}', '{save_as}'), - (2, '{base_name}/page/{number}/', '{base_name}/page/{number}/index.html'), + (1, "{url}", "{save_as}"), + (2, "{base_name}/page/{number}/", "{base_name}/page/{number}/index.html"), ) If you want a pattern to apply to the last page in the list, use ``-1`` as the ``minimum_page`` value:: - (-1, '{base_name}/last/', '{base_name}/last/index.html'), + (-1, "{base_name}/last/", "{base_name}/last/index.html"), Translations ============ @@ -1170,26 +1198,26 @@ section for more information. .. data:: DEFAULT_LANG - The default language to use. The default is ``'en'``. + The default language to use. The default is ``"en"``. .. data:: ARTICLE_TRANSLATION_ID The metadata attribute(s) used to identify which articles are translations of one another. May be a string or a collection of strings. Set to ``None`` or ``False`` to disable the identification of translations. The default is - ``'slug'``. + ``"slug"``. .. data:: PAGE_TRANSLATION_ID The metadata attribute(s) used to identify which pages are translations of one another. May be a string or a collection of strings. Set to ``None`` or ``False`` to disable the identification of translations. The default is - ``'slug'``. + ``"slug"``. .. data:: TRANSLATION_FEED_ATOM The location to save the Atom feed for translations. [3]_ The default is - ``'feeds/all-{lang}.atom.xml'``. + ``"feeds/all-{lang}.atom.xml"``. .. data:: TRANSLATION_FEED_ATOM_URL @@ -1227,18 +1255,18 @@ Ordering content Defines how the articles (``articles_page.object_list`` in the template) are sorted. Valid options are: metadata as a string (use ``reversed-`` prefix - to reverse the sort order), special option ``'basename'`` which will use + to reverse the sort order), special option ``"basename"`` which will use the basename of the file (without path), or a custom function to extract the - sorting key from articles. Using a value of ``'date'`` will sort articles in - chronological order, while the default value, ``'reversed-date'``, will sort + sorting key from articles. Using a value of ``"date"`` will sort articles in + chronological order, while the default value, ``"reversed-date"``, will sort articles by date in reverse order (i.e., newest article comes first). The - default is ``'reversed-date'``. + default is ``"reversed-date"``. .. data:: PAGE_ORDER_BY Defines how the pages (``pages`` variable in the template) are sorted. - Options are same as ``ARTICLE_ORDER_BY``. The default value, ``'basename'`` - will sort pages by their basename. The default is ``'basename'``. + Options are same as ``ARTICLE_ORDER_BY``. The default value, ``"basename"`` + will sort pages by their basename. The default is ``"basename"``. .. _settings/themes: @@ -1260,14 +1288,14 @@ themes. Destination directory in the output path where Pelican will place the files collected from `THEME_STATIC_PATHS`. Default is `theme`. The default is - ``'theme'``. + ``"theme"``. .. data:: THEME_STATIC_PATHS Static theme paths you want to copy. Default value is `static`, but if your theme has other static paths, you can put them here. If files or directories with the same names are included in the paths defined in this settings, they - will be progressively overwritten. The default is ``['static']``. + will be progressively overwritten. The default is ``["static"]``. .. data:: THEME_TEMPLATES_OVERRIDES @@ -1286,7 +1314,7 @@ themes. .. data:: CSS_FILE - Specify the CSS file you want to load. The default is ``'main.css'``. + Specify the CSS file you want to load. The default is ``"main.css"``. By default, two themes are available. You can specify them using the ``THEME`` setting or by passing the ``-t`` option to the ``pelican`` command: @@ -1313,7 +1341,10 @@ Following are example ways to specify your preferred theme:: # Specify a customized theme, via absolute path THEME = "/home/myuser/projects/mysite/themes/mycustomtheme" -The built-in ``simple`` theme can be customized using the following settings. +Simple Theme +------------ + +The built-in ``simple`` theme can be customized using the following settings: .. data:: STYLESHEET_URL @@ -1383,6 +1414,9 @@ Feel free to use them in your themes as well. Allows override of the name of the "social" widget. If not specified, defaults to "social". The default is ``None``. +Notmyidea Theme +--------------- + In addition, you can use the "wide" version of the ``notmyidea`` theme by adding the following to your configuration:: @@ -1405,7 +1439,7 @@ they will be filtered out. For example:: import logging - LOG_FILTER = [(logging.WARN, 'TAG_SAVE_AS is set to False')] + LOG_FILTER = [(logging.WARN, "TAG_SAVE_AS is set to False")] It is possible to filter out messages by a template. Check out source code to obtain a template. @@ -1413,7 +1447,7 @@ obtain a template. For example:: import logging - LOG_FILTER = [(logging.WARN, 'Empty alt attribute for image %s in %s')] + LOG_FILTER = [(logging.WARN, "Empty alt attribute for image %s in %s")] .. Warning:: @@ -1428,7 +1462,7 @@ For example:: .. _reading_only_modified_content: -Reading only modified content +Reading Only Modified Content ============================= To speed up the build process, Pelican can optionally read only articles and @@ -1442,12 +1476,12 @@ When Pelican is about to read some content source file: file has no record in the cache file, it is read as usual. 2. The file is checked according to ``CHECK_MODIFIED_METHOD``: - - If set to ``'mtime'``, the modification time of the file is - checked. - - If set to a name of a function provided by the ``hashlib`` - module, e.g. ``'md5'``, the file hash is checked. - - If set to anything else or the necessary information about the - file cannot be found in the cache file, the content is read as usual. + - If set to ``"mtime"``, the modification time of the file is + checked. + - If set to a name of a function provided by the ``hashlib`` + module, e.g. ``"md5"``, the file hash is checked. + - If set to anything else or the necessary information about the + file cannot be found in the cache file, the content is read as usual. 3. If the file is considered unchanged, the content data saved in a previous build corresponding to the file is loaded from the cache, and the @@ -1456,9 +1490,9 @@ When Pelican is about to read some content source file: modification information and the content data are saved to the cache if ``CACHE_CONTENT`` is ``True``. -If ``CONTENT_CACHING_LAYER`` is set to ``'reader'`` (the default), the raw +If ``CONTENT_CACHING_LAYER`` is set to ``"reader"`` (the default), the raw content and metadata returned by a reader are cached. If this setting is -instead set to ``'generator'``, the processed content object is cached. Caching +instead set to ``"generator"``, the processed content object is cached. Caching the processed content object may conflict with plugins (as some reading related signals may be skipped) and the ``WITH_FUTURE_DATES`` functionality (as the ``draft`` status of the cached content objects would not change automatically diff --git a/docs/themes.rst b/docs/themes.rst index ace5dcb9..fa4c9d37 100644 --- a/docs/themes.rst +++ b/docs/themes.rst @@ -10,6 +10,12 @@ Please note that while we do our best to review and merge theme contributions, they are submitted by the Pelican community and thus may have varying levels of support and interoperability. +Community themes can also be found on PyPI tagged with "`Framework :: +Pelican :: Themes`_". + +.. _Framework :: Pelican :: Themes: https://pypi.org/search/?q=&o=-created&c=Framework+%3A%3A+Pelican+%3A%3A+Themes + + Creating Themes ~~~~~~~~~~~~~~~ diff --git a/pelican/contents.py b/pelican/contents.py index 4025bda5..11459f77 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -27,6 +27,7 @@ from pelican.utils import ( sanitised_join, set_date_tzinfo, slugify, + strip_toc_elements_from_html, truncate_html_paragraphs, truncate_html_words, ) @@ -446,13 +447,19 @@ class Content: content = truncate_html_paragraphs(self.content, max_paragraphs) if self.settings["SUMMARY_MAX_LENGTH"] is None: - return content + summary = content + else: + summary = truncate_html_words( + content, + self.settings["SUMMARY_MAX_LENGTH"], + self.settings["SUMMARY_END_SUFFIX"], + ) - return truncate_html_words( - content, - self.settings["SUMMARY_MAX_LENGTH"], - self.settings["SUMMARY_END_SUFFIX"], - ) + # Strip TOC elements that would contain broken links in summary context + # TOC anchors only work in full article view, not in summaries/excerpts + summary = strip_toc_elements_from_html(summary) + + return summary @property def summary(self) -> str: diff --git a/pelican/generators.py b/pelican/generators.py index f663e019..515257c4 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -30,6 +30,7 @@ from pelican.utils import ( posixize_path, process_translations, ) +from pelican.writers import FileOverwriteFailedError logger = logging.getLogger(__name__) @@ -570,57 +571,93 @@ class ArticlesGenerator(CachingGenerator): tag_template = self.get_template("tag") for tag, articles in self.tags.items(): dates = [article for article in self.dates if article in articles] - write( - tag.save_as, - tag_template, - self.context, - tag=tag, - url=tag.url, - articles=articles, - dates=dates, - template_name="tag", - blog=True, - page_name=tag.page_name, - all_articles=self.articles, - ) + try: + write( + tag.save_as, + tag_template, + self.context, + tag=tag, + url=tag.url, + articles=articles, + dates=dates, + template_name="tag", + blog=True, + page_name=tag.page_name, + all_articles=self.articles, + ) + except FileOverwriteFailedError: + if not tag.slug: + logger.info( + 'Tag "%s" has an invalid slug; skipping writing tag page...', + tag, + extra={"limit_msg": "Further tags with invalid slugs."}, + ) + continue + else: + logger.error('Failed to write Tag page for "%s".', tag) + raise def generate_categories(self, write): """Generate category pages.""" category_template = self.get_template("category") for cat, articles in self.categories: dates = [article for article in self.dates if article in articles] - write( - cat.save_as, - category_template, - self.context, - url=cat.url, - category=cat, - articles=articles, - dates=dates, - template_name="category", - blog=True, - page_name=cat.page_name, - all_articles=self.articles, - ) + try: + write( + cat.save_as, + category_template, + self.context, + url=cat.url, + category=cat, + articles=articles, + dates=dates, + template_name="category", + blog=True, + page_name=cat.page_name, + all_articles=self.articles, + ) + except FileOverwriteFailedError: + if not cat.slug: + logger.info( + 'Category "%s" has an invalid slug; skipping writing category page...', + cat, + extra={"limit_msg": "Further categories with invalid slugs."}, + ) + continue + else: + logger.error('Failed to write Category page for "%s".', cat) + raise def generate_authors(self, write): """Generate Author pages.""" author_template = self.get_template("author") for aut, articles in self.authors: dates = [article for article in self.dates if article in articles] - write( - aut.save_as, - author_template, - self.context, - url=aut.url, - author=aut, - articles=articles, - dates=dates, - template_name="author", - blog=True, - page_name=aut.page_name, - all_articles=self.articles, - ) + try: + write( + aut.save_as, + author_template, + self.context, + url=aut.url, + author=aut, + articles=articles, + dates=dates, + template_name="author", + blog=True, + page_name=aut.page_name, + all_articles=self.articles, + ) + except FileOverwriteFailedError: + if not aut.slug: + logger.info( + 'Author "%s" has an invalid slug; skipping writing author page...', + aut, + extra={"limit_msg": "Further authors with invalid slugs."}, + ) + continue + else: + logger.error('Failed to write Author page for "%s".', aut) + raise def generate_drafts(self, write): """Generate drafts pages.""" diff --git a/pelican/settings.py b/pelican/settings.py index 98b1357e..dbe903f1 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -673,9 +673,10 @@ def configure_settings(settings: Settings) -> Settings: ] if any(settings.get(k) for k in feed_keys): - if not settings.get("SITEURL"): + if not (settings.get("SITEURL") or settings.get("FEED_DOMAIN")): logger.warning( - "Feeds generated without SITEURL set properly may not be valid" + "Feeds generated without SITEURL or FEED_DOMAIN set properly" + " may not be valid" ) if "TIMEZONE" not in settings: diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 4b1effa2..4e48e594 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -172,6 +172,28 @@ class TestPage(TestBase): ) self.assertIn("test_marker", page.summary) + def test_summary_strips_toc_elements(self): + """Auto-generated summary should strip TOC divs and toc-backref anchors.""" + page_kwargs = self._copy_page_kwargs() + settings = get_settings() + page_kwargs["settings"] = settings + del page_kwargs["metadata"]["summary"] + toc_content = ( + '
' + "

Table of contents

" + '' + "
" + '

My Section

' + "

First paragraph of real content.

" + ) + page_kwargs["content"] = toc_content + settings["SUMMARY_MAX_LENGTH"] = None + page = Page(**page_kwargs) + self.assertNotIn('
My Section", page.summary) + self.assertIn("

First paragraph of real content.

", page.summary) + def test_summary_get_summary_warning(self): """calling ._get_summary() should issue a warning""" page_kwargs = self._copy_page_kwargs() diff --git a/pelican/tests/test_settings.py b/pelican/tests/test_settings.py index 84f7a5c9..59eebd53 100644 --- a/pelican/tests/test_settings.py +++ b/pelican/tests/test_settings.py @@ -1,8 +1,10 @@ import copy import locale +import logging import os from os.path import abspath, dirname, join +from pelican import log from pelican.settings import ( DEFAULT_CONFIG, DEFAULT_THEME, @@ -11,7 +13,7 @@ from pelican.settings import ( handle_deprecated_settings, read_settings, ) -from pelican.tests.support import unittest +from pelican.tests.support import LogCountHandler, unittest class TestSettingsConfiguration(unittest.TestCase): @@ -108,6 +110,39 @@ class TestSettingsConfiguration(unittest.TestCase): configure_settings(settings) self.assertEqual(settings["FEED_DOMAIN"], "http://feeds.example.com") + def _feeds_warning_settings(self, **overrides): + base = { + "LOCALE": "", + "PATH": os.curdir, + "THEME": DEFAULT_THEME, + "FEED_RSS": "feeds/all.rss.xml", + } + base.update(overrides) + handler = LogCountHandler() + logger = logging.getLogger() + logger.addHandler(handler) + saved = log.LimitFilter._raised_messages.copy() + log.LimitFilter._raised_messages = set() + try: + configure_settings(base) + return handler.count_logs( + "Feeds generated without SITEURL", logging.WARNING + ) + finally: + log.LimitFilter._raised_messages = saved + logger.removeHandler(handler) + + def test_feeds_warning_with_siteurl(self): + self.assertEqual(self._feeds_warning_settings(SITEURL="http://example.com"), 0) + + def test_feeds_warning_with_feed_domain(self): + self.assertEqual( + self._feeds_warning_settings(FEED_DOMAIN="http://feeds.example.com"), 0 + ) + + def test_feeds_warning_without_siteurl_or_feed_domain(self): + self.assertEqual(self._feeds_warning_settings(), 1) + def test_theme_settings_exceptions(self): settings = self.settings diff --git a/pelican/tests/test_theme.py b/pelican/tests/test_theme.py index 28100893..aa996081 100644 --- a/pelican/tests/test_theme.py +++ b/pelican/tests/test_theme.py @@ -171,6 +171,36 @@ class TestTemplateInheritance(LoggedTestCase): finally: rmtree(content_dir) + def test_category_and_tag_feed_titles_use_slug(self): + """Feed link titles on category/tag pages should have unique titles.""" + + settings = read_settings( + path=None, + override={ + "THEME": "simple", + "PATH": CONTENT_DIR, + "OUTPUT_PATH": self.temp_output, + "CACHE_PATH": self.temp_cache, + "SITEURL": "http://example.com", + "SITENAME": "My Site", + "CATEGORY_FEED_ATOM": "feeds/{slug}.atom.xml", + "TAG_FEED_ATOM": "feeds/tag-{slug}.atom.xml", + }, + ) + + pelican = Pelican(settings=settings) + mute(True)(pelican.run)() + + cat_file = os.path.join(self.temp_output, "category", "test.html") + with open(cat_file) as f: + cat_content = f.read() + self.assertIn('title="Test Category Atom Feed"', cat_content) + + tag_file = os.path.join(self.temp_output, "tag", "foo.html") + with open(tag_file) as f: + tag_content = f.read() + self.assertIn('title="Foo Tag Atom Feed"', tag_content) + if __name__ == "__main__": unittest.main() diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index b5a53eac..aad07555 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -425,6 +425,60 @@ class TestUtils(LoggedTestCase): self.assertEqual(utils.truncate_html_paragraphs(three, 3), three) self.assertEqual(utils.truncate_html_paragraphs(three, 4), three) + def test_strip_toc_elements_from_html(self): + # Test removing TOC div with various class names + html_with_toc = ( + '
' + '

Table of Contents

' + '' + "
" + "

Some content here

" + ) + result = utils.strip_toc_elements_from_html(html_with_toc) + self.assertNotIn('
Some content here

", result) + + # Test removing toc-backref anchors while preserving heading text + html_with_backref = ( + '

Section Heading

' + "

Some content

" + ) + result = utils.strip_toc_elements_from_html(html_with_backref) + self.assertNotIn("toc-backref", result) + self.assertNotIn("Section Heading", result) + + # Test combined - remove both TOC div and backrefs + html_combined = ( + '
' + "

TOC here

" + "
" + '

the design

' + "

Article content

" + '

key features

' + "

More content

" + ) + result = utils.strip_toc_elements_from_html(html_combined) + self.assertNotIn('
Article content

", result) + + # Test empty input + self.assertEqual(utils.strip_toc_elements_from_html(""), "") + + # Test HTML without TOC elements (should be unchanged) + plain_html = "

Just some plain content

" + self.assertEqual(utils.strip_toc_elements_from_html(plain_html), plain_html) + + # Test case-insensitive matching + html_mixed_case = '

TOC

Content

' + result = utils.strip_toc_elements_from_html(html_mixed_case) + self.assertNotIn("CONTENTS", result) + self.assertIn("

Content

", result) + def test_process_translations(self): fr_articles = [] en_articles = [] diff --git a/pelican/themes/simple/templates/base.html b/pelican/themes/simple/templates/base.html index 491a5903..57d2bafd 100644 --- a/pelican/themes/simple/templates/base.html +++ b/pelican/themes/simple/templates/base.html @@ -25,16 +25,16 @@ {% endif %} {% if CATEGORY_FEED_ATOM and category %} - + {% endif %} {% if CATEGORY_FEED_RSS and category %} - + {% endif %} {% if TAG_FEED_ATOM and tag %} - + {% endif %} {% if TAG_FEED_RSS and tag %} - + {% endif %} {% endblock head %} diff --git a/pelican/urlwrappers.py b/pelican/urlwrappers.py index 8023613c..6eb952db 100644 --- a/pelican/urlwrappers.py +++ b/pelican/urlwrappers.py @@ -42,11 +42,18 @@ class URLWrapper: preserve_case=preserve_case, use_unicode=self.settings.get("SLUGIFY_USE_UNICODE", False), ) + if not self._slug: + logger.warning( + 'Unable to generate valid slug for %s "%s".', + self.__class__.__name__, + self.name, + extra={"limit_msg": "Other invalid slugs."}, + ) return self._slug @slug.setter def slug(self, slug): - # if slug is expliticly set, changing name won't alter slug + # if slug is explicitly set, changing name won't alter slug self._slug_from_name = False self._slug = slug @@ -95,7 +102,7 @@ class URLWrapper: return False def __str__(self): - return self.name + return self.name or "" def __repr__(self): return f"<{type(self).__name__} {self._name!r}>" diff --git a/pelican/utils.py b/pelican/utils.py index a5e08538..ee14a031 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -259,7 +259,7 @@ def slugify( Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. - Took from Django sources. + Taken from Django sources. For a set of sensible default regex substitutions to pass to regex_subs look into pelican.settings.DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']. @@ -649,6 +649,37 @@ def truncate_html_paragraphs(s, count): return "".join(paragraphs) +def strip_toc_elements_from_html(html: str) -> str: + """Strip table of contents elements from HTML summaries. + + Removes TOC divs (with broken navigation links) and toc-backref anchor + links from headings. Both are necessary since TOC anchor targets don't + exist when summaries are displayed outside full article context + (e.g., homepage, RSS feeds). + + :param html: HTML content to process + :return: Cleaned HTML with TOC elements removed + """ + # Remove the entire
...
block + html = re.sub( + r']*>.*?
', + "", + html, + flags=re.DOTALL | re.IGNORECASE, + ) + + # Remove anchor links from headings (e.g., text) + # These links point to anchors that don't exist in summary context + html = re.sub( + r']*class="[^"]*toc-backref[^"]*"[^>]*>(.*?)', + r"\1", + html, + flags=re.DOTALL | re.IGNORECASE, + ) + + return html + + def process_translations( content_list: list[Content], translation_id: str | Collection[str] | None = None, diff --git a/pelican/writers.py b/pelican/writers.py index 009d761a..67d52ecd 100644 --- a/pelican/writers.py +++ b/pelican/writers.py @@ -18,6 +18,10 @@ from pelican.utils import ( logger = logging.getLogger(__name__) +class FileOverwriteFailedError(RuntimeError): + """Failed to overwrite an existing file.""" + + class Writer: def __init__(self, output_path, settings=None): self.output_path = output_path @@ -107,14 +111,20 @@ class Writer: """ if filename in self._overridden_files: if override: - raise RuntimeError(f"File {filename} is set to be overridden twice") - logger.info("Skipping %s", filename) + raise FileOverwriteFailedError( + f'Failed to overwrite "{filename}" a second time ' + "(was previously overwritten)" + ) + logger.info('Skipping "%s", not overwriting', filename) filename = os.devnull elif filename in self._written_files: if override: - logger.info("Overwriting %s", filename) + logger.info('Overwriting "%s"', filename) else: - raise RuntimeError(f"File {filename} is to be overwritten") + raise FileOverwriteFailedError( + f'Failed to overwrite "{filename}" as Pelican has already ' + "written to it previously (set `override=True` if intended)" + ) if override: self._overridden_files.add(filename) self._written_files.add(filename) @@ -165,7 +175,7 @@ class Writer: with self._open_w(complete_path, "utf-8", override_output) as fp: feed.write(fp, "utf-8") - logger.info("Writing %s", complete_path) + logger.info('Writing "%s"', complete_path) signals.feed_written.send(complete_path, context=context, feed=feed) return feed @@ -216,7 +226,7 @@ class Writer: with self._open_w(path, "utf-8", override=override) as f: f.write(output) - logger.info("Writing %s", path) + logger.info('Writing "%s"', path) # Send a signal to say we're writing a file with some specific # local context.