diff --git a/MYST_README.myst b/MYST_README.myst new file mode 100644 index 00000000..4035bafc --- /dev/null +++ b/MYST_README.myst @@ -0,0 +1,204 @@ +--- +title: MyST Markdown Support in Pelican +date: 2024-01-15 +--- + +# MyST Markdown Support in Pelican + +## What is MyST? + +- MyST (Markedly Structured Text) is an extended Markdown syntax with rich features +- Built on top of Markdown and CommonMark +- Supports directives, roles, and cross-references from reStructuredText +- Provides YAML front-matter for metadata +- More information: [MyST Parser Documentation](https://myst-parser.readthedocs.io/) + +## How to Enable MyST + +### Install Dependencies + +```bash +pip install myst-parser pyyaml +``` + +### File Extensions + +By default, MyST processes files with the `.myst` extension: + +- `article.myst` - Processed as MyST content +- `page.myst` - Processed as MyST content + +### Override .md Files to Use MyST + +To parse all `.md` files as MyST instead of standard Markdown, add to your `pelicanconf.py`: + +```python +READERS = {"md": MystReader} +``` + +Note: This requires importing the reader: + +```python +from pelican.readers import MystReader +``` + +## Configuration + +### Default Extensions + +The following MyST extensions are enabled by default: + +- `smartquotes` - Smart quotes and apostrophes +- `replacements` - Text replacements (e.g., `(c)` → ©) +- `linkify` - Auto-linkify URLs +- `colon_fence` - Colon fence code blocks (`::: lang`) +- `deflist` - Definition lists +- `html_image` - HTML image support + +### Custom Configuration + +Add to your `pelicanconf.py`: + +```python +MYST = { + "enable_extensions": [ + "smartquotes", + "replacements", + "linkify", + "colon_fence", + "deflist", + "html_image", + "tasklist", # Add task list support + ], + "disable_syntax": [], # Disable specific syntax elements + "all_links_external": False, # Mark all links as external + "url_schemes": ["http", "https", "mailto", "ftp"], # Allowed URL schemes +} +``` + +## Front-Matter Metadata + +MyST uses YAML front-matter for metadata: + +```yaml +--- +title: My Article Title +date: 2024-01-15 10:30 +modified: 2024-01-16 +category: Technology +tags: [python, pelican, myst] +author: Your Name +summary: A **brief** summary with *inline* markup. +--- +``` + +Supported metadata fields: + +- `title` - Article title +- `date` - Publication date (ISO 8601 format) +- `modified` - Last modified date +- `category` - Article category +- `tags` - List of tags +- `author` - Author name +- `authors` - List of multiple authors +- `summary` - Article summary (supports inline markup) +- `slug` - URL slug +- `status` - Article status (draft, published, hidden) + +## MyST Syntax Examples + +### Directives + +``` +:::{note} +This is a note directive. +::: + +:::{warning} +This is a warning directive. +::: +``` + +### Roles + +``` +This is {sub}`subscript` and {sup}`superscript` text. +``` + +### Code Blocks + +```python +def hello(): + print("Hello, MyST!") +``` + +Or using colon fence: + +``` +::: python +def hello(): + print("Hello, MyST!") +::: +``` + +### Definition Lists + +``` +Term 1 +: Definition for term 1 + +Term 2 +: Definition for term 2 +``` + +### Task Lists + +(Requires `tasklist` extension) + +``` +- [ ] Incomplete task +- [x] Completed task +``` + +## Differences from Markdown + +- Uses `---` delimited YAML front-matter (not key-value pairs) +- Supports reStructuredText directives and roles +- More extensible syntax +- Better cross-reference support + +## Troubleshooting + +### MyST Reader Not Available + +If you see "myst-parser isn't installed" error: + +```bash +pip install myst-parser pyyaml +``` + +### YAML Front-Matter Not Parsing + +- Ensure front-matter is at the top of the file +- Check that `---` delimiters are on their own lines +- Verify YAML syntax is valid +- Ensure PyYAML is installed + +### Extensions Not Working + +- Check that extensions are spelled correctly in `MYST["enable_extensions"]` +- Some extensions may require specific MyST parser versions +- Refer to [MyST documentation](https://myst-parser.readthedocs.io/) for extension details + +## Resources + +- [MyST Parser Documentation](https://myst-parser.readthedocs.io/) +- [MyST Syntax Guide](https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html) +- [Pelican Documentation](https://docs.getpelican.com/) + +## Notes + +- MyST support is optional and requires `myst-parser` and `pyyaml` packages +- MyST uses Docutils under the hood (like reStructuredText) +- Formatted fields (like summary) use Markdown converter for inline markup +- MyST is ideal for technical documentation with need for directives/roles diff --git a/pelican/readers.py b/pelican/readers.py index dfa76638..564be193 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -24,6 +24,11 @@ try: except ImportError: Markdown = False +try: + from myst_parser.docutils_ import Parser as MystParser +except ImportError: + MystParser = False + # Metadata processors have no way to discard an unwanted value, so we have # them return this value instead to signal that it should be discarded later. # This means that _filter_discardable_metadata() must be called on processed @@ -45,8 +50,10 @@ DUPLICATES_DEFINITIONS_ALLOWED = { METADATA_PROCESSORS = { "tags": lambda x, y: ([Tag(tag, y) for tag in ensure_metadata_list(x)] or _DISCARD), - "date": lambda x, _y: get_date(x.replace("_", " ")), - "modified": lambda x, _y: get_date(x), + "date": lambda x, _y: x + if isinstance(x, datetime.datetime) + else get_date(x.replace("_", " ")), + "modified": lambda x, _y: x if isinstance(x, datetime.datetime) else get_date(x), "status": lambda x, _y: x.strip() or _DISCARD, "category": lambda x, y: _process_if_nonempty(Category, x, y), "author": lambda x, y: _process_if_nonempty(Author, x, y), @@ -267,9 +274,12 @@ class RstReader(BaseReader): if user_params: extra_params.update(user_params) + from docutils.readers import standalone + from docutils.parsers.rst import Parser + pub = docutils.core.Publisher( - reader="standalone", - parser="restructuredtext", + reader=standalone.Reader(), + parser=Parser(), writer=self.writer_class(), destination_class=docutils.io.StringOutput, ) @@ -362,6 +372,122 @@ class MarkdownReader(BaseReader): ) +class MystReader(BaseReader): + enabled = bool(MystParser) + file_extensions = ["myst"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _parse_metadata(self, text): + formatted_fields = self.settings["FORMATTED_FIELDS"] + output = {} + + if not text.strip().startswith("---"): + return output, text + + parts = text.split("---", 2) + if len(parts) < 3: + return output, text + + try: + import yaml + except ImportError: + logger.warning( + "PyYAML is required to parse MyST front-matter metadata. " + "Install it with: pip install PyYAML" + ) + return output, text + + try: + front_matter = yaml.safe_load(parts[1]) + if not isinstance(front_matter, dict): + return output, text + except yaml.YAMLError as e: + logger.warning("Failed to parse MyST front-matter: %s", e) + return output, text + + for name, value in front_matter.items(): + name = name.lower() + + if name in formatted_fields and isinstance(value, str): + if Markdown: + md = Markdown(**self.settings.get("MARKDOWN", {})) + value = md.convert(value) + else: + logger.warning( + "Markdown is required for formatted MyST metadata fields. " + "Install it with: pip install markdown" + ) + + output[name] = self.process_metadata(name, value) + + return output, parts[2] if len(parts) >= 3 else text + + def _get_publisher_settings(self): + myst_config = self.settings.get("MYST", {}) + myst_settings = { + "myst_enable_extensions": myst_config.get("enable_extensions", []), + "myst_disable_syntax": myst_config.get("disable_syntax", []), + "myst_all_links_external": myst_config.get("all_links_external", False), + "myst_url_schemes": myst_config.get("url_schemes", None), + } + + extra_params = { + "initial_header_level": "2", + "syntax_highlight": "short", + "input_encoding": "utf-8", + "halt_level": 4, # Use SEVERE level for MyST to avoid halting on warnings + "traceback": True, + "warning_stream": StringIO(), + "embed_stylesheet": False, + } + + user_params = self.settings.get("DOCUTILS_SETTINGS") + if user_params: + extra_params.update(user_params) + + extra_params.update(myst_settings) + return extra_params + + def read(self, source_path): + with pelican_open(source_path) as text: + metadata, content_text = self._parse_metadata(text) + + from docutils.readers import standalone + + parser = MystParser() + settings_overrides = self._get_publisher_settings() + + # Create StringInput with the content (without frontmatter) + source = docutils.io.StringInput( + source=content_text, source_path=source_path + ) + + pub = docutils.core.Publisher( + reader=standalone.Reader(), + parser=parser, + writer=PelicanHTMLWriter(), + destination_class=docutils.io.StringOutput, + ) + pub.process_programmatic_settings(None, settings_overrides, None) + pub.source = source + pub.publish() + + parts = pub.writer.parts + content = parts.get("body") + + metadata.setdefault("title", parts.get("title")) + + return content, metadata + + def disabled_message(self) -> str: + return ( + "Could not import 'myst_parser.docutils_.Parser'. " + "Have you installed the 'myst-parser' package?" + ) + + class HTMLReader(BaseReader): """Parses HTML files as input, looking for meta, title, and body tags""" diff --git a/pelican/settings.py b/pelican/settings.py index 98b1357e..2bb2983b 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -131,6 +131,20 @@ DEFAULT_CONFIG = { }, "output_format": "html5", }, + "MYST": { + "enable_extensions": [ + "colon_fence", + "deflist", + "html_admonition", + "html_image", + # "linkify" requires linkify-it-py to be installed + "replacements", + "smartquotes", + ], + "disable_syntax": [], + "all_links_external": False, + "url_schemes": ["http", "https", "mailto", "ftp"], + }, "JINJA_FILTERS": {}, "JINJA_GLOBALS": {}, "JINJA_TESTS": {}, diff --git a/pelican/tests/content/article_myst_basic.myst b/pelican/tests/content/article_myst_basic.myst new file mode 100644 index 00000000..880dbe6a --- /dev/null +++ b/pelican/tests/content/article_myst_basic.myst @@ -0,0 +1,24 @@ +--- +title: Test MyST Article +date: 2024-01-15 +author: Test Author +category: Test Category +tags: myst, test +--- + +# MyST Test Article + +This is a basic MyST article with **bold** and *italic* text. + +## Features + +- List item 1 +- List item 2 +- List item 3 + +Here's a [link](https://example.com) and some `inline code`. + +```python +def hello(): + print("Hello, MyST!") +``` diff --git a/pelican/tests/content/article_myst_directives.myst b/pelican/tests/content/article_myst_directives.myst new file mode 100644 index 00000000..5dea0673 --- /dev/null +++ b/pelican/tests/content/article_myst_directives.myst @@ -0,0 +1,30 @@ +--- +title: MyST with Directives +date: 2024-01-16 +author: Test Author +summary: Article demonstrating MyST **directives** and roles +--- + +# MyST Directives Test + +This article tests MyST directives and roles. + +:::{note} +This is a note directive. +::: + +:::{warning} +This is a warning! +::: + +## Roles + +Here's some {sub}`subscript` and {sup}`superscript` text. + +## Definition List + +term 1 +: Definition for term 1 + +term 2 +: Definition for term 2 diff --git a/pelican/tests/content/article_myst_no_metadata.myst b/pelican/tests/content/article_myst_no_metadata.myst new file mode 100644 index 00000000..0d770238 --- /dev/null +++ b/pelican/tests/content/article_myst_no_metadata.myst @@ -0,0 +1,7 @@ +--- +title: MyST Without Frontmatter +--- + +This article has no date or other metadata. + +Just a title and content. diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py index 751088f7..2eaab527 100644 --- a/pelican/tests/test_readers.py +++ b/pelican/tests/test_readers.py @@ -33,10 +33,16 @@ class ReaderTest(unittest.TestCase): self.fail(f"Expected {key} to have value {value}, but was not in Dict") def test_markdown_disabled(self): - with patch.object( - readers.MarkdownReader, "enabled", new_callable=PropertyMock - ) as attr_mock: - attr_mock.return_value = False + with ( + patch.object( + readers.MarkdownReader, "enabled", new_callable=PropertyMock + ) as md_attr_mock, + patch.object( + readers.MystReader, "enabled", new_callable=PropertyMock + ) as myst_attr_mock, + ): + md_attr_mock.return_value = False + myst_attr_mock.return_value = True readrs = readers.Readers(settings=get_settings()) self.assertEqual( set(readers.MarkdownReader.file_extensions), @@ -45,6 +51,25 @@ class ReaderTest(unittest.TestCase): for val in readrs.disabled_readers.values(): self.assertEqual(readers.MarkdownReader, val.__class__) + def test_myst_disabled(self): + with ( + patch.object( + readers.MystReader, "enabled", new_callable=PropertyMock + ) as myst_attr_mock, + patch.object( + readers.MarkdownReader, "enabled", new_callable=PropertyMock + ) as md_attr_mock, + ): + myst_attr_mock.return_value = False + md_attr_mock.return_value = True + readrs = readers.Readers(settings=get_settings()) + self.assertEqual( + set(readers.MystReader.file_extensions), + readrs.disabled_readers.keys(), + ) + for val in readrs.disabled_readers.values(): + self.assertEqual(readers.MystReader, val.__class__) + class TestAssertDictHasSubset(ReaderTest): def setUp(self): @@ -1025,3 +1050,61 @@ class HTMLReaderTest(ReaderTest): "title": "Article with an inline SVG", } self.assertDictHasSubset(page.metadata, expected) + + +@unittest.skipUnless(readers.MystParser, "myst-parser isn't installed") +class MystReaderTest(ReaderTest): + def test_article_with_yaml_metadata(self): + reader = readers.MystReader(settings=get_settings()) + content, metadata = reader.read(_path("article_myst_basic.myst")) + expected = { + "title": "Test MyST Article", + "date": SafeDatetime(2024, 1, 15, 10, 30), + "modified": SafeDatetime(2024, 1, 16, 14, 45), + "category": "test", + "tags": ["myst", "markdown", "test"], + "author": "Test Author", + "summary": "
This is a summary with inline markup.
", + } + self.assertDictHasSubset(metadata, expected) + self.assertIn("