import os
from unittest.mock import patch
from pelican import readers
from pelican.tests.support import get_settings, unittest
from pelican.utils import SafeDatetime
CUR_DIR = os.path.dirname(__file__)
CONTENT_PATH = os.path.join(CUR_DIR, "content")
def _path(*args):
return os.path.join(CONTENT_PATH, *args)
class ReaderTest(unittest.TestCase):
def read_file(self, path, **kwargs):
# Isolate from future API changes to readers.read_file
r = readers.Readers(settings=get_settings(**kwargs))
return r.read_file(base_path=CONTENT_PATH, path=path)
def assertDictHasSubset(self, dictionary, subset):
for key, value in subset.items():
if key in dictionary:
real_value = dictionary.get(key)
self.assertEqual(
value,
real_value,
"Expected %s to have value %s, but was %s"
% (key, value, real_value),
)
else:
self.fail(
"Expected %s to have value %s, but was not in Dict" % (key, value)
)
class TestAssertDictHasSubset(ReaderTest):
def setUp(self):
self.dictionary = {"key-a": "val-a", "key-b": "val-b"}
def tearDown(self):
self.dictionary = None
def test_subset(self):
self.assertDictHasSubset(self.dictionary, {"key-a": "val-a"})
def test_equal(self):
self.assertDictHasSubset(self.dictionary, self.dictionary)
def test_fail_not_set(self):
self.assertRaisesRegex(
AssertionError,
r"Expected.*key-c.*to have value.*val-c.*but was not in Dict",
self.assertDictHasSubset,
self.dictionary,
{"key-c": "val-c"},
)
def test_fail_wrong_val(self):
self.assertRaisesRegex(
AssertionError,
r"Expected .*key-a.* to have value .*val-b.* but was .*val-a.*",
self.assertDictHasSubset,
self.dictionary,
{"key-a": "val-b"},
)
class DefaultReaderTest(ReaderTest):
def test_readfile_unknown_extension(self):
with self.assertRaises(TypeError):
self.read_file(path="article_with_metadata.unknownextension")
def test_readfile_path_metadata_implicit_dates(self):
test_file = "article_with_metadata_implicit_dates.html"
page = self.read_file(path=test_file, DEFAULT_DATE="fs")
expected = {
"date": SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime),
"modified": SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime),
}
self.assertDictHasSubset(page.metadata, expected)
def test_readfile_path_metadata_explicit_dates(self):
test_file = "article_with_metadata_explicit_dates.html"
page = self.read_file(path=test_file, DEFAULT_DATE="fs")
expected = {
"date": SafeDatetime(2010, 12, 2, 10, 14),
"modified": SafeDatetime(2010, 12, 31, 23, 59),
}
self.assertDictHasSubset(page.metadata, expected)
def test_readfile_path_metadata_implicit_date_explicit_modified(self):
test_file = "article_with_metadata_implicit_date_explicit_modified.html"
page = self.read_file(path=test_file, DEFAULT_DATE="fs")
expected = {
"date": SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime),
"modified": SafeDatetime(2010, 12, 2, 10, 14),
}
self.assertDictHasSubset(page.metadata, expected)
def test_readfile_path_metadata_explicit_date_implicit_modified(self):
test_file = "article_with_metadata_explicit_date_implicit_modified.html"
page = self.read_file(path=test_file, DEFAULT_DATE="fs")
expected = {
"date": SafeDatetime(2010, 12, 2, 10, 14),
"modified": SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime),
}
self.assertDictHasSubset(page.metadata, expected)
def test_find_empty_alt(self):
with patch("pelican.readers.logger") as log_mock:
content = [
'
',
'
',
]
for tag in content:
readers.find_empty_alt(tag, "/test/path")
log_mock.warning.assert_called_with(
"Empty alt attribute for image %s in %s",
"test-image.png",
"/test/path",
extra={"limit_msg": "Other images have empty alt attributes"},
)
class RstReaderTest(ReaderTest):
def test_article_with_metadata(self):
page = self.read_file(path="article_with_metadata.rst")
expected = {
"category": "yeah",
"author": "Alexis Métaireau",
"title": "This is a super article !",
"summary": '
Multi-line metadata should be' " supported\nas well as inline" " markup and stuff to "typogrify" ""...
\n", "date": SafeDatetime(2010, 12, 2, 10, 14), "modified": SafeDatetime(2010, 12, 2, 10, 20), "tags": ["foo", "bar", "foobar"], "custom_field": "http://notmyidea.org", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_capitalized_metadata(self): page = self.read_file(path="article_with_capitalized_metadata.rst") expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "This is a super article !", "summary": 'Multi-line metadata should be' " supported\nas well as inline" " markup and stuff to "typogrify" ""...
\n", "date": SafeDatetime(2010, 12, 2, 10, 14), "modified": SafeDatetime(2010, 12, 2, 10, 20), "tags": ["foo", "bar", "foobar"], "custom_field": "http://notmyidea.org", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_filename_metadata(self): page = self.read_file( path="2012-11-29_rst_w_filename_meta#foo-bar.rst", FILENAME_METADATA=None ) expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "Rst with filename metadata", "reader": "rst", } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path="2012-11-29_rst_w_filename_meta#foo-bar.rst", FILENAME_METADATA=r"(?PTHIS is some content. With some stuff to " ""typogrify"...
\nNow with added " 'support for ' "TLA.
\n" ) self.assertEqual(page.content, expected) try: # otherwise, typogrify should be applied page = self.read_file(path="article.rst", TYPOGRIFY=True) expected = ( 'THIS is some content. ' "With some stuff to “typogrify”…
\n" 'Now with added support for TLA.
\n' ) self.assertEqual(page.content, expected) except ImportError: return unittest.skip("need the typogrify distribution") def test_typogrify_summary(self): # if nothing is specified in the settings, the summary should be # unmodified page = self.read_file(path="article_with_metadata.rst") expected = ( 'Multi-line metadata should be' " supported\nas well as inline" " markup and stuff to "typogrify" ""...
\n" ) self.assertEqual(page.metadata["summary"], expected) try: # otherwise, typogrify should be applied page = self.read_file(path="article_with_metadata.rst", TYPOGRIFY=True) expected = ( 'Multi-line metadata should be' " supported\nas well as inline" " markup and stuff to “typogrify" "”…
\n" ) self.assertEqual(page.metadata["summary"], expected) except ImportError: return unittest.skip("need the typogrify distribution") def test_typogrify_ignore_tags(self): try: # typogrify should be able to ignore user specified tags, # but tries to be clever with widont extension page = self.read_file( path="article.rst", TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=["p"] ) expected = ( "THIS is some content. With some stuff to " ""typogrify"...
\nNow with added " 'support for ' "TLA.
\n" ) self.assertEqual(page.content, expected) # typogrify should ignore code blocks by default because # code blocks are composed inside the pre tag page = self.read_file(path="article_with_code_block.rst", TYPOGRIFY=True) expected = ( "An article with some code
\n" ''
'x'
' &'
' y\nA block quote:
\n\nx " '& y\n' "
Normal:\nx" ' &' " y" "
\n" ) self.assertEqual(page.content, expected) # instruct typogrify to also ignore blockquotes page = self.read_file( path="article_with_code_block.rst", TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=["blockquote"], ) expected = ( "An article with some code
\n" ''
'x'
' &'
' y\nA block quote:
\n\nx " "& y\n" "
Normal:\nx" ' &' " y" "
\n" ) self.assertEqual(page.content, expected) except ImportError: return unittest.skip("need the typogrify distribution") except TypeError: return unittest.skip("need typogrify version 2.0.4 or later") def test_article_with_multiple_authors(self): page = self.read_file(path="article_with_multiple_authors.rst") expected = {"authors": ["First Author", "Second Author"]} self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_semicolon(self): page = self.read_file(path="article_with_multiple_authors_semicolon.rst") expected = {"authors": ["Author, First", "Author, Second"]} self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_list(self): page = self.read_file(path="article_with_multiple_authors_list.rst") expected = {"authors": ["Author, First", "Author, Second"]} self.assertDictHasSubset(page.metadata, expected) def test_default_date_formats(self): tuple_date = self.read_file(path="article.rst", DEFAULT_DATE=(2012, 5, 1)) string_date = self.read_file(path="article.rst", DEFAULT_DATE="2012-05-01") self.assertEqual(tuple_date.metadata["date"], string_date.metadata["date"]) def test_parse_error(self): # Verify that it raises an Exception, not nothing and not SystemExit or # some such with self.assertRaisesRegex(Exception, "underline too short"): self.read_file(path="../parse_error/parse_error.rst") def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path="article_with_typogrify_dashes.rst", TYPOGRIFY=True, TYPOGRIFY_DASHES="default", ) expected = "One: -; Two: —; Three: —-
\n" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path="article_with_typogrify_dashes.rst", TYPOGRIFY=True, TYPOGRIFY_DASHES="oldschool", ) expected = "One: -; Two: –; Three: —
\n" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path="article_with_typogrify_dashes.rst", TYPOGRIFY=True, TYPOGRIFY_DASHES="oldschool_inverted", ) expected = "One: -; Two: —; Three: –
\n" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) @unittest.skipUnless(readers.Markdown, "markdown isn't installed") class MdReaderTest(ReaderTest): def test_article_with_metadata(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path("article_with_md_extension.md")) expected = { "category": "test", "title": "Test md File", "summary": "I have a lot to test
", "date": SafeDatetime(2010, 12, 2, 10, 14), "modified": SafeDatetime(2010, 12, 2, 10, 20), "tags": ["foo", "bar", "foobar"], } self.assertDictHasSubset(metadata, expected) content, metadata = reader.read( _path("article_with_markdown_and_nonascii_summary.md") ) expected = { "title": "マックOS X 10.8でパイソンとVirtualenvをインストールと設定", "summary": "パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。
", "category": "指導書", "date": SafeDatetime(2012, 12, 20), "modified": SafeDatetime(2012, 12, 22), "tags": ["パイソン", "マック"], "slug": "python-virtualenv-on-mac-osx-mountain-lion-10.8", } self.assertDictHasSubset(metadata, expected) def test_article_with_footnote(self): settings = get_settings() ec = settings["MARKDOWN"]["extension_configs"] ec["markdown.extensions.footnotes"] = {"SEPARATOR": "-"} reader = readers.MarkdownReader(settings) content, metadata = reader.read(_path("article_with_markdown_and_footnote.md")) expected_content = ( "This is some content" '1" " with some footnotes" '2
\n' '" ) expected_metadata = { "title": "Article with markdown containing footnotes", "summary": ( "Summary with inline markup " "should be supported.
" ), "date": SafeDatetime(2012, 10, 31), "modified": SafeDatetime(2012, 11, 1), "multiline": [ "Line Metadata should be handle properly.", "See syntax of Meta-Data extension of " "Python Markdown package:", "If a line is indented by 4 or more spaces,", "that line is assumed to be an additional line of the value", "for the previous keyword.", "A keyword may have as many lines as desired.", ], } self.assertEqual(content, expected_content) self.assertDictHasSubset(metadata, expected_metadata) def test_article_with_file_extensions(self): reader = readers.MarkdownReader(settings=get_settings()) # test to ensure the md file extension is being processed by the # correct reader content, metadata = reader.read(_path("article_with_md_extension.md")) expected = ( "The quick brown fox jumped over the lazy dog's back.
" ) self.assertEqual(content, expected) # test to ensure the mkd file extension is being processed by the # correct reader content, metadata = reader.read(_path("article_with_mkd_extension.mkd")) expected = ( "This is another markdown test file. Uses" " the mkd extension.
" ) self.assertEqual(content, expected) # test to ensure the markdown file extension is being processed by the # correct reader content, metadata = reader.read( _path("article_with_markdown_extension.markdown") ) expected = ( "This is another markdown test file. Uses" " the markdown extension.
" ) self.assertEqual(content, expected) # test to ensure the mdown file extension is being processed by the # correct reader content, metadata = reader.read(_path("article_with_mdown_extension.mdown")) expected = ( "This is another markdown test file. Uses" " the mdown extension.
" ) self.assertEqual(content, expected) def test_article_with_markdown_markup_extension(self): # test to ensure the markdown markup extension is being processed as # expected page = self.read_file( path="article_with_markdown_markup_extensions.md", MARKDOWN={ "extension_configs": { "markdown.extensions.toc": {}, "markdown.extensions.codehilite": {}, "markdown.extensions.extra": {}, } }, ) expected = ( '\n" 'Test: This metadata value looks like metadata
", } self.assertDictHasSubset(metadata, expected) def test_empty_file(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path("empty.md")) self.assertEqual(metadata, {}) self.assertEqual(content, "") def test_empty_file_with_bom(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path("empty_with_bom.md")) self.assertEqual(metadata, {}) self.assertEqual(content, "") def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path="article_with_typogrify_dashes.md", TYPOGRIFY=True, TYPOGRIFY_DASHES="default", ) expected = "One: -; Two: —; Three: —-
" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path="article_with_typogrify_dashes.md", TYPOGRIFY=True, TYPOGRIFY_DASHES="oldschool", ) expected = "One: -; Two: –; Three: —
" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path="article_with_typogrify_dashes.md", TYPOGRIFY=True, TYPOGRIFY_DASHES="oldschool_inverted", ) expected = "One: -; Two: —; Three: –
" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) def test_metadata_has_no_discarded_data(self): md_filename = "article_with_markdown_and_empty_tags.md" r = readers.Readers( cache_name="cache", settings=get_settings(CACHE_CONTENT=True) ) page = r.read_file(base_path=CONTENT_PATH, path=md_filename) __, cached_metadata = r.get_cached_data(_path(md_filename), (None, None)) expected = {"title": "Article with markdown and empty tags"} self.assertEqual(cached_metadata, expected) self.assertNotIn("tags", page.metadata) self.assertDictHasSubset(page.metadata, expected) class HTMLReaderTest(ReaderTest): def test_article_with_comments(self): page = self.read_file(path="article_with_comments.html") self.assertEqual( """ Body content """, page.content, ) def test_article_with_keywords(self): page = self.read_file(path="article_with_keywords.html") expected = { "tags": ["foo", "bar", "foobar"], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata(self): page = self.read_file(path="article_with_metadata.html") expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "This is a super article !", "summary": "Summary and stuff", "date": SafeDatetime(2010, 12, 2, 10, 14), "tags": ["foo", "bar", "foobar"], "custom_field": "http://notmyidea.org", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_similar_metadata_tags(self): page = self.read_file(path="article_with_multiple_metadata_tags.html") expected = { "custom_field": ["https://getpelican.com", "https://www.eff.org"], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors(self): page = self.read_file(path="article_with_multiple_authors.html") expected = {"authors": ["First Author", "Second Author"]} self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata_and_contents_attrib(self): page = self.read_file(path="article_with_metadata_and_contents.html") expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "This is a super article !", "summary": "Summary and stuff", "date": SafeDatetime(2010, 12, 2, 10, 14), "tags": ["foo", "bar", "foobar"], "custom_field": "http://notmyidea.org", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_null_attributes(self): page = self.read_file(path="article_with_null_attributes.html") self.assertEqual( """ Ensure that empty attributes are copied properly. """, page.content, ) def test_article_with_attributes_containing_double_quotes(self): page = self.read_file( path="article_with_attributes_containing_" + "double_quotes.html" ) self.assertEqual( """ Ensure that if an attribute value contains a double quote, it is surrounded with single quotes, otherwise with double quotes. Span content Span content Span content """, page.content, ) def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. page = self.read_file(path="article_with_uppercase_metadata.html") # Key should be lowercase self.assertIn("category", page.metadata, "Key should be lowercase.") # Value should keep cases self.assertEqual("Yeah", page.metadata.get("category")) def test_article_with_nonconformant_meta_tags(self): page = self.read_file(path="article_with_nonconformant_meta_tags.html") expected = { "summary": "Summary and stuff", "title": "Article with Nonconformant HTML meta tags", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_inline_svg(self): page = self.read_file(path="article_with_inline_svg.html") expected = { "title": "Article with an inline SVG", } self.assertDictHasSubset(page.metadata, expected)