import os from unittest.mock import patch from pelican import readers from pelican.tests.support import get_settings, unittest from pelican.utils import SafeDatetime CUR_DIR = os.path.dirname(__file__) CONTENT_PATH = os.path.join(CUR_DIR, "content") def _path(*args): return os.path.join(CONTENT_PATH, *args) class ReaderTest(unittest.TestCase): def read_file(self, path, **kwargs): # Isolate from future API changes to readers.read_file r = readers.Readers(settings=get_settings(**kwargs)) return r.read_file(base_path=CONTENT_PATH, path=path) def assertDictHasSubset(self, dictionary, subset): for key, value in subset.items(): if key in dictionary: real_value = dictionary.get(key) self.assertEqual( value, real_value, "Expected %s to have value %s, but was %s" % (key, value, real_value), ) else: self.fail( "Expected %s to have value %s, but was not in Dict" % (key, value) ) class TestAssertDictHasSubset(ReaderTest): def setUp(self): self.dictionary = {"key-a": "val-a", "key-b": "val-b"} def tearDown(self): self.dictionary = None def test_subset(self): self.assertDictHasSubset(self.dictionary, {"key-a": "val-a"}) def test_equal(self): self.assertDictHasSubset(self.dictionary, self.dictionary) def test_fail_not_set(self): self.assertRaisesRegex( AssertionError, r"Expected.*key-c.*to have value.*val-c.*but was not in Dict", self.assertDictHasSubset, self.dictionary, {"key-c": "val-c"}, ) def test_fail_wrong_val(self): self.assertRaisesRegex( AssertionError, r"Expected .*key-a.* to have value .*val-b.* but was .*val-a.*", self.assertDictHasSubset, self.dictionary, {"key-a": "val-b"}, ) class DefaultReaderTest(ReaderTest): def test_readfile_unknown_extension(self): with self.assertRaises(TypeError): self.read_file(path="article_with_metadata.unknownextension") def test_readfile_path_metadata_implicit_dates(self): test_file = "article_with_metadata_implicit_dates.html" page = self.read_file(path=test_file, DEFAULT_DATE="fs") expected = { "date": SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime), "modified": SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime), } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_explicit_dates(self): test_file = "article_with_metadata_explicit_dates.html" page = self.read_file(path=test_file, DEFAULT_DATE="fs") expected = { "date": SafeDatetime(2010, 12, 2, 10, 14), "modified": SafeDatetime(2010, 12, 31, 23, 59), } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_implicit_date_explicit_modified(self): test_file = "article_with_metadata_implicit_date_explicit_modified.html" page = self.read_file(path=test_file, DEFAULT_DATE="fs") expected = { "date": SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime), "modified": SafeDatetime(2010, 12, 2, 10, 14), } self.assertDictHasSubset(page.metadata, expected) def test_readfile_path_metadata_explicit_date_implicit_modified(self): test_file = "article_with_metadata_explicit_date_implicit_modified.html" page = self.read_file(path=test_file, DEFAULT_DATE="fs") expected = { "date": SafeDatetime(2010, 12, 2, 10, 14), "modified": SafeDatetime.fromtimestamp(os.stat(_path(test_file)).st_mtime), } self.assertDictHasSubset(page.metadata, expected) def test_find_empty_alt(self): with patch("pelican.readers.logger") as log_mock: content = [ '', '', ] for tag in content: readers.find_empty_alt(tag, "/test/path") log_mock.warning.assert_called_with( "Empty alt attribute for image %s in %s", "test-image.png", "/test/path", extra={"limit_msg": "Other images have empty alt attributes"}, ) class RstReaderTest(ReaderTest): def test_article_with_metadata(self): page = self.read_file(path="article_with_metadata.rst") expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "This is a super article !", "summary": '

Multi-line metadata should be' " supported\nas well as inline" " markup and stuff to "typogrify" ""...

\n", "date": SafeDatetime(2010, 12, 2, 10, 14), "modified": SafeDatetime(2010, 12, 2, 10, 20), "tags": ["foo", "bar", "foobar"], "custom_field": "http://notmyidea.org", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_capitalized_metadata(self): page = self.read_file(path="article_with_capitalized_metadata.rst") expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "This is a super article !", "summary": '

Multi-line metadata should be' " supported\nas well as inline" " markup and stuff to "typogrify" ""...

\n", "date": SafeDatetime(2010, 12, 2, 10, 14), "modified": SafeDatetime(2010, 12, 2, 10, 20), "tags": ["foo", "bar", "foobar"], "custom_field": "http://notmyidea.org", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_filename_metadata(self): page = self.read_file( path="2012-11-29_rst_w_filename_meta#foo-bar.rst", FILENAME_METADATA=None ) expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "Rst with filename metadata", "reader": "rst", } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path="2012-11-29_rst_w_filename_meta#foo-bar.rst", FILENAME_METADATA=r"(?P\d{4}-\d{2}-\d{2}).*", ) expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "Rst with filename metadata", "date": SafeDatetime(2012, 11, 29), "reader": "rst", } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path="2012-11-29_rst_w_filename_meta#foo-bar.rst", FILENAME_METADATA=( r"(?P\d{4}-\d{2}-\d{2})" r"_(?P.*)" r"#(?P.*)-(?P.*)" ), ) expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "Rst with filename metadata", "date": SafeDatetime(2012, 11, 29), "slug": "rst_w_filename_meta", "mymeta": "foo", "reader": "rst", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_optional_filename_metadata(self): page = self.read_file( path="2012-11-29_rst_w_filename_meta#foo-bar.rst", FILENAME_METADATA=r"(?P\d{4}-\d{2}-\d{2})?", ) expected = { "date": SafeDatetime(2012, 11, 29), "reader": "rst", } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path="article.rst", FILENAME_METADATA=r"(?P\d{4}-\d{2}-\d{2})?" ) expected = { "reader": "rst", } self.assertDictHasSubset(page.metadata, expected) self.assertNotIn("date", page.metadata, "Date should not be set.") def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. reader = readers.RstReader(settings=get_settings()) content, metadata = reader.read(_path("article_with_uppercase_metadata.rst")) self.assertIn("category", metadata, "Key should be lowercase.") self.assertEqual("Yeah", metadata.get("category"), "Value keeps case.") def test_article_extra_path_metadata(self): input_with_metadata = "2012-11-29_rst_w_filename_meta#foo-bar.rst" page_metadata = self.read_file( path=input_with_metadata, FILENAME_METADATA=( r"(?P\d{4}-\d{2}-\d{2})" r"_(?P.*)" r"#(?P.*)-(?P.*)" ), EXTRA_PATH_METADATA={ input_with_metadata: {"key-1a": "value-1a", "key-1b": "value-1b"} }, ) expected_metadata = { "category": "yeah", "author": "Alexis Métaireau", "title": "Rst with filename metadata", "date": SafeDatetime(2012, 11, 29), "slug": "rst_w_filename_meta", "mymeta": "foo", "reader": "rst", "key-1a": "value-1a", "key-1b": "value-1b", } self.assertDictHasSubset(page_metadata.metadata, expected_metadata) input_file_path_without_metadata = "article.rst" page_without_metadata = self.read_file( path=input_file_path_without_metadata, EXTRA_PATH_METADATA={ input_file_path_without_metadata: {"author": "Charlès Overwrite"} }, ) expected_without_metadata = { "category": "misc", "author": "Charlès Overwrite", "title": "Article title", "reader": "rst", } self.assertDictHasSubset( page_without_metadata.metadata, expected_without_metadata ) def test_article_extra_path_metadata_dont_overwrite(self): # EXTRA_PATH_METADATA['author'] should get ignored # since we don't overwrite already set values input_file_path = "2012-11-29_rst_w_filename_meta#foo-bar.rst" page = self.read_file( path=input_file_path, FILENAME_METADATA=( r"(?P\d{4}-\d{2}-\d{2})" r"_(?P.*)" r"#(?P.*)-(?P.*)" ), EXTRA_PATH_METADATA={ input_file_path: {"author": "Charlès Overwrite", "key-1b": "value-1b"} }, ) expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "Rst with filename metadata", "date": SafeDatetime(2012, 11, 29), "slug": "rst_w_filename_meta", "mymeta": "foo", "reader": "rst", "key-1b": "value-1b", } self.assertDictHasSubset(page.metadata, expected) def test_article_extra_path_metadata_recurse(self): parent = "TestCategory" notparent = "TestCategory/article" path = "TestCategory/article_without_category.rst" epm = { parent: { "epmr_inherit": parent, "epmr_override": parent, }, notparent: {"epmr_bogus": notparent}, path: { "epmr_override": path, }, } expected_metadata = { "epmr_inherit": parent, "epmr_override": path, } page = self.read_file(path=path, EXTRA_PATH_METADATA=epm) self.assertDictHasSubset(page.metadata, expected_metadata) # Make sure vars aren't getting "inherited" by mistake... path = "article.rst" page = self.read_file(path=path, EXTRA_PATH_METADATA=epm) for k in expected_metadata.keys(): self.assertNotIn(k, page.metadata) # Same, but for edge cases where one file's name is a prefix of # another. path = "TestCategory/article_without_category.rst" page = self.read_file(path=path, EXTRA_PATH_METADATA=epm) for k in epm[notparent].keys(): self.assertNotIn(k, page.metadata) def test_typogrify(self): # if nothing is specified in the settings, the content should be # unmodified page = self.read_file(path="article.rst") expected = ( "

THIS is some content. With some stuff to " ""typogrify"...

\n

Now with added " 'support for ' "TLA.

\n" ) self.assertEqual(page.content, expected) try: # otherwise, typogrify should be applied page = self.read_file(path="article.rst", TYPOGRIFY=True) expected = ( '

THIS is some content. ' "With some stuff to “typogrify”…

\n" '

Now with added support for TLA.

\n' ) self.assertEqual(page.content, expected) except ImportError: return unittest.skip("need the typogrify distribution") def test_typogrify_summary(self): # if nothing is specified in the settings, the summary should be # unmodified page = self.read_file(path="article_with_metadata.rst") expected = ( '

Multi-line metadata should be' " supported\nas well as inline" " markup and stuff to "typogrify" ""...

\n" ) self.assertEqual(page.metadata["summary"], expected) try: # otherwise, typogrify should be applied page = self.read_file(path="article_with_metadata.rst", TYPOGRIFY=True) expected = ( '

Multi-line metadata should be' " supported\nas well as inline" " markup and stuff to “typogrify" "”…

\n" ) self.assertEqual(page.metadata["summary"], expected) except ImportError: return unittest.skip("need the typogrify distribution") def test_typogrify_ignore_tags(self): try: # typogrify should be able to ignore user specified tags, # but tries to be clever with widont extension page = self.read_file( path="article.rst", TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=["p"] ) expected = ( "

THIS is some content. With some stuff to " ""typogrify"...

\n

Now with added " 'support for ' "TLA.

\n" ) self.assertEqual(page.content, expected) # typogrify should ignore code blocks by default because # code blocks are composed inside the pre tag page = self.read_file(path="article_with_code_block.rst", TYPOGRIFY=True) expected = ( "

An article with some code

\n" '
'
                'x'
                ' &'
                ' y\n
\n' "

A block quote:

\n
\nx " '& y
\n' "

Normal:\nx" ' &' " y" "

\n" ) self.assertEqual(page.content, expected) # instruct typogrify to also ignore blockquotes page = self.read_file( path="article_with_code_block.rst", TYPOGRIFY=True, TYPOGRIFY_IGNORE_TAGS=["blockquote"], ) expected = ( "

An article with some code

\n" '
'
                'x'
                ' &'
                ' y\n
\n' "

A block quote:

\n
\nx " "& y
\n" "

Normal:\nx" ' &' " y" "

\n" ) self.assertEqual(page.content, expected) except ImportError: return unittest.skip("need the typogrify distribution") except TypeError: return unittest.skip("need typogrify version 2.0.4 or later") def test_article_with_multiple_authors(self): page = self.read_file(path="article_with_multiple_authors.rst") expected = {"authors": ["First Author", "Second Author"]} self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_semicolon(self): page = self.read_file(path="article_with_multiple_authors_semicolon.rst") expected = {"authors": ["Author, First", "Author, Second"]} self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors_list(self): page = self.read_file(path="article_with_multiple_authors_list.rst") expected = {"authors": ["Author, First", "Author, Second"]} self.assertDictHasSubset(page.metadata, expected) def test_default_date_formats(self): tuple_date = self.read_file(path="article.rst", DEFAULT_DATE=(2012, 5, 1)) string_date = self.read_file(path="article.rst", DEFAULT_DATE="2012-05-01") self.assertEqual(tuple_date.metadata["date"], string_date.metadata["date"]) def test_parse_error(self): # Verify that it raises an Exception, not nothing and not SystemExit or # some such with self.assertRaisesRegex(Exception, "underline too short"): self.read_file(path="../parse_error/parse_error.rst") def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path="article_with_typogrify_dashes.rst", TYPOGRIFY=True, TYPOGRIFY_DASHES="default", ) expected = "

One: -; Two: —; Three: —-

\n" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path="article_with_typogrify_dashes.rst", TYPOGRIFY=True, TYPOGRIFY_DASHES="oldschool", ) expected = "

One: -; Two: –; Three: —

\n" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path="article_with_typogrify_dashes.rst", TYPOGRIFY=True, TYPOGRIFY_DASHES="oldschool_inverted", ) expected = "

One: -; Two: —; Three: –

\n" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) @unittest.skipUnless(readers.Markdown, "markdown isn't installed") class MdReaderTest(ReaderTest): def test_article_with_metadata(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path("article_with_md_extension.md")) expected = { "category": "test", "title": "Test md File", "summary": "

I have a lot to test

", "date": SafeDatetime(2010, 12, 2, 10, 14), "modified": SafeDatetime(2010, 12, 2, 10, 20), "tags": ["foo", "bar", "foobar"], } self.assertDictHasSubset(metadata, expected) content, metadata = reader.read( _path("article_with_markdown_and_nonascii_summary.md") ) expected = { "title": "マックOS X 10.8でパイソンとVirtualenvをインストールと設定", "summary": "

パイソンとVirtualenvをまっくでインストールする方法について明確に説明します。

", "category": "指導書", "date": SafeDatetime(2012, 12, 20), "modified": SafeDatetime(2012, 12, 22), "tags": ["パイソン", "マック"], "slug": "python-virtualenv-on-mac-osx-mountain-lion-10.8", } self.assertDictHasSubset(metadata, expected) def test_article_with_footnote(self): settings = get_settings() ec = settings["MARKDOWN"]["extension_configs"] ec["markdown.extensions.footnotes"] = {"SEPARATOR": "-"} reader = readers.MarkdownReader(settings) content, metadata = reader.read(_path("article_with_markdown_and_footnote.md")) expected_content = ( "

This is some content" '1" " with some footnotes" '2

\n' '
\n' '
\n
    \n
  1. \n' "

    Numbered footnote " '

    \n' '
  2. \n
  3. \n' "

    Named footnote " '

    \n' "
  4. \n
\n
" ) expected_metadata = { "title": "Article with markdown containing footnotes", "summary": ( "

Summary with inline markup " "should be supported.

" ), "date": SafeDatetime(2012, 10, 31), "modified": SafeDatetime(2012, 11, 1), "multiline": [ "Line Metadata should be handle properly.", "See syntax of Meta-Data extension of " "Python Markdown package:", "If a line is indented by 4 or more spaces,", "that line is assumed to be an additional line of the value", "for the previous keyword.", "A keyword may have as many lines as desired.", ], } self.assertEqual(content, expected_content) self.assertDictHasSubset(metadata, expected_metadata) def test_article_with_file_extensions(self): reader = readers.MarkdownReader(settings=get_settings()) # test to ensure the md file extension is being processed by the # correct reader content, metadata = reader.read(_path("article_with_md_extension.md")) expected = ( "

Test Markdown File Header

\n" "

Used for pelican test

\n" "

The quick brown fox jumped over the lazy dog's back.

" ) self.assertEqual(content, expected) # test to ensure the mkd file extension is being processed by the # correct reader content, metadata = reader.read(_path("article_with_mkd_extension.mkd")) expected = ( "

Test Markdown File Header

\n

Used for pelican" " test

\n

This is another markdown test file. Uses" " the mkd extension.

" ) self.assertEqual(content, expected) # test to ensure the markdown file extension is being processed by the # correct reader content, metadata = reader.read( _path("article_with_markdown_extension.markdown") ) expected = ( "

Test Markdown File Header

\n

Used for pelican" " test

\n

This is another markdown test file. Uses" " the markdown extension.

" ) self.assertEqual(content, expected) # test to ensure the mdown file extension is being processed by the # correct reader content, metadata = reader.read(_path("article_with_mdown_extension.mdown")) expected = ( "

Test Markdown File Header

\n

Used for pelican" " test

\n

This is another markdown test file. Uses" " the mdown extension.

" ) self.assertEqual(content, expected) def test_article_with_markdown_markup_extension(self): # test to ensure the markdown markup extension is being processed as # expected page = self.read_file( path="article_with_markdown_markup_extensions.md", MARKDOWN={ "extension_configs": { "markdown.extensions.toc": {}, "markdown.extensions.codehilite": {}, "markdown.extensions.extra": {}, } }, ) expected = ( '
\n' "\n" "
\n" '

Level1

\n' '

Level2

' ) self.assertEqual(page.content, expected) def test_article_with_filename_metadata(self): page = self.read_file( path="2012-11-30_md_w_filename_meta#foo-bar.md", FILENAME_METADATA=None ) expected = { "category": "yeah", "author": "Alexis Métaireau", } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path="2012-11-30_md_w_filename_meta#foo-bar.md", FILENAME_METADATA=r"(?P\d{4}-\d{2}-\d{2}).*", ) expected = { "category": "yeah", "author": "Alexis Métaireau", "date": SafeDatetime(2012, 11, 30), } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path="2012-11-30_md_w_filename_meta#foo-bar.md", FILENAME_METADATA=( r"(?P\d{4}-\d{2}-\d{2})" r"_(?P.*)" r"#(?P.*)-(?P.*)" ), ) expected = { "category": "yeah", "author": "Alexis Métaireau", "date": SafeDatetime(2012, 11, 30), "slug": "md_w_filename_meta", "mymeta": "foo", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_optional_filename_metadata(self): page = self.read_file( path="2012-11-30_md_w_filename_meta#foo-bar.md", FILENAME_METADATA=r"(?P\d{4}-\d{2}-\d{2})?", ) expected = { "date": SafeDatetime(2012, 11, 30), "reader": "markdown", } self.assertDictHasSubset(page.metadata, expected) page = self.read_file( path="empty.md", FILENAME_METADATA=r"(?P\d{4}-\d{2}-\d{2})?" ) expected = { "reader": "markdown", } self.assertDictHasSubset(page.metadata, expected) self.assertNotIn("date", page.metadata, "Date should not be set.") def test_duplicate_tags_or_authors_are_removed(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path("article_with_duplicate_tags_authors.md")) expected = { "tags": ["foo", "bar", "foobar"], "authors": ["Author, First", "Author, Second"], } self.assertDictHasSubset(metadata, expected) def test_metadata_not_parsed_for_metadata(self): settings = get_settings() settings["FORMATTED_FIELDS"] = ["summary"] reader = readers.MarkdownReader(settings=settings) content, metadata = reader.read( _path("article_with_markdown_and_nested_metadata.md") ) expected = { "title": "Article with markdown and nested summary metadata", "summary": "

Test: This metadata value looks like metadata

", } self.assertDictHasSubset(metadata, expected) def test_empty_file(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path("empty.md")) self.assertEqual(metadata, {}) self.assertEqual(content, "") def test_empty_file_with_bom(self): reader = readers.MarkdownReader(settings=get_settings()) content, metadata = reader.read(_path("empty_with_bom.md")) self.assertEqual(metadata, {}) self.assertEqual(content, "") def test_typogrify_dashes_config(self): # Test default config page = self.read_file( path="article_with_typogrify_dashes.md", TYPOGRIFY=True, TYPOGRIFY_DASHES="default", ) expected = "

One: -; Two: —; Three: —-

" expected_title = "One -, two —, three —- dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool' variant page = self.read_file( path="article_with_typogrify_dashes.md", TYPOGRIFY=True, TYPOGRIFY_DASHES="oldschool", ) expected = "

One: -; Two: –; Three: —

" expected_title = "One -, two –, three — dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) # Test 'oldschool_inverted' variant page = self.read_file( path="article_with_typogrify_dashes.md", TYPOGRIFY=True, TYPOGRIFY_DASHES="oldschool_inverted", ) expected = "

One: -; Two: —; Three: –

" expected_title = "One -, two —, three – dashes!" self.assertEqual(page.content, expected) self.assertEqual(page.title, expected_title) def test_metadata_has_no_discarded_data(self): md_filename = "article_with_markdown_and_empty_tags.md" r = readers.Readers( cache_name="cache", settings=get_settings(CACHE_CONTENT=True) ) page = r.read_file(base_path=CONTENT_PATH, path=md_filename) __, cached_metadata = r.get_cached_data(_path(md_filename), (None, None)) expected = {"title": "Article with markdown and empty tags"} self.assertEqual(cached_metadata, expected) self.assertNotIn("tags", page.metadata) self.assertDictHasSubset(page.metadata, expected) class HTMLReaderTest(ReaderTest): def test_article_with_comments(self): page = self.read_file(path="article_with_comments.html") self.assertEqual( """ Body content """, page.content, ) def test_article_with_keywords(self): page = self.read_file(path="article_with_keywords.html") expected = { "tags": ["foo", "bar", "foobar"], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata(self): page = self.read_file(path="article_with_metadata.html") expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "This is a super article !", "summary": "Summary and stuff", "date": SafeDatetime(2010, 12, 2, 10, 14), "tags": ["foo", "bar", "foobar"], "custom_field": "http://notmyidea.org", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_similar_metadata_tags(self): page = self.read_file(path="article_with_multiple_metadata_tags.html") expected = { "custom_field": ["https://getpelican.com", "https://www.eff.org"], } self.assertDictHasSubset(page.metadata, expected) def test_article_with_multiple_authors(self): page = self.read_file(path="article_with_multiple_authors.html") expected = {"authors": ["First Author", "Second Author"]} self.assertDictHasSubset(page.metadata, expected) def test_article_with_metadata_and_contents_attrib(self): page = self.read_file(path="article_with_metadata_and_contents.html") expected = { "category": "yeah", "author": "Alexis Métaireau", "title": "This is a super article !", "summary": "Summary and stuff", "date": SafeDatetime(2010, 12, 2, 10, 14), "tags": ["foo", "bar", "foobar"], "custom_field": "http://notmyidea.org", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_null_attributes(self): page = self.read_file(path="article_with_null_attributes.html") self.assertEqual( """ Ensure that empty attributes are copied properly. """, page.content, ) def test_article_with_attributes_containing_double_quotes(self): page = self.read_file( path="article_with_attributes_containing_" + "double_quotes.html" ) self.assertEqual( """ Ensure that if an attribute value contains a double quote, it is surrounded with single quotes, otherwise with double quotes. Span content Span content Span content """, page.content, ) def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. page = self.read_file(path="article_with_uppercase_metadata.html") # Key should be lowercase self.assertIn("category", page.metadata, "Key should be lowercase.") # Value should keep cases self.assertEqual("Yeah", page.metadata.get("category")) def test_article_with_nonconformant_meta_tags(self): page = self.read_file(path="article_with_nonconformant_meta_tags.html") expected = { "summary": "Summary and stuff", "title": "Article with Nonconformant HTML meta tags", } self.assertDictHasSubset(page.metadata, expected) def test_article_with_inline_svg(self): page = self.read_file(path="article_with_inline_svg.html") expected = { "title": "Article with an inline SVG", } self.assertDictHasSubset(page.metadata, expected)