Ensure _DISCARDED is not being cached. Fix #2825 (#2926)

Filtration is now being applied before caching the metadata, solving the issue where _DISCARD objects from previous runs were being retrieved from cache.
2021-10-06 09:19:17 +01:00 · 2021-10-06 09:19:17 +01:00 · 8849721913
commit 8849721913
parent 0da8659d0e
4 changed files with 27 additions and 1 deletions
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -571,8 +571,9 @@ class Readers(FileStampDataCacher):
        content, reader_metadata = self.get_cached_data(path, (None, None))
        if content is None:
            content, reader_metadata = reader.read(path)
+            reader_metadata = _filter_discardable_metadata(reader_metadata)
            self.cache_data(path, (content, reader_metadata))
-        metadata.update(_filter_discardable_metadata(reader_metadata))
+        metadata.update(reader_metadata)

        if content:
            # find images with empty alt
--- a/pelican/tests/content/article_with_markdown_and_empty_tags.md
+++ b/pelican/tests/content/article_with_markdown_and_empty_tags.md
@ -0,0 +1,4 @@
+Title: Article with markdown and empty tags
+Tags:
+
+This is some content.
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -265,6 +265,8 @@ class TestArticlesGenerator(unittest.TestCase):
            ['This is a super article !', 'published', 'yeah', 'article'],
            ['This is a super article !', 'published', 'Default', 'article'],
            ['Article with an inline SVG', 'published', 'Default', 'article'],
+            ['Article with markdown and empty tags', 'published', 'Default',
+             'article'],
            ['This is an article with category !', 'published', 'yeah',
             'article'],
            ['This is an article with multiple authors!', 'published',
@ -569,6 +571,7 @@ class TestArticlesGenerator(unittest.TestCase):
            'Article title',
            'Article with Nonconformant HTML meta tags',
            'Article with an inline SVG',
+            'Article with markdown and empty tags',
            'Article with markdown and nested summary metadata',
            'Article with markdown and summary metadata multi',
            'Article with markdown and summary metadata single',
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -18,6 +18,7 @@ class ReaderTest(unittest.TestCase):

    def read_file(self, path, **kwargs):
        # Isolate from future API changes to readers.read_file
+
        r = readers.Readers(settings=get_settings(**kwargs))
        return r.read_file(base_path=CONTENT_PATH, path=path)

@ -795,6 +796,23 @@ class MdReaderTest(ReaderTest):
        self.assertEqual(page.content, expected)
        self.assertEqual(page.title, expected_title)

+    def test_metadata_has_no_discarded_data(self):
+        md_filename = 'article_with_markdown_and_empty_tags.md'
+
+        r = readers.Readers(cache_name='cache', settings=get_settings(
+            CACHE_CONTENT=True))
+        page = r.read_file(base_path=CONTENT_PATH, path=md_filename)
+
+        __, cached_metadata = r.get_cached_data(
+            _path(md_filename), (None, None))
+
+        expected = {
+            'title': 'Article with markdown and empty tags'
+        }
+        self.assertEqual(cached_metadata, expected)
+        self.assertNotIn('tags', page.metadata)
+        self.assertDictHasSubset(page.metadata, expected)
+

 class HTMLReaderTest(ReaderTest):
    def test_article_with_comments(self):