Ensure _DISCARDED is not being cached. Fix #2825

Filtration is now being applied before caching the metadata, solving the issue where _DISCARD objects from previous runs were being retrieved from cache.
2025-10-15 20:28:56 +02:00 · 2021-10-02 00:17:43 +01:00 · 2021-10-02 00:17:43 +01:00 · f714f27c78
commit f714f27c78
parent f862d64b7a
5 changed files with 41 additions and 4 deletions
--- a/RELEASE.md
+++ b/RELEASE.md
@ -0,0 +1,3 @@
+Release type: patch
+
+Address an issue where metadata flagged to be discarded was being cached.
--- a/pelican/readers.py
+++ b/pelican/readers.py
@ -571,8 +571,9 @@ class Readers(FileStampDataCacher):
        content, reader_metadata = self.get_cached_data(path, (None, None))
        if content is None:
            content, reader_metadata = reader.read(path)
+            reader_metadata = _filter_discardable_metadata(reader_metadata)
            self.cache_data(path, (content, reader_metadata))
-        metadata.update(_filter_discardable_metadata(reader_metadata))
+        metadata.update(reader_metadata)

        if content:
            # find images with empty alt
--- a/pelican/tests/content/article_with_markdown_and_empty_tags.md
+++ b/pelican/tests/content/article_with_markdown_and_empty_tags.md
@ -0,0 +1,4 @@
+Title: Article with markdown and empty tags
+Tags:
+
+This is some content.
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@ -265,6 +265,8 @@ class TestArticlesGenerator(unittest.TestCase):
            ['This is a super article !', 'published', 'yeah', 'article'],
            ['This is a super article !', 'published', 'Default', 'article'],
            ['Article with an inline SVG', 'published', 'Default', 'article'],
+            ['Article with markdown and empty tags', 'published', 'Default',
+             'article'],
            ['This is an article with category !', 'published', 'yeah',
             'article'],
            ['This is an article with multiple authors!', 'published',
@ -569,6 +571,7 @@ class TestArticlesGenerator(unittest.TestCase):
            'Article title',
            'Article with Nonconformant HTML meta tags',
            'Article with an inline SVG',
+            'Article with markdown and empty tags',
            'Article with markdown and nested summary metadata',
            'Article with markdown and summary metadata multi',
            'Article with markdown and summary metadata single',
--- a/pelican/tests/test_readers.py
+++ b/pelican/tests/test_readers.py
@ -16,10 +16,18 @@ def _path(*args):

 class ReaderTest(unittest.TestCase):

-    def read_file(self, path, **kwargs):
+    def setUp(self):
+        self._reader = None
+
+    def tearDown(self):
+        self._reader = None
+
+    def read_file(self, path, cache_name='', **kwargs):
        # Isolate from future API changes to readers.read_file
-        r = readers.Readers(settings=get_settings(**kwargs))
-        return r.read_file(base_path=CONTENT_PATH, path=path)
+
+        self._reader = readers.Readers(
+            cache_name=cache_name, settings=get_settings(**kwargs))
+        return self._reader.read_file(base_path=CONTENT_PATH, path=path)

    def assertDictHasSubset(self, dictionary, subset):
        for key, value in subset.items():
@ -795,6 +803,24 @@ class MdReaderTest(ReaderTest):
        self.assertEqual(page.content, expected)
        self.assertEqual(page.title, expected_title)

+    def test_metadata_has_no_discarded_data(self):
+        md_filename = 'article_with_markdown_and_empty_tags.md'
+        page = self.read_file(
+            path=md_filename,
+            cache_name='cache',
+            CACHE_CONTENT=True,
+            LOAD_CONTENT_CACHE=True)
+
+        file_path = _path(md_filename)
+        cached_metadata = self._reader._cache[file_path][1][1]
+
+        expected = {
+            'title': 'Article with markdown and empty tags'
+        }
+        self.assertEqual(cached_metadata, expected)
+        self.assertNotIn('tags', page.metadata)
+        self.assertDictHasSubset(page.metadata, expected)
+

 class HTMLReaderTest(ReaderTest):
    def test_article_with_comments(self):