Ensure _DISCARDED is not being cached. Fix #2825

Filtration is now being applied before caching the metadata, solving the issue where _DISCARD objects from previous runs were being retrieved from cache.
This commit is contained in:
jonasborges 2021-10-02 00:17:43 +01:00
commit f714f27c78
5 changed files with 41 additions and 4 deletions

3
RELEASE.md Normal file
View file

@ -0,0 +1,3 @@
Release type: patch
Address an issue where metadata flagged to be discarded was being cached.

View file

@ -571,8 +571,9 @@ class Readers(FileStampDataCacher):
content, reader_metadata = self.get_cached_data(path, (None, None))
if content is None:
content, reader_metadata = reader.read(path)
reader_metadata = _filter_discardable_metadata(reader_metadata)
self.cache_data(path, (content, reader_metadata))
metadata.update(_filter_discardable_metadata(reader_metadata))
metadata.update(reader_metadata)
if content:
# find images with empty alt

View file

@ -0,0 +1,4 @@
Title: Article with markdown and empty tags
Tags:
This is some content.

View file

@ -265,6 +265,8 @@ class TestArticlesGenerator(unittest.TestCase):
['This is a super article !', 'published', 'yeah', 'article'],
['This is a super article !', 'published', 'Default', 'article'],
['Article with an inline SVG', 'published', 'Default', 'article'],
['Article with markdown and empty tags', 'published', 'Default',
'article'],
['This is an article with category !', 'published', 'yeah',
'article'],
['This is an article with multiple authors!', 'published',
@ -569,6 +571,7 @@ class TestArticlesGenerator(unittest.TestCase):
'Article title',
'Article with Nonconformant HTML meta tags',
'Article with an inline SVG',
'Article with markdown and empty tags',
'Article with markdown and nested summary metadata',
'Article with markdown and summary metadata multi',
'Article with markdown and summary metadata single',

View file

@ -16,10 +16,18 @@ def _path(*args):
class ReaderTest(unittest.TestCase):
def read_file(self, path, **kwargs):
def setUp(self):
self._reader = None
def tearDown(self):
self._reader = None
def read_file(self, path, cache_name='', **kwargs):
# Isolate from future API changes to readers.read_file
r = readers.Readers(settings=get_settings(**kwargs))
return r.read_file(base_path=CONTENT_PATH, path=path)
self._reader = readers.Readers(
cache_name=cache_name, settings=get_settings(**kwargs))
return self._reader.read_file(base_path=CONTENT_PATH, path=path)
def assertDictHasSubset(self, dictionary, subset):
for key, value in subset.items():
@ -795,6 +803,24 @@ class MdReaderTest(ReaderTest):
self.assertEqual(page.content, expected)
self.assertEqual(page.title, expected_title)
def test_metadata_has_no_discarded_data(self):
md_filename = 'article_with_markdown_and_empty_tags.md'
page = self.read_file(
path=md_filename,
cache_name='cache',
CACHE_CONTENT=True,
LOAD_CONTENT_CACHE=True)
file_path = _path(md_filename)
cached_metadata = self._reader._cache[file_path][1][1]
expected = {
'title': 'Article with markdown and empty tags'
}
self.assertEqual(cached_metadata, expected)
self.assertNotIn('tags', page.metadata)
self.assertDictHasSubset(page.metadata, expected)
class HTMLReaderTest(ReaderTest):
def test_article_with_comments(self):