Make HTMLReader parse multiple occurences of metadata tags as list

this means you can now specify:
<meta name="custom_field" content="value_1" />
<meta name="custom_field" content="value_2" />

and the resulting object.custom_field will be ['value_1', 'value_2']
This commit is contained in:
Mr. Senko 2017-12-02 13:10:43 +02:00
commit f62217f38e
5 changed files with 33 additions and 1 deletions

View file

@ -5,6 +5,7 @@ Next release
============
* New signal: ``feed_generated``
* Make the HTML reader parse multiple occurences of metadata tags as list
3.7.1 (2017-01-10)
==================

View file

@ -440,7 +440,17 @@ class HTMLReader(BaseReader):
if name == 'keywords':
name = 'tags'
self.metadata[name] = contents
if name in self.metadata:
# if this metadata already exists (i.e. a previous tag with the
# same name has already been specified then either convert to
# list or append to list
if isinstance(self.metadata[name], list):
self.metadata[name].append(contents)
else:
self.metadata[name] = [self.metadata[name], contents]
else:
self.metadata[name] = contents
@classmethod
def _attr_value(cls, attrs, name, default=None):

View file

@ -0,0 +1,11 @@
<html>
<head>
<title>Metadata tags as list!</title>
<meta name="custom_field" content="http://notmyidea.org" />
<meta name="custom_field" content="http://mrsenko.com" />
</head>
<body>
When custom metadata tags are specified more than once
they are collected into a list!
</body>
</html>

View file

@ -173,6 +173,7 @@ class TestArticlesGenerator(unittest.TestCase):
['Article with markdown containing footnotes', 'published',
'Default', 'article'],
['Article with template', 'published', 'Default', 'custom'],
['Metadata tags as list!', 'published', 'Default', 'article'],
['Rst with filename metadata', 'published', 'yeah', 'article'],
['Test Markdown extensions', 'published', 'Default', 'article'],
['Test markdown File', 'published', 'test', 'article'],
@ -452,6 +453,7 @@ class TestArticlesGenerator(unittest.TestCase):
'Article with markdown and summary metadata single',
'Article with markdown containing footnotes',
'Article with template',
'Metadata tags as list!',
'Rst with filename metadata',
'Test Markdown extensions',
'Test markdown File',

View file

@ -657,6 +657,14 @@ class HTMLReaderTest(ReaderTest):
self.assertDictHasSubset(page.metadata, expected)
def test_article_with_multiple_similar_metadata_tags(self):
page = self.read_file(path='article_with_multiple_metadata_tags.html')
expected = {
'custom_field': ['http://notmyidea.org', 'http://mrsenko.com'],
}
self.assertDictHasSubset(page.metadata, expected)
def test_article_with_multiple_authors(self):
page = self.read_file(path='article_with_multiple_authors.html')
expected = {