Make HTMLReader parse multiple occurences of metadata tags as list

this means you can now specify:
<meta name="custom_field" content="value_1" />
<meta name="custom_field" content="value_2" />

and the resulting object.custom_field will be ['value_1', 'value_2']
This commit is contained in:
Mr. Senko 2017-12-02 13:10:43 +02:00
commit f62217f38e
5 changed files with 33 additions and 1 deletions

View file

@ -657,6 +657,14 @@ class HTMLReaderTest(ReaderTest):
self.assertDictHasSubset(page.metadata, expected)
def test_article_with_multiple_similar_metadata_tags(self):
page = self.read_file(path='article_with_multiple_metadata_tags.html')
expected = {
'custom_field': ['http://notmyidea.org', 'http://mrsenko.com'],
}
self.assertDictHasSubset(page.metadata, expected)
def test_article_with_multiple_authors(self):
page = self.read_file(path='article_with_multiple_authors.html')
expected = {