mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #2452 from stuaxo/patch-6
Importer: Avoid downloading duplicate post attachments
This commit is contained in:
commit
3596e04639
2 changed files with 11 additions and 9 deletions
|
|
@ -417,20 +417,22 @@ class TestWordpressXMLAttachements(unittest.TestCase):
|
||||||
self.assertTrue(self.attachments)
|
self.assertTrue(self.attachments)
|
||||||
for post in self.attachments.keys():
|
for post in self.attachments.keys():
|
||||||
if post is None:
|
if post is None:
|
||||||
expected = ('https://upload.wikimedia.org/wikipedia/commons/'
|
expected = {
|
||||||
'thumb/2/2c/Pelican_lakes_entrance02.jpg/'
|
('https://upload.wikimedia.org/wikipedia/commons/'
|
||||||
'240px-Pelican_lakes_entrance02.jpg')
|
'thumb/2/2c/Pelican_lakes_entrance02.jpg/'
|
||||||
self.assertEqual(self.attachments[post][0], expected)
|
'240px-Pelican_lakes_entrance02.jpg')
|
||||||
|
}
|
||||||
|
self.assertEqual(self.attachments[post], expected)
|
||||||
elif post == 'with-excerpt':
|
elif post == 'with-excerpt':
|
||||||
expected_invalid = ('http://thisurlisinvalid.notarealdomain/'
|
expected_invalid = ('http://thisurlisinvalid.notarealdomain/'
|
||||||
'not_an_image.jpg')
|
'not_an_image.jpg')
|
||||||
expected_pelikan = ('http://en.wikipedia.org/wiki/'
|
expected_pelikan = ('http://en.wikipedia.org/wiki/'
|
||||||
'File:Pelikan_Walvis_Bay.jpg')
|
'File:Pelikan_Walvis_Bay.jpg')
|
||||||
self.assertEqual(self.attachments[post][0], expected_invalid)
|
self.assertEqual(self.attachments[post],
|
||||||
self.assertEqual(self.attachments[post][1], expected_pelikan)
|
{expected_invalid, expected_pelikan})
|
||||||
elif post == 'with-tags':
|
elif post == 'with-tags':
|
||||||
expected_invalid = ('http://thisurlisinvalid.notarealdomain')
|
expected_invalid = ('http://thisurlisinvalid.notarealdomain')
|
||||||
self.assertEqual(self.attachments[post][0], expected_invalid)
|
self.assertEqual(self.attachments[post], {expected_invalid})
|
||||||
else:
|
else:
|
||||||
self.fail('all attachments should match to a '
|
self.fail('all attachments should match to a '
|
||||||
'filename or None, {}'
|
'filename or None, {}'
|
||||||
|
|
|
||||||
|
|
@ -699,7 +699,7 @@ def get_attachments(xml):
|
||||||
else:
|
else:
|
||||||
filename = get_filename(post_name, post_id)
|
filename = get_filename(post_name, post_id)
|
||||||
names[post_id] = filename
|
names[post_id] = filename
|
||||||
attachedposts = defaultdict(list)
|
attachedposts = defaultdict(set)
|
||||||
for parent, url in attachments:
|
for parent, url in attachments:
|
||||||
try:
|
try:
|
||||||
parent_name = names[parent]
|
parent_name = names[parent]
|
||||||
|
|
@ -707,7 +707,7 @@ def get_attachments(xml):
|
||||||
# attachment's parent is not a valid post
|
# attachment's parent is not a valid post
|
||||||
parent_name = None
|
parent_name = None
|
||||||
|
|
||||||
attachedposts[parent_name].append(url)
|
attachedposts[parent_name].add(url)
|
||||||
return attachedposts
|
return attachedposts
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue