Merge pull request #2449 from stuaxo/patch-5

Make importer work again. Fixes #2448
This commit is contained in:
Justin Mayer 2018-11-28 21:01:16 -08:00 committed by GitHub
commit 6aa2ad1808
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 22 deletions

View file

@ -44,7 +44,7 @@ class TestBloggerXmlImporter(unittest.TestCase):
def setUp(self):
self.old_locale = locale.setlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, str('C'))
self.posts = list(blogger2fields(BLOGGER_XML_SAMPLE))
self.posts = blogger2fields(BLOGGER_XML_SAMPLE)
def tearDown(self):
locale.setlocale(locale.LC_ALL, self.old_locale)
@ -53,14 +53,15 @@ class TestBloggerXmlImporter(unittest.TestCase):
"""Check that importer only outputs pages, articles and comments,
that these are correctly identified and that titles are correct.
"""
kinds = {x[8] for x in self.posts}
test_posts = list(self.posts)
kinds = {x[8] for x in test_posts}
self.assertEqual({'page', 'article', 'comment'}, kinds)
page_titles = {x[0] for x in self.posts if x[8] == 'page'}
page_titles = {x[0] for x in test_posts if x[8] == 'page'}
self.assertEqual({'Test page', 'Test page 2'}, page_titles)
article_titles = {x[0] for x in self.posts if x[8] == 'article'}
article_titles = {x[0] for x in test_posts if x[8] == 'article'}
self.assertEqual({'Black as Egypt\'s Night', 'The Steel Windpipe'},
article_titles)
comment_titles = {x[0] for x in self.posts if x[8] == 'comment'}
comment_titles = {x[0] for x in test_posts if x[8] == 'comment'}
self.assertEqual({'Mishka, always a pleasure to read your '
'adventures!...'},
comment_titles)
@ -69,15 +70,16 @@ class TestBloggerXmlImporter(unittest.TestCase):
"""Check that importerer outputs only statuses 'published' and 'draft',
that these are correctly identified and that filenames are correct.
"""
statuses = {x[7] for x in self.posts}
test_posts = list(self.posts)
statuses = {x[7] for x in test_posts}
self.assertEqual({'published', 'draft'}, statuses)
draft_filenames = {x[2] for x in self.posts if x[7] == 'draft'}
draft_filenames = {x[2] for x in test_posts if x[7] == 'draft'}
# draft filenames are id-based
self.assertEqual({'page-4386962582497458967',
'post-1276418104709695660'}, draft_filenames)
published_filenames = {x[2] for x in self.posts if x[7] == 'published'}
published_filenames = {x[2] for x in test_posts if x[7] == 'published'}
# published filenames are url-based, except comments
self.assertEqual({'the-steel-windpipe',
'test-page',
@ -91,8 +93,8 @@ class TestWordpressXmlImporter(unittest.TestCase):
def setUp(self):
self.old_locale = locale.setlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, str('C'))
self.posts = list(wp2fields(WORDPRESS_XML_SAMPLE))
self.custposts = list(wp2fields(WORDPRESS_XML_SAMPLE, True))
self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True)
def tearDown(self):
locale.setlocale(locale.LC_ALL, self.old_locale)
@ -242,6 +244,8 @@ class TestWordpressXmlImporter(unittest.TestCase):
self.assertFalse(out_name.endswith(filename))
def test_can_toggle_raw_html_code_parsing(self):
test_posts = list(self.posts)
def r(f):
with open(f, encoding='utf-8') as infile:
return infile.read()
@ -250,16 +254,16 @@ class TestWordpressXmlImporter(unittest.TestCase):
with temporary_folder() as temp:
rst_files = (r(f) for f
in silent_f2p(self.posts, 'markdown', temp))
in silent_f2p(test_posts, 'markdown', temp))
self.assertTrue(any('<iframe' in rst for rst in rst_files))
rst_files = (r(f) for f
in silent_f2p(self.posts, 'markdown',
in silent_f2p(test_posts, 'markdown',
temp, strip_raw=True))
self.assertFalse(any('<iframe' in rst for rst in rst_files))
# no effect in rst
rst_files = (r(f) for f in silent_f2p(self.posts, 'rst', temp))
rst_files = (r(f) for f in silent_f2p(test_posts, 'rst', temp))
self.assertFalse(any('<iframe' in rst for rst in rst_files))
rst_files = (r(f) for f in silent_f2p(self.posts, 'rst', temp,
rst_files = (r(f) for f in silent_f2p(test_posts, 'rst', temp,
strip_raw=True))
self.assertFalse(any('<iframe' in rst for rst in rst_files))

View file

@ -739,9 +739,8 @@ def download_attachments(output_path, urls):
return locations
def is_pandoc_needed(fields):
in_markup_idx = 9
return filter(lambda f: f[in_markup_idx] in ('html', 'wp-html'), fields)
def is_pandoc_needed(in_markup):
return in_markup in ('html', 'wp-html')
def get_pandoc_version():
@ -772,11 +771,7 @@ def fields2pelican(
wp_custpost=False, wp_attach=False, attachments=None):
pandoc_version = get_pandoc_version()
if is_pandoc_needed(fields) and not pandoc_version:
error = ('Pandoc must be installed to complete the '
'requested import action.')
exit(error)
posts_require_pandoc = []
settings = read_settings()
slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS']
@ -785,6 +780,9 @@ def fields2pelican(
kind, in_markup) in fields:
if filter_author and filter_author != author:
continue
if is_pandoc_needed(in_markup) and not pandoc_version:
posts_require_pandoc.append(filename)
slug = not disable_slugs and filename or None
if wp_attach and attachments:
@ -869,6 +867,11 @@ def fields2pelican(
with open(out_filename, 'w', encoding='utf-8') as fs:
fs.write(header + content)
if posts_require_pandoc:
logger.error("Pandoc must be installed to import the following posts:"
"\n {}".format("\n ".join(posts_require_pandoc)))
if wp_attach and attachments and None in attachments:
print("downloading attachments that don't have a parent post")
urls = attachments[None]