From ded234467d0f148a72008c08d4eb30c8e1b9228c Mon Sep 17 00:00:00 2001 From: Stuart Axon Date: Thu, 15 Nov 2018 15:12:20 +0000 Subject: [PATCH 1/3] Update pelican_import.py pelican-import: Move pandoc check inside loop, fixing #2448 --- pelican/tools/pelican_import.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 97df58ca..da636df5 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -739,9 +739,8 @@ def download_attachments(output_path, urls): return locations -def is_pandoc_needed(fields): - in_markup_idx = 9 - return filter(lambda f: f[in_markup_idx] in ('html', 'wp-html'), fields) +def is_pandoc_needed(in_markup): + return in_markup in ('html', 'wp-html') def get_pandoc_version(): @@ -773,11 +772,6 @@ def fields2pelican( pandoc_version = get_pandoc_version() - if is_pandoc_needed(fields) and not pandoc_version: - error = ('Pandoc must be installed to complete the ' - 'requested import action.') - exit(error) - settings = read_settings() slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS'] @@ -785,6 +779,11 @@ def fields2pelican( kind, in_markup) in fields: if filter_author and filter_author != author: continue + if is_pandoc_needed(in_markup) and not pandoc_version: + error = ('Pandoc must be installed to complete the ' + 'requested import action.') + exit(error) + slug = not disable_slugs and filename or None if wp_attach and attachments: From 942e4622413a5098793a4201c076ec1366a2386e Mon Sep 17 00:00:00 2001 From: Stuart Axon Date: Thu, 15 Nov 2018 21:32:16 +0000 Subject: [PATCH 2/3] Don't convert posts to lists for wp importer. --- pelican/tests/test_importer.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py index 75358e84..7afaa479 100644 --- a/pelican/tests/test_importer.py +++ b/pelican/tests/test_importer.py @@ -91,8 +91,8 @@ class TestWordpressXmlImporter(unittest.TestCase): def setUp(self): self.old_locale = locale.setlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, str('C')) - self.posts = list(wp2fields(WORDPRESS_XML_SAMPLE)) - self.custposts = list(wp2fields(WORDPRESS_XML_SAMPLE, True)) + self.posts = wp2fields(WORDPRESS_XML_SAMPLE) + self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True) def tearDown(self): locale.setlocale(locale.LC_ALL, self.old_locale) @@ -242,6 +242,8 @@ class TestWordpressXmlImporter(unittest.TestCase): self.assertFalse(out_name.endswith(filename)) def test_can_toggle_raw_html_code_parsing(self): + test_posts = list(self.posts) + def r(f): with open(f, encoding='utf-8') as infile: return infile.read() @@ -250,16 +252,16 @@ class TestWordpressXmlImporter(unittest.TestCase): with temporary_folder() as temp: rst_files = (r(f) for f - in silent_f2p(self.posts, 'markdown', temp)) + in silent_f2p(test_posts, 'markdown', temp)) self.assertTrue(any(' Date: Thu, 15 Nov 2018 21:37:17 +0000 Subject: [PATCH 3/3] Make the blogger tests consistant with the wp ones - cast to list in test if needed. --- pelican/tests/test_importer.py | 20 +++++++++++--------- pelican/tools/pelican_import.py | 10 +++++++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py index 7afaa479..75310a80 100644 --- a/pelican/tests/test_importer.py +++ b/pelican/tests/test_importer.py @@ -44,7 +44,7 @@ class TestBloggerXmlImporter(unittest.TestCase): def setUp(self): self.old_locale = locale.setlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, str('C')) - self.posts = list(blogger2fields(BLOGGER_XML_SAMPLE)) + self.posts = blogger2fields(BLOGGER_XML_SAMPLE) def tearDown(self): locale.setlocale(locale.LC_ALL, self.old_locale) @@ -53,14 +53,15 @@ class TestBloggerXmlImporter(unittest.TestCase): """Check that importer only outputs pages, articles and comments, that these are correctly identified and that titles are correct. """ - kinds = {x[8] for x in self.posts} + test_posts = list(self.posts) + kinds = {x[8] for x in test_posts} self.assertEqual({'page', 'article', 'comment'}, kinds) - page_titles = {x[0] for x in self.posts if x[8] == 'page'} + page_titles = {x[0] for x in test_posts if x[8] == 'page'} self.assertEqual({'Test page', 'Test page 2'}, page_titles) - article_titles = {x[0] for x in self.posts if x[8] == 'article'} + article_titles = {x[0] for x in test_posts if x[8] == 'article'} self.assertEqual({'Black as Egypt\'s Night', 'The Steel Windpipe'}, article_titles) - comment_titles = {x[0] for x in self.posts if x[8] == 'comment'} + comment_titles = {x[0] for x in test_posts if x[8] == 'comment'} self.assertEqual({'Mishka, always a pleasure to read your ' 'adventures!...'}, comment_titles) @@ -69,15 +70,16 @@ class TestBloggerXmlImporter(unittest.TestCase): """Check that importerer outputs only statuses 'published' and 'draft', that these are correctly identified and that filenames are correct. """ - statuses = {x[7] for x in self.posts} + test_posts = list(self.posts) + statuses = {x[7] for x in test_posts} self.assertEqual({'published', 'draft'}, statuses) - draft_filenames = {x[2] for x in self.posts if x[7] == 'draft'} + draft_filenames = {x[2] for x in test_posts if x[7] == 'draft'} # draft filenames are id-based self.assertEqual({'page-4386962582497458967', 'post-1276418104709695660'}, draft_filenames) - published_filenames = {x[2] for x in self.posts if x[7] == 'published'} + published_filenames = {x[2] for x in test_posts if x[7] == 'published'} # published filenames are url-based, except comments self.assertEqual({'the-steel-windpipe', 'test-page', @@ -243,7 +245,7 @@ class TestWordpressXmlImporter(unittest.TestCase): def test_can_toggle_raw_html_code_parsing(self): test_posts = list(self.posts) - + def r(f): with open(f, encoding='utf-8') as infile: return infile.read() diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index da636df5..346fca16 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -771,6 +771,7 @@ def fields2pelican( wp_custpost=False, wp_attach=False, attachments=None): pandoc_version = get_pandoc_version() + posts_require_pandoc = [] settings = read_settings() slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS'] @@ -780,9 +781,7 @@ def fields2pelican( if filter_author and filter_author != author: continue if is_pandoc_needed(in_markup) and not pandoc_version: - error = ('Pandoc must be installed to complete the ' - 'requested import action.') - exit(error) + posts_require_pandoc.append(filename) slug = not disable_slugs and filename or None @@ -868,6 +867,11 @@ def fields2pelican( with open(out_filename, 'w', encoding='utf-8') as fs: fs.write(header + content) + + if posts_require_pandoc: + logger.error("Pandoc must be installed to import the following posts:" + "\n {}".format("\n ".join(posts_require_pandoc))) + if wp_attach and attachments and None in attachments: print("downloading attachments that don't have a parent post") urls = attachments[None]