From ded234467d0f148a72008c08d4eb30c8e1b9228c Mon Sep 17 00:00:00 2001
From: Stuart Axon <stuaxo2@yahoo.com>
Date: Thu, 15 Nov 2018 15:12:20 +0000
Subject: [PATCH 1/3] Update pelican_import.py

pelican-import: Move pandoc check inside loop, fixing #2448
---
 pelican/tools/pelican_import.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 97df58ca..da636df5 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -739,9 +739,8 @@ def download_attachments(output_path, urls):
     return locations
 
 
-def is_pandoc_needed(fields):
-    in_markup_idx = 9
-    return filter(lambda f: f[in_markup_idx] in ('html', 'wp-html'), fields)
+def is_pandoc_needed(in_markup):
+    return in_markup in ('html', 'wp-html')
 
 
 def get_pandoc_version():
@@ -773,11 +772,6 @@ def fields2pelican(
 
     pandoc_version = get_pandoc_version()
 
-    if is_pandoc_needed(fields) and not pandoc_version:
-        error = ('Pandoc must be installed to complete the '
-                 'requested import action.')
-        exit(error)
-
     settings = read_settings()
     slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS']
 
@@ -785,6 +779,11 @@ def fields2pelican(
             kind, in_markup) in fields:
         if filter_author and filter_author != author:
             continue
+        if is_pandoc_needed(in_markup) and not pandoc_version:
+            error = ('Pandoc must be installed to complete the '
+                     'requested import action.')
+            exit(error)
+
         slug = not disable_slugs and filename or None
 
         if wp_attach and attachments:

From 942e4622413a5098793a4201c076ec1366a2386e Mon Sep 17 00:00:00 2001
From: Stuart Axon <stu.axon@gmail.com>
Date: Thu, 15 Nov 2018 21:32:16 +0000
Subject: [PATCH 2/3] Don't convert posts to lists for wp importer.

---
 pelican/tests/test_importer.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py
index 75358e84..7afaa479 100644
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@@ -91,8 +91,8 @@ class TestWordpressXmlImporter(unittest.TestCase):
     def setUp(self):
         self.old_locale = locale.setlocale(locale.LC_ALL)
         locale.setlocale(locale.LC_ALL, str('C'))
-        self.posts = list(wp2fields(WORDPRESS_XML_SAMPLE))
-        self.custposts = list(wp2fields(WORDPRESS_XML_SAMPLE, True))
+        self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
+        self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True)
 
     def tearDown(self):
         locale.setlocale(locale.LC_ALL, self.old_locale)
@@ -242,6 +242,8 @@ class TestWordpressXmlImporter(unittest.TestCase):
             self.assertFalse(out_name.endswith(filename))
 
     def test_can_toggle_raw_html_code_parsing(self):
+        test_posts = list(self.posts)
+        
         def r(f):
             with open(f, encoding='utf-8') as infile:
                 return infile.read()
@@ -250,16 +252,16 @@ class TestWordpressXmlImporter(unittest.TestCase):
         with temporary_folder() as temp:
 
             rst_files = (r(f) for f
-                         in silent_f2p(self.posts, 'markdown', temp))
+                         in silent_f2p(test_posts, 'markdown', temp))
             self.assertTrue(any('<iframe' in rst for rst in rst_files))
             rst_files = (r(f) for f
-                         in silent_f2p(self.posts, 'markdown',
+                         in silent_f2p(test_posts, 'markdown',
                                        temp, strip_raw=True))
             self.assertFalse(any('<iframe' in rst for rst in rst_files))
             # no effect in rst
-            rst_files = (r(f) for f in silent_f2p(self.posts, 'rst', temp))
+            rst_files = (r(f) for f in silent_f2p(test_posts, 'rst', temp))
             self.assertFalse(any('<iframe' in rst for rst in rst_files))
-            rst_files = (r(f) for f in silent_f2p(self.posts, 'rst', temp,
+            rst_files = (r(f) for f in silent_f2p(test_posts, 'rst', temp,
                          strip_raw=True))
             self.assertFalse(any('<iframe' in rst for rst in rst_files))
 

From a597a31dad07e3d0775d5d36cf0272901c28aab3 Mon Sep 17 00:00:00 2001
From: Stuart Axon <stu.axon@gmail.com>
Date: Thu, 15 Nov 2018 21:37:17 +0000
Subject: [PATCH 3/3] Make the blogger tests consistant with the wp ones - cast
 to list in test if needed.

---
 pelican/tests/test_importer.py  | 20 +++++++++++---------
 pelican/tools/pelican_import.py | 10 +++++++---
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py
index 7afaa479..75310a80 100644
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@@ -44,7 +44,7 @@ class TestBloggerXmlImporter(unittest.TestCase):
     def setUp(self):
         self.old_locale = locale.setlocale(locale.LC_ALL)
         locale.setlocale(locale.LC_ALL, str('C'))
-        self.posts = list(blogger2fields(BLOGGER_XML_SAMPLE))
+        self.posts = blogger2fields(BLOGGER_XML_SAMPLE)
 
     def tearDown(self):
         locale.setlocale(locale.LC_ALL, self.old_locale)
@@ -53,14 +53,15 @@ class TestBloggerXmlImporter(unittest.TestCase):
         """Check that importer only outputs pages, articles and comments,
         that these are correctly identified and that titles are correct.
         """
-        kinds = {x[8] for x in self.posts}
+        test_posts = list(self.posts)
+        kinds = {x[8] for x in test_posts}
         self.assertEqual({'page', 'article', 'comment'}, kinds)
-        page_titles = {x[0] for x in self.posts if x[8] == 'page'}
+        page_titles = {x[0] for x in test_posts if x[8] == 'page'}
         self.assertEqual({'Test page', 'Test page 2'}, page_titles)
-        article_titles = {x[0] for x in self.posts if x[8] == 'article'}
+        article_titles = {x[0] for x in test_posts if x[8] == 'article'}
         self.assertEqual({'Black as Egypt\'s Night', 'The Steel Windpipe'},
                          article_titles)
-        comment_titles = {x[0] for x in self.posts if x[8] == 'comment'}
+        comment_titles = {x[0] for x in test_posts if x[8] == 'comment'}
         self.assertEqual({'Mishka, always a pleasure to read your '
                           'adventures!...'},
                          comment_titles)
@@ -69,15 +70,16 @@ class TestBloggerXmlImporter(unittest.TestCase):
         """Check that importerer outputs only statuses 'published' and 'draft',
         that these are correctly identified and that filenames are correct.
         """
-        statuses = {x[7] for x in self.posts}
+        test_posts = list(self.posts)
+        statuses = {x[7] for x in test_posts}
         self.assertEqual({'published', 'draft'}, statuses)
 
-        draft_filenames = {x[2] for x in self.posts if x[7] == 'draft'}
+        draft_filenames = {x[2] for x in test_posts if x[7] == 'draft'}
         # draft filenames are id-based
         self.assertEqual({'page-4386962582497458967',
                           'post-1276418104709695660'}, draft_filenames)
 
-        published_filenames = {x[2] for x in self.posts if x[7] == 'published'}
+        published_filenames = {x[2] for x in test_posts if x[7] == 'published'}
         # published filenames are url-based, except comments
         self.assertEqual({'the-steel-windpipe',
                           'test-page',
@@ -243,7 +245,7 @@ class TestWordpressXmlImporter(unittest.TestCase):
 
     def test_can_toggle_raw_html_code_parsing(self):
         test_posts = list(self.posts)
-        
+
         def r(f):
             with open(f, encoding='utf-8') as infile:
                 return infile.read()
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index da636df5..346fca16 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -771,6 +771,7 @@ def fields2pelican(
         wp_custpost=False, wp_attach=False, attachments=None):
 
     pandoc_version = get_pandoc_version()
+    posts_require_pandoc = []
 
     settings = read_settings()
     slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS']
@@ -780,9 +781,7 @@ def fields2pelican(
         if filter_author and filter_author != author:
             continue
         if is_pandoc_needed(in_markup) and not pandoc_version:
-            error = ('Pandoc must be installed to complete the '
-                     'requested import action.')
-            exit(error)
+            posts_require_pandoc.append(filename)
 
         slug = not disable_slugs and filename or None
 
@@ -868,6 +867,11 @@ def fields2pelican(
 
         with open(out_filename, 'w', encoding='utf-8') as fs:
             fs.write(header + content)
+
+    if posts_require_pandoc:
+        logger.error("Pandoc must be installed to import the following posts:"
+                     "\n  {}".format("\n  ".join(posts_require_pandoc)))
+
     if wp_attach and attachments and None in attachments:
         print("downloading attachments that don't have a parent post")
         urls = attachments[None]