Merge pull request #2449 from stuaxo/patch-5

Make importer work again. Fixes #2448
This commit is contained in:
Justin Mayer 2018-11-28 21:01:16 -08:00 committed by GitHub
commit 6aa2ad1808
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 22 deletions

View file

@ -44,7 +44,7 @@ class TestBloggerXmlImporter(unittest.TestCase):
def setUp(self): def setUp(self):
self.old_locale = locale.setlocale(locale.LC_ALL) self.old_locale = locale.setlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, str('C')) locale.setlocale(locale.LC_ALL, str('C'))
self.posts = list(blogger2fields(BLOGGER_XML_SAMPLE)) self.posts = blogger2fields(BLOGGER_XML_SAMPLE)
def tearDown(self): def tearDown(self):
locale.setlocale(locale.LC_ALL, self.old_locale) locale.setlocale(locale.LC_ALL, self.old_locale)
@ -53,14 +53,15 @@ class TestBloggerXmlImporter(unittest.TestCase):
"""Check that importer only outputs pages, articles and comments, """Check that importer only outputs pages, articles and comments,
that these are correctly identified and that titles are correct. that these are correctly identified and that titles are correct.
""" """
kinds = {x[8] for x in self.posts} test_posts = list(self.posts)
kinds = {x[8] for x in test_posts}
self.assertEqual({'page', 'article', 'comment'}, kinds) self.assertEqual({'page', 'article', 'comment'}, kinds)
page_titles = {x[0] for x in self.posts if x[8] == 'page'} page_titles = {x[0] for x in test_posts if x[8] == 'page'}
self.assertEqual({'Test page', 'Test page 2'}, page_titles) self.assertEqual({'Test page', 'Test page 2'}, page_titles)
article_titles = {x[0] for x in self.posts if x[8] == 'article'} article_titles = {x[0] for x in test_posts if x[8] == 'article'}
self.assertEqual({'Black as Egypt\'s Night', 'The Steel Windpipe'}, self.assertEqual({'Black as Egypt\'s Night', 'The Steel Windpipe'},
article_titles) article_titles)
comment_titles = {x[0] for x in self.posts if x[8] == 'comment'} comment_titles = {x[0] for x in test_posts if x[8] == 'comment'}
self.assertEqual({'Mishka, always a pleasure to read your ' self.assertEqual({'Mishka, always a pleasure to read your '
'adventures!...'}, 'adventures!...'},
comment_titles) comment_titles)
@ -69,15 +70,16 @@ class TestBloggerXmlImporter(unittest.TestCase):
"""Check that importerer outputs only statuses 'published' and 'draft', """Check that importerer outputs only statuses 'published' and 'draft',
that these are correctly identified and that filenames are correct. that these are correctly identified and that filenames are correct.
""" """
statuses = {x[7] for x in self.posts} test_posts = list(self.posts)
statuses = {x[7] for x in test_posts}
self.assertEqual({'published', 'draft'}, statuses) self.assertEqual({'published', 'draft'}, statuses)
draft_filenames = {x[2] for x in self.posts if x[7] == 'draft'} draft_filenames = {x[2] for x in test_posts if x[7] == 'draft'}
# draft filenames are id-based # draft filenames are id-based
self.assertEqual({'page-4386962582497458967', self.assertEqual({'page-4386962582497458967',
'post-1276418104709695660'}, draft_filenames) 'post-1276418104709695660'}, draft_filenames)
published_filenames = {x[2] for x in self.posts if x[7] == 'published'} published_filenames = {x[2] for x in test_posts if x[7] == 'published'}
# published filenames are url-based, except comments # published filenames are url-based, except comments
self.assertEqual({'the-steel-windpipe', self.assertEqual({'the-steel-windpipe',
'test-page', 'test-page',
@ -91,8 +93,8 @@ class TestWordpressXmlImporter(unittest.TestCase):
def setUp(self): def setUp(self):
self.old_locale = locale.setlocale(locale.LC_ALL) self.old_locale = locale.setlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, str('C')) locale.setlocale(locale.LC_ALL, str('C'))
self.posts = list(wp2fields(WORDPRESS_XML_SAMPLE)) self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
self.custposts = list(wp2fields(WORDPRESS_XML_SAMPLE, True)) self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True)
def tearDown(self): def tearDown(self):
locale.setlocale(locale.LC_ALL, self.old_locale) locale.setlocale(locale.LC_ALL, self.old_locale)
@ -242,6 +244,8 @@ class TestWordpressXmlImporter(unittest.TestCase):
self.assertFalse(out_name.endswith(filename)) self.assertFalse(out_name.endswith(filename))
def test_can_toggle_raw_html_code_parsing(self): def test_can_toggle_raw_html_code_parsing(self):
test_posts = list(self.posts)
def r(f): def r(f):
with open(f, encoding='utf-8') as infile: with open(f, encoding='utf-8') as infile:
return infile.read() return infile.read()
@ -250,16 +254,16 @@ class TestWordpressXmlImporter(unittest.TestCase):
with temporary_folder() as temp: with temporary_folder() as temp:
rst_files = (r(f) for f rst_files = (r(f) for f
in silent_f2p(self.posts, 'markdown', temp)) in silent_f2p(test_posts, 'markdown', temp))
self.assertTrue(any('<iframe' in rst for rst in rst_files)) self.assertTrue(any('<iframe' in rst for rst in rst_files))
rst_files = (r(f) for f rst_files = (r(f) for f
in silent_f2p(self.posts, 'markdown', in silent_f2p(test_posts, 'markdown',
temp, strip_raw=True)) temp, strip_raw=True))
self.assertFalse(any('<iframe' in rst for rst in rst_files)) self.assertFalse(any('<iframe' in rst for rst in rst_files))
# no effect in rst # no effect in rst
rst_files = (r(f) for f in silent_f2p(self.posts, 'rst', temp)) rst_files = (r(f) for f in silent_f2p(test_posts, 'rst', temp))
self.assertFalse(any('<iframe' in rst for rst in rst_files)) self.assertFalse(any('<iframe' in rst for rst in rst_files))
rst_files = (r(f) for f in silent_f2p(self.posts, 'rst', temp, rst_files = (r(f) for f in silent_f2p(test_posts, 'rst', temp,
strip_raw=True)) strip_raw=True))
self.assertFalse(any('<iframe' in rst for rst in rst_files)) self.assertFalse(any('<iframe' in rst for rst in rst_files))

View file

@ -739,9 +739,8 @@ def download_attachments(output_path, urls):
return locations return locations
def is_pandoc_needed(fields): def is_pandoc_needed(in_markup):
in_markup_idx = 9 return in_markup in ('html', 'wp-html')
return filter(lambda f: f[in_markup_idx] in ('html', 'wp-html'), fields)
def get_pandoc_version(): def get_pandoc_version():
@ -772,11 +771,7 @@ def fields2pelican(
wp_custpost=False, wp_attach=False, attachments=None): wp_custpost=False, wp_attach=False, attachments=None):
pandoc_version = get_pandoc_version() pandoc_version = get_pandoc_version()
posts_require_pandoc = []
if is_pandoc_needed(fields) and not pandoc_version:
error = ('Pandoc must be installed to complete the '
'requested import action.')
exit(error)
settings = read_settings() settings = read_settings()
slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS'] slug_subs = settings['SLUG_REGEX_SUBSTITUTIONS']
@ -785,6 +780,9 @@ def fields2pelican(
kind, in_markup) in fields: kind, in_markup) in fields:
if filter_author and filter_author != author: if filter_author and filter_author != author:
continue continue
if is_pandoc_needed(in_markup) and not pandoc_version:
posts_require_pandoc.append(filename)
slug = not disable_slugs and filename or None slug = not disable_slugs and filename or None
if wp_attach and attachments: if wp_attach and attachments:
@ -869,6 +867,11 @@ def fields2pelican(
with open(out_filename, 'w', encoding='utf-8') as fs: with open(out_filename, 'w', encoding='utf-8') as fs:
fs.write(header + content) fs.write(header + content)
if posts_require_pandoc:
logger.error("Pandoc must be installed to import the following posts:"
"\n {}".format("\n ".join(posts_require_pandoc)))
if wp_attach and attachments and None in attachments: if wp_attach and attachments and None in attachments:
print("downloading attachments that don't have a parent post") print("downloading attachments that don't have a parent post")
urls = attachments[None] urls = attachments[None]