diff --git a/pelican/tests/content/wordpressexport.xml b/pelican/tests/content/wordpressexport.xml
index 9b194e8f..4f5b3651 100644
--- a/pelican/tests/content/wordpressexport.xml
+++ b/pelican/tests/content/wordpressexport.xml
@@ -685,7 +685,52 @@ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]>_edit_last
-
+
+ -
+ Caption on image
+ http://thisisa.test/?p=176
+ Thu, 01 Jan 1970 00:00:00 +0000
+ bob
+ http://thisisa.test/?p=176
+
+ [/caption]
+
+[caption attachment_id="43" align="aligncenter" width="300"]
This also a pelican[/caption]
+
+[caption attachment_id="44" align="aligncenter" width="300"]
Yet another pelican[/caption]
+
+Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]>
+
+ 176
+ 2012-02-16 15:52:55
+ 0000-00-00 00:00:00
+ open
+ open
+ caption-on-image
+ publish
+ 0
+ 0
+ post
+
+ 0
+
+
+ _edit_last
+
+
+
-
A custom post in category 4
http://thisisa.test/?p=175
diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py
index 198ee0fe..743cea8c 100644
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@@ -334,6 +334,32 @@ class TestWordpressXmlImporter(unittest.TestCase):
escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
self.assertFalse(escaped_quotes)
+ def test_convert_caption_to_figure(self):
+ def r(f):
+ with open(f, encoding='utf-8') as infile:
+ return infile.read()
+ silent_f2p = mute(True)(fields2pelican)
+ test_post = filter(
+ lambda p: p[0].startswith("Caption on image"),
+ self.posts)
+ with temporary_folder() as temp:
+ md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]
+
+ caption = re.search(r'\[caption', md)
+ self.assertFalse(caption)
+
+ for occurence in [
+ '/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
+ '/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png',
+ '/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
+ 'This is a pelican',
+ 'This also a pelican',
+ 'Yet another pelican',
+ ]:
+ # pandoc 2.x converts into 
+ # pandoc 3.x converts into srctext
+ self.assertIn(occurence, md)
+
class TestBuildHeader(unittest.TestCase):
def test_build_header(self):
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 7833ebbe..b426de9c 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -107,6 +107,13 @@ def decode_wp_content(content, br=True):
return re.sub(pattern, lambda m: dic[m.group()], string)
content = _multi_replace(pre_tags, content)
+ # convert [caption] tags into
+ content = re.sub(
+ r'\[caption(?:.*?)(?:caption=\"(.*?)\")?\]'
+ r'((?:\)?(?:\)(?:\<\/a\>)?)\s?(.*?)\[\/caption\]',
+ r'\n\2\n\1\3\n',
+ content)
+
return content