1
0
Fork 0
forked from github/pelican

Convert Wordpress caption to figure

In Wordpress, inserting image with a caption can look like:

[caption id="attachment_42" caption="Image Description"]<a ...><img ... /></a>[/caption]
[caption id="attachment_42"]<a ...><img ... /></a> Image Description[/caption]
[caption id="attachment_42"]<img ... > Image Description[/caption]

Replace by an HTML figure tag
This commit is contained in:
Martin (mart-e) 2023-06-04 12:34:53 +02:00 committed by Martin Trigaux
commit 48166bd687
3 changed files with 79 additions and 1 deletions

View file

@ -685,7 +685,52 @@ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></con
<wp:meta_key>_edit_last</wp:meta_key> <wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[3]]></wp:meta_value> <wp:meta_value><![CDATA[3]]></wp:meta_value>
</wp:postmeta> </wp:postmeta>
</item> </item>
<item>
<title>Caption on image</title>
<link>http://thisisa.test/?p=176</link>
<pubDate>Thu, 01 Jan 1970 00:00:00 +0000</pubDate>
<dc:creator>bob</dc:creator>
<guid isPermaLink="false">http://thisisa.test/?p=176</guid>
<description></description>
<content:encoded><![CDATA[Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
[caption attachment_id="42" align="aligncenter" width="300" caption="This is a pelican"]<img src="/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png"/>[/caption]
[caption attachment_id="43" align="aligncenter" width="300"]<img src="/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png" width="300" height="216" class="size-medium wp-image-1055" /> This also a pelican[/caption]
[caption attachment_id="44" align="aligncenter" width="300"]<a href="https://getpelican.com/"><img src="/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png" alt=""/> Yet another pelican[/caption]
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>176</wp:post_id>
<wp:post_date>2012-02-16 15:52:55</wp:post_date>
<wp:post_date_gmt>0000-00-00 00:00:00</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status>
<wp:ping_status>open</wp:ping_status>
<wp:post_name>caption-on-image</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_parent>0</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type>post</wp:post_type>
<wp:post_password></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<category domain="category" nicename="category-2"><![CDATA[Category 2]]></category>
<wp:postmeta>
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[3]]></wp:meta_value>
</wp:postmeta>
</item>
<item> <item>
<title>A custom post in category 4</title> <title>A custom post in category 4</title>
<link>http://thisisa.test/?p=175</link> <link>http://thisisa.test/?p=175</link>

View file

@ -334,6 +334,32 @@ class TestWordpressXmlImporter(unittest.TestCase):
escaped_quotes = re.search(r'\\[\'"“”‘’]', md) escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
self.assertFalse(escaped_quotes) self.assertFalse(escaped_quotes)
def test_convert_caption_to_figure(self):
def r(f):
with open(f, encoding='utf-8') as infile:
return infile.read()
silent_f2p = mute(True)(fields2pelican)
test_post = filter(
lambda p: p[0].startswith("Caption on image"),
self.posts)
with temporary_folder() as temp:
md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]
caption = re.search(r'\[caption', md)
self.assertFalse(caption)
for occurence in [
'/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
'/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png',
'/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
'This is a pelican',
'This also a pelican',
'Yet another pelican',
]:
# pandoc 2.x converts into ![text](src)
# pandoc 3.x converts into <figure>src<figcaption>text</figcaption></figure>
self.assertIn(occurence, md)
class TestBuildHeader(unittest.TestCase): class TestBuildHeader(unittest.TestCase):
def test_build_header(self): def test_build_header(self):

View file

@ -107,6 +107,13 @@ def decode_wp_content(content, br=True):
return re.sub(pattern, lambda m: dic[m.group()], string) return re.sub(pattern, lambda m: dic[m.group()], string)
content = _multi_replace(pre_tags, content) content = _multi_replace(pre_tags, content)
# convert [caption] tags into <figure>
content = re.sub(
r'\[caption(?:.*?)(?:caption=\"(.*?)\")?\]'
r'((?:\<a(?:.*?)\>)?(?:\<img.*?\>)(?:\<\/a\>)?)\s?(.*?)\[\/caption\]',
r'<figure>\n\2\n<figcaption>\1\3</figcaption>\n</figure>',
content)
return content return content