forked from github/pelican
Convert Wordpress caption to figure
In Wordpress, inserting image with a caption can look like: [caption id="attachment_42" caption="Image Description"]<a ...><img ... /></a>[/caption] [caption id="attachment_42"]<a ...><img ... /></a> Image Description[/caption] [caption id="attachment_42"]<img ... > Image Description[/caption] Replace by an HTML figure tag
This commit is contained in:
parent
3be0703b14
commit
48166bd687
3 changed files with 79 additions and 1 deletions
47
pelican/tests/content/wordpressexport.xml
vendored
47
pelican/tests/content/wordpressexport.xml
vendored
|
|
@ -685,7 +685,52 @@ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></con
|
||||||
<wp:meta_key>_edit_last</wp:meta_key>
|
<wp:meta_key>_edit_last</wp:meta_key>
|
||||||
<wp:meta_value><![CDATA[3]]></wp:meta_value>
|
<wp:meta_value><![CDATA[3]]></wp:meta_value>
|
||||||
</wp:postmeta>
|
</wp:postmeta>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>Caption on image</title>
|
||||||
|
<link>http://thisisa.test/?p=176</link>
|
||||||
|
<pubDate>Thu, 01 Jan 1970 00:00:00 +0000</pubDate>
|
||||||
|
<dc:creator>bob</dc:creator>
|
||||||
|
<guid isPermaLink="false">http://thisisa.test/?p=176</guid>
|
||||||
|
<description></description>
|
||||||
|
<content:encoded><![CDATA[Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
|
||||||
|
[caption attachment_id="42" align="aligncenter" width="300" caption="This is a pelican"]<img src="/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png"/>[/caption]
|
||||||
|
|
||||||
|
[caption attachment_id="43" align="aligncenter" width="300"]<img src="/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png" width="300" height="216" class="size-medium wp-image-1055" /> This also a pelican[/caption]
|
||||||
|
|
||||||
|
[caption attachment_id="44" align="aligncenter" width="300"]<a href="https://getpelican.com/"><img src="/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png" alt=""/> Yet another pelican[/caption]
|
||||||
|
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></content:encoded>
|
||||||
|
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
|
||||||
|
<wp:post_id>176</wp:post_id>
|
||||||
|
<wp:post_date>2012-02-16 15:52:55</wp:post_date>
|
||||||
|
<wp:post_date_gmt>0000-00-00 00:00:00</wp:post_date_gmt>
|
||||||
|
<wp:comment_status>open</wp:comment_status>
|
||||||
|
<wp:ping_status>open</wp:ping_status>
|
||||||
|
<wp:post_name>caption-on-image</wp:post_name>
|
||||||
|
<wp:status>publish</wp:status>
|
||||||
|
<wp:post_parent>0</wp:post_parent>
|
||||||
|
<wp:menu_order>0</wp:menu_order>
|
||||||
|
<wp:post_type>post</wp:post_type>
|
||||||
|
<wp:post_password></wp:post_password>
|
||||||
|
<wp:is_sticky>0</wp:is_sticky>
|
||||||
|
<category domain="category" nicename="category-2"><![CDATA[Category 2]]></category>
|
||||||
|
<wp:postmeta>
|
||||||
|
<wp:meta_key>_edit_last</wp:meta_key>
|
||||||
|
<wp:meta_value><![CDATA[3]]></wp:meta_value>
|
||||||
|
</wp:postmeta>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<title>A custom post in category 4</title>
|
<title>A custom post in category 4</title>
|
||||||
<link>http://thisisa.test/?p=175</link>
|
<link>http://thisisa.test/?p=175</link>
|
||||||
|
|
|
||||||
|
|
@ -334,6 +334,32 @@ class TestWordpressXmlImporter(unittest.TestCase):
|
||||||
escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
|
escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
|
||||||
self.assertFalse(escaped_quotes)
|
self.assertFalse(escaped_quotes)
|
||||||
|
|
||||||
|
def test_convert_caption_to_figure(self):
|
||||||
|
def r(f):
|
||||||
|
with open(f, encoding='utf-8') as infile:
|
||||||
|
return infile.read()
|
||||||
|
silent_f2p = mute(True)(fields2pelican)
|
||||||
|
test_post = filter(
|
||||||
|
lambda p: p[0].startswith("Caption on image"),
|
||||||
|
self.posts)
|
||||||
|
with temporary_folder() as temp:
|
||||||
|
md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]
|
||||||
|
|
||||||
|
caption = re.search(r'\[caption', md)
|
||||||
|
self.assertFalse(caption)
|
||||||
|
|
||||||
|
for occurence in [
|
||||||
|
'/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
|
||||||
|
'/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png',
|
||||||
|
'/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
|
||||||
|
'This is a pelican',
|
||||||
|
'This also a pelican',
|
||||||
|
'Yet another pelican',
|
||||||
|
]:
|
||||||
|
# pandoc 2.x converts into 
|
||||||
|
# pandoc 3.x converts into <figure>src<figcaption>text</figcaption></figure>
|
||||||
|
self.assertIn(occurence, md)
|
||||||
|
|
||||||
|
|
||||||
class TestBuildHeader(unittest.TestCase):
|
class TestBuildHeader(unittest.TestCase):
|
||||||
def test_build_header(self):
|
def test_build_header(self):
|
||||||
|
|
|
||||||
|
|
@ -107,6 +107,13 @@ def decode_wp_content(content, br=True):
|
||||||
return re.sub(pattern, lambda m: dic[m.group()], string)
|
return re.sub(pattern, lambda m: dic[m.group()], string)
|
||||||
content = _multi_replace(pre_tags, content)
|
content = _multi_replace(pre_tags, content)
|
||||||
|
|
||||||
|
# convert [caption] tags into <figure>
|
||||||
|
content = re.sub(
|
||||||
|
r'\[caption(?:.*?)(?:caption=\"(.*?)\")?\]'
|
||||||
|
r'((?:\<a(?:.*?)\>)?(?:\<img.*?\>)(?:\<\/a\>)?)\s?(.*?)\[\/caption\]',
|
||||||
|
r'<figure>\n\2\n<figcaption>\1\3</figcaption>\n</figure>',
|
||||||
|
content)
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue