forked from github/pelican
[IMP] pelican_import with gmf instead of markdown
The markdown import of pandoc is their own flavour of markdown. It for instance uses fenced divs[1] which are not supported by python-markdown. When importing content from Wordpress, there is several issues as explained in discussion 3113[2] This change follows a discussion with pandoc developer[3] [1] https://pandoc.org/MANUAL.html#divs-and-spans [2] https://github.com/getpelican/pelican/discussions/3113 [3] https://fosstodon.org/@pandoc/110105559949588768 Take the following Wordpress blog post sample: ```html <p><!-- wp:paragraph --></p> <p>Paragraph content</p> <p><!-- /wp:paragraph --></p> <p><!-- wp:image {"align":"center","id":3747,"sizeSlug":"full"} --></p> <div class="wp-block-image"> <figure class="aligncenter size-full"><img src="https://test.com/test.jpg" alt="" class="wp-image-3747" title="Some title"/><br /> <figcaption><em>Some caption</em></figcaption> </figure> </div> <p><!-- /wp:image --></p> ``` Before this commit: was imported as ```md `<!-- wp:paragraph -->`{=html} Paragraph content `<!-- /wp:paragraph -->`{=html} `<!-- wp:image {"align":"center","id":3747,"sizeSlug":"full"} -->`{=html} ::: wp-block-image <figure class="aligncenter size-full"> <img src="https://test.com/test.jpg" title="Some title" class="wp-image-3747" /><br /> <figcaption><em>Some caption</em></figcaption> </figure> ::: `<!-- /wp:image -->`{=html} ``` After this change: ```md <!-- wp:paragraph --> Paragraph content <!-- /wp:paragraph --> <!-- wp:image {"align":"center","id":3747,"sizeSlug":"full"} --> <div class="wp-block-image"> <figure class="aligncenter size-full"> <img src="https://test.com/test.jpg" title="Some title" class="wp-image-3747" /><br /> <figcaption><em>Some caption</em></figcaption> </figure> </div> <!-- /wp:image --> ``` Fixes #3113
This commit is contained in:
parent
06c9e0fb80
commit
219c01afb0
2 changed files with 6 additions and 3 deletions
|
|
@ -317,7 +317,7 @@ class TestWordpressXmlImporter(unittest.TestCase):
|
|||
self.posts)
|
||||
with temporary_folder() as temp:
|
||||
md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]
|
||||
sample_line = re.search(r'- This is a code sample', md).group(0)
|
||||
sample_line = re.search(r'- This is a code sample', md).group(0)
|
||||
code_line = re.search(r'\s+a = \[1, 2, 3\]', md).group(0)
|
||||
self.assertTrue(sample_line.rindex('This') < code_line.rindex('a'))
|
||||
|
||||
|
|
|
|||
|
|
@ -839,12 +839,15 @@ def fields2pelican(
|
|||
if pandoc_version >= (1, 16) else '--no-wrap'
|
||||
cmd = ('pandoc --normalize {0} --from=html'
|
||||
' --to={1} {2} -o "{3}" "{4}"')
|
||||
cmd = cmd.format(parse_raw, out_markup, wrap_none,
|
||||
cmd = cmd.format(parse_raw,
|
||||
out_markup if out_markup != 'markdown' else "gfm",
|
||||
wrap_none,
|
||||
out_filename, html_filename)
|
||||
else:
|
||||
from_arg = '-f html+raw_html' if not strip_raw else '-f html'
|
||||
cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"')
|
||||
cmd = cmd.format(from_arg, out_markup,
|
||||
cmd = cmd.format(from_arg,
|
||||
out_markup if out_markup != 'markdown' else "gfm",
|
||||
out_filename, html_filename)
|
||||
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue