Add pandoc2 support to pelican-import. Fix #2255

Specific options passed to pandoc2 in order to get similar results than with pandoc1: - Disable smart quotes from the markdown output. - Enable raw parsing from html.
2025-10-15 20:28:56 +02:00 · 2018-06-26 18:47:42 +02:00 · 2018-06-26 18:47:42 +02:00 · 150d1f05d0
commit 150d1f05d0
parent d2eb32c910
3 changed files with 53 additions and 6 deletions
--- a/pelican/tests/content/wordpressexport.xml
+++ b/pelican/tests/content/wordpressexport.xml
@ -554,7 +554,11 @@ Pelicans are supposed to eat fish, damn it!

 <iframe width="420" height="315" src="http://www.youtube.com/embed/QNNl_uWmQXE" frameborder="0" allowfullscreen></iframe>

-Bottom line: don't mess up with birds]]></content:encoded>
+Bottom line: don't mess up with birds
+
+"That's a 'wonderful' shoe."
+
+“That’s a ‘magic’ sock.”]]></content:encoded>
        <excerpt:encoded><![CDATA[]]></excerpt:encoded>
        <wp:post_id>173</wp:post_id>
        <wp:post_date>2012-02-16 15:52:55</wp:post_date>
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@ -268,6 +268,19 @@ class TestWordpressXmlImporter(unittest.TestCase):
            code_line = re.search(r'\s+a = \[1, 2, 3\]', md).group(0)
            self.assertTrue(sample_line.rindex('This') < code_line.rindex('a'))

+    def test_dont_use_smart_quotes(self):
+        def r(f):
+            with open(f, encoding='utf-8') as infile:
+                return infile.read()
+        silent_f2p = mute(True)(fields2pelican)
+        test_post = filter(
+            lambda p: p[0].startswith("Post with raw data"),
+            self.posts)
+        with temporary_folder() as temp:
+            md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]
+            escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
+            self.assertFalse(escaped_quotes)
+

 class TestBuildHeader(unittest.TestCase):
    def test_build_header(self):