1
0
Fork 0
forked from github/pelican

Add pandoc2 support to pelican-import. Fix #2255

Specific options passed to pandoc2 in order to get similar results than
with pandoc1:

- Disable smart quotes from the markdown output.

- Enable raw parsing from html.
This commit is contained in:
David Alfonso 2018-06-26 18:47:42 +02:00
commit 150d1f05d0
3 changed files with 53 additions and 6 deletions

View file

@ -554,7 +554,11 @@ Pelicans are supposed to eat fish, damn it!
<iframe width="420" height="315" src="http://www.youtube.com/embed/QNNl_uWmQXE" frameborder="0" allowfullscreen></iframe>
Bottom line: don't mess up with birds]]></content:encoded>
Bottom line: don't mess up with birds
"That's a 'wonderful' shoe."
“Thats a magic sock.”]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>173</wp:post_id>
<wp:post_date>2012-02-16 15:52:55</wp:post_date>

View file

@ -268,6 +268,19 @@ class TestWordpressXmlImporter(unittest.TestCase):
code_line = re.search(r'\s+a = \[1, 2, 3\]', md).group(0)
self.assertTrue(sample_line.rindex('This') < code_line.rindex('a'))
def test_dont_use_smart_quotes(self):
def r(f):
with open(f, encoding='utf-8') as infile:
return infile.read()
silent_f2p = mute(True)(fields2pelican)
test_post = filter(
lambda p: p[0].startswith("Post with raw data"),
self.posts)
with temporary_folder() as temp:
md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]
escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
self.assertFalse(escaped_quotes)
class TestBuildHeader(unittest.TestCase):
def test_build_header(self):

View file

@ -674,6 +674,22 @@ def download_attachments(output_path, urls):
return locations
def is_pandoc_needed(fields):
in_markup_idx = 9
return filter(lambda f: f[in_markup_idx] in ('html', 'wp-html'), fields)
def get_pandoc_version():
cmd = ['pandoc', '--version']
try:
output = subprocess.check_output(cmd, universal_newlines=True)
except (subprocess.CalledProcessError, OSError) as e:
logger.warning("Pandoc version unknown: %s", e)
return ''
return output.split()[1]
def update_links_to_attached_files(content, attachments):
for old_url, new_path in attachments.items():
# url may occur both with http:// and https://
@ -689,6 +705,14 @@ def fields2pelican(
dircat=False, strip_raw=False, disable_slugs=False,
dirpage=False, filename_template=None, filter_author=None,
wp_custpost=False, wp_attach=False, attachments=None):
pandoc_version = get_pandoc_version()
if is_pandoc_needed(fields) and not pandoc_version:
error = ('Pandoc must be installed to complete the '
'requested import action.')
exit(error)
for (title, content, filename, date, author, categories, tags, status,
kind, in_markup) in fields:
if filter_author and filter_author != author:
@ -735,11 +759,17 @@ def fields2pelican(
fp.write(new_content)
if pandoc_version[0] == '1':
parse_raw = '--parse-raw' if not strip_raw else ''
cmd = ('pandoc --normalize {0} --from=html'
' --to={1} -o "{2}" "{3}"')
cmd = cmd.format(parse_raw, out_markup,
out_filename, html_filename)
else:
from_arg = '-f html+raw_html' if not strip_raw else '-f html'
cmd = ('pandoc {0} --to={1}-smart -o "{2}" "{3}"')
cmd = cmd.format(from_arg, out_markup,
out_filename, html_filename)
try:
rc = subprocess.call(cmd, shell=True)