use a tempfile for intermediate html file for pandoc in importer

2025-10-15 20:28:56 +02:00 · 2023-10-28 16:31:05 +03:00 · 2023-10-28 16:31:05 +03:00 · 11c13ceae1
commit 11c13ceae1
parent 83a8059d02
1 changed files with 34 additions and 36 deletions
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@ -7,6 +7,7 @@ import os
 import re
 import subprocess
 import sys
+import tempfile
 import time
 from collections import defaultdict
 from html import unescape
@ -785,9 +786,8 @@ def fields2pelican(
        print(out_filename)

        if in_markup in ('html', 'wp-html'):
-            html_filename = os.path.join(output_path, filename + '.html')
-
-            with open(html_filename, 'w', encoding='utf-8') as fp:
+            with tempfile.TemporaryDirectory() as tmpdir:
+                html_filename = os.path.join(tmpdir, 'pandoc-input.html')
                # Replace newlines with paragraphs wrapped with <p> so
                # HTML is valid before conversion
                if in_markup == 'wp-html':
@ -796,41 +796,39 @@ def fields2pelican(
                    paragraphs = content.splitlines()
                    paragraphs = ['<p>{}</p>'.format(p) for p in paragraphs]
                    new_content = ''.join(paragraphs)
+                with open(html_filename, 'w', encoding='utf-8') as fp:
+                    fp.write(new_content)

-                fp.write(new_content)
+                if pandoc_version < (2,):
+                    parse_raw = '--parse-raw' if not strip_raw else ''
+                    wrap_none = '--wrap=none' \
+                        if pandoc_version >= (1, 16) else '--no-wrap'
+                    cmd = ('pandoc --normalize {0} --from=html'
+                           ' --to={1} {2} -o "{3}" "{4}"')
+                    cmd = cmd.format(parse_raw,
+                                     out_markup if out_markup != 'markdown' else "gfm",
+                                     wrap_none,
+                                     out_filename, html_filename)
+                else:
+                    from_arg = '-f html+raw_html' if not strip_raw else '-f html'
+                    cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"')
+                    cmd = cmd.format(from_arg,
+                                     out_markup if out_markup != 'markdown' else "gfm",
+                                     out_filename, html_filename)

-            if pandoc_version < (2,):
-                parse_raw = '--parse-raw' if not strip_raw else ''
-                wrap_none = '--wrap=none' \
-                    if pandoc_version >= (1, 16) else '--no-wrap'
-                cmd = ('pandoc --normalize {0} --from=html'
-                       ' --to={1} {2} -o "{3}" "{4}"')
-                cmd = cmd.format(parse_raw,
-                                 out_markup if out_markup != 'markdown' else "gfm",
-                                 wrap_none,
-                                 out_filename, html_filename)
-            else:
-                from_arg = '-f html+raw_html' if not strip_raw else '-f html'
-                cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"')
-                cmd = cmd.format(from_arg,
-                                 out_markup if out_markup != 'markdown' else "gfm",
-                                 out_filename, html_filename)
+                try:
+                    rc = subprocess.call(cmd, shell=True)
+                    if rc < 0:
+                        error = 'Child was terminated by signal %d' % -rc
+                        exit(error)

-            try:
-                rc = subprocess.call(cmd, shell=True)
-                if rc < 0:
-                    error = 'Child was terminated by signal %d' % -rc
+                    elif rc > 0:
+                        error = 'Please, check your Pandoc installation.'
+                        exit(error)
+                except OSError as e:
+                    error = 'Pandoc execution failed: %s' % e
                    exit(error)

-                elif rc > 0:
-                    error = 'Please, check your Pandoc installation.'
-                    exit(error)
-            except OSError as e:
-                error = 'Pandoc execution failed: %s' % e
-                exit(error)
-
-            os.remove(html_filename)
-
            with open(out_filename, encoding='utf-8') as fs:
                content = fs.read()
                if out_markup == 'markdown':