diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py
index f45f885c..0d9586f0 100644
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@@ -1,12 +1,9 @@
-import datetime
import locale
import os
import re
from posixpath import join as posix_join
from unittest.mock import patch
-import dateutil.tz
-
from pelican.settings import DEFAULT_CONFIG
from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
unittest)
@@ -46,12 +43,9 @@ class TestWithOsDefaults(unittest.TestCase):
def setUp(self):
self.old_locale = locale.setlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, 'C')
- self.old_timezone = datetime.datetime.now(dateutil.tz.tzlocal()).tzname()
- os.environ['TZ'] = 'UTC'
def tearDown(self):
locale.setlocale(locale.LC_ALL, self.old_locale)
- os.environ['TZ'] = self.old_timezone
@skipIfNoExecutable(['pandoc', '--version'])
@@ -502,7 +496,7 @@ class TestTumblrImporter(TestWithOsDefaults):
{
"type": "photo",
"blog_name": "testy",
- "date": "2019-11-07 21:26:40 GMT",
+ "date": "2019-11-07 21:26:40 UTC",
"timestamp": 1573162000,
"format": "html",
"slug": "a-slug",
@@ -528,7 +522,7 @@ class TestTumblrImporter(TestWithOsDefaults):
self.assertEqual(
[('Photo',
'
\n',
- '2019-11-07-a-slug', '2019-11-07 21:26:40', 'testy', ['photo'],
+ '2019-11-07-a-slug', '2019-11-07 21:26:40+0000', 'testy', ['photo'],
['economics'], 'published', 'article', 'html')],
posts,
posts)
@@ -544,7 +538,7 @@ class TestTumblrImporter(TestWithOsDefaults):
"type": "video",
"blog_name": "testy",
"slug": "the-slug",
- "date": "2017-07-07 20:31:41 GMT",
+ "date": "2017-07-07 20:31:41 UTC",
"timestamp": 1499459501,
"state": "published",
"format": "html",
@@ -583,7 +577,7 @@ class TestTumblrImporter(TestWithOsDefaults):
'\n'
'\n',
'2017-07-07-the-slug',
- '2017-07-07 20:31:41', 'testy', ['video'], [], 'published',
+ '2017-07-07 20:31:41+0000', 'testy', ['video'], [], 'published',
'article', 'html')],
posts,
posts)
@@ -599,7 +593,7 @@ class TestTumblrImporter(TestWithOsDefaults):
"type": "video",
"blog_name": "testy",
"slug": "the-slug",
- "date": "2016-08-14 16:37:35 GMT",
+ "date": "2016-08-14 16:37:35 UTC",
"timestamp": 1471192655,
"state": "published",
"format": "html",
@@ -638,7 +632,7 @@ class TestTumblrImporter(TestWithOsDefaults):
'v=b">via
Caption
' '(This video isn\'t available anymore.)
\n', '2016-08-14-the-slug', - '2016-08-14 16:37:35', 'testy', ['video'], ['interviews'], + '2016-08-14 16:37:35+0000', 'testy', ['video'], ['interviews'], 'published', 'article', 'html')], posts, posts) diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 16ce6305..95e196ba 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -1,11 +1,13 @@ #!/usr/bin/env python import argparse +import datetime import logging import os import re import subprocess import sys +import tempfile import time from collections import defaultdict from html import unescape @@ -416,10 +418,12 @@ def tumblr2fields(api_key, blogname): slug = post.get('slug') or slugify(title, regex_subs=subs) tags = post.get('tags') timestamp = post.get('timestamp') - date = SafeDatetime.fromtimestamp(int(timestamp)).strftime( - "%Y-%m-%d %H:%M:%S") - slug = SafeDatetime.fromtimestamp(int(timestamp)).strftime( - "%Y-%m-%d-") + slug + date = SafeDatetime.fromtimestamp( + int(timestamp), tz=datetime.timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S%z") + slug = SafeDatetime.fromtimestamp( + int(timestamp), tz=datetime.timezone.utc + ).strftime("%Y-%m-%d-") + slug format = post.get('format') content = post.get('body') type = post.get('type') @@ -782,9 +786,8 @@ def fields2pelican( print(out_filename) if in_markup in ('html', 'wp-html'): - html_filename = os.path.join(output_path, filename + '.html') - - with open(html_filename, 'w', encoding='utf-8') as fp: + with tempfile.TemporaryDirectory() as tmpdir: + html_filename = os.path.join(tmpdir, 'pandoc-input.html') # Replace newlines with paragraphs wrapped withso # HTML is valid before conversion if in_markup == 'wp-html': @@ -793,41 +796,39 @@ def fields2pelican( paragraphs = content.splitlines() paragraphs = ['
{}
'.format(p) for p in paragraphs] new_content = ''.join(paragraphs) + with open(html_filename, 'w', encoding='utf-8') as fp: + fp.write(new_content) - fp.write(new_content) + if pandoc_version < (2,): + parse_raw = '--parse-raw' if not strip_raw else '' + wrap_none = '--wrap=none' \ + if pandoc_version >= (1, 16) else '--no-wrap' + cmd = ('pandoc --normalize {0} --from=html' + ' --to={1} {2} -o "{3}" "{4}"') + cmd = cmd.format(parse_raw, + out_markup if out_markup != 'markdown' else "gfm", + wrap_none, + out_filename, html_filename) + else: + from_arg = '-f html+raw_html' if not strip_raw else '-f html' + cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"') + cmd = cmd.format(from_arg, + out_markup if out_markup != 'markdown' else "gfm", + out_filename, html_filename) - if pandoc_version < (2,): - parse_raw = '--parse-raw' if not strip_raw else '' - wrap_none = '--wrap=none' \ - if pandoc_version >= (1, 16) else '--no-wrap' - cmd = ('pandoc --normalize {0} --from=html' - ' --to={1} {2} -o "{3}" "{4}"') - cmd = cmd.format(parse_raw, - out_markup if out_markup != 'markdown' else "gfm", - wrap_none, - out_filename, html_filename) - else: - from_arg = '-f html+raw_html' if not strip_raw else '-f html' - cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"') - cmd = cmd.format(from_arg, - out_markup if out_markup != 'markdown' else "gfm", - out_filename, html_filename) + try: + rc = subprocess.call(cmd, shell=True) + if rc < 0: + error = 'Child was terminated by signal %d' % -rc + exit(error) - try: - rc = subprocess.call(cmd, shell=True) - if rc < 0: - error = 'Child was terminated by signal %d' % -rc + elif rc > 0: + error = 'Please, check your Pandoc installation.' + exit(error) + except OSError as e: + error = 'Pandoc execution failed: %s' % e exit(error) - elif rc > 0: - error = 'Please, check your Pandoc installation.' - exit(error) - except OSError as e: - error = 'Pandoc execution failed: %s' % e - exit(error) - - os.remove(html_filename) - with open(out_filename, encoding='utf-8') as fs: content = fs.read() if out_markup == 'markdown':