diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py index f45f885c..0d9586f0 100644 --- a/pelican/tests/test_importer.py +++ b/pelican/tests/test_importer.py @@ -1,12 +1,9 @@ -import datetime import locale import os import re from posixpath import join as posix_join from unittest.mock import patch -import dateutil.tz - from pelican.settings import DEFAULT_CONFIG from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder, unittest) @@ -46,12 +43,9 @@ class TestWithOsDefaults(unittest.TestCase): def setUp(self): self.old_locale = locale.setlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, 'C') - self.old_timezone = datetime.datetime.now(dateutil.tz.tzlocal()).tzname() - os.environ['TZ'] = 'UTC' def tearDown(self): locale.setlocale(locale.LC_ALL, self.old_locale) - os.environ['TZ'] = self.old_timezone @skipIfNoExecutable(['pandoc', '--version']) @@ -502,7 +496,7 @@ class TestTumblrImporter(TestWithOsDefaults): { "type": "photo", "blog_name": "testy", - "date": "2019-11-07 21:26:40 GMT", + "date": "2019-11-07 21:26:40 UTC", "timestamp": 1573162000, "format": "html", "slug": "a-slug", @@ -528,7 +522,7 @@ class TestTumblrImporter(TestWithOsDefaults): self.assertEqual( [('Photo', '\n', - '2019-11-07-a-slug', '2019-11-07 21:26:40', 'testy', ['photo'], + '2019-11-07-a-slug', '2019-11-07 21:26:40+0000', 'testy', ['photo'], ['economics'], 'published', 'article', 'html')], posts, posts) @@ -544,7 +538,7 @@ class TestTumblrImporter(TestWithOsDefaults): "type": "video", "blog_name": "testy", "slug": "the-slug", - "date": "2017-07-07 20:31:41 GMT", + "date": "2017-07-07 20:31:41 UTC", "timestamp": 1499459501, "state": "published", "format": "html", @@ -583,7 +577,7 @@ class TestTumblrImporter(TestWithOsDefaults): '\n' '\n', '2017-07-07-the-slug', - '2017-07-07 20:31:41', 'testy', ['video'], [], 'published', + '2017-07-07 20:31:41+0000', 'testy', ['video'], [], 'published', 'article', 'html')], posts, posts) @@ -599,7 +593,7 @@ class TestTumblrImporter(TestWithOsDefaults): "type": "video", "blog_name": "testy", "slug": "the-slug", - "date": "2016-08-14 16:37:35 GMT", + "date": "2016-08-14 16:37:35 UTC", "timestamp": 1471192655, "state": "published", "format": "html", @@ -638,7 +632,7 @@ class TestTumblrImporter(TestWithOsDefaults): 'v=b">via

\n

Caption

' '

(This video isn\'t available anymore.)

\n', '2016-08-14-the-slug', - '2016-08-14 16:37:35', 'testy', ['video'], ['interviews'], + '2016-08-14 16:37:35+0000', 'testy', ['video'], ['interviews'], 'published', 'article', 'html')], posts, posts) diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 16ce6305..95e196ba 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -1,11 +1,13 @@ #!/usr/bin/env python import argparse +import datetime import logging import os import re import subprocess import sys +import tempfile import time from collections import defaultdict from html import unescape @@ -416,10 +418,12 @@ def tumblr2fields(api_key, blogname): slug = post.get('slug') or slugify(title, regex_subs=subs) tags = post.get('tags') timestamp = post.get('timestamp') - date = SafeDatetime.fromtimestamp(int(timestamp)).strftime( - "%Y-%m-%d %H:%M:%S") - slug = SafeDatetime.fromtimestamp(int(timestamp)).strftime( - "%Y-%m-%d-") + slug + date = SafeDatetime.fromtimestamp( + int(timestamp), tz=datetime.timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S%z") + slug = SafeDatetime.fromtimestamp( + int(timestamp), tz=datetime.timezone.utc + ).strftime("%Y-%m-%d-") + slug format = post.get('format') content = post.get('body') type = post.get('type') @@ -782,9 +786,8 @@ def fields2pelican( print(out_filename) if in_markup in ('html', 'wp-html'): - html_filename = os.path.join(output_path, filename + '.html') - - with open(html_filename, 'w', encoding='utf-8') as fp: + with tempfile.TemporaryDirectory() as tmpdir: + html_filename = os.path.join(tmpdir, 'pandoc-input.html') # Replace newlines with paragraphs wrapped with

so # HTML is valid before conversion if in_markup == 'wp-html': @@ -793,41 +796,39 @@ def fields2pelican( paragraphs = content.splitlines() paragraphs = ['

{}

'.format(p) for p in paragraphs] new_content = ''.join(paragraphs) + with open(html_filename, 'w', encoding='utf-8') as fp: + fp.write(new_content) - fp.write(new_content) + if pandoc_version < (2,): + parse_raw = '--parse-raw' if not strip_raw else '' + wrap_none = '--wrap=none' \ + if pandoc_version >= (1, 16) else '--no-wrap' + cmd = ('pandoc --normalize {0} --from=html' + ' --to={1} {2} -o "{3}" "{4}"') + cmd = cmd.format(parse_raw, + out_markup if out_markup != 'markdown' else "gfm", + wrap_none, + out_filename, html_filename) + else: + from_arg = '-f html+raw_html' if not strip_raw else '-f html' + cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"') + cmd = cmd.format(from_arg, + out_markup if out_markup != 'markdown' else "gfm", + out_filename, html_filename) - if pandoc_version < (2,): - parse_raw = '--parse-raw' if not strip_raw else '' - wrap_none = '--wrap=none' \ - if pandoc_version >= (1, 16) else '--no-wrap' - cmd = ('pandoc --normalize {0} --from=html' - ' --to={1} {2} -o "{3}" "{4}"') - cmd = cmd.format(parse_raw, - out_markup if out_markup != 'markdown' else "gfm", - wrap_none, - out_filename, html_filename) - else: - from_arg = '-f html+raw_html' if not strip_raw else '-f html' - cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"') - cmd = cmd.format(from_arg, - out_markup if out_markup != 'markdown' else "gfm", - out_filename, html_filename) + try: + rc = subprocess.call(cmd, shell=True) + if rc < 0: + error = 'Child was terminated by signal %d' % -rc + exit(error) - try: - rc = subprocess.call(cmd, shell=True) - if rc < 0: - error = 'Child was terminated by signal %d' % -rc + elif rc > 0: + error = 'Please, check your Pandoc installation.' + exit(error) + except OSError as e: + error = 'Pandoc execution failed: %s' % e exit(error) - elif rc > 0: - error = 'Please, check your Pandoc installation.' - exit(error) - except OSError as e: - error = 'Pandoc execution failed: %s' % e - exit(error) - - os.remove(html_filename) - with open(out_filename, encoding='utf-8') as fs: content = fs.read() if out_markup == 'markdown':