From 1404a2dbc32296b6f2d8ceb46287a63e5bb37724 Mon Sep 17 00:00:00 2001 From: boxydog <93335439+boxydog@users.noreply.github.com> Date: Fri, 27 Oct 2023 14:56:34 -0500 Subject: [PATCH] Remove newline when importing Tumblr post photos (#3215) Co-authored-by: Dan Frankowski --- pelican/tests/test_importer.py | 79 ++++++++++++++++++++++++++++----- pelican/tools/pelican_import.py | 34 +++++++------- 2 files changed, 83 insertions(+), 30 deletions(-) diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py index 743cea8c..3855e382 100644 --- a/pelican/tests/test_importer.py +++ b/pelican/tests/test_importer.py @@ -1,7 +1,11 @@ +import datetime import locale import os import re from posixpath import join as posix_join +from unittest.mock import patch + +import dateutil.tz from pelican.settings import DEFAULT_CONFIG from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder, @@ -10,9 +14,12 @@ from pelican.tools.pelican_import import (blogger2fields, build_header, build_markdown_header, decode_wp_content, download_attachments, fields2pelican, - get_attachments, wp2fields) + get_attachments, tumblr2fields, + wp2fields, + ) from pelican.utils import path_to_file_url, slugify + CUR_DIR = os.path.abspath(os.path.dirname(__file__)) BLOGGER_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'bloggerexport.xml') WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml') @@ -34,17 +41,26 @@ except ImportError: LXML = False -@skipIfNoExecutable(['pandoc', '--version']) -@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module') -class TestBloggerXmlImporter(unittest.TestCase): - +class TestWithOsDefaults(unittest.TestCase): + """Set locale to C and timezone to UTC for tests, then restore.""" def setUp(self): self.old_locale = locale.setlocale(locale.LC_ALL) locale.setlocale(locale.LC_ALL, 'C') - self.posts = blogger2fields(BLOGGER_XML_SAMPLE) + self.old_timezone = datetime.datetime.now(dateutil.tz.tzlocal()).tzname() + os.environ['TZ'] = 'UTC' def tearDown(self): locale.setlocale(locale.LC_ALL, self.old_locale) + os.environ['TZ'] = self.old_timezone + + +@skipIfNoExecutable(['pandoc', '--version']) +@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module') +class TestBloggerXmlImporter(TestWithOsDefaults): + + def setUp(self): + super().setUp() + self.posts = blogger2fields(BLOGGER_XML_SAMPLE) def test_recognise_kind_and_title(self): """Check that importer only outputs pages, articles and comments, @@ -85,17 +101,13 @@ class TestBloggerXmlImporter(unittest.TestCase): @skipIfNoExecutable(['pandoc', '--version']) @unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module') -class TestWordpressXmlImporter(unittest.TestCase): +class TestWordpressXmlImporter(TestWithOsDefaults): def setUp(self): - self.old_locale = locale.setlocale(locale.LC_ALL) - locale.setlocale(locale.LC_ALL, 'C') + super().setUp() self.posts = wp2fields(WORDPRESS_XML_SAMPLE) self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True) - def tearDown(self): - locale.setlocale(locale.LC_ALL, self.old_locale) - def test_ignore_empty_posts(self): self.assertTrue(self.posts) for (title, content, fname, date, author, @@ -477,3 +489,46 @@ class TestWordpressXMLAttachements(unittest.TestCase): self.assertTrue( directory.endswith(posix_join('content', 'article.rst')), directory) + + +class TestTumblrImporter(TestWithOsDefaults): + @patch("pelican.tools.pelican_import._get_tumblr_posts") + def test_posts(self, get): + def get_posts(api_key, blogname, offset=0): + if offset > 0: + return [] + + return [ + { + "type": "photo", + "blog_name": "testy", + "date": "2019-11-07 21:26:40 GMT", + "timestamp": 1573162000, + "format": "html", + "slug": "a-slug", + "tags": [ + "economics" + ], + "state": "published", + + "photos": [ + { + "caption": "", + "original_size": { + "url": "https://..fccdc2360ba7182a.jpg", + "width": 634, + "height": 789 + }, + }] + } + ] + get.side_effect = get_posts + + posts = list(tumblr2fields("api_key", "blogname")) + self.assertEqual( + [('Photo', + '\n', + '2019-11-07-a-slug', '2019-11-07 21:26:40', 'testy', ['photo'], + ['economics'], 'published', 'article', 'html')], + posts, + posts) diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index cd643ec6..474b5cba 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -390,22 +390,22 @@ def dc2fields(file): post_format) -def tumblr2fields(api_key, blogname): - """ Imports Tumblr posts (API v2)""" +def _get_tumblr_posts(api_key, blogname, offset=0): import json import urllib.request as urllib_request + url = ("https://api.tumblr.com/v2/blog/%s.tumblr.com/" + "posts?api_key=%s&offset=%d&filter=raw") % ( + blogname, api_key, offset) + request = urllib_request.Request(url) + handle = urllib_request.urlopen(request) + posts = json.loads(handle.read().decode('utf-8')) + return posts.get('response').get('posts') - def get_tumblr_posts(api_key, blogname, offset=0): - url = ("https://api.tumblr.com/v2/blog/%s.tumblr.com/" - "posts?api_key=%s&offset=%d&filter=raw") % ( - blogname, api_key, offset) - request = urllib_request.Request(url) - handle = urllib_request.urlopen(request) - posts = json.loads(handle.read().decode('utf-8')) - return posts.get('response').get('posts') +def tumblr2fields(api_key, blogname): + """ Imports Tumblr posts (API v2)""" offset = 0 - posts = get_tumblr_posts(api_key, blogname, offset) + posts = _get_tumblr_posts(api_key, blogname, offset) subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS'] while len(posts) > 0: for post in posts: @@ -428,12 +428,10 @@ def tumblr2fields(api_key, blogname): fmtstr = '![%s](%s)' else: fmtstr = '%s' - content = '' - for photo in post.get('photos'): - content += '\n'.join( - fmtstr % (photo.get('caption'), - photo.get('original_size').get('url'))) - content += '\n\n' + post.get('caption') + content = '\n'.join( + fmtstr % (photo.get('caption'), + photo.get('original_size').get('url')) + for photo in post.get('photos')) elif type == 'quote': if format == 'markdown': fmtstr = '\n\n— %s' @@ -483,7 +481,7 @@ def tumblr2fields(api_key, blogname): tags, status, kind, format) offset += len(posts) - posts = get_tumblr_posts(api_key, blogname, offset) + posts = _get_tumblr_posts(api_key, blogname, offset) def feed2fields(file):