From 1404a2dbc32296b6f2d8ceb46287a63e5bb37724 Mon Sep 17 00:00:00 2001
From: boxydog <93335439+boxydog@users.noreply.github.com>
Date: Fri, 27 Oct 2023 14:56:34 -0500
Subject: [PATCH] Remove newline when importing Tumblr post photos (#3215)

Co-authored-by: Dan Frankowski <dfrankow@gmail.com>
---
 pelican/tests/test_importer.py  | 79 ++++++++++++++++++++++++++++-----
 pelican/tools/pelican_import.py | 34 +++++++-------
 2 files changed, 83 insertions(+), 30 deletions(-)

diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py
index 743cea8c..3855e382 100644
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@@ -1,7 +1,11 @@
+import datetime
 import locale
 import os
 import re
 from posixpath import join as posix_join
+from unittest.mock import patch
+
+import dateutil.tz
 
 from pelican.settings import DEFAULT_CONFIG
 from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
@@ -10,9 +14,12 @@ from pelican.tools.pelican_import import (blogger2fields, build_header,
                                           build_markdown_header,
                                           decode_wp_content,
                                           download_attachments, fields2pelican,
-                                          get_attachments, wp2fields)
+                                          get_attachments, tumblr2fields,
+                                          wp2fields,
+                                          )
 from pelican.utils import path_to_file_url, slugify
 
+
 CUR_DIR = os.path.abspath(os.path.dirname(__file__))
 BLOGGER_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'bloggerexport.xml')
 WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml')
@@ -34,17 +41,26 @@ except ImportError:
     LXML = False
 
 
-@skipIfNoExecutable(['pandoc', '--version'])
-@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
-class TestBloggerXmlImporter(unittest.TestCase):
-
+class TestWithOsDefaults(unittest.TestCase):
+    """Set locale to C and timezone to UTC for tests, then restore."""
     def setUp(self):
         self.old_locale = locale.setlocale(locale.LC_ALL)
         locale.setlocale(locale.LC_ALL, 'C')
-        self.posts = blogger2fields(BLOGGER_XML_SAMPLE)
+        self.old_timezone = datetime.datetime.now(dateutil.tz.tzlocal()).tzname()
+        os.environ['TZ'] = 'UTC'
 
     def tearDown(self):
         locale.setlocale(locale.LC_ALL, self.old_locale)
+        os.environ['TZ'] = self.old_timezone
+
+
+@skipIfNoExecutable(['pandoc', '--version'])
+@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
+class TestBloggerXmlImporter(TestWithOsDefaults):
+
+    def setUp(self):
+        super().setUp()
+        self.posts = blogger2fields(BLOGGER_XML_SAMPLE)
 
     def test_recognise_kind_and_title(self):
         """Check that importer only outputs pages, articles and comments,
@@ -85,17 +101,13 @@ class TestBloggerXmlImporter(unittest.TestCase):
 
 @skipIfNoExecutable(['pandoc', '--version'])
 @unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
-class TestWordpressXmlImporter(unittest.TestCase):
+class TestWordpressXmlImporter(TestWithOsDefaults):
 
     def setUp(self):
-        self.old_locale = locale.setlocale(locale.LC_ALL)
-        locale.setlocale(locale.LC_ALL, 'C')
+        super().setUp()
         self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
         self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True)
 
-    def tearDown(self):
-        locale.setlocale(locale.LC_ALL, self.old_locale)
-
     def test_ignore_empty_posts(self):
         self.assertTrue(self.posts)
         for (title, content, fname, date, author,
@@ -477,3 +489,46 @@ class TestWordpressXMLAttachements(unittest.TestCase):
             self.assertTrue(
                 directory.endswith(posix_join('content', 'article.rst')),
                 directory)
+
+
+class TestTumblrImporter(TestWithOsDefaults):
+    @patch("pelican.tools.pelican_import._get_tumblr_posts")
+    def test_posts(self, get):
+        def get_posts(api_key, blogname, offset=0):
+            if offset > 0:
+                return []
+
+            return [
+                {
+                    "type": "photo",
+                    "blog_name": "testy",
+                    "date": "2019-11-07 21:26:40 GMT",
+                    "timestamp": 1573162000,
+                    "format": "html",
+                    "slug": "a-slug",
+                    "tags": [
+                        "economics"
+                    ],
+                    "state": "published",
+
+                    "photos": [
+                        {
+                            "caption": "",
+                            "original_size": {
+                                "url": "https://..fccdc2360ba7182a.jpg",
+                                "width": 634,
+                                "height": 789
+                            },
+                        }]
+                }
+            ]
+        get.side_effect = get_posts
+
+        posts = list(tumblr2fields("api_key", "blogname"))
+        self.assertEqual(
+            [('Photo',
+              '<img alt="" src="https://..fccdc2360ba7182a.jpg" />\n',
+              '2019-11-07-a-slug', '2019-11-07 21:26:40', 'testy', ['photo'],
+              ['economics'], 'published', 'article', 'html')],
+            posts,
+            posts)
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index cd643ec6..474b5cba 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -390,22 +390,22 @@ def dc2fields(file):
                post_format)
 
 
-def tumblr2fields(api_key, blogname):
-    """ Imports Tumblr posts (API v2)"""
+def _get_tumblr_posts(api_key, blogname, offset=0):
     import json
     import urllib.request as urllib_request
+    url = ("https://api.tumblr.com/v2/blog/%s.tumblr.com/"
+           "posts?api_key=%s&offset=%d&filter=raw") % (
+        blogname, api_key, offset)
+    request = urllib_request.Request(url)
+    handle = urllib_request.urlopen(request)
+    posts = json.loads(handle.read().decode('utf-8'))
+    return posts.get('response').get('posts')
 
-    def get_tumblr_posts(api_key, blogname, offset=0):
-        url = ("https://api.tumblr.com/v2/blog/%s.tumblr.com/"
-               "posts?api_key=%s&offset=%d&filter=raw") % (
-            blogname, api_key, offset)
-        request = urllib_request.Request(url)
-        handle = urllib_request.urlopen(request)
-        posts = json.loads(handle.read().decode('utf-8'))
-        return posts.get('response').get('posts')
 
+def tumblr2fields(api_key, blogname):
+    """ Imports Tumblr posts (API v2)"""
     offset = 0
-    posts = get_tumblr_posts(api_key, blogname, offset)
+    posts = _get_tumblr_posts(api_key, blogname, offset)
     subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
     while len(posts) > 0:
         for post in posts:
@@ -428,12 +428,10 @@ def tumblr2fields(api_key, blogname):
                     fmtstr = '![%s](%s)'
                 else:
                     fmtstr = '<img alt="%s" src="%s" />'
-                content = ''
-                for photo in post.get('photos'):
-                    content += '\n'.join(
-                        fmtstr % (photo.get('caption'),
-                                  photo.get('original_size').get('url')))
-                content += '\n\n' + post.get('caption')
+                content = '\n'.join(
+                    fmtstr % (photo.get('caption'),
+                              photo.get('original_size').get('url'))
+                    for photo in post.get('photos'))
             elif type == 'quote':
                 if format == 'markdown':
                     fmtstr = '\n\n&mdash; %s'
@@ -483,7 +481,7 @@ def tumblr2fields(api_key, blogname):
                    tags, status, kind, format)
 
         offset += len(posts)
-        posts = get_tumblr_posts(api_key, blogname, offset)
+        posts = _get_tumblr_posts(api_key, blogname, offset)
 
 
 def feed2fields(file):