forked from github/pelican
Remove newline when importing Tumblr post photos (#3215)
Co-authored-by: Dan Frankowski <dfrankow@gmail.com>
This commit is contained in:
parent
fab6e1a2c5
commit
1404a2dbc3
2 changed files with 83 additions and 30 deletions
|
|
@ -1,7 +1,11 @@
|
||||||
|
import datetime
|
||||||
import locale
|
import locale
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from posixpath import join as posix_join
|
from posixpath import join as posix_join
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import dateutil.tz
|
||||||
|
|
||||||
from pelican.settings import DEFAULT_CONFIG
|
from pelican.settings import DEFAULT_CONFIG
|
||||||
from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
|
from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
|
||||||
|
|
@ -10,9 +14,12 @@ from pelican.tools.pelican_import import (blogger2fields, build_header,
|
||||||
build_markdown_header,
|
build_markdown_header,
|
||||||
decode_wp_content,
|
decode_wp_content,
|
||||||
download_attachments, fields2pelican,
|
download_attachments, fields2pelican,
|
||||||
get_attachments, wp2fields)
|
get_attachments, tumblr2fields,
|
||||||
|
wp2fields,
|
||||||
|
)
|
||||||
from pelican.utils import path_to_file_url, slugify
|
from pelican.utils import path_to_file_url, slugify
|
||||||
|
|
||||||
|
|
||||||
CUR_DIR = os.path.abspath(os.path.dirname(__file__))
|
CUR_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||||
BLOGGER_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'bloggerexport.xml')
|
BLOGGER_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'bloggerexport.xml')
|
||||||
WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml')
|
WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml')
|
||||||
|
|
@ -34,17 +41,26 @@ except ImportError:
|
||||||
LXML = False
|
LXML = False
|
||||||
|
|
||||||
|
|
||||||
@skipIfNoExecutable(['pandoc', '--version'])
|
class TestWithOsDefaults(unittest.TestCase):
|
||||||
@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
|
"""Set locale to C and timezone to UTC for tests, then restore."""
|
||||||
class TestBloggerXmlImporter(unittest.TestCase):
|
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.old_locale = locale.setlocale(locale.LC_ALL)
|
self.old_locale = locale.setlocale(locale.LC_ALL)
|
||||||
locale.setlocale(locale.LC_ALL, 'C')
|
locale.setlocale(locale.LC_ALL, 'C')
|
||||||
self.posts = blogger2fields(BLOGGER_XML_SAMPLE)
|
self.old_timezone = datetime.datetime.now(dateutil.tz.tzlocal()).tzname()
|
||||||
|
os.environ['TZ'] = 'UTC'
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
locale.setlocale(locale.LC_ALL, self.old_locale)
|
locale.setlocale(locale.LC_ALL, self.old_locale)
|
||||||
|
os.environ['TZ'] = self.old_timezone
|
||||||
|
|
||||||
|
|
||||||
|
@skipIfNoExecutable(['pandoc', '--version'])
|
||||||
|
@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
|
||||||
|
class TestBloggerXmlImporter(TestWithOsDefaults):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super().setUp()
|
||||||
|
self.posts = blogger2fields(BLOGGER_XML_SAMPLE)
|
||||||
|
|
||||||
def test_recognise_kind_and_title(self):
|
def test_recognise_kind_and_title(self):
|
||||||
"""Check that importer only outputs pages, articles and comments,
|
"""Check that importer only outputs pages, articles and comments,
|
||||||
|
|
@ -85,17 +101,13 @@ class TestBloggerXmlImporter(unittest.TestCase):
|
||||||
|
|
||||||
@skipIfNoExecutable(['pandoc', '--version'])
|
@skipIfNoExecutable(['pandoc', '--version'])
|
||||||
@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
|
@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
|
||||||
class TestWordpressXmlImporter(unittest.TestCase):
|
class TestWordpressXmlImporter(TestWithOsDefaults):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.old_locale = locale.setlocale(locale.LC_ALL)
|
super().setUp()
|
||||||
locale.setlocale(locale.LC_ALL, 'C')
|
|
||||||
self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
|
self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
|
||||||
self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True)
|
self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True)
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
locale.setlocale(locale.LC_ALL, self.old_locale)
|
|
||||||
|
|
||||||
def test_ignore_empty_posts(self):
|
def test_ignore_empty_posts(self):
|
||||||
self.assertTrue(self.posts)
|
self.assertTrue(self.posts)
|
||||||
for (title, content, fname, date, author,
|
for (title, content, fname, date, author,
|
||||||
|
|
@ -477,3 +489,46 @@ class TestWordpressXMLAttachements(unittest.TestCase):
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
directory.endswith(posix_join('content', 'article.rst')),
|
directory.endswith(posix_join('content', 'article.rst')),
|
||||||
directory)
|
directory)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTumblrImporter(TestWithOsDefaults):
|
||||||
|
@patch("pelican.tools.pelican_import._get_tumblr_posts")
|
||||||
|
def test_posts(self, get):
|
||||||
|
def get_posts(api_key, blogname, offset=0):
|
||||||
|
if offset > 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"type": "photo",
|
||||||
|
"blog_name": "testy",
|
||||||
|
"date": "2019-11-07 21:26:40 GMT",
|
||||||
|
"timestamp": 1573162000,
|
||||||
|
"format": "html",
|
||||||
|
"slug": "a-slug",
|
||||||
|
"tags": [
|
||||||
|
"economics"
|
||||||
|
],
|
||||||
|
"state": "published",
|
||||||
|
|
||||||
|
"photos": [
|
||||||
|
{
|
||||||
|
"caption": "",
|
||||||
|
"original_size": {
|
||||||
|
"url": "https://..fccdc2360ba7182a.jpg",
|
||||||
|
"width": 634,
|
||||||
|
"height": 789
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
get.side_effect = get_posts
|
||||||
|
|
||||||
|
posts = list(tumblr2fields("api_key", "blogname"))
|
||||||
|
self.assertEqual(
|
||||||
|
[('Photo',
|
||||||
|
'<img alt="" src="https://..fccdc2360ba7182a.jpg" />\n',
|
||||||
|
'2019-11-07-a-slug', '2019-11-07 21:26:40', 'testy', ['photo'],
|
||||||
|
['economics'], 'published', 'article', 'html')],
|
||||||
|
posts,
|
||||||
|
posts)
|
||||||
|
|
|
||||||
|
|
@ -390,22 +390,22 @@ def dc2fields(file):
|
||||||
post_format)
|
post_format)
|
||||||
|
|
||||||
|
|
||||||
def tumblr2fields(api_key, blogname):
|
def _get_tumblr_posts(api_key, blogname, offset=0):
|
||||||
""" Imports Tumblr posts (API v2)"""
|
|
||||||
import json
|
import json
|
||||||
import urllib.request as urllib_request
|
import urllib.request as urllib_request
|
||||||
|
url = ("https://api.tumblr.com/v2/blog/%s.tumblr.com/"
|
||||||
|
"posts?api_key=%s&offset=%d&filter=raw") % (
|
||||||
|
blogname, api_key, offset)
|
||||||
|
request = urllib_request.Request(url)
|
||||||
|
handle = urllib_request.urlopen(request)
|
||||||
|
posts = json.loads(handle.read().decode('utf-8'))
|
||||||
|
return posts.get('response').get('posts')
|
||||||
|
|
||||||
def get_tumblr_posts(api_key, blogname, offset=0):
|
|
||||||
url = ("https://api.tumblr.com/v2/blog/%s.tumblr.com/"
|
|
||||||
"posts?api_key=%s&offset=%d&filter=raw") % (
|
|
||||||
blogname, api_key, offset)
|
|
||||||
request = urllib_request.Request(url)
|
|
||||||
handle = urllib_request.urlopen(request)
|
|
||||||
posts = json.loads(handle.read().decode('utf-8'))
|
|
||||||
return posts.get('response').get('posts')
|
|
||||||
|
|
||||||
|
def tumblr2fields(api_key, blogname):
|
||||||
|
""" Imports Tumblr posts (API v2)"""
|
||||||
offset = 0
|
offset = 0
|
||||||
posts = get_tumblr_posts(api_key, blogname, offset)
|
posts = _get_tumblr_posts(api_key, blogname, offset)
|
||||||
subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
|
subs = DEFAULT_CONFIG['SLUG_REGEX_SUBSTITUTIONS']
|
||||||
while len(posts) > 0:
|
while len(posts) > 0:
|
||||||
for post in posts:
|
for post in posts:
|
||||||
|
|
@ -428,12 +428,10 @@ def tumblr2fields(api_key, blogname):
|
||||||
fmtstr = ''
|
fmtstr = ''
|
||||||
else:
|
else:
|
||||||
fmtstr = '<img alt="%s" src="%s" />'
|
fmtstr = '<img alt="%s" src="%s" />'
|
||||||
content = ''
|
content = '\n'.join(
|
||||||
for photo in post.get('photos'):
|
fmtstr % (photo.get('caption'),
|
||||||
content += '\n'.join(
|
photo.get('original_size').get('url'))
|
||||||
fmtstr % (photo.get('caption'),
|
for photo in post.get('photos'))
|
||||||
photo.get('original_size').get('url')))
|
|
||||||
content += '\n\n' + post.get('caption')
|
|
||||||
elif type == 'quote':
|
elif type == 'quote':
|
||||||
if format == 'markdown':
|
if format == 'markdown':
|
||||||
fmtstr = '\n\n— %s'
|
fmtstr = '\n\n— %s'
|
||||||
|
|
@ -483,7 +481,7 @@ def tumblr2fields(api_key, blogname):
|
||||||
tags, status, kind, format)
|
tags, status, kind, format)
|
||||||
|
|
||||||
offset += len(posts)
|
offset += len(posts)
|
||||||
posts = get_tumblr_posts(api_key, blogname, offset)
|
posts = _get_tumblr_posts(api_key, blogname, offset)
|
||||||
|
|
||||||
|
|
||||||
def feed2fields(file):
|
def feed2fields(file):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue