forked from github/pelican
788 lines
27 KiB
Python
788 lines
27 KiB
Python
import os
|
|
import re
|
|
from posixpath import join as posix_join
|
|
from unittest.mock import patch
|
|
|
|
from pelican.settings import DEFAULT_CONFIG
|
|
from pelican.tests.support import (
|
|
TestCaseWithCLocale,
|
|
mute,
|
|
skipIfNoExecutable,
|
|
temporary_folder,
|
|
unittest,
|
|
)
|
|
from pelican.tools.pelican_import import (
|
|
blogger2fields,
|
|
build_header,
|
|
build_markdown_header,
|
|
decode_wp_content,
|
|
download_attachments,
|
|
fields2pelican,
|
|
get_attachments,
|
|
medium_slug,
|
|
mediumpost2fields,
|
|
mediumposts2fields,
|
|
strip_medium_post_content,
|
|
tumblr2fields,
|
|
wp2fields,
|
|
)
|
|
from pelican.utils import path_to_file_url, slugify
|
|
|
|
CUR_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
BLOGGER_XML_SAMPLE = os.path.join(CUR_DIR, "content", "bloggerexport.xml")
|
|
WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, "content", "wordpressexport.xml")
|
|
WORDPRESS_ENCODED_CONTENT_SAMPLE = os.path.join(
|
|
CUR_DIR, "content", "wordpress_content_encoded"
|
|
)
|
|
WORDPRESS_DECODED_CONTENT_SAMPLE = os.path.join(
|
|
CUR_DIR, "content", "wordpress_content_decoded"
|
|
)
|
|
|
|
try:
|
|
from bs4 import BeautifulSoup
|
|
except ImportError:
|
|
BeautifulSoup = False
|
|
|
|
try:
|
|
import bs4.builder._lxml as LXML
|
|
except ImportError:
|
|
LXML = False
|
|
|
|
|
|
@skipIfNoExecutable(["pandoc", "--version"])
|
|
@unittest.skipUnless(BeautifulSoup, "Needs BeautifulSoup module")
|
|
class TestBloggerXmlImporter(TestCaseWithCLocale):
|
|
def setUp(self):
|
|
super().setUp()
|
|
self.posts = blogger2fields(BLOGGER_XML_SAMPLE)
|
|
|
|
def test_recognise_kind_and_title(self):
|
|
"""Check that importer only outputs pages, articles and comments,
|
|
that these are correctly identified and that titles are correct.
|
|
"""
|
|
test_posts = list(self.posts)
|
|
kinds = {x[8] for x in test_posts}
|
|
self.assertEqual({"page", "article", "comment"}, kinds)
|
|
page_titles = {x[0] for x in test_posts if x[8] == "page"}
|
|
self.assertEqual({"Test page", "Test page 2"}, page_titles)
|
|
article_titles = {x[0] for x in test_posts if x[8] == "article"}
|
|
self.assertEqual(
|
|
{"Black as Egypt's Night", "The Steel Windpipe"}, article_titles
|
|
)
|
|
comment_titles = {x[0] for x in test_posts if x[8] == "comment"}
|
|
self.assertEqual(
|
|
{"Mishka, always a pleasure to read your adventures!..."}, comment_titles
|
|
)
|
|
|
|
def test_recognise_status_with_correct_filename(self):
|
|
"""Check that importerer outputs only statuses 'published' and 'draft',
|
|
that these are correctly identified and that filenames are correct.
|
|
"""
|
|
test_posts = list(self.posts)
|
|
statuses = {x[7] for x in test_posts}
|
|
self.assertEqual({"published", "draft"}, statuses)
|
|
|
|
draft_filenames = {x[2] for x in test_posts if x[7] == "draft"}
|
|
# draft filenames are id-based
|
|
self.assertEqual(
|
|
{"page-4386962582497458967", "post-1276418104709695660"}, draft_filenames
|
|
)
|
|
|
|
published_filenames = {x[2] for x in test_posts if x[7] == "published"}
|
|
# published filenames are url-based, except comments
|
|
self.assertEqual(
|
|
{"the-steel-windpipe", "test-page", "post-5590533389087749201"},
|
|
published_filenames,
|
|
)
|
|
|
|
|
|
@skipIfNoExecutable(["pandoc", "--version"])
|
|
@unittest.skipUnless(BeautifulSoup, "Needs BeautifulSoup module")
|
|
class TestWordpressXmlImporter(TestCaseWithCLocale):
|
|
def setUp(self):
|
|
super().setUp()
|
|
self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
|
|
self.custposts = wp2fields(WORDPRESS_XML_SAMPLE, True)
|
|
|
|
def test_ignore_empty_posts(self):
|
|
self.assertTrue(self.posts)
|
|
for (
|
|
title,
|
|
_content,
|
|
_fname,
|
|
_date,
|
|
_author,
|
|
_categ,
|
|
_tags,
|
|
_status,
|
|
_kind,
|
|
_format,
|
|
) in self.posts:
|
|
self.assertTrue(title.strip())
|
|
|
|
def test_recognise_page_kind(self):
|
|
"""Check that we recognise pages in wordpress, as opposed to posts"""
|
|
self.assertTrue(self.posts)
|
|
# Collect (title, filename, kind) of non-empty posts recognised as page
|
|
pages_data = []
|
|
for (
|
|
title,
|
|
_content,
|
|
fname,
|
|
_date,
|
|
_author,
|
|
_categ,
|
|
_tags,
|
|
_status,
|
|
kind,
|
|
_format,
|
|
) in self.posts:
|
|
if kind == "page":
|
|
pages_data.append((title, fname))
|
|
self.assertEqual(2, len(pages_data))
|
|
self.assertEqual(("Page", "contact"), pages_data[0])
|
|
self.assertEqual(("Empty Page", "empty"), pages_data[1])
|
|
|
|
def test_dirpage_directive_for_page_kind(self):
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_post = filter(lambda p: p[0].startswith("Empty Page"), self.posts)
|
|
with temporary_folder() as temp:
|
|
fname = next(iter(silent_f2p(test_post, "markdown", temp, dirpage=True)))
|
|
self.assertTrue(fname.endswith("pages%sempty.md" % os.path.sep))
|
|
|
|
def test_dircat(self):
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_posts = [
|
|
post
|
|
for post in self.posts
|
|
# check post has a category
|
|
if len(post[5]) > 0
|
|
]
|
|
with temporary_folder() as temp:
|
|
fnames = list(silent_f2p(test_posts, "markdown", temp, dircat=True))
|
|
subs = DEFAULT_CONFIG["SLUG_REGEX_SUBSTITUTIONS"]
|
|
index = 0
|
|
for post in test_posts:
|
|
name = post[2]
|
|
category = slugify(post[5][0], regex_subs=subs, preserve_case=True)
|
|
name += ".md"
|
|
filename = os.path.join(category, name)
|
|
out_name = fnames[index]
|
|
self.assertTrue(out_name.endswith(filename))
|
|
index += 1
|
|
|
|
def test_unless_custom_post_all_items_should_be_pages_or_posts(self):
|
|
self.assertTrue(self.posts)
|
|
pages_data = []
|
|
for (
|
|
title,
|
|
_content,
|
|
fname,
|
|
_date,
|
|
_author,
|
|
_categ,
|
|
_tags,
|
|
_status,
|
|
kind,
|
|
_format,
|
|
) in self.posts:
|
|
if kind in {"page", "article"}:
|
|
pass
|
|
else:
|
|
pages_data.append((title, fname))
|
|
self.assertEqual(0, len(pages_data))
|
|
|
|
def test_recognise_custom_post_type(self):
|
|
self.assertTrue(self.custposts)
|
|
cust_data = []
|
|
for (
|
|
title,
|
|
_content,
|
|
_fname,
|
|
_date,
|
|
_author,
|
|
_categ,
|
|
_tags,
|
|
_status,
|
|
kind,
|
|
_format,
|
|
) in self.custposts:
|
|
if kind in {"page", "article"}:
|
|
pass
|
|
else:
|
|
cust_data.append((title, kind))
|
|
self.assertEqual(3, len(cust_data))
|
|
self.assertEqual(("A custom post in category 4", "custom1"), cust_data[0])
|
|
self.assertEqual(("A custom post in category 5", "custom1"), cust_data[1])
|
|
self.assertEqual(
|
|
("A 2nd custom post type also in category 5", "custom2"), cust_data[2]
|
|
)
|
|
|
|
def test_custom_posts_put_in_own_dir(self):
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_posts = []
|
|
for post in self.custposts:
|
|
# check post kind
|
|
if post[8] == "article" or post[8] == "page":
|
|
pass
|
|
else:
|
|
test_posts.append(post)
|
|
with temporary_folder() as temp:
|
|
fnames = list(silent_f2p(test_posts, "markdown", temp, wp_custpost=True))
|
|
index = 0
|
|
for post in test_posts:
|
|
name = post[2]
|
|
kind = post[8]
|
|
name += ".md"
|
|
filename = os.path.join(kind, name)
|
|
out_name = fnames[index]
|
|
self.assertTrue(out_name.endswith(filename))
|
|
index += 1
|
|
|
|
def test_custom_posts_put_in_own_dir_and_catagory_sub_dir(self):
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_posts = []
|
|
for post in self.custposts:
|
|
# check post kind
|
|
if post[8] == "article" or post[8] == "page":
|
|
pass
|
|
else:
|
|
test_posts.append(post)
|
|
with temporary_folder() as temp:
|
|
fnames = list(
|
|
silent_f2p(test_posts, "markdown", temp, wp_custpost=True, dircat=True)
|
|
)
|
|
subs = DEFAULT_CONFIG["SLUG_REGEX_SUBSTITUTIONS"]
|
|
index = 0
|
|
for post in test_posts:
|
|
name = post[2]
|
|
kind = post[8]
|
|
category = slugify(post[5][0], regex_subs=subs, preserve_case=True)
|
|
name += ".md"
|
|
filename = os.path.join(kind, category, name)
|
|
out_name = fnames[index]
|
|
self.assertTrue(out_name.endswith(filename))
|
|
index += 1
|
|
|
|
def test_wp_custpost_true_dirpage_false(self):
|
|
# pages should only be put in their own directory when dirpage = True
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_posts = [
|
|
post
|
|
for post in self.custposts
|
|
# check post kind
|
|
if post[8] == "page"
|
|
]
|
|
with temporary_folder() as temp:
|
|
fnames = list(
|
|
silent_f2p(
|
|
test_posts, "markdown", temp, wp_custpost=True, dirpage=False
|
|
)
|
|
)
|
|
index = 0
|
|
for post in test_posts:
|
|
name = post[2]
|
|
name += ".md"
|
|
filename = os.path.join("pages", name)
|
|
out_name = fnames[index]
|
|
self.assertFalse(out_name.endswith(filename))
|
|
|
|
def test_can_toggle_raw_html_code_parsing(self):
|
|
test_posts = list(self.posts)
|
|
|
|
def r(f):
|
|
with open(f, encoding="utf-8") as infile:
|
|
return infile.read()
|
|
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
|
|
with temporary_folder() as temp:
|
|
rst_files = (r(f) for f in silent_f2p(test_posts, "markdown", temp))
|
|
self.assertTrue(any("<iframe" in rst for rst in rst_files))
|
|
rst_files = (
|
|
r(f) for f in silent_f2p(test_posts, "markdown", temp, strip_raw=True)
|
|
)
|
|
self.assertFalse(any("<iframe" in rst for rst in rst_files))
|
|
# no effect in rst
|
|
rst_files = (r(f) for f in silent_f2p(test_posts, "rst", temp))
|
|
self.assertFalse(any("<iframe" in rst for rst in rst_files))
|
|
rst_files = (
|
|
r(f) for f in silent_f2p(test_posts, "rst", temp, strip_raw=True)
|
|
)
|
|
self.assertFalse(any("<iframe" in rst for rst in rst_files))
|
|
|
|
def test_decode_html_entities_in_titles(self):
|
|
test_posts = [post for post in self.posts if post[2] == "html-entity-test"]
|
|
self.assertEqual(len(test_posts), 1)
|
|
|
|
post = test_posts[0]
|
|
title = post[0]
|
|
self.assertTrue(
|
|
title,
|
|
"A normal post with some <html> entities in "
|
|
"the title. You can't miss them.",
|
|
)
|
|
self.assertNotIn("&", title)
|
|
|
|
def test_decode_wp_content_returns_empty(self):
|
|
"""Check that given an empty string we return an empty string."""
|
|
self.assertEqual(decode_wp_content(""), "")
|
|
|
|
def test_decode_wp_content(self):
|
|
"""Check that we can decode a wordpress content string."""
|
|
with open(WORDPRESS_ENCODED_CONTENT_SAMPLE) as encoded_file:
|
|
encoded_content = encoded_file.read()
|
|
with open(WORDPRESS_DECODED_CONTENT_SAMPLE) as decoded_file:
|
|
decoded_content = decoded_file.read()
|
|
self.assertEqual(
|
|
decode_wp_content(encoded_content, br=False), decoded_content
|
|
)
|
|
|
|
def test_preserve_verbatim_formatting(self):
|
|
def r(f):
|
|
with open(f, encoding="utf-8") as infile:
|
|
return infile.read()
|
|
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_post = filter(lambda p: p[0].startswith("Code in List"), self.posts)
|
|
with temporary_folder() as temp:
|
|
md = next(r(f) for f in silent_f2p(test_post, "markdown", temp))
|
|
self.assertTrue(re.search(r"\s+a = \[1, 2, 3\]", md))
|
|
self.assertTrue(re.search(r"\s+b = \[4, 5, 6\]", md))
|
|
|
|
for_line = re.search(r"\s+for i in zip\(a, b\):", md).group(0)
|
|
print_line = re.search(r"\s+print i", md).group(0)
|
|
self.assertTrue(for_line.rindex("for") < print_line.rindex("print"))
|
|
|
|
def test_code_in_list(self):
|
|
def r(f):
|
|
with open(f, encoding="utf-8") as infile:
|
|
return infile.read()
|
|
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_post = filter(lambda p: p[0].startswith("Code in List"), self.posts)
|
|
with temporary_folder() as temp:
|
|
md = next(r(f) for f in silent_f2p(test_post, "markdown", temp))
|
|
sample_line = re.search(r"- This is a code sample", md).group(0)
|
|
code_line = re.search(r"\s+a = \[1, 2, 3\]", md).group(0)
|
|
self.assertTrue(sample_line.rindex("This") < code_line.rindex("a"))
|
|
|
|
def test_dont_use_smart_quotes(self):
|
|
def r(f):
|
|
with open(f, encoding="utf-8") as infile:
|
|
return infile.read()
|
|
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_post = filter(lambda p: p[0].startswith("Post with raw data"), self.posts)
|
|
with temporary_folder() as temp:
|
|
md = next(r(f) for f in silent_f2p(test_post, "markdown", temp))
|
|
escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
|
|
self.assertFalse(escaped_quotes)
|
|
|
|
def test_convert_caption_to_figure(self):
|
|
def r(f):
|
|
with open(f, encoding="utf-8") as infile:
|
|
return infile.read()
|
|
|
|
silent_f2p = mute(True)(fields2pelican)
|
|
test_post = filter(lambda p: p[0].startswith("Caption on image"), self.posts)
|
|
with temporary_folder() as temp:
|
|
md = next(r(f) for f in silent_f2p(test_post, "markdown", temp))
|
|
|
|
caption = re.search(r"\[caption", md)
|
|
self.assertFalse(caption)
|
|
|
|
for occurence in [
|
|
"/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png",
|
|
"/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png",
|
|
"/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png",
|
|
"This is a pelican",
|
|
"This also a pelican",
|
|
"Yet another pelican",
|
|
]:
|
|
# pandoc 2.x converts into 
|
|
# pandoc 3.x converts into <figure>src<figcaption>text</figcaption></figure>
|
|
self.assertIn(occurence, md)
|
|
|
|
|
|
class TestBuildHeader(unittest.TestCase):
|
|
def test_build_header(self):
|
|
header = build_header("test", None, None, None, None, None)
|
|
self.assertEqual(header, "test\n####\n\n")
|
|
|
|
def test_build_header_with_fields(self):
|
|
header_data = [
|
|
"Test Post",
|
|
"2014-11-04",
|
|
"Alexis Métaireau",
|
|
["Programming"],
|
|
["Pelican", "Python"],
|
|
"test-post",
|
|
]
|
|
|
|
expected_docutils = "\n".join(
|
|
[
|
|
"Test Post",
|
|
"#########",
|
|
":date: 2014-11-04",
|
|
":author: Alexis Métaireau",
|
|
":category: Programming",
|
|
":tags: Pelican, Python",
|
|
":slug: test-post",
|
|
"\n",
|
|
]
|
|
)
|
|
|
|
expected_md = "\n".join(
|
|
[
|
|
"Title: Test Post",
|
|
"Date: 2014-11-04",
|
|
"Author: Alexis Métaireau",
|
|
"Category: Programming",
|
|
"Tags: Pelican, Python",
|
|
"Slug: test-post",
|
|
"\n",
|
|
]
|
|
)
|
|
|
|
self.assertEqual(build_header(*header_data), expected_docutils)
|
|
self.assertEqual(build_markdown_header(*header_data), expected_md)
|
|
|
|
def test_build_header_with_east_asian_characters(self):
|
|
header = build_header(
|
|
"これは広い幅の文字だけで構成されたタイトルです",
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
)
|
|
|
|
self.assertEqual(
|
|
header,
|
|
(
|
|
"これは広い幅の文字だけで構成されたタイトルです\n"
|
|
"##############################################"
|
|
"\n\n"
|
|
),
|
|
)
|
|
|
|
def test_galleries_added_to_header(self):
|
|
header = build_header(
|
|
"test",
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
attachments=["output/test1", "output/test2"],
|
|
)
|
|
self.assertEqual(
|
|
header, ("test\n####\n:attachments: output/test1, output/test2\n\n")
|
|
)
|
|
|
|
def test_galleries_added_to_markdown_header(self):
|
|
header = build_markdown_header(
|
|
"test",
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
attachments=["output/test1", "output/test2"],
|
|
)
|
|
self.assertEqual(
|
|
header, "Title: test\nAttachments: output/test1, output/test2\n\n"
|
|
)
|
|
|
|
|
|
@unittest.skipUnless(BeautifulSoup, "Needs BeautifulSoup module")
|
|
@unittest.skipUnless(LXML, "Needs lxml module")
|
|
class TestWordpressXMLAttachements(TestCaseWithCLocale):
|
|
def setUp(self):
|
|
super().setUp()
|
|
self.attachments = get_attachments(WORDPRESS_XML_SAMPLE)
|
|
|
|
def test_recognise_attachments(self):
|
|
self.assertTrue(self.attachments)
|
|
self.assertTrue(len(self.attachments.keys()) == 3)
|
|
|
|
def test_attachments_associated_with_correct_post(self):
|
|
self.assertTrue(self.attachments)
|
|
for post in self.attachments.keys():
|
|
if post is None:
|
|
expected = {
|
|
(
|
|
"https://upload.wikimedia.org/wikipedia/commons/"
|
|
"thumb/2/2c/Pelican_lakes_entrance02.jpg/"
|
|
"240px-Pelican_lakes_entrance02.jpg"
|
|
)
|
|
}
|
|
self.assertEqual(self.attachments[post], expected)
|
|
elif post == "with-excerpt":
|
|
expected_invalid = (
|
|
"http://thisurlisinvalid.notarealdomain/not_an_image.jpg"
|
|
)
|
|
expected_pelikan = (
|
|
"http://en.wikipedia.org/wiki/File:Pelikan_Walvis_Bay.jpg"
|
|
)
|
|
self.assertEqual(
|
|
self.attachments[post], {expected_invalid, expected_pelikan}
|
|
)
|
|
elif post == "with-tags":
|
|
expected_invalid = "http://thisurlisinvalid.notarealdomain"
|
|
self.assertEqual(self.attachments[post], {expected_invalid})
|
|
else:
|
|
self.fail(f"all attachments should match to a filename or None, {post}")
|
|
|
|
def test_download_attachments(self):
|
|
real_file = os.path.join(CUR_DIR, "content/article.rst")
|
|
good_url = path_to_file_url(real_file)
|
|
bad_url = "http://localhost:1/not_a_file.txt"
|
|
silent_da = mute()(download_attachments)
|
|
with temporary_folder() as temp:
|
|
locations = list(silent_da(temp, [good_url, bad_url]))
|
|
self.assertEqual(1, len(locations))
|
|
directory = locations[0]
|
|
self.assertTrue(
|
|
directory.endswith(posix_join("content", "article.rst")), directory
|
|
)
|
|
|
|
|
|
class TestTumblrImporter(TestCaseWithCLocale):
|
|
@patch("pelican.tools.pelican_import._get_tumblr_posts")
|
|
def test_posts(self, get):
|
|
def get_posts(api_key, blogname, offset=0):
|
|
if offset > 0:
|
|
return []
|
|
|
|
return [
|
|
{
|
|
"type": "photo",
|
|
"blog_name": "testy",
|
|
"date": "2019-11-07 21:26:40 UTC",
|
|
"timestamp": 1573162000,
|
|
"format": "html",
|
|
"slug": "a-slug",
|
|
"tags": ["economics"],
|
|
"state": "published",
|
|
"photos": [
|
|
{
|
|
"caption": "",
|
|
"original_size": {
|
|
"url": "https://..fccdc2360ba7182a.jpg",
|
|
"width": 634,
|
|
"height": 789,
|
|
},
|
|
}
|
|
],
|
|
}
|
|
]
|
|
|
|
get.side_effect = get_posts
|
|
|
|
posts = list(tumblr2fields("api_key", "blogname"))
|
|
self.assertEqual(
|
|
[
|
|
(
|
|
"Photo",
|
|
'<img alt="" src="https://..fccdc2360ba7182a.jpg" />\n',
|
|
"2019-11-07-a-slug",
|
|
"2019-11-07 21:26:40+0000",
|
|
"testy",
|
|
["photo"],
|
|
["economics"],
|
|
"published",
|
|
"article",
|
|
"html",
|
|
)
|
|
],
|
|
posts,
|
|
posts,
|
|
)
|
|
|
|
@patch("pelican.tools.pelican_import._get_tumblr_posts")
|
|
def test_video_embed(self, get):
|
|
def get_posts(api_key, blogname, offset=0):
|
|
if offset > 0:
|
|
return []
|
|
|
|
return [
|
|
{
|
|
"type": "video",
|
|
"blog_name": "testy",
|
|
"slug": "the-slug",
|
|
"date": "2017-07-07 20:31:41 UTC",
|
|
"timestamp": 1499459501,
|
|
"state": "published",
|
|
"format": "html",
|
|
"tags": [],
|
|
"source_url": "https://href.li/?https://www.youtube.com/a",
|
|
"source_title": "youtube.com",
|
|
"caption": "<p>Caption</p>",
|
|
"player": [
|
|
{"width": 250, "embed_code": "<iframe>1</iframe>"},
|
|
{"width": 400, "embed_code": "<iframe>2</iframe>"},
|
|
{"width": 500, "embed_code": "<iframe>3</iframe>"},
|
|
],
|
|
"video_type": "youtube",
|
|
}
|
|
]
|
|
|
|
get.side_effect = get_posts
|
|
|
|
posts = list(tumblr2fields("api_key", "blogname"))
|
|
self.assertEqual(
|
|
[
|
|
(
|
|
"youtube.com",
|
|
'<p><a href="https://href.li/?'
|
|
'https://www.youtube.com/a">via</a></p>\n<p>Caption</p>'
|
|
"<iframe>1</iframe>\n"
|
|
"<iframe>2</iframe>\n"
|
|
"<iframe>3</iframe>\n",
|
|
"2017-07-07-the-slug",
|
|
"2017-07-07 20:31:41+0000",
|
|
"testy",
|
|
["video"],
|
|
[],
|
|
"published",
|
|
"article",
|
|
"html",
|
|
)
|
|
],
|
|
posts,
|
|
posts,
|
|
)
|
|
|
|
@patch("pelican.tools.pelican_import._get_tumblr_posts")
|
|
def test_broken_video_embed(self, get):
|
|
def get_posts(api_key, blogname, offset=0):
|
|
if offset > 0:
|
|
return []
|
|
|
|
return [
|
|
{
|
|
"type": "video",
|
|
"blog_name": "testy",
|
|
"slug": "the-slug",
|
|
"date": "2016-08-14 16:37:35 UTC",
|
|
"timestamp": 1471192655,
|
|
"state": "published",
|
|
"format": "html",
|
|
"tags": ["interviews"],
|
|
"source_url": "https://href.li/?https://www.youtube.com/watch?v=b",
|
|
"source_title": "youtube.com",
|
|
"caption": "<p>Caption</p>",
|
|
"player": [
|
|
{
|
|
"width": 250,
|
|
# If video is gone, embed_code is False
|
|
"embed_code": False,
|
|
},
|
|
{"width": 400, "embed_code": False},
|
|
{"width": 500, "embed_code": False},
|
|
],
|
|
"video_type": "youtube",
|
|
}
|
|
]
|
|
|
|
get.side_effect = get_posts
|
|
|
|
posts = list(tumblr2fields("api_key", "blogname"))
|
|
self.assertEqual(
|
|
[
|
|
(
|
|
"youtube.com",
|
|
'<p><a href="https://href.li/?https://www.youtube.com/watch?'
|
|
'v=b">via</a></p>\n<p>Caption</p>'
|
|
"<p>(This video isn't available anymore.)</p>\n",
|
|
"2016-08-14-the-slug",
|
|
"2016-08-14 16:37:35+0000",
|
|
"testy",
|
|
["video"],
|
|
["interviews"],
|
|
"published",
|
|
"article",
|
|
"html",
|
|
)
|
|
],
|
|
posts,
|
|
posts,
|
|
)
|
|
|
|
|
|
class TestMediumImporter(TestCaseWithCLocale):
|
|
def setUp(self):
|
|
super().setUp()
|
|
self.test_content_root = "pelican/tests/content"
|
|
# The content coming out of parsing is similar, but not the same.
|
|
# Beautiful soup rearranges the order of attributes, for example.
|
|
# So, we keep a copy of the content for the test.
|
|
content_filename = f"{self.test_content_root}/medium_post_content.txt"
|
|
with open(content_filename, encoding="utf-8") as the_content_file:
|
|
# Many editors and scripts add a final newline, so live with that
|
|
# in our test
|
|
the_content = the_content_file.read()
|
|
assert the_content[-1] == "\n"
|
|
the_content = the_content[:-1]
|
|
self.post_tuple = (
|
|
"A title",
|
|
the_content,
|
|
# slug:
|
|
"2017-04-21-medium-post",
|
|
"2017-04-21 17:11",
|
|
"User Name",
|
|
None,
|
|
(),
|
|
"published",
|
|
"article",
|
|
"html",
|
|
)
|
|
|
|
def test_mediumpost2field(self):
|
|
"""Parse one post"""
|
|
post_filename = f"{self.test_content_root}/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html"
|
|
val = mediumpost2fields(post_filename)
|
|
self.assertEqual(self.post_tuple, val, val)
|
|
|
|
def test_mediumposts2field(self):
|
|
"""Parse all posts in an export directory"""
|
|
posts = list(mediumposts2fields(f"{self.test_content_root}/medium_posts"))
|
|
self.assertEqual(1, len(posts))
|
|
self.assertEqual(self.post_tuple, posts[0])
|
|
|
|
def test_strip_content(self):
|
|
"""Strip out unhelpful tags"""
|
|
html_doc = (
|
|
"<section>This keeps <i>lots</i> of <b>tags</b>, but not "
|
|
"the <section>section</section> tags</section>"
|
|
)
|
|
soup = BeautifulSoup(html_doc, "html.parser")
|
|
self.assertEqual(
|
|
"This keeps <i>lots</i> of <b>tags</b>, but not the section tags",
|
|
strip_medium_post_content(soup),
|
|
)
|
|
|
|
def test_medium_slug(self):
|
|
# Remove hex stuff at the end
|
|
self.assertEqual(
|
|
"2017-04-27_A-long-title",
|
|
medium_slug(
|
|
"medium-export/posts/2017-04-27_A-long-title--2971442227dd.html"
|
|
),
|
|
)
|
|
# Remove "--DRAFT" at the end
|
|
self.assertEqual(
|
|
"2017-04-27_A-long-title",
|
|
medium_slug("medium-export/posts/2017-04-27_A-long-title--DRAFT.html"),
|
|
)
|
|
# Remove both (which happens)
|
|
self.assertEqual(
|
|
"draft_How-to-do", medium_slug("draft_How-to-do--DRAFT--87225c81dddd.html")
|
|
)
|
|
# If no hex stuff, leave it alone
|
|
self.assertEqual(
|
|
"2017-04-27_A-long-title",
|
|
medium_slug("medium-export/posts/2017-04-27_A-long-title.html"),
|
|
)
|