forked from github/pelican
Medium post importer (from medium export)
This commit is contained in:
parent
2d75ca8391
commit
d6a33f1d21
7 changed files with 357 additions and 21 deletions
4
pelican/tests/content/medium_post_content.txt
vendored
Normal file
4
pelican/tests/content/medium_post_content.txt
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
<hr/><h3>Title header</h3><p>A paragraph of content.</p><p>Paragraph number two.</p><p>A list:</p><ol><li>One.</li><li>Two.</li><li>Three.</li></ol><p>A link: <a data-href="https://example.com/example" href="https://example.com/example" target="_blank">link text</a>.</p><h3>Header 2</h3><p>A block quote:</p><blockquote>quote words <strong>strong words</strong></blockquote><p>after blockquote</p><figure><img data-height="282" data-image-id="image1.png" data-width="739" src="https://cdn-images-1.medium.com/max/800/image1.png"/><figcaption>A figure caption.</figcaption></figure><p>A final note: <a data-href="http://stats.stackexchange.com/" href="http://stats.stackexchange.com/" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p><hr/><p><em>Next: </em><a data-href="https://medium.com/@username/post-url" href="https://medium.com/@username/post-url" target="_blank"><em>Next post</em>
|
||||
</a></p>
|
||||
<p>By <a href="https://medium.com/@username">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p>
|
||||
72
pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
vendored
Normal file
72
pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
vendored
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>A title</title><style>
|
||||
* {
|
||||
font-family: Georgia, Cambria, "Times New Roman", Times, serif;
|
||||
}
|
||||
html, body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
h1 {
|
||||
font-size: 50px;
|
||||
margin-bottom: 17px;
|
||||
color: #333;
|
||||
}
|
||||
h2 {
|
||||
font-size: 24px;
|
||||
line-height: 1.6;
|
||||
margin: 30px 0 0 0;
|
||||
margin-bottom: 18px;
|
||||
margin-top: 33px;
|
||||
color: #333;
|
||||
}
|
||||
h3 {
|
||||
font-size: 30px;
|
||||
margin: 10px 0 20px 0;
|
||||
color: #333;
|
||||
}
|
||||
header {
|
||||
width: 640px;
|
||||
margin: auto;
|
||||
}
|
||||
section {
|
||||
width: 640px;
|
||||
margin: auto;
|
||||
}
|
||||
section p {
|
||||
margin-bottom: 27px;
|
||||
font-size: 20px;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
}
|
||||
section img {
|
||||
max-width: 640px;
|
||||
}
|
||||
footer {
|
||||
padding: 0 20px;
|
||||
margin: 50px 0;
|
||||
text-align: center;
|
||||
font-size: 12px;
|
||||
}
|
||||
.aspectRatioPlaceholder {
|
||||
max-width: auto !important;
|
||||
max-height: auto !important;
|
||||
}
|
||||
.aspectRatioPlaceholder-fill {
|
||||
padding-bottom: 0 !important;
|
||||
}
|
||||
header,
|
||||
section[data-field=subtitle],
|
||||
section[data-field=description] {
|
||||
display: none;
|
||||
}
|
||||
</style></head><body><article class="h-entry">
|
||||
<header>
|
||||
<h1 class="p-name">A name (like title)</h1>
|
||||
</header>
|
||||
<section data-field="subtitle" class="p-summary">
|
||||
Summary (first several words of content)
|
||||
</section>
|
||||
<section data-field="body" class="e-content">
|
||||
<section name="ad15" class="section section--body section--first"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><h3 name="20a3" id="20a3" class="graf graf--h3 graf--leading graf--title">Title header</h3><p name="e3d6" id="e3d6" class="graf graf--p graf-after--h3">A paragraph of content.</p><p name="c7a8" id="c7a8" class="graf graf--p graf-after--p">Paragraph number two.</p><p name="42aa" id="42aa" class="graf graf--p graf-after--p">A list:</p><ol class="postList"><li name="d65f" id="d65f" class="graf graf--li graf-after--p">One.</li><li name="232b" id="232b" class="graf graf--li graf-after--li">Two.</li><li name="ef87" id="ef87" class="graf graf--li graf-after--li">Three.</li></ol><p name="e743" id="e743" class="graf graf--p graf-after--p">A link: <a href="https://example.com/example" data-href="https://example.com/example" class="markup--anchor markup--p-anchor" target="_blank">link text</a>.</p><h3 name="4cfd" id="4cfd" class="graf graf--h3 graf-after--p">Header 2</h3><p name="433c" id="433c" class="graf graf--p graf-after--p">A block quote:</p><blockquote name="3537" id="3537" class="graf graf--blockquote graf-after--p">quote words <strong class="markup--strong markup--blockquote-strong">strong words</strong></blockquote><p name="00cc" id="00cc" class="graf graf--p graf-after--blockquote">after blockquote</p><figure name="edb0" id="edb0" class="graf graf--figure graf-after--p"><img class="graf-image" data-image-id="image1.png" data-width="739" data-height="282" src="https://cdn-images-1.medium.com/max/800/image1.png"><figcaption class="imageCaption">A figure caption.</figcaption></figure><p name="f401" id="f401" class="graf graf--p graf-after--p graf--trailing">A final note: <a href="http://stats.stackexchange.com/" data-href="http://stats.stackexchange.com/" class="markup--anchor markup--p-anchor" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p></div></div></section><section name="09a3" class="section section--body section--last"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><p name="81e8" id="81e8" class="graf graf--p graf--leading"><em class="markup--em markup--p-em">Next: </em><a href="https://medium.com/@username/post-url" data-href="https://medium.com/@username/post-url" class="markup--anchor markup--p-anchor" target="_blank"><em class="markup--em markup--p-em">Next post</em>
|
||||
</section>
|
||||
<footer><p>By <a href="https://medium.com/@username" class="p-author h-card">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time class="dt-published" datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url" class="p-canonical">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p></footer></article></body></html>
|
||||
|
|
@ -264,6 +264,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
|
||||
def test_generate_context(self):
|
||||
articles_expected = [
|
||||
["A title", "published", "medium_posts", "article"],
|
||||
["Article title", "published", "Default", "article"],
|
||||
[
|
||||
"Article with markdown and summary metadata multi",
|
||||
|
|
@ -391,13 +392,24 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
# terms of process order will define the name for that category
|
||||
categories = [cat.name for cat, _ in self.generator.categories]
|
||||
categories_alternatives = (
|
||||
sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
|
||||
sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
|
||||
sorted(
|
||||
["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
|
||||
),
|
||||
sorted(
|
||||
["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
|
||||
),
|
||||
)
|
||||
self.assertIn(sorted(categories), categories_alternatives)
|
||||
# test for slug
|
||||
categories = [cat.slug for cat, _ in self.generator.categories]
|
||||
categories_expected = ["default", "testcategory", "yeah", "test", "zhi-dao-shu"]
|
||||
categories_expected = [
|
||||
"default",
|
||||
"testcategory",
|
||||
"medium_posts",
|
||||
"yeah",
|
||||
"test",
|
||||
"zhi-dao-shu",
|
||||
]
|
||||
self.assertEqual(sorted(categories), sorted(categories_expected))
|
||||
|
||||
def test_do_not_use_folder_as_category(self):
|
||||
|
|
@ -549,7 +561,8 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
granularity: {period["period"] for period in periods}
|
||||
for granularity, periods in period_archives.items()
|
||||
}
|
||||
expected = {"year": {(1970,), (2010,), (2012,), (2014,)}}
|
||||
self.maxDiff = None
|
||||
expected = {"year": {(1970,), (2010,), (2012,), (2014,), (2017,)}}
|
||||
self.assertEqual(expected, abbreviated_archives)
|
||||
|
||||
# Month archives enabled:
|
||||
|
|
@ -570,7 +583,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
for granularity, periods in period_archives.items()
|
||||
}
|
||||
expected = {
|
||||
"year": {(1970,), (2010,), (2012,), (2014,)},
|
||||
"year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
|
||||
"month": {
|
||||
(1970, "January"),
|
||||
(2010, "December"),
|
||||
|
|
@ -578,6 +591,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
(2012, "November"),
|
||||
(2012, "October"),
|
||||
(2014, "February"),
|
||||
(2017, "April"),
|
||||
},
|
||||
}
|
||||
self.assertEqual(expected, abbreviated_archives)
|
||||
|
|
@ -602,7 +616,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
for granularity, periods in period_archives.items()
|
||||
}
|
||||
expected = {
|
||||
"year": {(1970,), (2010,), (2012,), (2014,)},
|
||||
"year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
|
||||
"month": {
|
||||
(1970, "January"),
|
||||
(2010, "December"),
|
||||
|
|
@ -610,6 +624,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
(2012, "November"),
|
||||
(2012, "October"),
|
||||
(2014, "February"),
|
||||
(2017, "April"),
|
||||
},
|
||||
"day": {
|
||||
(1970, "January", 1),
|
||||
|
|
@ -619,6 +634,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
(2012, "October", 30),
|
||||
(2012, "October", 31),
|
||||
(2014, "February", 9),
|
||||
(2017, "April", 21),
|
||||
},
|
||||
}
|
||||
self.assertEqual(expected, abbreviated_archives)
|
||||
|
|
@ -836,8 +852,12 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
|
||||
categories = sorted([category.name for category, _ in generator.categories])
|
||||
categories_expected = [
|
||||
sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
|
||||
sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
|
||||
sorted(
|
||||
["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
|
||||
),
|
||||
sorted(
|
||||
["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
|
||||
),
|
||||
]
|
||||
self.assertIn(categories, categories_expected)
|
||||
|
||||
|
|
@ -864,6 +884,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
|||
generator.generate_context()
|
||||
|
||||
expected = [
|
||||
"A title",
|
||||
"An Article With Code Block To Test Typogrify Ignore",
|
||||
"Article title",
|
||||
"Article with Nonconformant HTML meta tags",
|
||||
|
|
|
|||
|
|
@ -21,6 +21,10 @@ from pelican.tools.pelican_import import (
|
|||
get_attachments,
|
||||
tumblr2fields,
|
||||
wp2fields,
|
||||
mediumpost2fields,
|
||||
mediumposts2fields,
|
||||
strip_medium_post_content,
|
||||
medium_slug,
|
||||
)
|
||||
from pelican.utils import path_to_file_url, slugify
|
||||
|
||||
|
|
@ -708,3 +712,82 @@ class TestTumblrImporter(TestCaseWithCLocale):
|
|||
posts,
|
||||
posts,
|
||||
)
|
||||
|
||||
|
||||
class TestMediumImporter(TestCaseWithCLocale):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.test_content_root = "pelican/tests/content"
|
||||
# The content coming out of parsing is similar, but not the same.
|
||||
# Beautiful soup rearranges the order of attributes, for example.
|
||||
# So, we keep a copy of the content for the test.
|
||||
content_filename = f"{self.test_content_root}/medium_post_content.txt"
|
||||
with open(content_filename, encoding="utf-8") as the_content_file:
|
||||
# Many editors and scripts add a final newline, so live with that
|
||||
# in our test
|
||||
the_content = the_content_file.read()
|
||||
assert the_content[-1] == "\n"
|
||||
the_content = the_content[:-1]
|
||||
self.post_tuple = (
|
||||
"A title",
|
||||
the_content,
|
||||
# slug:
|
||||
"2017-04-21-medium-post",
|
||||
"2017-04-21 17:11",
|
||||
"User Name",
|
||||
None,
|
||||
(),
|
||||
"published",
|
||||
"article",
|
||||
"html",
|
||||
)
|
||||
|
||||
def test_mediumpost2field(self):
|
||||
"""Parse one post"""
|
||||
post_filename = f"{self.test_content_root}/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html"
|
||||
val = mediumpost2fields(post_filename)
|
||||
self.assertEqual(self.post_tuple, val, val)
|
||||
|
||||
def test_mediumposts2field(self):
|
||||
"""Parse all posts in an export directory"""
|
||||
posts = [
|
||||
fields
|
||||
for fields in mediumposts2fields(f"{self.test_content_root}/medium_posts")
|
||||
]
|
||||
self.assertEqual(1, len(posts))
|
||||
self.assertEqual(self.post_tuple, posts[0])
|
||||
|
||||
def test_strip_content(self):
|
||||
"""Strip out unhelpful tags"""
|
||||
html_doc = (
|
||||
"<section>This keeps <i>lots</i> of <b>tags</b>, but not "
|
||||
"the <section>section</section> tags</section>"
|
||||
)
|
||||
soup = BeautifulSoup(html_doc, "html.parser")
|
||||
self.assertEqual(
|
||||
"This keeps <i>lots</i> of <b>tags</b>, but not the section tags",
|
||||
strip_medium_post_content(soup),
|
||||
)
|
||||
|
||||
def test_medium_slug(self):
|
||||
# Remove hex stuff at the end
|
||||
self.assertEqual(
|
||||
"2017-04-27_A-long-title",
|
||||
medium_slug(
|
||||
"medium-export/posts/2017-04-27_A-long-title--2971442227dd.html"
|
||||
),
|
||||
)
|
||||
# Remove "--DRAFT" at the end
|
||||
self.assertEqual(
|
||||
"2017-04-27_A-long-title",
|
||||
medium_slug("medium-export/posts/2017-04-27_A-long-title--DRAFT.html"),
|
||||
)
|
||||
# Remove both (which happens)
|
||||
self.assertEqual(
|
||||
"draft_How-to-do", medium_slug("draft_How-to-do--DRAFT--87225c81dddd.html")
|
||||
)
|
||||
# If no hex stuff, leave it alone
|
||||
self.assertEqual(
|
||||
"2017-04-27_A-long-title",
|
||||
medium_slug("medium-export/posts/2017-04-27_A-long-title.html"),
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue