Merge pull request #3264 from boxydog/medium_importer

This commit is contained in:
Justin Mayer 2024-01-26 10:02:54 +01:00 committed by GitHub
commit ff35d26cbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 357 additions and 21 deletions

View file

@ -439,8 +439,8 @@ For **Markdown**, one must rely on an extension. For example, using the `mdx_inc
Importing an existing site Importing an existing site
========================== ==========================
It is possible to import your site from WordPress, Tumblr, Dotclear, and RSS It is possible to import your site from several other blogging sites
feeds using a simple script. See :ref:`import`. (like WordPress, Tumblr, ..) using a simple script. See :ref:`import`.
Translations Translations
============ ============

View file

@ -11,6 +11,7 @@ software to reStructuredText or Markdown. The supported import formats are:
- Blogger XML export - Blogger XML export
- Dotclear export - Dotclear export
- Medium export
- Tumblr API - Tumblr API
- WordPress XML export - WordPress XML export
- RSS/Atom feed - RSS/Atom feed
@ -71,6 +72,7 @@ Optional arguments
-h, --help Show this help message and exit -h, --help Show this help message and exit
--blogger Blogger XML export (default: False) --blogger Blogger XML export (default: False)
--dotclear Dotclear export (default: False) --dotclear Dotclear export (default: False)
--medium Medium export (default: False)
--tumblr Tumblr API (default: False) --tumblr Tumblr API (default: False)
--wpfile WordPress XML export (default: False) --wpfile WordPress XML export (default: False)
--feed Feed to parse (default: False) --feed Feed to parse (default: False)
@ -86,8 +88,7 @@ Optional arguments
(default: False) (default: False)
--filter-author Import only post from the specified author --filter-author Import only post from the specified author
--strip-raw Strip raw HTML code that can't be converted to markup --strip-raw Strip raw HTML code that can't be converted to markup
such as flash embeds or iframes (wordpress import such as flash embeds or iframes (default: False)
only) (default: False)
--wp-custpost Put wordpress custom post types in directories. If --wp-custpost Put wordpress custom post types in directories. If
used with --dir-cat option directories will be created used with --dir-cat option directories will be created
as "/post_type/category/" (wordpress import only) as "/post_type/category/" (wordpress import only)
@ -119,6 +120,14 @@ For Dotclear::
$ pelican-import --dotclear -o ~/output ~/backup.txt $ pelican-import --dotclear -o ~/output ~/backup.txt
For Medium::
$ pelican-import --medium -o ~/output ~/medium-export/posts/
The Medium export is a zip file. Unzip it, and point this tool to the
"posts" subdirectory. For more information on how to export, see
https://help.medium.com/hc/en-us/articles/115004745787-Export-your-account-data.
For Tumblr:: For Tumblr::
$ pelican-import --tumblr -o ~/output --blogname=<blogname> <api_key> $ pelican-import --tumblr -o ~/output --blogname=<blogname> <api_key>

View file

@ -0,0 +1,4 @@
<hr/><h3>Title header</h3><p>A paragraph of content.</p><p>Paragraph number two.</p><p>A list:</p><ol><li>One.</li><li>Two.</li><li>Three.</li></ol><p>A link: <a data-href="https://example.com/example" href="https://example.com/example" target="_blank">link text</a>.</p><h3>Header 2</h3><p>A block quote:</p><blockquote>quote words <strong>strong words</strong></blockquote><p>after blockquote</p><figure><img data-height="282" data-image-id="image1.png" data-width="739" src="https://cdn-images-1.medium.com/max/800/image1.png"/><figcaption>A figure caption.</figcaption></figure><p>A final note: <a data-href="http://stats.stackexchange.com/" href="http://stats.stackexchange.com/" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p><hr/><p><em>Next: </em><a data-href="https://medium.com/@username/post-url" href="https://medium.com/@username/post-url" target="_blank"><em>Next post</em>
</a></p>
<p>By <a href="https://medium.com/@username">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p>

View file

@ -0,0 +1,72 @@
<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>A title</title><style>
* {
font-family: Georgia, Cambria, "Times New Roman", Times, serif;
}
html, body {
margin: 0;
padding: 0;
}
h1 {
font-size: 50px;
margin-bottom: 17px;
color: #333;
}
h2 {
font-size: 24px;
line-height: 1.6;
margin: 30px 0 0 0;
margin-bottom: 18px;
margin-top: 33px;
color: #333;
}
h3 {
font-size: 30px;
margin: 10px 0 20px 0;
color: #333;
}
header {
width: 640px;
margin: auto;
}
section {
width: 640px;
margin: auto;
}
section p {
margin-bottom: 27px;
font-size: 20px;
line-height: 1.6;
color: #333;
}
section img {
max-width: 640px;
}
footer {
padding: 0 20px;
margin: 50px 0;
text-align: center;
font-size: 12px;
}
.aspectRatioPlaceholder {
max-width: auto !important;
max-height: auto !important;
}
.aspectRatioPlaceholder-fill {
padding-bottom: 0 !important;
}
header,
section[data-field=subtitle],
section[data-field=description] {
display: none;
}
</style></head><body><article class="h-entry">
<header>
<h1 class="p-name">A name (like title)</h1>
</header>
<section data-field="subtitle" class="p-summary">
Summary (first several words of content)
</section>
<section data-field="body" class="e-content">
<section name="ad15" class="section section--body section--first"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><h3 name="20a3" id="20a3" class="graf graf--h3 graf--leading graf--title">Title header</h3><p name="e3d6" id="e3d6" class="graf graf--p graf-after--h3">A paragraph of content.</p><p name="c7a8" id="c7a8" class="graf graf--p graf-after--p">Paragraph number two.</p><p name="42aa" id="42aa" class="graf graf--p graf-after--p">A list:</p><ol class="postList"><li name="d65f" id="d65f" class="graf graf--li graf-after--p">One.</li><li name="232b" id="232b" class="graf graf--li graf-after--li">Two.</li><li name="ef87" id="ef87" class="graf graf--li graf-after--li">Three.</li></ol><p name="e743" id="e743" class="graf graf--p graf-after--p">A link: <a href="https://example.com/example" data-href="https://example.com/example" class="markup--anchor markup--p-anchor" target="_blank">link text</a>.</p><h3 name="4cfd" id="4cfd" class="graf graf--h3 graf-after--p">Header 2</h3><p name="433c" id="433c" class="graf graf--p graf-after--p">A block quote:</p><blockquote name="3537" id="3537" class="graf graf--blockquote graf-after--p">quote words <strong class="markup--strong markup--blockquote-strong">strong words</strong></blockquote><p name="00cc" id="00cc" class="graf graf--p graf-after--blockquote">after blockquote</p><figure name="edb0" id="edb0" class="graf graf--figure graf-after--p"><img class="graf-image" data-image-id="image1.png" data-width="739" data-height="282" src="https://cdn-images-1.medium.com/max/800/image1.png"><figcaption class="imageCaption">A figure caption.</figcaption></figure><p name="f401" id="f401" class="graf graf--p graf-after--p graf--trailing">A final note: <a href="http://stats.stackexchange.com/" data-href="http://stats.stackexchange.com/" class="markup--anchor markup--p-anchor" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p></div></div></section><section name="09a3" class="section section--body section--last"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><p name="81e8" id="81e8" class="graf graf--p graf--leading"><em class="markup--em markup--p-em">Next: </em><a href="https://medium.com/@username/post-url" data-href="https://medium.com/@username/post-url" class="markup--anchor markup--p-anchor" target="_blank"><em class="markup--em markup--p-em">Next post</em>
</section>
<footer><p>By <a href="https://medium.com/@username" class="p-author h-card">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time class="dt-published" datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url" class="p-canonical">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p></footer></article></body></html>

View file

@ -264,6 +264,7 @@ class TestArticlesGenerator(unittest.TestCase):
def test_generate_context(self): def test_generate_context(self):
articles_expected = [ articles_expected = [
["A title", "published", "medium_posts", "article"],
["Article title", "published", "Default", "article"], ["Article title", "published", "Default", "article"],
[ [
"Article with markdown and summary metadata multi", "Article with markdown and summary metadata multi",
@ -391,13 +392,24 @@ class TestArticlesGenerator(unittest.TestCase):
# terms of process order will define the name for that category # terms of process order will define the name for that category
categories = [cat.name for cat, _ in self.generator.categories] categories = [cat.name for cat, _ in self.generator.categories]
categories_alternatives = ( categories_alternatives = (
sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]), sorted(
sorted(["Default", "TestCategory", "yeah", "test", "指導書"]), ["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
),
sorted(
["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
),
) )
self.assertIn(sorted(categories), categories_alternatives) self.assertIn(sorted(categories), categories_alternatives)
# test for slug # test for slug
categories = [cat.slug for cat, _ in self.generator.categories] categories = [cat.slug for cat, _ in self.generator.categories]
categories_expected = ["default", "testcategory", "yeah", "test", "zhi-dao-shu"] categories_expected = [
"default",
"testcategory",
"medium_posts",
"yeah",
"test",
"zhi-dao-shu",
]
self.assertEqual(sorted(categories), sorted(categories_expected)) self.assertEqual(sorted(categories), sorted(categories_expected))
def test_do_not_use_folder_as_category(self): def test_do_not_use_folder_as_category(self):
@ -549,7 +561,8 @@ class TestArticlesGenerator(unittest.TestCase):
granularity: {period["period"] for period in periods} granularity: {period["period"] for period in periods}
for granularity, periods in period_archives.items() for granularity, periods in period_archives.items()
} }
expected = {"year": {(1970,), (2010,), (2012,), (2014,)}} self.maxDiff = None
expected = {"year": {(1970,), (2010,), (2012,), (2014,), (2017,)}}
self.assertEqual(expected, abbreviated_archives) self.assertEqual(expected, abbreviated_archives)
# Month archives enabled: # Month archives enabled:
@ -570,7 +583,7 @@ class TestArticlesGenerator(unittest.TestCase):
for granularity, periods in period_archives.items() for granularity, periods in period_archives.items()
} }
expected = { expected = {
"year": {(1970,), (2010,), (2012,), (2014,)}, "year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
"month": { "month": {
(1970, "January"), (1970, "January"),
(2010, "December"), (2010, "December"),
@ -578,6 +591,7 @@ class TestArticlesGenerator(unittest.TestCase):
(2012, "November"), (2012, "November"),
(2012, "October"), (2012, "October"),
(2014, "February"), (2014, "February"),
(2017, "April"),
}, },
} }
self.assertEqual(expected, abbreviated_archives) self.assertEqual(expected, abbreviated_archives)
@ -602,7 +616,7 @@ class TestArticlesGenerator(unittest.TestCase):
for granularity, periods in period_archives.items() for granularity, periods in period_archives.items()
} }
expected = { expected = {
"year": {(1970,), (2010,), (2012,), (2014,)}, "year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
"month": { "month": {
(1970, "January"), (1970, "January"),
(2010, "December"), (2010, "December"),
@ -610,6 +624,7 @@ class TestArticlesGenerator(unittest.TestCase):
(2012, "November"), (2012, "November"),
(2012, "October"), (2012, "October"),
(2014, "February"), (2014, "February"),
(2017, "April"),
}, },
"day": { "day": {
(1970, "January", 1), (1970, "January", 1),
@ -619,6 +634,7 @@ class TestArticlesGenerator(unittest.TestCase):
(2012, "October", 30), (2012, "October", 30),
(2012, "October", 31), (2012, "October", 31),
(2014, "February", 9), (2014, "February", 9),
(2017, "April", 21),
}, },
} }
self.assertEqual(expected, abbreviated_archives) self.assertEqual(expected, abbreviated_archives)
@ -836,8 +852,12 @@ class TestArticlesGenerator(unittest.TestCase):
categories = sorted([category.name for category, _ in generator.categories]) categories = sorted([category.name for category, _ in generator.categories])
categories_expected = [ categories_expected = [
sorted(["Default", "TestCategory", "yeah", "test", "指導書"]), sorted(
sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]), ["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
),
sorted(
["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
),
] ]
self.assertIn(categories, categories_expected) self.assertIn(categories, categories_expected)
@ -864,6 +884,7 @@ class TestArticlesGenerator(unittest.TestCase):
generator.generate_context() generator.generate_context()
expected = [ expected = [
"A title",
"An Article With Code Block To Test Typogrify Ignore", "An Article With Code Block To Test Typogrify Ignore",
"Article title", "Article title",
"Article with Nonconformant HTML meta tags", "Article with Nonconformant HTML meta tags",

View file

@ -21,6 +21,10 @@ from pelican.tools.pelican_import import (
get_attachments, get_attachments,
tumblr2fields, tumblr2fields,
wp2fields, wp2fields,
mediumpost2fields,
mediumposts2fields,
strip_medium_post_content,
medium_slug,
) )
from pelican.utils import path_to_file_url, slugify from pelican.utils import path_to_file_url, slugify
@ -708,3 +712,82 @@ class TestTumblrImporter(TestCaseWithCLocale):
posts, posts,
posts, posts,
) )
class TestMediumImporter(TestCaseWithCLocale):
def setUp(self):
super().setUp()
self.test_content_root = "pelican/tests/content"
# The content coming out of parsing is similar, but not the same.
# Beautiful soup rearranges the order of attributes, for example.
# So, we keep a copy of the content for the test.
content_filename = f"{self.test_content_root}/medium_post_content.txt"
with open(content_filename, encoding="utf-8") as the_content_file:
# Many editors and scripts add a final newline, so live with that
# in our test
the_content = the_content_file.read()
assert the_content[-1] == "\n"
the_content = the_content[:-1]
self.post_tuple = (
"A title",
the_content,
# slug:
"2017-04-21-medium-post",
"2017-04-21 17:11",
"User Name",
None,
(),
"published",
"article",
"html",
)
def test_mediumpost2field(self):
"""Parse one post"""
post_filename = f"{self.test_content_root}/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html"
val = mediumpost2fields(post_filename)
self.assertEqual(self.post_tuple, val, val)
def test_mediumposts2field(self):
"""Parse all posts in an export directory"""
posts = [
fields
for fields in mediumposts2fields(f"{self.test_content_root}/medium_posts")
]
self.assertEqual(1, len(posts))
self.assertEqual(self.post_tuple, posts[0])
def test_strip_content(self):
"""Strip out unhelpful tags"""
html_doc = (
"<section>This keeps <i>lots</i> of <b>tags</b>, but not "
"the <section>section</section> tags</section>"
)
soup = BeautifulSoup(html_doc, "html.parser")
self.assertEqual(
"This keeps <i>lots</i> of <b>tags</b>, but not the section tags",
strip_medium_post_content(soup),
)
def test_medium_slug(self):
# Remove hex stuff at the end
self.assertEqual(
"2017-04-27_A-long-title",
medium_slug(
"medium-export/posts/2017-04-27_A-long-title--2971442227dd.html"
),
)
# Remove "--DRAFT" at the end
self.assertEqual(
"2017-04-27_A-long-title",
medium_slug("medium-export/posts/2017-04-27_A-long-title--DRAFT.html"),
)
# Remove both (which happens)
self.assertEqual(
"draft_How-to-do", medium_slug("draft_How-to-do--DRAFT--87225c81dddd.html")
)
# If no hex stuff, leave it alone
self.assertEqual(
"2017-04-27_A-long-title",
medium_slug("medium-export/posts/2017-04-27_A-long-title.html"),
)

View file

@ -15,6 +15,8 @@ from urllib.error import URLError
from urllib.parse import quote, urlparse, urlsplit, urlunsplit from urllib.parse import quote, urlparse, urlsplit, urlunsplit
from urllib.request import urlretrieve from urllib.request import urlretrieve
import dateutil.parser
# because logging.setLoggerClass has to be called before logging.getLogger # because logging.setLoggerClass has to be called before logging.getLogger
from pelican.log import init from pelican.log import init
from pelican.settings import DEFAULT_CONFIG from pelican.settings import DEFAULT_CONFIG
@ -114,19 +116,25 @@ def decode_wp_content(content, br=True):
return content return content
def xml_to_soup(xml): def _import_bs4():
"""Opens an xml file""" """Import and return bs4, otherwise sys.exit."""
try: try:
from bs4 import BeautifulSoup import bs4
except ImportError: except ImportError:
error = ( error = (
'Missing dependency "BeautifulSoup4" and "lxml" required to ' 'Missing dependency "BeautifulSoup4" and "lxml" required to '
"import XML files." "import XML files."
) )
sys.exit(error) sys.exit(error)
return bs4
def file_to_soup(xml, features="xml"):
"""Reads a file, returns soup."""
bs4 = _import_bs4()
with open(xml, encoding="utf-8") as infile: with open(xml, encoding="utf-8") as infile:
xmlfile = infile.read() xmlfile = infile.read()
soup = BeautifulSoup(xmlfile, "xml") soup = bs4.BeautifulSoup(xmlfile, features)
return soup return soup
@ -140,7 +148,7 @@ def get_filename(post_name, post_id):
def wp2fields(xml, wp_custpost=False): def wp2fields(xml, wp_custpost=False):
"""Opens a wordpress XML file, and yield Pelican fields""" """Opens a wordpress XML file, and yield Pelican fields"""
soup = xml_to_soup(xml) soup = file_to_soup(xml)
items = soup.rss.channel.findAll("item") items = soup.rss.channel.findAll("item")
for item in items: for item in items:
if item.find("status").string in ["publish", "draft"]: if item.find("status").string in ["publish", "draft"]:
@ -210,7 +218,7 @@ def wp2fields(xml, wp_custpost=False):
def blogger2fields(xml): def blogger2fields(xml):
"""Opens a blogger XML file, and yield Pelican fields""" """Opens a blogger XML file, and yield Pelican fields"""
soup = xml_to_soup(xml) soup = file_to_soup(xml)
entries = soup.feed.findAll("entry") entries = soup.feed.findAll("entry")
for entry in entries: for entry in entries:
raw_kind = entry.find( raw_kind = entry.find(
@ -536,6 +544,133 @@ def tumblr2fields(api_key, blogname):
posts = _get_tumblr_posts(api_key, blogname, offset) posts = _get_tumblr_posts(api_key, blogname, offset)
def strip_medium_post_content(soup) -> str:
"""Strip some tags and attributes from medium post content.
For example, the 'section' and 'div' tags cause trouble while rendering.
The problem with these tags is you can get a section divider (--------------)
that is not between two pieces of content. For example:
Some text.
.. container:: section-divider
--------------
.. container:: section-content
More content.
In this case, pandoc complains: "Unexpected section title or transition."
Also, the "id" and "name" attributes in tags cause similar problems. They show
up in .rst as extra junk that separates transitions.
"""
# Remove tags
# section and div cause problems
# footer also can cause problems, and has nothing we want to keep
# See https://stackoverflow.com/a/8439761
invalid_tags = ["section", "div", "footer"]
for tag in invalid_tags:
for match in soup.findAll(tag):
match.replaceWithChildren()
# Remove attributes
# See https://stackoverflow.com/a/9045719
invalid_attributes = ["name", "id", "class"]
bs4 = _import_bs4()
for tag in soup.descendants:
if isinstance(tag, bs4.element.Tag):
tag.attrs = {
key: value
for key, value in tag.attrs.items()
if key not in invalid_attributes
}
# Get the string of all content, keeping other tags
all_content = "".join(str(element) for element in soup.contents)
return all_content
def mediumpost2fields(filepath: str) -> tuple:
"""Take an HTML post from a medium export, return Pelican fields."""
soup = file_to_soup(filepath, "html.parser")
if not soup:
raise ValueError(f"{filepath} could not be parsed by beautifulsoup")
kind = "article"
content = soup.find("section", class_="e-content")
if not content:
raise ValueError(f"{filepath}: Post has no content")
title = soup.find("title").string or ""
raw_date = soup.find("time", class_="dt-published")
date = None
if raw_date:
# This datetime can include timezone, e.g., "2017-04-21T17:11:55.799Z"
# python before 3.11 can't parse the timezone using datetime.fromisoformat
# See also https://docs.python.org/3.10/library/datetime.html#datetime.datetime.fromisoformat
# "This does not support parsing arbitrary ISO 8601 strings"
# So, we use dateutil.parser, which can handle it.
date_object = dateutil.parser.parse(raw_date.attrs["datetime"])
date = date_object.strftime("%Y-%m-%d %H:%M")
status = "published"
else:
status = "draft"
author = soup.find("a", class_="p-author h-card")
if author:
author = author.string
# Now that we're done with classes, we can strip the content
content = strip_medium_post_content(content)
# medium HTML export doesn't have tag or category
# RSS feed has tags, but it doesn't have all the posts.
tags = ()
slug = medium_slug(filepath)
# TODO: make the fields a python dataclass
return (
title,
content,
slug,
date,
author,
None,
tags,
status,
kind,
"html",
)
def medium_slug(filepath: str) -> str:
"""Make the filepath of a medium exported file into a slug."""
# slug: filename without extension
slug = os.path.basename(filepath)
slug = os.path.splitext(slug)[0]
# A medium export filename looks like date_-title-...html
# But, RST doesn't like "_-" (see https://github.com/sphinx-doc/sphinx/issues/4350)
# so get rid of it
slug = slug.replace("_-", "-")
# drop the hex string medium puts on the end of the filename, why keep it.
# e.g., "-a8a8a8a8" or "---a9a9a9a9"
# also: drafts don't need "--DRAFT"
slug = re.sub(r"((-)+([0-9a-f]+|DRAFT))+$", "", slug)
return slug
def mediumposts2fields(medium_export_dir: str):
"""Take HTML posts in a medium export directory, and yield Pelican fields."""
for file in os.listdir(medium_export_dir):
filename = os.fsdecode(file)
yield mediumpost2fields(os.path.join(medium_export_dir, filename))
def feed2fields(file): def feed2fields(file):
"""Read a feed and yield pelican fields""" """Read a feed and yield pelican fields"""
import feedparser import feedparser
@ -711,7 +846,7 @@ def get_attachments(xml):
"""returns a dictionary of posts that have attachments with a list """returns a dictionary of posts that have attachments with a list
of the attachment_urls of the attachment_urls
""" """
soup = xml_to_soup(xml) soup = file_to_soup(xml)
items = soup.rss.channel.findAll("item") items = soup.rss.channel.findAll("item")
names = {} names = {}
attachments = [] attachments = []
@ -837,6 +972,9 @@ def fields2pelican(
posts_require_pandoc.append(filename) posts_require_pandoc.append(filename)
slug = not disable_slugs and filename or None slug = not disable_slugs and filename or None
assert slug is None or filename == os.path.basename(
filename
), f"filename is not a basename: {filename}"
if wp_attach and attachments: if wp_attach and attachments:
try: try:
@ -984,6 +1122,9 @@ def main():
parser.add_argument( parser.add_argument(
"--dotclear", action="store_true", dest="dotclear", help="Dotclear export" "--dotclear", action="store_true", dest="dotclear", help="Dotclear export"
) )
parser.add_argument(
"--medium", action="store_true", dest="medium", help="Medium export"
)
parser.add_argument( parser.add_argument(
"--tumblr", action="store_true", dest="tumblr", help="Tumblr export" "--tumblr", action="store_true", dest="tumblr", help="Tumblr export"
) )
@ -1069,6 +1210,8 @@ def main():
input_type = "blogger" input_type = "blogger"
elif args.dotclear: elif args.dotclear:
input_type = "dotclear" input_type = "dotclear"
elif args.medium:
input_type = "medium"
elif args.tumblr: elif args.tumblr:
input_type = "tumblr" input_type = "tumblr"
elif args.wpfile: elif args.wpfile:
@ -1077,8 +1220,8 @@ def main():
input_type = "feed" input_type = "feed"
else: else:
error = ( error = (
"You must provide either --blogger, --dotclear, " "You must provide one of --blogger, --dotclear, "
"--tumblr, --wpfile or --feed options" "--medium, --tumblr, --wpfile or --feed options"
) )
exit(error) exit(error)
@ -1097,12 +1240,16 @@ def main():
fields = blogger2fields(args.input) fields = blogger2fields(args.input)
elif input_type == "dotclear": elif input_type == "dotclear":
fields = dc2fields(args.input) fields = dc2fields(args.input)
elif input_type == "medium":
fields = mediumposts2fields(args.input)
elif input_type == "tumblr": elif input_type == "tumblr":
fields = tumblr2fields(args.input, args.blogname) fields = tumblr2fields(args.input, args.blogname)
elif input_type == "wordpress": elif input_type == "wordpress":
fields = wp2fields(args.input, args.wp_custpost or False) fields = wp2fields(args.input, args.wp_custpost or False)
elif input_type == "feed": elif input_type == "feed":
fields = feed2fields(args.input) fields = feed2fields(args.input)
else:
raise ValueError(f"Unhandled input_type {input_type}")
if args.wp_attach: if args.wp_attach:
attachments = get_attachments(args.input) attachments = get_attachments(args.input)