1
0
Fork 0
forked from github/pelican

Merge pull request #2380 from oulenz/attachment_links

importer: update links to attachments if --wp-attach
This commit is contained in:
Justin Mayer 2018-07-09 12:54:59 +02:00 committed by GitHub
commit 76eebfe539
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 15 deletions

View file

@ -80,10 +80,11 @@ Optional arguments
as "/post_type/category/" (wordpress import only)
--wp-attach Download files uploaded to wordpress as attachments.
Files will be added to posts as a list in the post
header. All files will be downloaded, even if they
header and links to the files within the post will be
updated. All files will be downloaded, even if they
aren't associated with a post. Files will be downloaded
with their original path inside the output directory,
e.g. "output/wp-uploads/date/postname/file.jpg"
e.g. "output/wp-uploads/date/postname/file.jpg".
(wordpress import only) (requires an internet
connection)
--disable-slugs Disable storing slugs from imported posts within

View file

@ -10,6 +10,7 @@ import subprocess
import sys
import time
from codecs import open
from collections import defaultdict
from six.moves.urllib.error import URLError
from six.moves.urllib.parse import urlparse
@ -633,7 +634,7 @@ def get_attachments(xml):
else:
filename = get_filename(filename, post_id)
names[post_id] = filename
attachedposts = {}
attachedposts = defaultdict(list)
for parent, url in attachments:
try:
parent_name = names[parent]
@ -641,11 +642,7 @@ def get_attachments(xml):
# attachment's parent is not a valid post
parent_name = None
try:
attachedposts[parent_name].append(url)
except KeyError:
attachedposts[parent_name] = []
attachedposts[parent_name].append(url)
attachedposts[parent_name].append(url)
return attachedposts
@ -653,7 +650,7 @@ def download_attachments(output_path, urls):
"""Downloads WordPress attachments and returns a list of paths to
attachments that can be associated with a post (relative path to output
directory). Files that fail to download, will not be added to posts"""
locations = []
locations = {}
for url in urls:
path = urlparse(url).path
# teardown path and rebuild to negate any errors with
@ -670,13 +667,23 @@ def download_attachments(output_path, urls):
print('downloading {}'.format(filename))
try:
urlretrieve(url, os.path.join(full_path, filename))
locations.append(os.path.join(localpath, filename))
locations[url] = os.path.join(localpath, filename)
except (URLError, IOError) as e:
# Python 2.7 throws an IOError rather Than URLError
logger.warning("No file could be downloaded from %s\n%s", url, e)
return locations
def update_links_to_attached_files(content, attachments):
for old_url, new_path in attachments.items():
# url may occur both with http:// and https://
http_url = old_url.replace('https://', 'http://')
https_url = old_url.replace('http://', 'https://')
for url in [http_url, https_url]:
content = content.replace(url, '{filename}' + new_path)
return content
def fields2pelican(
fields, out_markup, output_path,
dircat=False, strip_raw=False, disable_slugs=False,
@ -691,21 +698,22 @@ def fields2pelican(
if wp_attach and attachments:
try:
urls = attachments[filename]
attached_files = download_attachments(output_path, urls)
links = download_attachments(output_path, urls)
except KeyError:
attached_files = None
links = None
else:
attached_files = None
links = None
ext = get_ext(out_markup, in_markup)
if ext == '.md':
header = build_markdown_header(
title, date, author, categories, tags, slug,
status, attached_files)
status, links.values() if links else None)
else:
out_markup = 'rst'
header = build_header(title, date, author, categories,
tags, slug, status, attached_files)
tags, slug, status, links.values()
if links else None)
out_filename = get_out_filename(
output_path, filename, ext, kind, dirpage, dircat,
@ -756,6 +764,9 @@ def fields2pelican(
content = content.replace('\\\n ', ' \n')
content = content.replace('\\\n', ' \n')
if wp_attach and links:
content = update_links_to_attached_files(content, links)
with open(out_filename, 'w', encoding='utf-8') as fs:
fs.write(header + content)
if wp_attach and attachments and None in attachments: