forked from github/pelican
Merge pull request #2380 from oulenz/attachment_links
importer: update links to attachments if --wp-attach
This commit is contained in:
commit
76eebfe539
2 changed files with 27 additions and 15 deletions
|
|
@ -80,10 +80,11 @@ Optional arguments
|
|||
as "/post_type/category/" (wordpress import only)
|
||||
--wp-attach Download files uploaded to wordpress as attachments.
|
||||
Files will be added to posts as a list in the post
|
||||
header. All files will be downloaded, even if they
|
||||
header and links to the files within the post will be
|
||||
updated. All files will be downloaded, even if they
|
||||
aren't associated with a post. Files will be downloaded
|
||||
with their original path inside the output directory,
|
||||
e.g. "output/wp-uploads/date/postname/file.jpg"
|
||||
e.g. "output/wp-uploads/date/postname/file.jpg".
|
||||
(wordpress import only) (requires an internet
|
||||
connection)
|
||||
--disable-slugs Disable storing slugs from imported posts within
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import subprocess
|
|||
import sys
|
||||
import time
|
||||
from codecs import open
|
||||
from collections import defaultdict
|
||||
|
||||
from six.moves.urllib.error import URLError
|
||||
from six.moves.urllib.parse import urlparse
|
||||
|
|
@ -633,7 +634,7 @@ def get_attachments(xml):
|
|||
else:
|
||||
filename = get_filename(filename, post_id)
|
||||
names[post_id] = filename
|
||||
attachedposts = {}
|
||||
attachedposts = defaultdict(list)
|
||||
for parent, url in attachments:
|
||||
try:
|
||||
parent_name = names[parent]
|
||||
|
|
@ -641,11 +642,7 @@ def get_attachments(xml):
|
|||
# attachment's parent is not a valid post
|
||||
parent_name = None
|
||||
|
||||
try:
|
||||
attachedposts[parent_name].append(url)
|
||||
except KeyError:
|
||||
attachedposts[parent_name] = []
|
||||
attachedposts[parent_name].append(url)
|
||||
attachedposts[parent_name].append(url)
|
||||
return attachedposts
|
||||
|
||||
|
||||
|
|
@ -653,7 +650,7 @@ def download_attachments(output_path, urls):
|
|||
"""Downloads WordPress attachments and returns a list of paths to
|
||||
attachments that can be associated with a post (relative path to output
|
||||
directory). Files that fail to download, will not be added to posts"""
|
||||
locations = []
|
||||
locations = {}
|
||||
for url in urls:
|
||||
path = urlparse(url).path
|
||||
# teardown path and rebuild to negate any errors with
|
||||
|
|
@ -670,13 +667,23 @@ def download_attachments(output_path, urls):
|
|||
print('downloading {}'.format(filename))
|
||||
try:
|
||||
urlretrieve(url, os.path.join(full_path, filename))
|
||||
locations.append(os.path.join(localpath, filename))
|
||||
locations[url] = os.path.join(localpath, filename)
|
||||
except (URLError, IOError) as e:
|
||||
# Python 2.7 throws an IOError rather Than URLError
|
||||
logger.warning("No file could be downloaded from %s\n%s", url, e)
|
||||
return locations
|
||||
|
||||
|
||||
def update_links_to_attached_files(content, attachments):
|
||||
for old_url, new_path in attachments.items():
|
||||
# url may occur both with http:// and https://
|
||||
http_url = old_url.replace('https://', 'http://')
|
||||
https_url = old_url.replace('http://', 'https://')
|
||||
for url in [http_url, https_url]:
|
||||
content = content.replace(url, '{filename}' + new_path)
|
||||
return content
|
||||
|
||||
|
||||
def fields2pelican(
|
||||
fields, out_markup, output_path,
|
||||
dircat=False, strip_raw=False, disable_slugs=False,
|
||||
|
|
@ -691,21 +698,22 @@ def fields2pelican(
|
|||
if wp_attach and attachments:
|
||||
try:
|
||||
urls = attachments[filename]
|
||||
attached_files = download_attachments(output_path, urls)
|
||||
links = download_attachments(output_path, urls)
|
||||
except KeyError:
|
||||
attached_files = None
|
||||
links = None
|
||||
else:
|
||||
attached_files = None
|
||||
links = None
|
||||
|
||||
ext = get_ext(out_markup, in_markup)
|
||||
if ext == '.md':
|
||||
header = build_markdown_header(
|
||||
title, date, author, categories, tags, slug,
|
||||
status, attached_files)
|
||||
status, links.values() if links else None)
|
||||
else:
|
||||
out_markup = 'rst'
|
||||
header = build_header(title, date, author, categories,
|
||||
tags, slug, status, attached_files)
|
||||
tags, slug, status, links.values()
|
||||
if links else None)
|
||||
|
||||
out_filename = get_out_filename(
|
||||
output_path, filename, ext, kind, dirpage, dircat,
|
||||
|
|
@ -756,6 +764,9 @@ def fields2pelican(
|
|||
content = content.replace('\\\n ', ' \n')
|
||||
content = content.replace('\\\n', ' \n')
|
||||
|
||||
if wp_attach and links:
|
||||
content = update_links_to_attached_files(content, links)
|
||||
|
||||
with open(out_filename, 'w', encoding='utf-8') as fs:
|
||||
fs.write(header + content)
|
||||
if wp_attach and attachments and None in attachments:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue