mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
importer: update links to attachments if --wp-attach
This commit is contained in:
parent
a6600e2fca
commit
a5571ba1d5
2 changed files with 27 additions and 15 deletions
|
|
@ -80,10 +80,11 @@ Optional arguments
|
||||||
as "/post_type/category/" (wordpress import only)
|
as "/post_type/category/" (wordpress import only)
|
||||||
--wp-attach Download files uploaded to wordpress as attachments.
|
--wp-attach Download files uploaded to wordpress as attachments.
|
||||||
Files will be added to posts as a list in the post
|
Files will be added to posts as a list in the post
|
||||||
header. All files will be downloaded, even if they
|
header and links to the files within the post will be
|
||||||
|
updated. All files will be downloaded, even if they
|
||||||
aren't associated with a post. Files will be downloaded
|
aren't associated with a post. Files will be downloaded
|
||||||
with their original path inside the output directory,
|
with their original path inside the output directory,
|
||||||
e.g. "output/wp-uploads/date/postname/file.jpg"
|
e.g. "output/wp-uploads/date/postname/file.jpg".
|
||||||
(wordpress import only) (requires an internet
|
(wordpress import only) (requires an internet
|
||||||
connection)
|
connection)
|
||||||
--disable-slugs Disable storing slugs from imported posts within
|
--disable-slugs Disable storing slugs from imported posts within
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from codecs import open
|
from codecs import open
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
from six.moves.urllib.error import URLError
|
from six.moves.urllib.error import URLError
|
||||||
from six.moves.urllib.parse import urlparse
|
from six.moves.urllib.parse import urlparse
|
||||||
|
|
@ -633,7 +634,7 @@ def get_attachments(xml):
|
||||||
else:
|
else:
|
||||||
filename = get_filename(filename, post_id)
|
filename = get_filename(filename, post_id)
|
||||||
names[post_id] = filename
|
names[post_id] = filename
|
||||||
attachedposts = {}
|
attachedposts = defaultdict(list)
|
||||||
for parent, url in attachments:
|
for parent, url in attachments:
|
||||||
try:
|
try:
|
||||||
parent_name = names[parent]
|
parent_name = names[parent]
|
||||||
|
|
@ -641,11 +642,7 @@ def get_attachments(xml):
|
||||||
# attachment's parent is not a valid post
|
# attachment's parent is not a valid post
|
||||||
parent_name = None
|
parent_name = None
|
||||||
|
|
||||||
try:
|
attachedposts[parent_name].append(url)
|
||||||
attachedposts[parent_name].append(url)
|
|
||||||
except KeyError:
|
|
||||||
attachedposts[parent_name] = []
|
|
||||||
attachedposts[parent_name].append(url)
|
|
||||||
return attachedposts
|
return attachedposts
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -653,7 +650,7 @@ def download_attachments(output_path, urls):
|
||||||
"""Downloads WordPress attachments and returns a list of paths to
|
"""Downloads WordPress attachments and returns a list of paths to
|
||||||
attachments that can be associated with a post (relative path to output
|
attachments that can be associated with a post (relative path to output
|
||||||
directory). Files that fail to download, will not be added to posts"""
|
directory). Files that fail to download, will not be added to posts"""
|
||||||
locations = []
|
locations = {}
|
||||||
for url in urls:
|
for url in urls:
|
||||||
path = urlparse(url).path
|
path = urlparse(url).path
|
||||||
# teardown path and rebuild to negate any errors with
|
# teardown path and rebuild to negate any errors with
|
||||||
|
|
@ -670,13 +667,23 @@ def download_attachments(output_path, urls):
|
||||||
print('downloading {}'.format(filename))
|
print('downloading {}'.format(filename))
|
||||||
try:
|
try:
|
||||||
urlretrieve(url, os.path.join(full_path, filename))
|
urlretrieve(url, os.path.join(full_path, filename))
|
||||||
locations.append(os.path.join(localpath, filename))
|
locations[url] = os.path.join(localpath, filename)
|
||||||
except (URLError, IOError) as e:
|
except (URLError, IOError) as e:
|
||||||
# Python 2.7 throws an IOError rather Than URLError
|
# Python 2.7 throws an IOError rather Than URLError
|
||||||
logger.warning("No file could be downloaded from %s\n%s", url, e)
|
logger.warning("No file could be downloaded from %s\n%s", url, e)
|
||||||
return locations
|
return locations
|
||||||
|
|
||||||
|
|
||||||
|
def update_links_to_attached_files(content, attachments):
|
||||||
|
for old_url, new_path in attachments.items():
|
||||||
|
# url may occur both with http:// and https://
|
||||||
|
http_url = old_url.replace('https://', 'http://')
|
||||||
|
https_url = old_url.replace('http://', 'https://')
|
||||||
|
for url in [http_url, https_url]:
|
||||||
|
content = content.replace(url, '{filename}' + new_path)
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
def fields2pelican(
|
def fields2pelican(
|
||||||
fields, out_markup, output_path,
|
fields, out_markup, output_path,
|
||||||
dircat=False, strip_raw=False, disable_slugs=False,
|
dircat=False, strip_raw=False, disable_slugs=False,
|
||||||
|
|
@ -691,21 +698,22 @@ def fields2pelican(
|
||||||
if wp_attach and attachments:
|
if wp_attach and attachments:
|
||||||
try:
|
try:
|
||||||
urls = attachments[filename]
|
urls = attachments[filename]
|
||||||
attached_files = download_attachments(output_path, urls)
|
links = download_attachments(output_path, urls)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
attached_files = None
|
links = None
|
||||||
else:
|
else:
|
||||||
attached_files = None
|
links = None
|
||||||
|
|
||||||
ext = get_ext(out_markup, in_markup)
|
ext = get_ext(out_markup, in_markup)
|
||||||
if ext == '.md':
|
if ext == '.md':
|
||||||
header = build_markdown_header(
|
header = build_markdown_header(
|
||||||
title, date, author, categories, tags, slug,
|
title, date, author, categories, tags, slug,
|
||||||
status, attached_files)
|
status, links.values() if links else None)
|
||||||
else:
|
else:
|
||||||
out_markup = 'rst'
|
out_markup = 'rst'
|
||||||
header = build_header(title, date, author, categories,
|
header = build_header(title, date, author, categories,
|
||||||
tags, slug, status, attached_files)
|
tags, slug, status, links.values()
|
||||||
|
if links else None)
|
||||||
|
|
||||||
out_filename = get_out_filename(
|
out_filename = get_out_filename(
|
||||||
output_path, filename, ext, kind, dirpage, dircat,
|
output_path, filename, ext, kind, dirpage, dircat,
|
||||||
|
|
@ -756,6 +764,9 @@ def fields2pelican(
|
||||||
content = content.replace('\\\n ', ' \n')
|
content = content.replace('\\\n ', ' \n')
|
||||||
content = content.replace('\\\n', ' \n')
|
content = content.replace('\\\n', ' \n')
|
||||||
|
|
||||||
|
if wp_attach and links:
|
||||||
|
content = update_links_to_attached_files(content, links)
|
||||||
|
|
||||||
with open(out_filename, 'w', encoding='utf-8') as fs:
|
with open(out_filename, 'w', encoding='utf-8') as fs:
|
||||||
fs.write(header + content)
|
fs.write(header + content)
|
||||||
if wp_attach and attachments and None in attachments:
|
if wp_attach and attachments and None in attachments:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue