From ff7433f1614818d035afcd235266d68830fd73db Mon Sep 17 00:00:00 2001 From: Roland Askew Date: Fri, 11 Jan 2019 19:20:14 +1300 Subject: [PATCH] patch to fix #2499, pelican import assert when downloading wordpress artifact with non utf-8 chars. --- pelican/tools/pelican_import.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 346fca16..84dead13 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -12,7 +12,7 @@ from codecs import open from collections import defaultdict from six.moves.urllib.error import URLError -from six.moves.urllib.parse import urlparse +from six.moves.urllib.parse import urlparse, urlsplit, urlunsplit, quote from six.moves.urllib.request import urlretrieve # because logging.setLoggerClass has to be called before logging.getLogger @@ -729,6 +729,12 @@ def download_attachments(output_path, urls): full_path = os.path.join(output_path, localpath) if not os.path.exists(full_path): os.makedirs(full_path) + + # Generate percent-encoded URL + scheme, netloc, path, query, fragment = urlsplit(url) + path = quote(path) + url = urlunsplit((scheme, netloc, path, query, fragment)) + print('downloading {}'.format(filename)) try: urlretrieve(url, os.path.join(full_path, filename))