mirror of
https://github.com/getpelican/pelican.git
synced 2026-05-27 17:36:12 +02:00
Add wp-resolve option to resolve links in content to attachments of the form
http://servername?id=1 In theory these could link to anything in wordpress, I only have one example and it is to an attachment.
This commit is contained in:
parent
b7cfe0c4f9
commit
23b5f4448e
1 changed files with 60 additions and 16 deletions
|
|
@ -678,35 +678,57 @@ def get_out_filename(output_path, filename, ext, kind,
|
||||||
|
|
||||||
|
|
||||||
def get_attachments(xml):
|
def get_attachments(xml):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:param xml:
|
||||||
|
:return: attachment_urls
|
||||||
|
"""
|
||||||
"""returns a dictionary of posts that have attachments with a list
|
"""returns a dictionary of posts that have attachments with a list
|
||||||
of the attachment_urls
|
of the attachment_urls
|
||||||
"""
|
"""
|
||||||
soup = xml_to_soup(xml)
|
soup = xml_to_soup(xml)
|
||||||
|
server = soup.rss.channel.link.string
|
||||||
items = soup.rss.channel.findAll('item')
|
items = soup.rss.channel.findAll('item')
|
||||||
names = {}
|
names = {}
|
||||||
attachments = []
|
attachments = []
|
||||||
|
attachments_by_id = defaultdict(set)
|
||||||
|
attachment_ids = {}
|
||||||
|
|
||||||
for item in items:
|
for item in items:
|
||||||
kind = item.find('post_type').string
|
kind = item.find('post_type').string
|
||||||
post_name = item.find('post_name').string
|
post_name = item.find('post_name').string
|
||||||
post_id = item.find('post_id').string
|
post_id = item.find('post_id').string
|
||||||
|
|
||||||
|
if kind not in ['attachment', 'post']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
filename = get_filename(post_name, post_id)
|
||||||
|
names[post_id] = filename
|
||||||
|
|
||||||
if kind == 'attachment':
|
if kind == 'attachment':
|
||||||
attachments.append((item.find('post_parent').string,
|
attachments.append((item.find('post_parent').string,
|
||||||
item.find('attachment_url').string))
|
item.find('attachment_url').string))
|
||||||
else:
|
attachment_ids[post_id] = item.find('attachment_url').string
|
||||||
filename = get_filename(post_name, post_id)
|
elif kind == 'post':
|
||||||
names[post_id] = filename
|
content = item.find('encoded').string
|
||||||
|
find_attachment = re.compile(r'({}/\?attachment_id=(\d+))'.format(server))
|
||||||
|
for url, attachment_id in find_attachment.findall(content):
|
||||||
|
attachments_by_id[filename].add((url, attachment_id))
|
||||||
|
|
||||||
attachedposts = defaultdict(set)
|
attachedposts = defaultdict(set)
|
||||||
for parent, url in attachments:
|
for parent, url in attachments:
|
||||||
try:
|
if parent in names: # check parent post is valid
|
||||||
parent_name = names[parent]
|
parent_name = names[parent]
|
||||||
except KeyError:
|
attachedposts[parent_name].add(url)
|
||||||
# attachment's parent is not a valid post
|
|
||||||
parent_name = None
|
|
||||||
|
|
||||||
attachedposts[parent_name].add(url)
|
attachment_links = defaultdict(set)
|
||||||
return attachedposts
|
if attachments_by_id:
|
||||||
|
for filename, links in attachments_by_id.items():
|
||||||
|
for url, attachment_id in links:
|
||||||
|
destination = attachment_ids[attachment_id]
|
||||||
|
attachment_links[filename].add((url, destination))
|
||||||
|
|
||||||
|
return dict(attachedposts), dict(attachment_links)
|
||||||
|
|
||||||
|
|
||||||
def download_attachments(output_path, urls):
|
def download_attachments(output_path, urls):
|
||||||
|
|
@ -744,6 +766,15 @@ def download_attachments(output_path, urls):
|
||||||
return locations
|
return locations
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_attachments(urls):
|
||||||
|
attachment_links = {}
|
||||||
|
for src, dst in urls:
|
||||||
|
dst = urlparse(dst).path
|
||||||
|
attachment_links[src] = dst
|
||||||
|
|
||||||
|
return attachment_links
|
||||||
|
|
||||||
|
|
||||||
def is_pandoc_needed(in_markup):
|
def is_pandoc_needed(in_markup):
|
||||||
return in_markup in ('html', 'wp-html')
|
return in_markup in ('html', 'wp-html')
|
||||||
|
|
||||||
|
|
@ -773,7 +804,8 @@ def fields2pelican(
|
||||||
fields, out_markup, output_path,
|
fields, out_markup, output_path,
|
||||||
dircat=False, strip_raw=False, disable_slugs=False,
|
dircat=False, strip_raw=False, disable_slugs=False,
|
||||||
dirpage=False, filename_template=None, filter_author=None,
|
dirpage=False, filename_template=None, filter_author=None,
|
||||||
wp_custpost=False, wp_attach=False, attachments=None):
|
wp_custpost=False, wp_attach=False, wp_resolve=False,
|
||||||
|
attachments=None, attachment_links=None):
|
||||||
|
|
||||||
pandoc_version = get_pandoc_version()
|
pandoc_version = get_pandoc_version()
|
||||||
posts_require_pandoc = []
|
posts_require_pandoc = []
|
||||||
|
|
@ -790,14 +822,18 @@ def fields2pelican(
|
||||||
|
|
||||||
slug = not disable_slugs and filename or None
|
slug = not disable_slugs and filename or None
|
||||||
|
|
||||||
|
links = {}
|
||||||
if wp_attach and attachments:
|
if wp_attach and attachments:
|
||||||
try:
|
try:
|
||||||
urls = attachments[filename]
|
urls = attachments[filename]
|
||||||
links = download_attachments(output_path, urls)
|
links = download_attachments(output_path, urls)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
links = None
|
pass
|
||||||
else:
|
|
||||||
links = None
|
if wp_resolve and attachment_links:
|
||||||
|
urls = attachment_links.get(filename)
|
||||||
|
if urls:
|
||||||
|
links.update(resolve_attachments(urls))
|
||||||
|
|
||||||
ext = get_ext(out_markup, in_markup)
|
ext = get_ext(out_markup, in_markup)
|
||||||
if ext == '.adoc':
|
if ext == '.adoc':
|
||||||
|
|
@ -948,6 +984,12 @@ def main():
|
||||||
'with their original path inside the output directory. '
|
'with their original path inside the output directory. '
|
||||||
'e.g. output/wp-uploads/date/postname/file.jpg '
|
'e.g. output/wp-uploads/date/postname/file.jpg '
|
||||||
'-- Requires an internet connection --')
|
'-- Requires an internet connection --')
|
||||||
|
parser.add_argument(
|
||||||
|
'--wp-resolve', action='store_true', dest='wp_resolve',
|
||||||
|
help='(wordpress import only) Attempt to resolve links in content that '
|
||||||
|
'refer to attachments ids. '
|
||||||
|
'Replace links in content of the form https://servername?id=23 '
|
||||||
|
'with links of the form https://servername/post-slug-name ')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--disable-slugs', action='store_true',
|
'--disable-slugs', action='store_true',
|
||||||
dest='disable_slugs',
|
dest='disable_slugs',
|
||||||
|
|
@ -1010,9 +1052,9 @@ def main():
|
||||||
fields = feed2fields(args.input)
|
fields = feed2fields(args.input)
|
||||||
|
|
||||||
if args.wp_attach:
|
if args.wp_attach:
|
||||||
attachments = get_attachments(args.input)
|
attachments, attachment_links = get_attachments(args.input)
|
||||||
else:
|
else:
|
||||||
attachments = None
|
attachments, attachment_links = None, None
|
||||||
|
|
||||||
# init logging
|
# init logging
|
||||||
init()
|
init()
|
||||||
|
|
@ -1024,4 +1066,6 @@ def main():
|
||||||
filter_author=args.author,
|
filter_author=args.author,
|
||||||
wp_custpost=args.wp_custpost or False,
|
wp_custpost=args.wp_custpost or False,
|
||||||
wp_attach=args.wp_attach or False,
|
wp_attach=args.wp_attach or False,
|
||||||
attachments=attachments or None)
|
wp_resolve=args.wp_resolve or False,
|
||||||
|
attachments=attachments or None,
|
||||||
|
attachment_links=attachment_links or None)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue