mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Add wp-resolve option to resolve links in content to attachments of the form
http://servername?id=1 In theory these could link to anything in wordpress, I only have one example and it is to an attachment.
This commit is contained in:
parent
b7cfe0c4f9
commit
23b5f4448e
1 changed files with 60 additions and 16 deletions
|
|
@ -678,35 +678,57 @@ def get_out_filename(output_path, filename, ext, kind,
|
|||
|
||||
|
||||
def get_attachments(xml):
|
||||
"""
|
||||
|
||||
:param xml:
|
||||
:return: attachment_urls
|
||||
"""
|
||||
"""returns a dictionary of posts that have attachments with a list
|
||||
of the attachment_urls
|
||||
"""
|
||||
soup = xml_to_soup(xml)
|
||||
server = soup.rss.channel.link.string
|
||||
items = soup.rss.channel.findAll('item')
|
||||
names = {}
|
||||
attachments = []
|
||||
attachments_by_id = defaultdict(set)
|
||||
attachment_ids = {}
|
||||
|
||||
for item in items:
|
||||
kind = item.find('post_type').string
|
||||
post_name = item.find('post_name').string
|
||||
post_id = item.find('post_id').string
|
||||
|
||||
if kind not in ['attachment', 'post']:
|
||||
continue
|
||||
|
||||
filename = get_filename(post_name, post_id)
|
||||
names[post_id] = filename
|
||||
|
||||
if kind == 'attachment':
|
||||
attachments.append((item.find('post_parent').string,
|
||||
item.find('attachment_url').string))
|
||||
else:
|
||||
filename = get_filename(post_name, post_id)
|
||||
names[post_id] = filename
|
||||
attachment_ids[post_id] = item.find('attachment_url').string
|
||||
elif kind == 'post':
|
||||
content = item.find('encoded').string
|
||||
find_attachment = re.compile(r'({}/\?attachment_id=(\d+))'.format(server))
|
||||
for url, attachment_id in find_attachment.findall(content):
|
||||
attachments_by_id[filename].add((url, attachment_id))
|
||||
|
||||
attachedposts = defaultdict(set)
|
||||
for parent, url in attachments:
|
||||
try:
|
||||
if parent in names: # check parent post is valid
|
||||
parent_name = names[parent]
|
||||
except KeyError:
|
||||
# attachment's parent is not a valid post
|
||||
parent_name = None
|
||||
attachedposts[parent_name].add(url)
|
||||
|
||||
attachedposts[parent_name].add(url)
|
||||
return attachedposts
|
||||
attachment_links = defaultdict(set)
|
||||
if attachments_by_id:
|
||||
for filename, links in attachments_by_id.items():
|
||||
for url, attachment_id in links:
|
||||
destination = attachment_ids[attachment_id]
|
||||
attachment_links[filename].add((url, destination))
|
||||
|
||||
return dict(attachedposts), dict(attachment_links)
|
||||
|
||||
|
||||
def download_attachments(output_path, urls):
|
||||
|
|
@ -744,6 +766,15 @@ def download_attachments(output_path, urls):
|
|||
return locations
|
||||
|
||||
|
||||
def resolve_attachments(urls):
|
||||
attachment_links = {}
|
||||
for src, dst in urls:
|
||||
dst = urlparse(dst).path
|
||||
attachment_links[src] = dst
|
||||
|
||||
return attachment_links
|
||||
|
||||
|
||||
def is_pandoc_needed(in_markup):
|
||||
return in_markup in ('html', 'wp-html')
|
||||
|
||||
|
|
@ -773,7 +804,8 @@ def fields2pelican(
|
|||
fields, out_markup, output_path,
|
||||
dircat=False, strip_raw=False, disable_slugs=False,
|
||||
dirpage=False, filename_template=None, filter_author=None,
|
||||
wp_custpost=False, wp_attach=False, attachments=None):
|
||||
wp_custpost=False, wp_attach=False, wp_resolve=False,
|
||||
attachments=None, attachment_links=None):
|
||||
|
||||
pandoc_version = get_pandoc_version()
|
||||
posts_require_pandoc = []
|
||||
|
|
@ -790,14 +822,18 @@ def fields2pelican(
|
|||
|
||||
slug = not disable_slugs and filename or None
|
||||
|
||||
links = {}
|
||||
if wp_attach and attachments:
|
||||
try:
|
||||
urls = attachments[filename]
|
||||
links = download_attachments(output_path, urls)
|
||||
except KeyError:
|
||||
links = None
|
||||
else:
|
||||
links = None
|
||||
pass
|
||||
|
||||
if wp_resolve and attachment_links:
|
||||
urls = attachment_links.get(filename)
|
||||
if urls:
|
||||
links.update(resolve_attachments(urls))
|
||||
|
||||
ext = get_ext(out_markup, in_markup)
|
||||
if ext == '.adoc':
|
||||
|
|
@ -948,6 +984,12 @@ def main():
|
|||
'with their original path inside the output directory. '
|
||||
'e.g. output/wp-uploads/date/postname/file.jpg '
|
||||
'-- Requires an internet connection --')
|
||||
parser.add_argument(
|
||||
'--wp-resolve', action='store_true', dest='wp_resolve',
|
||||
help='(wordpress import only) Attempt to resolve links in content that '
|
||||
'refer to attachments ids. '
|
||||
'Replace links in content of the form https://servername?id=23 '
|
||||
'with links of the form https://servername/post-slug-name ')
|
||||
parser.add_argument(
|
||||
'--disable-slugs', action='store_true',
|
||||
dest='disable_slugs',
|
||||
|
|
@ -1010,9 +1052,9 @@ def main():
|
|||
fields = feed2fields(args.input)
|
||||
|
||||
if args.wp_attach:
|
||||
attachments = get_attachments(args.input)
|
||||
attachments, attachment_links = get_attachments(args.input)
|
||||
else:
|
||||
attachments = None
|
||||
attachments, attachment_links = None, None
|
||||
|
||||
# init logging
|
||||
init()
|
||||
|
|
@ -1024,4 +1066,6 @@ def main():
|
|||
filter_author=args.author,
|
||||
wp_custpost=args.wp_custpost or False,
|
||||
wp_attach=args.wp_attach or False,
|
||||
attachments=attachments or None)
|
||||
wp_resolve=args.wp_resolve or False,
|
||||
attachments=attachments or None,
|
||||
attachment_links=attachment_links or None)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue