mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Import wordpress pages to pages/ subdir with --dir-page option
When importing from Wordpress, the --dir-page directive (disabled by default) automatically adds files to the pages/ when they are recognised as pages, as opposed to posts.
This commit is contained in:
parent
870ec0efe8
commit
8c7ea8df98
3 changed files with 54 additions and 9 deletions
|
|
@ -42,7 +42,7 @@ Usage
|
||||||
::
|
::
|
||||||
|
|
||||||
pelican-import [-h] [--wpfile] [--dotclear] [--posterous] [--feed] [-o OUTPUT]
|
pelican-import [-h] [--wpfile] [--dotclear] [--posterous] [--feed] [-o OUTPUT]
|
||||||
[-m MARKUP] [--dir-cat] [--strip-raw] [--disable-slugs]
|
[-m MARKUP] [--dir-cat] [--dir-page] [--strip-raw] [--disable-slugs]
|
||||||
[-e EMAIL] [-p PASSWORD]
|
[-e EMAIL] [-p PASSWORD]
|
||||||
input|api_token
|
input|api_token
|
||||||
|
|
||||||
|
|
@ -67,6 +67,8 @@ Optional arguments
|
||||||
(default: rst)
|
(default: rst)
|
||||||
--dir-cat Put files in directories with categories name
|
--dir-cat Put files in directories with categories name
|
||||||
(default: False)
|
(default: False)
|
||||||
|
--dir-page Put files recognised as pages in "pages/" sub-
|
||||||
|
directory (wordpress import only) (default: False)
|
||||||
--strip-raw Strip raw HTML code that can't be converted to markup
|
--strip-raw Strip raw HTML code that can't be converted to markup
|
||||||
such as flash embeds or iframes (wordpress import
|
such as flash embeds or iframes (wordpress import
|
||||||
only) (default: False)
|
only) (default: False)
|
||||||
|
|
|
||||||
|
|
@ -32,9 +32,28 @@ class TestWordpressXmlImporter(unittest.TestCase):
|
||||||
|
|
||||||
def test_ignore_empty_posts(self):
|
def test_ignore_empty_posts(self):
|
||||||
self.assertTrue(self.posts)
|
self.assertTrue(self.posts)
|
||||||
for title, content, fname, date, author, categ, tags, format in self.posts:
|
for title, content, fname, date, author, categ, tags, kind, format in self.posts:
|
||||||
self.assertTrue(title.strip())
|
self.assertTrue(title.strip())
|
||||||
|
|
||||||
|
def test_recognise_page_kind(self):
|
||||||
|
""" Check that we recognise pages in wordpress, as opposed to posts """
|
||||||
|
self.assertTrue(self.posts)
|
||||||
|
# Collect (title, filename, kind) of non-empty posts recognised as page
|
||||||
|
pages_data = []
|
||||||
|
for title, content, fname, date, author, categ, tags, kind, format in self.posts:
|
||||||
|
if kind == 'page':
|
||||||
|
pages_data.append((title, fname))
|
||||||
|
self.assertEqual(2, len(pages_data))
|
||||||
|
self.assertEqual(('Page', 'contact'), pages_data[0])
|
||||||
|
self.assertEqual(('Empty Page', 'empty'), pages_data[1])
|
||||||
|
|
||||||
|
def test_dirpage_directive_for_page_kind(self):
|
||||||
|
silent_f2p = mute(True)(fields2pelican)
|
||||||
|
test_post = filter(lambda p: p[0].startswith("Empty Page"), self.posts)
|
||||||
|
with temporary_folder() as temp:
|
||||||
|
fname = list(silent_f2p(test_post, 'markdown', temp, dirpage=True))[0]
|
||||||
|
self.assertTrue(fname.endswith('pages%sempty.md' % os.path.sep))
|
||||||
|
|
||||||
def test_can_toggle_raw_html_code_parsing(self):
|
def test_can_toggle_raw_html_code_parsing(self):
|
||||||
def r(f):
|
def r(f):
|
||||||
with open(f) as infile:
|
with open(f) as infile:
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,12 @@ def wp2fields(xml):
|
||||||
|
|
||||||
tags = [tag.string for tag in item.findAll('category', {'domain' : 'post_tag'})]
|
tags = [tag.string for tag in item.findAll('category', {'domain' : 'post_tag'})]
|
||||||
|
|
||||||
yield (title, content, filename, date, author, categories, tags, "wp-html")
|
kind = 'article'
|
||||||
|
if item.find('post_type').string == 'page':
|
||||||
|
kind = 'page'
|
||||||
|
|
||||||
|
yield (title, content, filename, date, author, categories, tags,
|
||||||
|
kind, "wp-html")
|
||||||
|
|
||||||
def dc2fields(file):
|
def dc2fields(file):
|
||||||
"""Opens a Dotclear export file, and yield pelican fields"""
|
"""Opens a Dotclear export file, and yield pelican fields"""
|
||||||
|
|
@ -265,7 +270,10 @@ def dc2fields(file):
|
||||||
content = content.replace('\\n', '')
|
content = content.replace('\\n', '')
|
||||||
post_format = "html"
|
post_format = "html"
|
||||||
|
|
||||||
yield (post_title, content, slugify(post_title), post_creadt, author, categories, tags, post_format)
|
kind = 'article' # TODO: Recognise pages
|
||||||
|
|
||||||
|
yield (post_title, content, slugify(post_title), post_creadt, author,
|
||||||
|
categories, tags, kind, post_format)
|
||||||
|
|
||||||
|
|
||||||
def posterous2fields(api_token, email, password):
|
def posterous2fields(api_token, email, password):
|
||||||
|
|
@ -313,9 +321,10 @@ def posterous2fields(api_token, email, password):
|
||||||
delta = timedelta(hours = offset / 100)
|
delta = timedelta(hours = offset / 100)
|
||||||
date_object -= delta
|
date_object -= delta
|
||||||
date = date_object.strftime("%Y-%m-%d %H:%M")
|
date = date_object.strftime("%Y-%m-%d %H:%M")
|
||||||
|
kind = 'article' # TODO: Recognise pages
|
||||||
|
|
||||||
yield (post.get('title'), post.get('body_cleaned'), slug, date,
|
yield (post.get('title'), post.get('body_cleaned'), slug, date,
|
||||||
post.get('user').get('display_name'), [], tags, "html")
|
post.get('user').get('display_name'), [], tags, kind, "html")
|
||||||
|
|
||||||
def feed2fields(file):
|
def feed2fields(file):
|
||||||
"""Read a feed and yield pelican fields"""
|
"""Read a feed and yield pelican fields"""
|
||||||
|
|
@ -328,7 +337,9 @@ def feed2fields(file):
|
||||||
tags = [e['term'] for e in entry.tags] if hasattr(entry, "tags") else None
|
tags = [e['term'] for e in entry.tags] if hasattr(entry, "tags") else None
|
||||||
|
|
||||||
slug = slugify(entry.title)
|
slug = slugify(entry.title)
|
||||||
yield (entry.title, entry.description, slug, date, author, [], tags, "html")
|
kind = 'article'
|
||||||
|
yield (entry.title, entry.description, slug, date, author, [], tags,
|
||||||
|
kind, "html")
|
||||||
|
|
||||||
|
|
||||||
def build_header(title, date, author, categories, tags, slug):
|
def build_header(title, date, author, categories, tags, slug):
|
||||||
|
|
@ -363,8 +374,11 @@ def build_markdown_header(title, date, author, categories, tags, slug):
|
||||||
header += '\n'
|
header += '\n'
|
||||||
return header
|
return header
|
||||||
|
|
||||||
def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=False, disable_slugs=False):
|
def fields2pelican(fields, out_markup, output_path,
|
||||||
for title, content, filename, date, author, categories, tags, in_markup in fields:
|
dircat=False, strip_raw=False, disable_slugs=False,
|
||||||
|
dirpage=False, filename_template=None):
|
||||||
|
for (title, content, filename, date, author, categories, tags,
|
||||||
|
kind, in_markup) in fields:
|
||||||
slug = not disable_slugs and filename or None
|
slug = not disable_slugs and filename or None
|
||||||
if (in_markup == "markdown") or (out_markup == "markdown") :
|
if (in_markup == "markdown") or (out_markup == "markdown") :
|
||||||
ext = '.md'
|
ext = '.md'
|
||||||
|
|
@ -385,8 +399,14 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
|
||||||
filename = '_'
|
filename = '_'
|
||||||
filename = filename[:249] # allow for 5 extra characters
|
filename = filename[:249] # allow for 5 extra characters
|
||||||
|
|
||||||
|
# option to put page posts in pages/ subdirectory
|
||||||
|
if dirpage and kind == 'page':
|
||||||
|
pages_dir = os.path.join(output_path, 'pages')
|
||||||
|
if not os.path.isdir(pages_dir):
|
||||||
|
os.mkdir(pages_dir)
|
||||||
|
out_filename = os.path.join(pages_dir, filename+ext)
|
||||||
# option to put files in directories with categories names
|
# option to put files in directories with categories names
|
||||||
if dircat and (len(categories) > 0):
|
elif dircat and (len(categories) > 0):
|
||||||
catname = slugify(categories[0])
|
catname = slugify(categories[0])
|
||||||
out_filename = os.path.join(output_path, catname, filename+ext)
|
out_filename = os.path.join(output_path, catname, filename+ext)
|
||||||
if not os.path.isdir(os.path.join(output_path, catname)):
|
if not os.path.isdir(os.path.join(output_path, catname)):
|
||||||
|
|
@ -464,6 +484,9 @@ def main():
|
||||||
help='Output markup format (supports rst & markdown)')
|
help='Output markup format (supports rst & markdown)')
|
||||||
parser.add_argument('--dir-cat', action='store_true', dest='dircat',
|
parser.add_argument('--dir-cat', action='store_true', dest='dircat',
|
||||||
help='Put files in directories with categories name')
|
help='Put files in directories with categories name')
|
||||||
|
parser.add_argument('--dir-page', action='store_true', dest='dirpage',
|
||||||
|
help=('Put files recognised as pages in "pages/" sub-directory'
|
||||||
|
' (wordpress import only)'))
|
||||||
parser.add_argument('--strip-raw', action='store_true', dest='strip_raw',
|
parser.add_argument('--strip-raw', action='store_true', dest='strip_raw',
|
||||||
help="Strip raw HTML code that can't be converted to "
|
help="Strip raw HTML code that can't be converted to "
|
||||||
"markup such as flash embeds or iframes (wordpress import only)")
|
"markup such as flash embeds or iframes (wordpress import only)")
|
||||||
|
|
@ -512,5 +535,6 @@ def main():
|
||||||
|
|
||||||
fields2pelican(fields, args.markup, args.output,
|
fields2pelican(fields, args.markup, args.output,
|
||||||
dircat=args.dircat or False,
|
dircat=args.dircat or False,
|
||||||
|
dirpage=args.dirpage or False,
|
||||||
strip_raw=args.strip_raw or False,
|
strip_raw=args.strip_raw or False,
|
||||||
disable_slugs=args.disable_slugs or False)
|
disable_slugs=args.disable_slugs or False)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue