mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #577 from davidjb/import-improvements-slug
Provide slug storage option for posts during Pelican import
This commit is contained in:
commit
98c8db568b
4 changed files with 64 additions and 16 deletions
|
|
@ -9,6 +9,8 @@ Release history
|
||||||
3.1 (2012-12-04)
|
3.1 (2012-12-04)
|
||||||
================
|
================
|
||||||
|
|
||||||
|
* Importer now stores slugs within files by default. This can be disabled with
|
||||||
|
the ``--disable-slugs`` option.
|
||||||
* Improve handling of links to intra-site resources
|
* Improve handling of links to intra-site resources
|
||||||
* Ensure WordPress import adds paragraphs for all types of line endings
|
* Ensure WordPress import adds paragraphs for all types of line endings
|
||||||
in post content
|
in post content
|
||||||
|
|
|
||||||
|
|
@ -39,29 +39,44 @@ Usage
|
||||||
"""""
|
"""""
|
||||||
|
|
||||||
| pelican-import [-h] [--wpfile] [--dotclear] [--feed] [-o OUTPUT]
|
| pelican-import [-h] [--wpfile] [--dotclear] [--feed] [-o OUTPUT]
|
||||||
| [-m MARKUP][--dir-cat]
|
| [-m MARKUP] [--dir-cat] [--strip-raw] [--disable-slugs]
|
||||||
| input
|
| input
|
||||||
|
|
||||||
|
Positional arguments
|
||||||
|
====================
|
||||||
|
|
||||||
|
input The input file to read
|
||||||
|
|
||||||
Optional arguments
|
Optional arguments
|
||||||
""""""""""""""""""
|
""""""""""""""""""
|
||||||
|
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--wpfile Wordpress XML export
|
--wpfile Wordpress XML export (default: False)
|
||||||
--dotclear Dotclear export
|
--dotclear Dotclear export (default: False)
|
||||||
--feed Feed to parse
|
--feed Feed to parse (default: False)
|
||||||
-o OUTPUT, --output OUTPUT
|
-o OUTPUT, --output OUTPUT
|
||||||
Output path
|
Output path (default: output)
|
||||||
-m MARKUP Output markup
|
-m MARKUP, --markup MARKUP
|
||||||
|
Output markup format (supports rst & markdown)
|
||||||
|
(default: rst)
|
||||||
--dir-cat Put files in directories with categories name
|
--dir-cat Put files in directories with categories name
|
||||||
|
(default: False)
|
||||||
|
--strip-raw Strip raw HTML code that can't be converted to markup
|
||||||
|
such as flash embeds or iframes (wordpress import
|
||||||
|
only) (default: False)
|
||||||
|
--disable-slugs Disable storing slugs from imported posts within
|
||||||
|
output. With this disabled, your Pelican URLs may not
|
||||||
|
be consistent with your original posts. (default:
|
||||||
|
False)
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
========
|
========
|
||||||
|
|
||||||
for WordPress::
|
For WordPress::
|
||||||
|
|
||||||
$ pelican-import --wpfile -o ~/output ~/posts.xml
|
$ pelican-import --wpfile -o ~/output ~/posts.xml
|
||||||
|
|
||||||
for Dotclear::
|
For Dotclear::
|
||||||
|
|
||||||
$ pelican-import --dotclear -o ~/output ~/backup.txt
|
$ pelican-import --dotclear -o ~/output ~/backup.txt
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -181,7 +181,7 @@ def feed2fields(file):
|
||||||
yield (entry.title, entry.description, slug, date, author, [], tags, "html")
|
yield (entry.title, entry.description, slug, date, author, [], tags, "html")
|
||||||
|
|
||||||
|
|
||||||
def build_header(title, date, author, categories, tags):
|
def build_header(title, date, author, categories, tags, slug):
|
||||||
"""Build a header from a list of fields"""
|
"""Build a header from a list of fields"""
|
||||||
header = '%s\n%s\n' % (title, '#' * len(title))
|
header = '%s\n%s\n' % (title, '#' * len(title))
|
||||||
if date:
|
if date:
|
||||||
|
|
@ -192,10 +192,12 @@ def build_header(title, date, author, categories, tags):
|
||||||
header += ':category: %s\n' % ', '.join(categories)
|
header += ':category: %s\n' % ', '.join(categories)
|
||||||
if tags:
|
if tags:
|
||||||
header += ':tags: %s\n' % ', '.join(tags)
|
header += ':tags: %s\n' % ', '.join(tags)
|
||||||
|
if slug:
|
||||||
|
header += ':slug: %s\n' % slug
|
||||||
header += '\n'
|
header += '\n'
|
||||||
return header
|
return header
|
||||||
|
|
||||||
def build_markdown_header(title, date, author, categories, tags):
|
def build_markdown_header(title, date, author, categories, tags, slug):
|
||||||
"""Build a header from a list of fields"""
|
"""Build a header from a list of fields"""
|
||||||
header = 'Title: %s\n' % title
|
header = 'Title: %s\n' % title
|
||||||
if date:
|
if date:
|
||||||
|
|
@ -206,18 +208,21 @@ def build_markdown_header(title, date, author, categories, tags):
|
||||||
header += 'Category: %s\n' % ', '.join(categories)
|
header += 'Category: %s\n' % ', '.join(categories)
|
||||||
if tags:
|
if tags:
|
||||||
header += 'Tags: %s\n' % ', '.join(tags)
|
header += 'Tags: %s\n' % ', '.join(tags)
|
||||||
|
if slug:
|
||||||
|
header += 'Slug: %s\n' % slug
|
||||||
header += '\n'
|
header += '\n'
|
||||||
return header
|
return header
|
||||||
|
|
||||||
def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=False):
|
def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=False, disable_slugs=False):
|
||||||
for title, content, filename, date, author, categories, tags, in_markup in fields:
|
for title, content, filename, date, author, categories, tags, in_markup in fields:
|
||||||
|
slug = not disable_slugs and filename or None
|
||||||
if (in_markup == "markdown") or (out_markup == "markdown") :
|
if (in_markup == "markdown") or (out_markup == "markdown") :
|
||||||
ext = '.md'
|
ext = '.md'
|
||||||
header = build_markdown_header(title, date, author, categories, tags)
|
header = build_markdown_header(title, date, author, categories, tags, slug)
|
||||||
else:
|
else:
|
||||||
out_markup = "rst"
|
out_markup = "rst"
|
||||||
ext = '.rst'
|
ext = '.rst'
|
||||||
header = build_header(title, date, author, categories, tags)
|
header = build_header(title, date, author, categories, tags, slug)
|
||||||
|
|
||||||
filename = os.path.basename(filename)
|
filename = os.path.basename(filename)
|
||||||
|
|
||||||
|
|
@ -278,8 +283,8 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Transform feed, Wordpress or Dotclear files to rst files."
|
description="Transform feed, Wordpress or Dotclear files to reST (rst) "
|
||||||
"Be sure to have pandoc installed",
|
"or Markdown (md) files. Be sure to have pandoc installed.",
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
|
||||||
parser.add_argument(dest='input', help='The input file to read')
|
parser.add_argument(dest='input', help='The input file to read')
|
||||||
|
|
@ -298,6 +303,11 @@ def main():
|
||||||
parser.add_argument('--strip-raw', action='store_true', dest='strip_raw',
|
parser.add_argument('--strip-raw', action='store_true', dest='strip_raw',
|
||||||
help="Strip raw HTML code that can't be converted to "
|
help="Strip raw HTML code that can't be converted to "
|
||||||
"markup such as flash embeds or iframes (wordpress import only)")
|
"markup such as flash embeds or iframes (wordpress import only)")
|
||||||
|
parser.add_argument('--disable-slugs', action='store_true',
|
||||||
|
dest='disable_slugs',
|
||||||
|
help='Disable storing slugs from imported posts within output. '
|
||||||
|
'With this disabled, your Pelican URLs may not be consistent '
|
||||||
|
'with your original posts.')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
@ -328,4 +338,5 @@ def main():
|
||||||
|
|
||||||
fields2pelican(fields, args.markup, args.output,
|
fields2pelican(fields, args.markup, args.output,
|
||||||
dircat=args.dircat or False,
|
dircat=args.dircat or False,
|
||||||
strip_raw=args.strip_raw or False)
|
strip_raw=args.strip_raw or False,
|
||||||
|
disable_slugs=args.disable_slugs or False)
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,26 @@ class TestWordpressXmlImporter(unittest.TestCase):
|
||||||
strip_raw=True))
|
strip_raw=True))
|
||||||
self.assertFalse(any('<iframe' in rst for rst in rst_files))
|
self.assertFalse(any('<iframe' in rst for rst in rst_files))
|
||||||
|
|
||||||
|
def test_can_toggle_slug_storage(self):
|
||||||
|
|
||||||
|
posts = list(self.posts)
|
||||||
|
r = lambda f: open(f).read()
|
||||||
|
silent_f2p = mute(True)(fields2pelican)
|
||||||
|
|
||||||
|
with temporary_folder() as temp:
|
||||||
|
|
||||||
|
rst_files = (r(f) for f in silent_f2p(posts, 'markdown', temp))
|
||||||
|
self.assertTrue(all('Slug:' in rst for rst in rst_files))
|
||||||
|
rst_files = (r(f) for f in silent_f2p(posts, 'markdown', temp,
|
||||||
|
disable_slugs=True))
|
||||||
|
self.assertFalse(any('Slug:' in rst for rst in rst_files))
|
||||||
|
|
||||||
|
rst_files = (r(f) for f in silent_f2p(posts, 'rst', temp))
|
||||||
|
self.assertTrue(all(':slug:' in rst for rst in rst_files))
|
||||||
|
rst_files = (r(f) for f in silent_f2p(posts, 'rst', temp,
|
||||||
|
disable_slugs=True))
|
||||||
|
self.assertFalse(any(':slug:' in rst for rst in rst_files))
|
||||||
|
|
||||||
def test_decode_html_entities_in_titles(self):
|
def test_decode_html_entities_in_titles(self):
|
||||||
posts = list(self.posts)
|
posts = list(self.posts)
|
||||||
test_posts = [post for post in posts if post[2] == 'html-entity-test']
|
test_posts = [post for post in posts if post[2] == 'html-entity-test']
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue