From 2d5dd70e857eaa09875103232a12acc369fbe629 Mon Sep 17 00:00:00 2001 From: Ranjhith Kalisamy Date: Mon, 24 Oct 2011 00:19:52 +0530 Subject: [PATCH] In import script, add support to output posts in markdown format --- tools/pelican-import | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/pelican-import b/tools/pelican-import index 4392d9ff..1c84511f 100755 --- a/tools/pelican-import +++ b/tools/pelican-import @@ -183,12 +183,13 @@ def build_markdown_header(title, date, author, categories, tags): header += '\n' return header -def fields2pelican(fields, output_path, dircat=False): - for title, content, filename, date, author, categories, tags, markup in fields: - if (markup == "markdown"): +def fields2pelican(fields, out_markup, output_path, dircat=False): + for title, content, filename, date, author, categories, tags, in_markup in fields: + if (in_markup == "markdown") or (out_markup == "markdown") : ext = '.md' header = build_markdown_header(title, date, author, categories, tags) else: + out_markup = "rst" ext = '.rst' header = build_header(title, date, author, categories, tags) @@ -205,24 +206,31 @@ def fields2pelican(fields, output_path, dircat=False): print out_filename - if markup == "html": + if in_markup == "html": html_filename = os.path.join(output_path, filename+'.html') with open(html_filename, 'w', encoding='utf-8') as fp: + # Replace simple newlines with
+newline so that the HTML file + # represents the original post more accurately + content = content.replace("\n", "
\n") fp.write(content) - os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename, html_filename)) + os.system('pandoc --normalize --reference-links --from=html --to=%s -o "%s" "%s"' % (out_markup, out_filename, html_filename)) - os.remove(html_filename) + #os.remove(html_filename) with open(out_filename, 'r', encoding='utf-8') as fs: content = fs.read() + if out_markup == "markdown": + # In markdown, to insert a
, end a line with two or more spaces & then a end-of-line + content = content.replace("\\\n ", " \n") + content = content.replace("\\\n", " \n") with open(out_filename, 'w', encoding='utf-8') as fs: fs.write(header + content) -def main(input_type, input, output_path, dircat=False): +def main(input_type, input, out_markup, output_path, dircat=False): if input_type == 'wordpress': fields = wp2fields(input) elif input_type == 'dotclear': @@ -230,7 +238,7 @@ def main(input_type, input, output_path, dircat=False): elif input_type == 'feed': fields = feed2fields(input) - fields2pelican(fields, output_path, dircat=dircat) + fields2pelican(fields, out_markup, output_path, dircat=dircat) if __name__ == '__main__': @@ -247,6 +255,8 @@ if __name__ == '__main__': help='Feed to parse') parser.add_argument('-o', '--output', dest='output', default='output', help='Output path') + parser.add_argument('-m', '--markup', dest='markup', default='rst', + help='Output markup format (supports rst & markdown)') parser.add_argument('--dir-cat', action='store_true', dest='dircat', help='Put files in directories with categories name') args = parser.parse_args() @@ -261,4 +271,4 @@ if __name__ == '__main__': else: print "you must provide either --wpfile, --dotclear or --feed options" exit() - main(input_type, args.input, args.output, dircat=args.dircat) + main(input_type, args.input, args.markup, args.output, dircat=args.dircat)