diff --git a/tools/pelican-import b/tools/pelican-import index 4392d9ff..084d966e 100755 --- a/tools/pelican-import +++ b/tools/pelican-import @@ -183,12 +183,13 @@ def build_markdown_header(title, date, author, categories, tags): header += '\n' return header -def fields2pelican(fields, output_path, dircat=False): - for title, content, filename, date, author, categories, tags, markup in fields: - if (markup == "markdown"): +def fields2pelican(fields, out_markup, output_path, dircat=False): + for title, content, filename, date, author, categories, tags, in_markup in fields: + if (in_markup == "markdown") or (out_markup == "markdown") : ext = '.md' header = build_markdown_header(title, date, author, categories, tags) else: + out_markup = "rst" ext = '.rst' header = build_header(title, date, author, categories, tags) @@ -205,24 +206,31 @@ def fields2pelican(fields, output_path, dircat=False): print out_filename - if markup == "html": + if in_markup == "html": html_filename = os.path.join(output_path, filename+'.html') with open(html_filename, 'w', encoding='utf-8') as fp: + # Replace simple newlines with
+newline so that the HTML file + # represents the original post more accurately + content = content.replace("\n", "
\n") fp.write(content) - os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename, html_filename)) + os.system('pandoc --normalize --reference-links --from=html --to=%s -o "%s" "%s"' % (out_markup, out_filename, html_filename)) - os.remove(html_filename) + #os.remove(html_filename) with open(out_filename, 'r', encoding='utf-8') as fs: content = fs.read() + if out_markup == "markdown": + # In markdown, to insert a
, end a line with two or more spaces & then a end-of-line + content = content.replace("\\\n ", " \n") + content = content.replace("\\\n", " \n") with open(out_filename, 'w', encoding='utf-8') as fs: fs.write(header + content) -def main(input_type, input, output_path, dircat=False): +def main(input_type, input, out_markup, output_path, dircat=False): if input_type == 'wordpress': fields = wp2fields(input) elif input_type == 'dotclear': @@ -230,7 +238,7 @@ def main(input_type, input, output_path, dircat=False): elif input_type == 'feed': fields = feed2fields(input) - fields2pelican(fields, output_path, dircat=dircat) + fields2pelican(fields, out_markup, output_path, dircat=dircat) if __name__ == '__main__': @@ -247,6 +255,8 @@ if __name__ == '__main__': help='Feed to parse') parser.add_argument('-o', '--output', dest='output', default='output', help='Output path') + parser.add_argument('-m', '--markup', dest='markup', default='rst', + help='Output markup format (supports rst & markdown)') parser.add_argument('--dir-cat', action='store_true', dest='dircat', help='Put files in directories with categories name') args = parser.parse_args() @@ -261,4 +271,12 @@ if __name__ == '__main__': else: print "you must provide either --wpfile, --dotclear or --feed options" exit() - main(input_type, args.input, args.output, dircat=args.dircat) + + if not os.path.exists(args.output): + try: + os.mkdir(args.output) + except OSError: + error("Couldn't create the output folder: " + args.output) + exit() + + main(input_type, args.input, args.markup, args.output, dircat=args.dircat)