diff --git a/tools/pelican-import b/tools/pelican-import
index 4392d9ff..084d966e 100755
--- a/tools/pelican-import
+++ b/tools/pelican-import
@@ -183,12 +183,13 @@ def build_markdown_header(title, date, author, categories, tags):
header += '\n'
return header
-def fields2pelican(fields, output_path, dircat=False):
- for title, content, filename, date, author, categories, tags, markup in fields:
- if (markup == "markdown"):
+def fields2pelican(fields, out_markup, output_path, dircat=False):
+ for title, content, filename, date, author, categories, tags, in_markup in fields:
+ if (in_markup == "markdown") or (out_markup == "markdown") :
ext = '.md'
header = build_markdown_header(title, date, author, categories, tags)
else:
+ out_markup = "rst"
ext = '.rst'
header = build_header(title, date, author, categories, tags)
@@ -205,24 +206,31 @@ def fields2pelican(fields, output_path, dircat=False):
print out_filename
- if markup == "html":
+ if in_markup == "html":
html_filename = os.path.join(output_path, filename+'.html')
with open(html_filename, 'w', encoding='utf-8') as fp:
+ # Replace simple newlines with
+newline so that the HTML file
+ # represents the original post more accurately
+ content = content.replace("\n", "
\n")
fp.write(content)
- os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename, html_filename))
+ os.system('pandoc --normalize --reference-links --from=html --to=%s -o "%s" "%s"' % (out_markup, out_filename, html_filename))
- os.remove(html_filename)
+ #os.remove(html_filename)
with open(out_filename, 'r', encoding='utf-8') as fs:
content = fs.read()
+ if out_markup == "markdown":
+ # In markdown, to insert a
, end a line with two or more spaces & then a end-of-line
+ content = content.replace("\\\n ", " \n")
+ content = content.replace("\\\n", " \n")
with open(out_filename, 'w', encoding='utf-8') as fs:
fs.write(header + content)
-def main(input_type, input, output_path, dircat=False):
+def main(input_type, input, out_markup, output_path, dircat=False):
if input_type == 'wordpress':
fields = wp2fields(input)
elif input_type == 'dotclear':
@@ -230,7 +238,7 @@ def main(input_type, input, output_path, dircat=False):
elif input_type == 'feed':
fields = feed2fields(input)
- fields2pelican(fields, output_path, dircat=dircat)
+ fields2pelican(fields, out_markup, output_path, dircat=dircat)
if __name__ == '__main__':
@@ -247,6 +255,8 @@ if __name__ == '__main__':
help='Feed to parse')
parser.add_argument('-o', '--output', dest='output', default='output',
help='Output path')
+ parser.add_argument('-m', '--markup', dest='markup', default='rst',
+ help='Output markup format (supports rst & markdown)')
parser.add_argument('--dir-cat', action='store_true', dest='dircat',
help='Put files in directories with categories name')
args = parser.parse_args()
@@ -261,4 +271,12 @@ if __name__ == '__main__':
else:
print "you must provide either --wpfile, --dotclear or --feed options"
exit()
- main(input_type, args.input, args.output, dircat=args.dircat)
+
+ if not os.path.exists(args.output):
+ try:
+ os.mkdir(args.output)
+ except OSError:
+ error("Couldn't create the output folder: " + args.output)
+ exit()
+
+ main(input_type, args.input, args.markup, args.output, dircat=args.dircat)