From 2d5dd70e857eaa09875103232a12acc369fbe629 Mon Sep 17 00:00:00 2001
From: Ranjhith Kalisamy <ranjhith.kumar@gmail.com>
Date: Mon, 24 Oct 2011 00:19:52 +0530
Subject: [PATCH] In import script, add support to output posts in markdown
 format

---
 tools/pelican-import | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/tools/pelican-import b/tools/pelican-import
index 4392d9ff..1c84511f 100755
--- a/tools/pelican-import
+++ b/tools/pelican-import
@@ -183,12 +183,13 @@ def build_markdown_header(title, date, author, categories, tags):
     header += '\n'
     return header
 
-def fields2pelican(fields, output_path, dircat=False):
-    for title, content, filename, date, author, categories, tags, markup in fields:
-        if (markup == "markdown"):
+def fields2pelican(fields, out_markup, output_path, dircat=False):
+    for title, content, filename, date, author, categories, tags, in_markup in fields:
+        if (in_markup == "markdown") or (out_markup == "markdown") :
             ext = '.md'
             header = build_markdown_header(title, date, author, categories, tags)
         else:
+            out_markup = "rst"
             ext = '.rst'
             header = build_header(title, date, author, categories, tags)
 
@@ -205,24 +206,31 @@ def fields2pelican(fields, output_path, dircat=False):
 
         print out_filename
 
-        if markup == "html":
+        if in_markup == "html":
             html_filename = os.path.join(output_path, filename+'.html')
 
             with open(html_filename, 'w', encoding='utf-8') as fp:
+                # Replace simple newlines with <br />+newline so that the HTML file
+                # represents the original post more accurately
+                content = content.replace("\n", "<br />\n")
                 fp.write(content)
 
-            os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename, html_filename))
+            os.system('pandoc --normalize --reference-links --from=html --to=%s -o "%s" "%s"' % (out_markup, out_filename, html_filename))
 
-            os.remove(html_filename)
+            #os.remove(html_filename)
 
             with open(out_filename, 'r', encoding='utf-8') as fs:
                 content = fs.read()
+                if out_markup == "markdown":
+                    # In markdown, to insert a <br />, end a line with two or more spaces & then a end-of-line
+                    content = content.replace("\\\n ", "  \n")
+                    content = content.replace("\\\n", "  \n")
 
         with open(out_filename, 'w', encoding='utf-8') as fs:
             fs.write(header + content)
 
 
-def main(input_type, input, output_path, dircat=False):
+def main(input_type, input, out_markup, output_path, dircat=False):
     if input_type == 'wordpress':
         fields = wp2fields(input)
     elif input_type == 'dotclear':
@@ -230,7 +238,7 @@ def main(input_type, input, output_path, dircat=False):
     elif input_type == 'feed':
         fields = feed2fields(input)
 
-    fields2pelican(fields, output_path, dircat=dircat)
+    fields2pelican(fields, out_markup, output_path, dircat=dircat)
 
 
 if __name__ == '__main__':
@@ -247,6 +255,8 @@ if __name__ == '__main__':
             help='Feed to parse')
     parser.add_argument('-o', '--output', dest='output', default='output',
             help='Output path')
+    parser.add_argument('-m', '--markup', dest='markup', default='rst',
+            help='Output markup format (supports rst & markdown)')
     parser.add_argument('--dir-cat', action='store_true', dest='dircat',
             help='Put files in directories with categories name')
     args = parser.parse_args()
@@ -261,4 +271,4 @@ if __name__ == '__main__':
     else:
         print "you must provide either --wpfile, --dotclear or --feed options"
         exit()
-    main(input_type, args.input, args.output, dircat=args.dircat)
+    main(input_type, args.input, args.markup, args.output, dircat=args.dircat)