In import script, add support to output posts in markdown format

2025-10-15 20:28:56 +02:00 · 2011-10-24 00:19:52 +05:30 · 2011-10-24 00:19:52 +05:30 · 2d5dd70e85
commit 2d5dd70e85
parent 3bdb1eae0b
1 changed files with 19 additions and 9 deletions
--- a/tools/pelican-import
+++ b/tools/pelican-import
@ -183,12 +183,13 @@ def build_markdown_header(title, date, author, categories, tags):
    header += '\n'
    return header

-def fields2pelican(fields, output_path, dircat=False):
-    for title, content, filename, date, author, categories, tags, markup in fields:
-        if (markup == "markdown"):
+def fields2pelican(fields, out_markup, output_path, dircat=False):
+    for title, content, filename, date, author, categories, tags, in_markup in fields:
+        if (in_markup == "markdown") or (out_markup == "markdown") :
            ext = '.md'
            header = build_markdown_header(title, date, author, categories, tags)
        else:
+            out_markup = "rst"
            ext = '.rst'
            header = build_header(title, date, author, categories, tags)

@ -205,24 +206,31 @@ def fields2pelican(fields, output_path, dircat=False):

        print out_filename

-        if markup == "html":
+        if in_markup == "html":
            html_filename = os.path.join(output_path, filename+'.html')

            with open(html_filename, 'w', encoding='utf-8') as fp:
+                # Replace simple newlines with <br />+newline so that the HTML file
+                # represents the original post more accurately
+                content = content.replace("\n", "<br />\n")
                fp.write(content)

-            os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename, html_filename))
+            os.system('pandoc --normalize --reference-links --from=html --to=%s -o "%s" "%s"' % (out_markup, out_filename, html_filename))

-            os.remove(html_filename)
+            #os.remove(html_filename)

            with open(out_filename, 'r', encoding='utf-8') as fs:
                content = fs.read()
+                if out_markup == "markdown":
+                    # In markdown, to insert a <br />, end a line with two or more spaces & then a end-of-line
+                    content = content.replace("\\\n ", "  \n")
+                    content = content.replace("\\\n", "  \n")

        with open(out_filename, 'w', encoding='utf-8') as fs:
            fs.write(header + content)


-def main(input_type, input, output_path, dircat=False):
+def main(input_type, input, out_markup, output_path, dircat=False):
    if input_type == 'wordpress':
        fields = wp2fields(input)
    elif input_type == 'dotclear':
@ -230,7 +238,7 @@ def main(input_type, input, output_path, dircat=False):
    elif input_type == 'feed':
        fields = feed2fields(input)

-    fields2pelican(fields, output_path, dircat=dircat)
+    fields2pelican(fields, out_markup, output_path, dircat=dircat)


 if __name__ == '__main__':
@ -247,6 +255,8 @@ if __name__ == '__main__':
            help='Feed to parse')
    parser.add_argument('-o', '--output', dest='output', default='output',
            help='Output path')
+    parser.add_argument('-m', '--markup', dest='markup', default='rst',
+            help='Output markup format (supports rst & markdown)')
    parser.add_argument('--dir-cat', action='store_true', dest='dircat',
            help='Put files in directories with categories name')
    args = parser.parse_args()
@ -261,4 +271,4 @@ if __name__ == '__main__':
    else:
        print "you must provide either --wpfile, --dotclear or --feed options"
        exit()
-    main(input_type, args.input, args.output, dircat=args.dircat)
+    main(input_type, args.input, args.markup, args.output, dircat=args.dircat)