Port pelican to python 3.

Stays compatible with 2.x series, thanks to an unified codebase.
2013-01-11 02:57:43 +01:00 · 2013-01-11 02:57:43 +01:00 · 71995d5e1b
commit 71995d5e1b
parent 9847394e12
43 changed files with 495 additions and 287 deletions
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@ -1,7 +1,12 @@
 #!/usr/bin/env python

+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals, print_function
 import argparse
-from HTMLParser import HTMLParser
+try:
+    from html.parser import HTMLParser
+except ImportError:
+    from HTMLParser import HTMLParser
 import os
 import subprocess
 import sys
@ -15,14 +20,14 @@ from pelican.utils import slugify
 def wp2fields(xml):
    """Opens a wordpress XML file, and yield pelican fields"""
    try:
-        from BeautifulSoup import BeautifulStoneSoup
+        from bs4 import BeautifulSoup
    except ImportError:
        error = ('Missing dependency '
-                 '"BeautifulSoup" required to import Wordpress XML files.')
+                 '"BeautifulSoup4" and "lxml" required to import Wordpress XML files.')
        sys.exit(error)

    xmlfile = open(xml, encoding='utf-8').read()
-    soup = BeautifulStoneSoup(xmlfile)
+    soup = BeautifulSoup(xmlfile, "xml")
    items = soup.rss.channel.findAll('item')

    for item in items:
@ -54,10 +59,10 @@ def wp2fields(xml):
 def dc2fields(file):
    """Opens a Dotclear export file, and yield pelican fields"""
    try:
-        from BeautifulSoup import BeautifulStoneSoup
+        from bs4 import BeautifulSoup
    except ImportError:
        error = ('Missing dependency '
-                 '"BeautifulSoup" required to import Dotclear files.')
+                 '"BeautifulSoup4" and "lxml" required to import Dotclear files.')
        sys.exit(error)


@ -142,13 +147,27 @@ def dc2fields(file):
        if len(tag) > 1:
            if int(tag[:1]) == 1:
                newtag = tag.split('"')[1]
-                tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )))
+                tags.append(
+                    BeautifulSoup(
+                        newtag
+                        , "xml"
+                    )
+                    # bs4 always outputs UTF-8
+                    .decode('utf-8')
+                )
            else:
                i=1
                j=1
                while(i <= int(tag[:1])):
                    newtag = tag.split('"')[j].replace('\\','')
-                    tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )))
+                    tags.append(
+                        BeautifulSoup(
+                            newtag
+                            , "xml"
+                        )
+                        # bs4 always outputs UTF-8
+                        .decode('utf-8')
+                    )
                    i=i+1
                    if j < int(tag[:1])*2:
                        j=j+2
@ -244,7 +263,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
                # Replace newlines with paragraphs wrapped with <p> so
                # HTML is valid before conversion
                paragraphs = content.splitlines()
-                paragraphs = [u'<p>{0}</p>'.format(p) for p in paragraphs]
+                paragraphs = ['<p>{0}</p>'.format(p) for p in paragraphs]
                new_content = ''.join(paragraphs)

                fp.write(new_content)
@ -264,7 +283,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
                elif rc > 0:
                    error = "Please, check your Pandoc installation."
                    exit(error)
-            except OSError, e:
+            except OSError as e:
                error = "Pandoc execution failed: %s" % e
                exit(error)

@ -284,7 +303,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
 def main():
    parser = argparse.ArgumentParser(
        description="Transform feed, Wordpress or Dotclear files to reST (rst) "
-                    "or Markdown (md) files. Be sure to have pandoc installed.",
+            "or Markdown (md) files. Be sure to have pandoc installed",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument(dest='input', help='The input file to read')
@ -304,10 +323,10 @@ def main():
        help="Strip raw HTML code that can't be converted to "
             "markup such as flash embeds or iframes (wordpress import only)")
    parser.add_argument('--disable-slugs', action='store_true',
-        dest='disable_slugs',
-        help='Disable storing slugs from imported posts within output. '
-             'With this disabled, your Pelican URLs may not be consistent '
-             'with your original posts.')
+    dest='disable_slugs',
+    help='Disable storing slugs from imported posts within output. '
+         'With this disabled, your Pelican URLs may not be consistent '
+         'with your original posts.')

    args = parser.parse_args()

@ -339,4 +358,4 @@ def main():
    fields2pelican(fields, args.markup, args.output,
                   dircat=args.dircat or False,
                   strip_raw=args.strip_raw or False,
-                   disable_slugs=args.disable_slugs or False)
+                   strip_slugs=args.disable_slugs or False)