1
0
Fork 0
forked from github/pelican

Port pelican to python 3.

Stays compatible with 2.x series, thanks to an unified codebase.
This commit is contained in:
Dirk Makowski 2013-01-11 02:57:43 +01:00 committed by Alexis Métaireau
commit 71995d5e1b
43 changed files with 495 additions and 287 deletions

View file

@ -1,7 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import argparse
from HTMLParser import HTMLParser
try:
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser
import os
import subprocess
import sys
@ -15,14 +20,14 @@ from pelican.utils import slugify
def wp2fields(xml):
"""Opens a wordpress XML file, and yield pelican fields"""
try:
from BeautifulSoup import BeautifulStoneSoup
from bs4 import BeautifulSoup
except ImportError:
error = ('Missing dependency '
'"BeautifulSoup" required to import Wordpress XML files.')
'"BeautifulSoup4" and "lxml" required to import Wordpress XML files.')
sys.exit(error)
xmlfile = open(xml, encoding='utf-8').read()
soup = BeautifulStoneSoup(xmlfile)
soup = BeautifulSoup(xmlfile, "xml")
items = soup.rss.channel.findAll('item')
for item in items:
@ -54,10 +59,10 @@ def wp2fields(xml):
def dc2fields(file):
"""Opens a Dotclear export file, and yield pelican fields"""
try:
from BeautifulSoup import BeautifulStoneSoup
from bs4 import BeautifulSoup
except ImportError:
error = ('Missing dependency '
'"BeautifulSoup" required to import Dotclear files.')
'"BeautifulSoup4" and "lxml" required to import Dotclear files.')
sys.exit(error)
@ -142,13 +147,27 @@ def dc2fields(file):
if len(tag) > 1:
if int(tag[:1]) == 1:
newtag = tag.split('"')[1]
tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )))
tags.append(
BeautifulSoup(
newtag
, "xml"
)
# bs4 always outputs UTF-8
.decode('utf-8')
)
else:
i=1
j=1
while(i <= int(tag[:1])):
newtag = tag.split('"')[j].replace('\\','')
tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )))
tags.append(
BeautifulSoup(
newtag
, "xml"
)
# bs4 always outputs UTF-8
.decode('utf-8')
)
i=i+1
if j < int(tag[:1])*2:
j=j+2
@ -244,7 +263,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
# Replace newlines with paragraphs wrapped with <p> so
# HTML is valid before conversion
paragraphs = content.splitlines()
paragraphs = [u'<p>{0}</p>'.format(p) for p in paragraphs]
paragraphs = ['<p>{0}</p>'.format(p) for p in paragraphs]
new_content = ''.join(paragraphs)
fp.write(new_content)
@ -264,7 +283,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
elif rc > 0:
error = "Please, check your Pandoc installation."
exit(error)
except OSError, e:
except OSError as e:
error = "Pandoc execution failed: %s" % e
exit(error)
@ -284,7 +303,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
def main():
parser = argparse.ArgumentParser(
description="Transform feed, Wordpress or Dotclear files to reST (rst) "
"or Markdown (md) files. Be sure to have pandoc installed.",
"or Markdown (md) files. Be sure to have pandoc installed",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(dest='input', help='The input file to read')
@ -304,10 +323,10 @@ def main():
help="Strip raw HTML code that can't be converted to "
"markup such as flash embeds or iframes (wordpress import only)")
parser.add_argument('--disable-slugs', action='store_true',
dest='disable_slugs',
help='Disable storing slugs from imported posts within output. '
'With this disabled, your Pelican URLs may not be consistent '
'with your original posts.')
dest='disable_slugs',
help='Disable storing slugs from imported posts within output. '
'With this disabled, your Pelican URLs may not be consistent '
'with your original posts.')
args = parser.parse_args()
@ -339,4 +358,4 @@ def main():
fields2pelican(fields, args.markup, args.output,
dircat=args.dircat or False,
strip_raw=args.strip_raw or False,
disable_slugs=args.disable_slugs or False)
strip_slugs=args.disable_slugs or False)