forked from github/pelican
Port pelican to python 3.
Stays compatible with 2.x series, thanks to an unified codebase.
This commit is contained in:
parent
9847394e12
commit
71995d5e1b
43 changed files with 495 additions and 287 deletions
|
|
@ -1,7 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals, print_function
|
||||
import argparse
|
||||
from HTMLParser import HTMLParser
|
||||
try:
|
||||
from html.parser import HTMLParser
|
||||
except ImportError:
|
||||
from HTMLParser import HTMLParser
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
|
@ -15,14 +20,14 @@ from pelican.utils import slugify
|
|||
def wp2fields(xml):
|
||||
"""Opens a wordpress XML file, and yield pelican fields"""
|
||||
try:
|
||||
from BeautifulSoup import BeautifulStoneSoup
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
error = ('Missing dependency '
|
||||
'"BeautifulSoup" required to import Wordpress XML files.')
|
||||
'"BeautifulSoup4" and "lxml" required to import Wordpress XML files.')
|
||||
sys.exit(error)
|
||||
|
||||
xmlfile = open(xml, encoding='utf-8').read()
|
||||
soup = BeautifulStoneSoup(xmlfile)
|
||||
soup = BeautifulSoup(xmlfile, "xml")
|
||||
items = soup.rss.channel.findAll('item')
|
||||
|
||||
for item in items:
|
||||
|
|
@ -54,10 +59,10 @@ def wp2fields(xml):
|
|||
def dc2fields(file):
|
||||
"""Opens a Dotclear export file, and yield pelican fields"""
|
||||
try:
|
||||
from BeautifulSoup import BeautifulStoneSoup
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
error = ('Missing dependency '
|
||||
'"BeautifulSoup" required to import Dotclear files.')
|
||||
'"BeautifulSoup4" and "lxml" required to import Dotclear files.')
|
||||
sys.exit(error)
|
||||
|
||||
|
||||
|
|
@ -142,13 +147,27 @@ def dc2fields(file):
|
|||
if len(tag) > 1:
|
||||
if int(tag[:1]) == 1:
|
||||
newtag = tag.split('"')[1]
|
||||
tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )))
|
||||
tags.append(
|
||||
BeautifulSoup(
|
||||
newtag
|
||||
, "xml"
|
||||
)
|
||||
# bs4 always outputs UTF-8
|
||||
.decode('utf-8')
|
||||
)
|
||||
else:
|
||||
i=1
|
||||
j=1
|
||||
while(i <= int(tag[:1])):
|
||||
newtag = tag.split('"')[j].replace('\\','')
|
||||
tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )))
|
||||
tags.append(
|
||||
BeautifulSoup(
|
||||
newtag
|
||||
, "xml"
|
||||
)
|
||||
# bs4 always outputs UTF-8
|
||||
.decode('utf-8')
|
||||
)
|
||||
i=i+1
|
||||
if j < int(tag[:1])*2:
|
||||
j=j+2
|
||||
|
|
@ -244,7 +263,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
|
|||
# Replace newlines with paragraphs wrapped with <p> so
|
||||
# HTML is valid before conversion
|
||||
paragraphs = content.splitlines()
|
||||
paragraphs = [u'<p>{0}</p>'.format(p) for p in paragraphs]
|
||||
paragraphs = ['<p>{0}</p>'.format(p) for p in paragraphs]
|
||||
new_content = ''.join(paragraphs)
|
||||
|
||||
fp.write(new_content)
|
||||
|
|
@ -264,7 +283,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
|
|||
elif rc > 0:
|
||||
error = "Please, check your Pandoc installation."
|
||||
exit(error)
|
||||
except OSError, e:
|
||||
except OSError as e:
|
||||
error = "Pandoc execution failed: %s" % e
|
||||
exit(error)
|
||||
|
||||
|
|
@ -284,7 +303,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
|
|||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Transform feed, Wordpress or Dotclear files to reST (rst) "
|
||||
"or Markdown (md) files. Be sure to have pandoc installed.",
|
||||
"or Markdown (md) files. Be sure to have pandoc installed",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
||||
parser.add_argument(dest='input', help='The input file to read')
|
||||
|
|
@ -304,10 +323,10 @@ def main():
|
|||
help="Strip raw HTML code that can't be converted to "
|
||||
"markup such as flash embeds or iframes (wordpress import only)")
|
||||
parser.add_argument('--disable-slugs', action='store_true',
|
||||
dest='disable_slugs',
|
||||
help='Disable storing slugs from imported posts within output. '
|
||||
'With this disabled, your Pelican URLs may not be consistent '
|
||||
'with your original posts.')
|
||||
dest='disable_slugs',
|
||||
help='Disable storing slugs from imported posts within output. '
|
||||
'With this disabled, your Pelican URLs may not be consistent '
|
||||
'with your original posts.')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
@ -339,4 +358,4 @@ def main():
|
|||
fields2pelican(fields, args.markup, args.output,
|
||||
dircat=args.dircat or False,
|
||||
strip_raw=args.strip_raw or False,
|
||||
disable_slugs=args.disable_slugs or False)
|
||||
strip_slugs=args.disable_slugs or False)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue