Merge pull request #325 from akavlie/issue-314

Better output formatting (see issue #314)
This commit is contained in:
Alexis Metaireau 2012-04-19 01:34:57 -07:00
commit 004ff4e7b6
2 changed files with 37 additions and 7 deletions

View file

@ -19,6 +19,22 @@ The conversion from HTML to reStructuredText relies on `pandoc
written with Markdown syntax, they will not be converted (as Pelican also written with Markdown syntax, they will not be converted (as Pelican also
supports Markdown). supports Markdown).
Dependencies
""""""""""""
``pelican-import`` has two dependencies not required by the rest of pelican:
- BeautifulSoup
- pandoc
BeatifulSoup can be installed like any other Python package::
$ pip install BeautifulSoup
For pandoc, install a package for your operating system from the
`pandoc site <http://johnmacfarlane.net/pandoc/installing.html>`_.
Usage Usage
""""" """""
@ -26,8 +42,8 @@ Usage
| [--dir-cat] | [--dir-cat]
| input | input
Optional arguments: Optional arguments
""""""""""""""""""" """"""""""""""""""
-h, --help show this help message and exit -h, --help show this help message and exit
--wpfile Wordpress XML export --wpfile Wordpress XML export

View file

@ -13,7 +13,12 @@ from pelican.utils import slugify
def wp2fields(xml): def wp2fields(xml):
"""Opens a wordpress XML file, and yield pelican fields""" """Opens a wordpress XML file, and yield pelican fields"""
from BeautifulSoup import BeautifulStoneSoup try:
from BeautifulSoup import BeautifulStoneSoup
except ImportError:
error = ('Missing dependency '
'"BeautifulSoup" required to import Wordpress XML files.')
sys.exit(error)
xmlfile = open(xml, encoding='utf-8').read() xmlfile = open(xml, encoding='utf-8').read()
soup = BeautifulStoneSoup(xmlfile) soup = BeautifulStoneSoup(xmlfile)
@ -40,7 +45,13 @@ def wp2fields(xml):
def dc2fields(file): def dc2fields(file):
"""Opens a Dotclear export file, and yield pelican fields""" """Opens a Dotclear export file, and yield pelican fields"""
from BeautifulSoup import BeautifulStoneSoup try:
from BeautifulSoup import BeautifulStoneSoup
except ImportError:
error = ('Missing dependency '
'"BeautifulSoup" required to import Dotclear files.')
sys.exit(error)
in_cat = False in_cat = False
in_post = False in_post = False
@ -213,9 +224,12 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
html_filename = os.path.join(output_path, filename+'.html') html_filename = os.path.join(output_path, filename+'.html')
with open(html_filename, 'w', encoding='utf-8') as fp: with open(html_filename, 'w', encoding='utf-8') as fp:
# Replace simple newlines with <br />+newline so that the HTML file # Replace newlines with paragraphs wrapped with <p> so
# represents the original post more accurately # HTML is valid before conversion
content = content.replace("\n", "<br />\n") paragraphs = content.split('\n\n')
paragraphs = [u'<p>{}</p>'.format(p) for p in paragraphs]
new_content = ''.join(paragraphs)
fp.write(content) fp.write(content)
cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format( cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(