1
0
Fork 0
forked from github/pelican

Merge pull request #325 from akavlie/issue-314

Better output formatting (see issue #314)
This commit is contained in:
Alexis Metaireau 2012-04-19 01:34:57 -07:00
commit 004ff4e7b6
2 changed files with 37 additions and 7 deletions

View file

@ -19,6 +19,22 @@ The conversion from HTML to reStructuredText relies on `pandoc
written with Markdown syntax, they will not be converted (as Pelican also
supports Markdown).
Dependencies
""""""""""""
``pelican-import`` has two dependencies not required by the rest of pelican:
- BeautifulSoup
- pandoc
BeatifulSoup can be installed like any other Python package::
$ pip install BeautifulSoup
For pandoc, install a package for your operating system from the
`pandoc site <http://johnmacfarlane.net/pandoc/installing.html>`_.
Usage
"""""
@ -26,8 +42,8 @@ Usage
| [--dir-cat]
| input
Optional arguments:
"""""""""""""""""""
Optional arguments
""""""""""""""""""
-h, --help show this help message and exit
--wpfile Wordpress XML export

View file

@ -13,7 +13,12 @@ from pelican.utils import slugify
def wp2fields(xml):
"""Opens a wordpress XML file, and yield pelican fields"""
from BeautifulSoup import BeautifulStoneSoup
try:
from BeautifulSoup import BeautifulStoneSoup
except ImportError:
error = ('Missing dependency '
'"BeautifulSoup" required to import Wordpress XML files.')
sys.exit(error)
xmlfile = open(xml, encoding='utf-8').read()
soup = BeautifulStoneSoup(xmlfile)
@ -40,7 +45,13 @@ def wp2fields(xml):
def dc2fields(file):
"""Opens a Dotclear export file, and yield pelican fields"""
from BeautifulSoup import BeautifulStoneSoup
try:
from BeautifulSoup import BeautifulStoneSoup
except ImportError:
error = ('Missing dependency '
'"BeautifulSoup" required to import Dotclear files.')
sys.exit(error)
in_cat = False
in_post = False
@ -213,9 +224,12 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
html_filename = os.path.join(output_path, filename+'.html')
with open(html_filename, 'w', encoding='utf-8') as fp:
# Replace simple newlines with <br />+newline so that the HTML file
# represents the original post more accurately
content = content.replace("\n", "<br />\n")
# Replace newlines with paragraphs wrapped with <p> so
# HTML is valid before conversion
paragraphs = content.split('\n\n')
paragraphs = [u'<p>{}</p>'.format(p) for p in paragraphs]
new_content = ''.join(paragraphs)
fp.write(content)
cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(