Merge pull request #325 from akavlie/issue-314

Better output formatting (see issue #314)
2025-10-15 20:28:56 +02:00 · 2012-04-19 01:34:57 -07:00 · 2012-04-19 01:34:57 -07:00 · 004ff4e7b6
commit 004ff4e7b6
parent f3188587ce 5710dc771d
2 changed files with 37 additions and 7 deletions
--- a/docs/importer.rst
+++ b/docs/importer.rst
@ -19,6 +19,22 @@ The conversion from HTML to reStructuredText relies on `pandoc
 written with Markdown syntax, they will not be converted (as Pelican also
 supports Markdown).
 Dependencies
 """"""""""""
 ``pelican-import`` has two dependencies not required by the rest of pelican:
 - BeautifulSoup
 - pandoc
 BeatifulSoup can be installed like any other Python package::
    $ pip install BeautifulSoup
 For pandoc, install a package for your operating system from the 
 `pandoc site <http://johnmacfarlane.net/pandoc/installing.html>`_.
 Usage
 """""
@ -26,8 +42,8 @@ Usage
 |                [--dir-cat]
 |                input
-Optional arguments:
+Optional arguments
-"""""""""""""""""""
+""""""""""""""""""
  -h, --help            show this help message and exit
  --wpfile              Wordpress XML export
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@ -13,7 +13,12 @@ from pelican.utils import slugify
 def wp2fields(xml):
    """Opens a wordpress XML file, and yield pelican fields"""
-    from BeautifulSoup import BeautifulStoneSoup
+    try:
        from BeautifulSoup import BeautifulStoneSoup
    except ImportError:
        error = ('Missing dependency '
                 '"BeautifulSoup" required to import Wordpress XML files.')
        sys.exit(error)
    xmlfile = open(xml, encoding='utf-8').read()
    soup = BeautifulStoneSoup(xmlfile)
@ -40,7 +45,13 @@ def wp2fields(xml):
 def dc2fields(file):
    """Opens a Dotclear export file, and yield pelican fields"""
-    from BeautifulSoup import BeautifulStoneSoup
+    try:
        from BeautifulSoup import BeautifulStoneSoup
    except ImportError:
        error = ('Missing dependency '
                 '"BeautifulSoup" required to import Dotclear files.')
        sys.exit(error)
    in_cat = False
    in_post = False
@ -213,9 +224,12 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
            html_filename = os.path.join(output_path, filename+'.html')
            with open(html_filename, 'w', encoding='utf-8') as fp:
-                # Replace simple newlines with <br />+newline so that the HTML file
+                # Replace newlines with paragraphs wrapped with <p> so
-                # represents the original post more accurately
+                # HTML is valid before conversion
-                content = content.replace("\n", "<br />\n")
+                paragraphs = content.split('\n\n')
                paragraphs = [u'<p>{}</p>'.format(p) for p in paragraphs]
                new_content = ''.join(paragraphs)
                fp.write(content)
            cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(