Merge pull request #325 from akavlie/issue-314

Better output formatting (see issue #314)
2012-04-19 01:34:57 -07:00 · 2012-04-19 01:34:57 -07:00 · 004ff4e7b6
commit 004ff4e7b6
parent f3188587ce 5710dc771d
2 changed files with 37 additions and 7 deletions
--- a/docs/importer.rst
+++ b/docs/importer.rst
@ -19,6 +19,22 @@ The conversion from HTML to reStructuredText relies on `pandoc
 written with Markdown syntax, they will not be converted (as Pelican also
 supports Markdown).

+Dependencies
+""""""""""""
+
+``pelican-import`` has two dependencies not required by the rest of pelican:
+
+- BeautifulSoup
+- pandoc
+
+BeatifulSoup can be installed like any other Python package::
+
+    $ pip install BeautifulSoup
+
+For pandoc, install a package for your operating system from the 
+`pandoc site <http://johnmacfarlane.net/pandoc/installing.html>`_.
+
+
 Usage
 """""

@ -26,8 +42,8 @@ Usage
 |                [--dir-cat]
 |                input

-Optional arguments:
-"""""""""""""""""""
+Optional arguments
+""""""""""""""""""

  -h, --help            show this help message and exit
  --wpfile              Wordpress XML export
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@ -13,7 +13,12 @@ from pelican.utils import slugify

 def wp2fields(xml):
    """Opens a wordpress XML file, and yield pelican fields"""
-    from BeautifulSoup import BeautifulStoneSoup
+    try:
+        from BeautifulSoup import BeautifulStoneSoup
+    except ImportError:
+        error = ('Missing dependency '
+                 '"BeautifulSoup" required to import Wordpress XML files.')
+        sys.exit(error)

    xmlfile = open(xml, encoding='utf-8').read()
    soup = BeautifulStoneSoup(xmlfile)
@ -40,7 +45,13 @@ def wp2fields(xml):

 def dc2fields(file):
    """Opens a Dotclear export file, and yield pelican fields"""
-    from BeautifulSoup import BeautifulStoneSoup
+    try:
+        from BeautifulSoup import BeautifulStoneSoup
+    except ImportError:
+        error = ('Missing dependency '
+                 '"BeautifulSoup" required to import Dotclear files.')
+        sys.exit(error)
+

    in_cat = False
    in_post = False
@ -213,9 +224,12 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
            html_filename = os.path.join(output_path, filename+'.html')

            with open(html_filename, 'w', encoding='utf-8') as fp:
-                # Replace simple newlines with <br />+newline so that the HTML file
-                # represents the original post more accurately
-                content = content.replace("\n", "<br />\n")
+                # Replace newlines with paragraphs wrapped with <p> so
+                # HTML is valid before conversion
+                paragraphs = content.split('\n\n')
+                paragraphs = [u'<p>{}</p>'.format(p) for p in paragraphs]
+                new_content = ''.join(paragraphs)
+
                fp.write(content)

            cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(