From 6888a046362316f98fb3aaf2982ca246ad724f30 Mon Sep 17 00:00:00 2001
From: Aaron Kavlie <akavlie@gmail.com>
Date: Thu, 12 Apr 2012 19:38:59 -0700
Subject: [PATCH 1/8] Issue #311

Catch BeautifulSoup ImportError.
---
 pelican/tools/pelican_import.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 57c4fc22..a4d64c67 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -13,7 +13,12 @@ from pelican.utils import slugify
 
 def wp2fields(xml):
     """Opens a wordpress XML file, and yield pelican fields"""
-    from BeautifulSoup import BeautifulStoneSoup
+    try:
+        from BeautifulSoup import BeautifulStoneSoup
+    except ImportError:
+        error = 'Missing dependency ' + \
+                '"BeautifulSoup" required to import Wordpress files.'
+        sys.exit(error)
 
     xmlfile = open(xml, encoding='utf-8').read()
     soup = BeautifulStoneSoup(xmlfile)
@@ -40,7 +45,13 @@ def wp2fields(xml):
 
 def dc2fields(file):
     """Opens a Dotclear export file, and yield pelican fields"""
-    from BeautifulSoup import BeautifulStoneSoup
+    try:
+        from BeautifulSoup import BeautifulStoneSoup
+    except ImportError:
+        error = 'Missing dependency ' + \
+                '"BeautifulSoup" required to import Dotclear files.'
+        sys.exit(error)
+
 
     in_cat = False
     in_post = False

From 23c05ad7dbd46e61d1cd1cfe193510601d7c2299 Mon Sep 17 00:00:00 2001
From: Aaron Kavlie <akavlie@gmail.com>
Date: Thu, 12 Apr 2012 19:53:03 -0700
Subject: [PATCH 2/8] Issue #311, #312

Document BeautifulSoup & pandoc deps.
---
 docs/importer.rst | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/docs/importer.rst b/docs/importer.rst
index 377820af..96e9e729 100644
--- a/docs/importer.rst
+++ b/docs/importer.rst
@@ -19,6 +19,23 @@ The conversion from HTML to reStructuredText relies on `pandoc
 written with Markdown syntax, they will not be converted (as Pelican also
 supports Markdown).
 
+Dependencies
+""""""""""""
+
+``pelican-import`` has two additional dependencies not included with pelican
+by default:
+
+- BeautifulSoup
+- pandoc
+
+BeatifulSoup can be installed like any other Python package::
+
+    $ pip install BeautifulSoup
+
+For pandoc, install a package for your operating system from the 
+`pandoc site <http://johnmacfarlane.net/pandoc/installing.html>`_.
+
+
 Usage
 """""
 
@@ -26,8 +43,8 @@ Usage
 |                [--dir-cat]
 |                input
 
-Optional arguments:
-"""""""""""""""""""
+Optional arguments
+""""""""""""""""""
 
   -h, --help            show this help message and exit
   --wpfile              Wordpress XML export

From 6577efc8f466acf8f6e639528959e2915a7f9413 Mon Sep 17 00:00:00 2001
From: Aaron Kavlie <akavlie@gmail.com>
Date: Wed, 18 Apr 2012 00:20:54 -0700
Subject: [PATCH 3/8] Wrap paragraphs in <p> tags

---
 pelican/tools/pelican_import.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index a4d64c67..01253960 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -17,7 +17,7 @@ def wp2fields(xml):
         from BeautifulSoup import BeautifulStoneSoup
     except ImportError:
         error = 'Missing dependency ' + \
-                '"BeautifulSoup" required to import Wordpress files.'
+                '"BeautifulSoup" required to import Wordpress XML files.'
         sys.exit(error)
 
     xmlfile = open(xml, encoding='utf-8').read()
@@ -226,7 +226,10 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
             with open(html_filename, 'w', encoding='utf-8') as fp:
                 # Replace simple newlines with <br />+newline so that the HTML file
                 # represents the original post more accurately
-                content = content.replace("\n", "<br />\n")
+                paragraphs = content.split('\n\n')
+                paragraphs = ['<p>%s</p>' % p for p in paragraphs]
+                new_content = ''.join(paragraphs)
+
                 fp.write(content)
 
             cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(

From 9491bb40d4127b29e2dc68d421d96aca3eb32e98 Mon Sep 17 00:00:00 2001
From: Aaron Kavlie <akavlie@gmail.com>
Date: Wed, 18 Apr 2012 00:24:52 -0700
Subject: [PATCH 4/8] Add --no-wrap option to pandoc, fixing issue with long
 links names (another fix for issue #314)

---
 pelican/tools/pelican_import.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 01253960..9a19f33c 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -232,7 +232,7 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
 
                 fp.write(content)
 
-            cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(
+            cmd = 'pandoc --normalize --no-wrap --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(
                 out_markup, out_filename, html_filename)
 
             try:

From cc30695b72772a5faab8bfccf217e2e1397b4f9f Mon Sep 17 00:00:00 2001
From: Aaron Kavlie <akavlie@gmail.com>
Date: Wed, 18 Apr 2012 09:29:47 -0700
Subject: [PATCH 5/8] Correct comment; switch to new style string formatting.

---
 pelican/tools/pelican_import.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 9a19f33c..b45d4fec 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -224,10 +224,10 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
             html_filename = os.path.join(output_path, filename+'.html')
 
             with open(html_filename, 'w', encoding='utf-8') as fp:
-                # Replace simple newlines with <br />+newline so that the HTML file
-                # represents the original post more accurately
+                # Replace newlines with paragraphs wrapped with <p> so
+                # HTML is valid before conversion
                 paragraphs = content.split('\n\n')
-                paragraphs = ['<p>%s</p>' % p for p in paragraphs]
+                paragraphs = ['<p>{}</p>'.format(p) for p in paragraphs]
                 new_content = ''.join(paragraphs)
 
                 fp.write(content)

From 36a53442821fbdf379e45c309906fdf0a6f30193 Mon Sep 17 00:00:00 2001
From: Aaron Kavlie <akavlie@gmail.com>
Date: Wed, 18 Apr 2012 22:14:53 -0700
Subject: [PATCH 6/8] Beautify two-line string concat.

---
 pelican/tools/pelican_import.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index b45d4fec..fdf28d14 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -16,8 +16,8 @@ def wp2fields(xml):
     try:
         from BeautifulSoup import BeautifulStoneSoup
     except ImportError:
-        error = 'Missing dependency ' + \
-                '"BeautifulSoup" required to import Wordpress XML files.'
+        error = ('Missing dependency '
+                 '"BeautifulSoup" required to import Wordpress XML files.')
         sys.exit(error)
 
     xmlfile = open(xml, encoding='utf-8').read()
@@ -48,8 +48,8 @@ def dc2fields(file):
     try:
         from BeautifulSoup import BeautifulStoneSoup
     except ImportError:
-        error = 'Missing dependency ' + \
-                '"BeautifulSoup" required to import Dotclear files.'
+        error = ('Missing dependency '
+                 '"BeautifulSoup" required to import Dotclear files.')
         sys.exit(error)
 
 

From 5cad4c46f06963c58d43dbaf6e2f5addbec663ea Mon Sep 17 00:00:00 2001
From: Aaron Kavlie <akavlie@gmail.com>
Date: Wed, 18 Apr 2012 22:17:43 -0700
Subject: [PATCH 7/8] Improve wording of docs re: pelican-import deps.

---
 docs/importer.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/importer.rst b/docs/importer.rst
index 96e9e729..0147f900 100644
--- a/docs/importer.rst
+++ b/docs/importer.rst
@@ -22,8 +22,7 @@ supports Markdown).
 Dependencies
 """"""""""""
 
-``pelican-import`` has two additional dependencies not included with pelican
-by default:
+``pelican-import`` has two dependencies not required by the rest of pelican:
 
 - BeautifulSoup
 - pandoc

From 5710dc771d6951519eea5209f388fe17b79b973c Mon Sep 17 00:00:00 2001
From: Aaron Kavlie <akavlie@gmail.com>
Date: Wed, 18 Apr 2012 22:28:49 -0700
Subject: [PATCH 8/8] Remove --no-wrap; change para formatting to unicode
 string.

---
 pelican/tools/pelican_import.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index fdf28d14..050b1010 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -227,12 +227,12 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
                 # Replace newlines with paragraphs wrapped with <p> so
                 # HTML is valid before conversion
                 paragraphs = content.split('\n\n')
-                paragraphs = ['<p>{}</p>'.format(p) for p in paragraphs]
+                paragraphs = [u'<p>{}</p>'.format(p) for p in paragraphs]
                 new_content = ''.join(paragraphs)
 
                 fp.write(content)
 
-            cmd = 'pandoc --normalize --no-wrap --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(
+            cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(
                 out_markup, out_filename, html_filename)
 
             try: