diff --git a/docs/changelog.rst b/docs/changelog.rst
index 6cea6d93..5771abda 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -5,6 +5,7 @@ Release history
================
* Improve handling of links to intra-site resources
+* Decode HTML entities within WordPress post titles on import
3.0 (2012-08-08)
==================
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index fc28c6a4..fceac4b5 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python
import argparse
+from HTMLParser import HTMLParser
import os
import subprocess
import sys
@@ -29,7 +30,8 @@ def wp2fields(xml):
if item.fetch('wp:status')[0].contents[0] == "publish":
try:
- title = item.title.contents[0]
+ # Use HTMLParser due to issues with BeautifulSoup 3
+ title = HTMLParser().unescape(item.title.contents[0])
except IndexError:
continue
diff --git a/tests/content/wordpressexport.xml b/tests/content/wordpressexport.xml
index d3e86cba..0d68f180 100644
--- a/tests/content/wordpressexport.xml
+++ b/tests/content/wordpressexport.xml
@@ -112,10 +112,10 @@
A normal post
- http://thisisa.test/?p=173
+ http://thisisa.test/?p=174
Thu, 01 Jan 1970 00:00:00 +0000bob
- http://thisisa.test/?p=173
+ http://thisisa.test/?p=174
- 173
+ 1742012-02-16 15:52:550000-00-00 00:00:00open
@@ -574,5 +574,59 @@ Bottom line: don't mess up with birds]]>
+
+ A normal post with some <html> entities in the title. You can't miss them.
+ http://thisisa.test/?p=175
+ Thu, 01 Jan 1970 00:00:00 +0000
+ bob
+ http://thisisa.test/?p=175
+
+
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+
+Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]>
+
+ 175
+ 2012-02-16 15:52:55
+ 0000-00-00 00:00:00
+ open
+ open
+ html-entity-test
+ publish
+ 0
+ 0
+ post
+
+ 0
+
+
+ _edit_last
+
+
+
diff --git a/tests/test_importer.py b/tests/test_importer.py
index d4ff8205..959a556a 100644
--- a/tests/test_importer.py
+++ b/tests/test_importer.py
@@ -47,3 +47,13 @@ class TestWordpressXmlImporter(unittest.TestCase):
rst_files = (r(f) for f in silent_f2p(posts, 'rst', temp,
strip_raw=True))
self.assertFalse(any('