From 8a6d96b289880b40bf5225f048fbef6b0a80e80b Mon Sep 17 00:00:00 2001
From: Steve Schwarz <steve@agilitynerd.com>
Date: Sun, 24 Feb 2013 17:59:24 -0600
Subject: [PATCH] pelican_import fix for bs4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Quick fix for this traceback:
$ pelican-import --wpfile ~/Downloads/mysite.wordpress.2013-02-24.xml 
Traceback (most recent call last):
  File "/Users/me/.virtualenvs/pelican/bin/pelican-import", line 8, in <module>
    load_entry_point('pelican==3.2', 'console_scripts', 'pelican-import')()
  File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 363, in main
    disable_slugs=args.disable_slugs or False)
  File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 238, in fields2pelican
    for title, content, filename, date, author, categories, tags, in_markup in fields:
  File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 37, in wp2fields
    if item.fetch('wp:status')[0].contents[0] == "publish":
TypeError: 'NoneType' object is not callable

I'm a BeautifulSoup novice but these changes allowed me to import two of my wordpress.xml files.
---
 pelican/tools/pelican_import.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 33041b0e..04747509 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -34,7 +34,7 @@ def wp2fields(xml):
 
     for item in items:
 
-        if item.fetch('wp:status')[0].contents[0] == "publish":
+        if item.find('status').string == "publish":
 
             try:
                 # Use HTMLParser due to issues with BeautifulSoup 3
@@ -42,19 +42,18 @@ def wp2fields(xml):
             except IndexError:
                 continue
 
-            content = item.fetch('content:encoded')[0].contents[0]
-            filename = item.fetch('wp:post_name')[0].contents[0]
+            content = item.find('encoded').string
+            filename = item.find('post_name').string
 
-            raw_date = item.fetch('wp:post_date')[0].contents[0]
+            raw_date = item.find('post_date').string
             date_object = time.strptime(raw_date, "%Y-%m-%d %H:%M:%S")
             date = time.strftime("%Y-%m-%d %H:%M", date_object)
+            author = item.find('creator').string
 
-            author = item.fetch('dc:creator')[0].contents[0].title()
+            categories = [cat.string for cat in item.findAll(name='category')]
+            # caturl = [cat['nicename'] for cat in item.find(domain='category')]
 
-            categories = [cat.contents[0] for cat in item.fetch(domain='category')]
-            # caturl = [cat['nicename'] for cat in item.fetch(domain='category')]
-
-            tags = [tag.contents[0] for tag in item.fetch(domain='post_tag')]
+            tags = [tag.string for tag in item.findAll(name='post_tag')]
 
             yield (title, content, filename, date, author, categories, tags, "html")