From 8a6d96b289880b40bf5225f048fbef6b0a80e80b Mon Sep 17 00:00:00 2001 From: Steve Schwarz Date: Sun, 24 Feb 2013 17:59:24 -0600 Subject: [PATCH] pelican_import fix for bs4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quick fix for this traceback: $ pelican-import --wpfile ~/Downloads/mysite.wordpress.2013-02-24.xml  Traceback (most recent call last):   File "/Users/me/.virtualenvs/pelican/bin/pelican-import", line 8, in     load_entry_point('pelican==3.2', 'console_scripts', 'pelican-import')()   File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 363, in main     disable_slugs=args.disable_slugs or False)   File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 238, in fields2pelican     for title, content, filename, date, author, categories, tags, in_markup in fields:   File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 37, in wp2fields     if item.fetch('wp:status')[0].contents[0] == "publish": TypeError: 'NoneType' object is not callable I'm a BeautifulSoup novice but these changes allowed me to import two of my wordpress.xml files. --- pelican/tools/pelican_import.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 33041b0e..04747509 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -34,7 +34,7 @@ def wp2fields(xml): for item in items: - if item.fetch('wp:status')[0].contents[0] == "publish": + if item.find('status').string == "publish": try: # Use HTMLParser due to issues with BeautifulSoup 3 @@ -42,19 +42,18 @@ def wp2fields(xml): except IndexError: continue - content = item.fetch('content:encoded')[0].contents[0] - filename = item.fetch('wp:post_name')[0].contents[0] + content = item.find('encoded').string + filename = item.find('post_name').string - raw_date = item.fetch('wp:post_date')[0].contents[0] + raw_date = item.find('post_date').string date_object = time.strptime(raw_date, "%Y-%m-%d %H:%M:%S") date = time.strftime("%Y-%m-%d %H:%M", date_object) + author = item.find('creator').string - author = item.fetch('dc:creator')[0].contents[0].title() + categories = [cat.string for cat in item.findAll(name='category')] + # caturl = [cat['nicename'] for cat in item.find(domain='category')] - categories = [cat.contents[0] for cat in item.fetch(domain='category')] - # caturl = [cat['nicename'] for cat in item.fetch(domain='category')] - - tags = [tag.contents[0] for tag in item.fetch(domain='post_tag')] + tags = [tag.string for tag in item.findAll(name='post_tag')] yield (title, content, filename, date, author, categories, tags, "html")