1
0
Fork 0
forked from github/pelican

pelican_import fix for bs4

Quick fix for this traceback:
$ pelican-import --wpfile ~/Downloads/mysite.wordpress.2013-02-24.xml 
Traceback (most recent call last):
  File "/Users/me/.virtualenvs/pelican/bin/pelican-import", line 8, in <module>
    load_entry_point('pelican==3.2', 'console_scripts', 'pelican-import')()
  File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 363, in main
    disable_slugs=args.disable_slugs or False)
  File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 238, in fields2pelican
    for title, content, filename, date, author, categories, tags, in_markup in fields:
  File "/Users/me/.virtualenvs/pelican/src/pelican/pelican/tools/pelican_import.py", line 37, in wp2fields
    if item.fetch('wp:status')[0].contents[0] == "publish":
TypeError: 'NoneType' object is not callable

I'm a BeautifulSoup novice but these changes allowed me to import two of my wordpress.xml files.
This commit is contained in:
Steve Schwarz 2013-02-24 17:59:24 -06:00 committed by Alexis Métaireau
commit 8a6d96b289

View file

@ -34,7 +34,7 @@ def wp2fields(xml):
for item in items:
if item.fetch('wp:status')[0].contents[0] == "publish":
if item.find('status').string == "publish":
try:
# Use HTMLParser due to issues with BeautifulSoup 3
@ -42,19 +42,18 @@ def wp2fields(xml):
except IndexError:
continue
content = item.fetch('content:encoded')[0].contents[0]
filename = item.fetch('wp:post_name')[0].contents[0]
content = item.find('encoded').string
filename = item.find('post_name').string
raw_date = item.fetch('wp:post_date')[0].contents[0]
raw_date = item.find('post_date').string
date_object = time.strptime(raw_date, "%Y-%m-%d %H:%M:%S")
date = time.strftime("%Y-%m-%d %H:%M", date_object)
author = item.find('creator').string
author = item.fetch('dc:creator')[0].contents[0].title()
categories = [cat.string for cat in item.findAll(name='category')]
# caturl = [cat['nicename'] for cat in item.find(domain='category')]
categories = [cat.contents[0] for cat in item.fetch(domain='category')]
# caturl = [cat['nicename'] for cat in item.fetch(domain='category')]
tags = [tag.contents[0] for tag in item.fetch(domain='post_tag')]
tags = [tag.string for tag in item.findAll(name='post_tag')]
yield (title, content, filename, date, author, categories, tags, "html")