better handle case where there is no title in the wordpress post to import

(expect the exception that is actually getting thrown)
This commit is contained in:
Dominique Plante 2013-05-29 06:23:34 -07:00
commit 7fb88d75ae
3 changed files with 114 additions and 2 deletions

View file

@ -0,0 +1,90 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!-- This is a WordPress eXtended RSS file generated by WordPress as an export of your site. -->
<!-- It contains information about your site's posts, pages, comments, categories, and other content. -->
<!-- You may use this file to transfer that content from one site to another. -->
<!-- This file is not intended to serve as a complete backup of your site. -->
<!-- To import this information into a WordPress site follow these steps: -->
<!-- 1. Log in to that site as an administrator. -->
<!-- 2. Go to Tools: Import in the WordPress admin panel. -->
<!-- 3. Install the "WordPress" importer from the list. -->
<!-- 4. Activate & Run Importer. -->
<!-- 5. Upload this file using the form provided on that page. -->
<!-- 6. You will first be asked to map the authors in this export file to users -->
<!-- on the site. For each author, you may choose to map to an -->
<!-- existing user on the site or to create a new user. -->
<!-- 7. WordPress will then import each of the posts, pages, comments, categories, etc. -->
<!-- contained in this file into your site. -->
<!-- generator="WordPress/3.3.1" created="2012-05-13 01:13" -->
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.1/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.1/"
>
<channel>
<title>Pelican test channel</title>
<link>http://thisisa.test</link>
<description>Not a real feed, just for test</description>
<pubDate>Sun, 13 May 2012 01:13:52 +0000</pubDate>
<language>en</language>
<wp:wxr_version>1.1</wp:wxr_version>
<wp:base_site_url>http://thisisa.test</wp:base_site_url>
<wp:base_blog_url>http://thisisa.test</wp:base_blog_url>
<wp:author><wp:author_id>2</wp:author_id><wp:author_login>Bob</wp:author_login><wp:author_email>bob@thisisa.test</wp:author_email><wp:author_display_name><![CDATA[Bob]]></wp:author_display_name><wp:author_first_name><![CDATA[Bob]]></wp:author_first_name><wp:author_last_name><![CDATA[]]></wp:author_last_name></wp:author>
<wp:author><wp:author_id>3</wp:author_id><wp:author_login>Jonh</wp:author_login><wp:author_email>jonh@thisisa.test</wp:author_email><wp:author_display_name><![CDATA[Jonh]]></wp:author_display_name><wp:author_first_name><![CDATA[Jonh]]></wp:author_first_name><wp:author_last_name><![CDATA[]]></wp:author_last_name></wp:author>
<wp:category><wp:term_id>7</wp:term_id><wp:category_nicename>categ-1</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[Category 1]]></wp:cat_name></wp:category>
<wp:category><wp:term_id>11</wp:term_id><wp:category_nicename>categ-2</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[Category 2]]></wp:cat_name></wp:category>
<wp:category><wp:term_id>1</wp:term_id><wp:category_nicename>uncategorized</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[Uncategorized]]></wp:cat_name></wp:category>
<wp:category><wp:term_id>15</wp:term_id><wp:category_nicename>categ-3</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[Category 3]]></wp:cat_name></wp:category>
<wp:tag><wp:term_id>25</wp:term_id><wp:tag_slug>tag-1</wp:tag_slug><wp:tag_name><![CDATA[tag 1]]></wp:tag_name></wp:tag>
<wp:tag><wp:term_id>122</wp:term_id><wp:tag_slug>tag2</wp:tag_slug><wp:tag_name><![CDATA[Tag2]]></wp:tag_name></wp:tag>
<wp:tag><wp:term_id>68</wp:term_id><wp:tag_slug>tag-3</wp:tag_slug><wp:tag_name><![CDATA[Tag 3]]></wp:tag_name></wp:tag>
<generator>http://wordpress.org/?v=3.3.1</generator>
<item>
<link>http://thisisa.test/contact/</link>
<pubDate>Wed, 11 Apr 2012 11:38:08 +0000</pubDate>
<dc:creator>bob</dc:creator>
<guid isPermaLink="false">http://thisisa.test/?page_id=334</guid>
<description></description>
<content:encoded><![CDATA[Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>334</wp:post_id>
<wp:post_date>2012-04-11 06:38:08</wp:post_date>
<wp:post_date_gmt>2012-04-11 11:38:08</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status>
<wp:ping_status>open</wp:ping_status>
<wp:post_name>contact</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_parent>0</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type>page</wp:post_type>
<wp:post_password></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<wp:postmeta>
<wp:meta_key>sharing_disabled</wp:meta_key>
<wp:meta_value><![CDATA[1]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key>_wp_page_template</wp:meta_key>
<wp:meta_value><![CDATA[default]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[3]]></wp:meta_value>
</wp:postmeta>
</item>
</channel>
</rss>

View file

@ -4,12 +4,16 @@ from __future__ import unicode_literals, print_function
import os
import re
from pelican.tools.pelican_import import wp2fields, fields2pelican, decode_wp_content
from pelican.tools.pelican_import import (wp2fields,
fields2pelican, decode_wp_content)
from pelican.tests.support import (unittest, temporary_folder, mute,
skipIfNoExecutable)
CUR_DIR = os.path.dirname(__file__)
WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml')
WORDPRESS_XML_SAMPLE_NO_TITLE = os.path.join(CUR_DIR, 'content', 'wordpressexport-notitle.xml')
WORDPRESS_ENCODED_CONTENT_SAMPLE = os.path.join(CUR_DIR,
'content',
'wordpress_content_encoded')
@ -122,3 +126,21 @@ class TestWordpressXmlImporter(unittest.TestCase):
sample_line = re.search(r'- This is a code sample', md).group(0)
code_line = re.search(r'\s+a = \[1, 2, 3\]', md).group(0)
self.assertTrue(sample_line.rindex('This') < code_line.rindex('a'))
@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
class TestWordpressXmlImporter_OtherCases(unittest.TestCase):
def test_no_title(self):
self.posts = list(wp2fields(WORDPRESS_XML_SAMPLE_NO_TITLE))
# format of each tuple in self.posts:
# (title, content, filename, date, author, categories, tags, kind, "wp-html")
first_post = self.posts[0]
self.assertEqual('No title [contact]', first_post[0]) # title
self.assertEqual('contact', first_post[2]) # filename
self.assertEqual('2012-04-11 06:38', first_post[3]) # date - '2012-04-11 06:38'
self.assertEqual('bob', first_post[4]) # author - bob
self.assertEqual([], first_post[5]) # categories []
self.assertEqual([], first_post[6]) # tags []
self.assertEqual('page', first_post[7]) # kind

View file

@ -119,7 +119,7 @@ def wp2fields(xml):
try:
# Use HTMLParser due to issues with BeautifulSoup 3
title = HTMLParser().unescape(item.title.contents[0])
except IndexError:
except (AttributeError, NameError):
title = 'No title [%s]' % item.find('post_name').string
logger.warn('Post "%s" is lacking a proper title' % title)