Implement Posterous import - fixes #608

This commit is contained in:
Irfan Ahmad 2013-03-24 19:19:26 +05:00 committed by Justin Mayer
commit 58faf9462e
2 changed files with 73 additions and 3 deletions

View file

@ -13,6 +13,7 @@ software to ReStructuredText or Markdown. The supported import formats are:
- WordPress XML export
- Dotclear export
- Posterous API
- RSS/Atom feed
The conversion from HTML to reStructuredText or Markdown relies on `Pandoc`_.
@ -40,14 +41,16 @@ Usage
::
pelican-import [-h] [--wpfile] [--dotclear] [--feed] [-o OUTPUT]
pelican-import [-h] [--wpfile] [--dotclear] [--posterous] [--feed] [-o OUTPUT]
[-m MARKUP] [--dir-cat] [--strip-raw] [--disable-slugs]
input
[-e EMAIL] [-p PASSWORD]
input|api_token
Positional arguments
--------------------
input The input file to read
api_token [Posterous only] api_token can be obtained from http://posterous.com/api/
Optional arguments
------------------
@ -55,6 +58,7 @@ Optional arguments
-h, --help Show this help message and exit
--wpfile WordPress XML export (default: False)
--dotclear Dotclear export (default: False)
--posterous Posterous API (default: False)
--feed Feed to parse (default: False)
-o OUTPUT, --output OUTPUT
Output path (default: output)
@ -70,6 +74,10 @@ Optional arguments
output. With this disabled, your Pelican URLs may not
be consistent with your original posts. (default:
False)
-e EMAIL, --email=EMAIL
Email used to authenticate Posterous API
-p PASSWORD, --password=PASSWORD
Password used to authenticate Posterous API
Examples
@ -83,6 +91,9 @@ For Dotclear::
$ pelican-import --dotclear -o ~/output ~/backup.txt
for Posterous::
$ pelican-import --posterous -o ~/output --email=<email_address> --password=<password> <api_token>
Tests
=====

View file

@ -261,6 +261,55 @@ def dc2fields(file):
yield (post_title, content, slugify(post_title), post_creadt, author, categories, tags, post_format)
def posterous2fields(api_token, email, password):
"""Imports posterous posts"""
import base64
from datetime import datetime, timedelta
try:
# py3k import
import json
except ImportError:
# py2 import
import simplejson as json
try:
# py3k import
import urllib.request as urllib_request
except ImportError:
# py2 import
import urllib2 as urllib_request
def get_posterous_posts(api_token, email, password, page = 1):
base64string = base64.encodestring(("%s:%s" % (email, password)).encode('utf-8')).replace(b'\n', b'')
url = "http://posterous.com/api/v2/users/me/sites/primary/posts?api_token=%s&page=%d" % (api_token, page)
request = urllib_request.Request(url)
request.add_header("Authorization", "Basic %s" % base64string.decode())
handle = urllib_request.urlopen(request)
posts = json.loads(handle.read().decode('utf-8'))
return posts
page = 1
posts = get_posterous_posts(api_token, email, password, page)
while len(posts) > 0:
posts = get_posterous_posts(api_token, email, password, page)
page += 1
for post in posts:
slug = post.get('slug')
if not slug:
slug = slugify(post.get('title'))
tags = [tag.get('name') for tag in post.get('tags')]
raw_date = post.get('display_date')
date_object = datetime.strptime(raw_date[:-6], "%Y/%m/%d %H:%M:%S")
offset = int(raw_date[-5:])
delta = timedelta(hours = offset / 100)
date_object -= delta
date = date_object.strftime("%Y-%m-%d %H:%M")
yield (post.get('title'), post.get('body_cleaned'), slug, date,
post.get('user').get('display_name'), [], tags, "html")
def feed2fields(file):
"""Read a feed and yield pelican fields"""
import feedparser
@ -389,6 +438,8 @@ def main():
help='Wordpress XML export')
parser.add_argument('--dotclear', action='store_true', dest='dotclear',
help='Dotclear export')
parser.add_argument('--posterous', action='store_true', dest='posterous',
help='Posterous export')
parser.add_argument('--feed', action='store_true', dest='feed',
help='Feed to parse')
parser.add_argument('-o', '--output', dest='output', default='output',
@ -405,6 +456,10 @@ def main():
help='Disable storing slugs from imported posts within output. '
'With this disabled, your Pelican URLs may not be consistent '
'with your original posts.')
parser.add_argument('-e', '--email', dest='email',
help="Email address (posterous import only)")
parser.add_argument('-p', '--password', dest='password',
help="Password (posterous import only)")
args = parser.parse_args()
@ -413,10 +468,12 @@ def main():
input_type = 'wordpress'
elif args.dotclear:
input_type = 'dotclear'
elif args.posterous:
input_type = 'posterous'
elif args.feed:
input_type = 'feed'
else:
error = "You must provide either --wpfile, --dotclear or --feed options"
error = "You must provide either --wpfile, --dotclear, --posterous or --feed options"
exit(error)
if not os.path.exists(args.output):
@ -430,6 +487,8 @@ def main():
fields = wp2fields(args.input)
elif input_type == 'dotclear':
fields = dc2fields(args.input)
elif input_type == 'posterous':
fields = posterous2fields(args.input, args.email, args.password)
elif input_type == 'feed':
fields = feed2fields(args.input)