diff --git a/docs/importer.rst b/docs/importer.rst index 7ca3142d..ea3335ba 100644 --- a/docs/importer.rst +++ b/docs/importer.rst @@ -13,6 +13,7 @@ software to ReStructuredText or Markdown. The supported import formats are: - WordPress XML export - Dotclear export +- Posterous API - RSS/Atom feed The conversion from HTML to reStructuredText or Markdown relies on `Pandoc`_. @@ -40,14 +41,16 @@ Usage :: - pelican-import [-h] [--wpfile] [--dotclear] [--feed] [-o OUTPUT] + pelican-import [-h] [--wpfile] [--dotclear] [--posterous] [--feed] [-o OUTPUT] [-m MARKUP] [--dir-cat] [--strip-raw] [--disable-slugs] - input + [-e EMAIL] [-p PASSWORD] + input|api_token Positional arguments -------------------- input The input file to read + api_token [Posterous only] api_token can be obtained from http://posterous.com/api/ Optional arguments ------------------ @@ -55,6 +58,7 @@ Optional arguments -h, --help Show this help message and exit --wpfile WordPress XML export (default: False) --dotclear Dotclear export (default: False) + --posterous Posterous API (default: False) --feed Feed to parse (default: False) -o OUTPUT, --output OUTPUT Output path (default: output) @@ -70,6 +74,10 @@ Optional arguments output. With this disabled, your Pelican URLs may not be consistent with your original posts. (default: False) + -e EMAIL, --email=EMAIL + Email used to authenticate Posterous API + -p PASSWORD, --password=PASSWORD + Password used to authenticate Posterous API Examples @@ -83,6 +91,9 @@ For Dotclear:: $ pelican-import --dotclear -o ~/output ~/backup.txt +for Posterous:: + + $ pelican-import --posterous -o ~/output --email= --password= Tests ===== diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 53998eff..a3bac84c 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -261,6 +261,55 @@ def dc2fields(file): yield (post_title, content, slugify(post_title), post_creadt, author, categories, tags, post_format) +def posterous2fields(api_token, email, password): + """Imports posterous posts""" + import base64 + from datetime import datetime, timedelta + try: + # py3k import + import json + except ImportError: + # py2 import + import simplejson as json + + try: + # py3k import + import urllib.request as urllib_request + except ImportError: + # py2 import + import urllib2 as urllib_request + + + def get_posterous_posts(api_token, email, password, page = 1): + base64string = base64.encodestring(("%s:%s" % (email, password)).encode('utf-8')).replace(b'\n', b'') + url = "http://posterous.com/api/v2/users/me/sites/primary/posts?api_token=%s&page=%d" % (api_token, page) + request = urllib_request.Request(url) + request.add_header("Authorization", "Basic %s" % base64string.decode()) + handle = urllib_request.urlopen(request) + posts = json.loads(handle.read().decode('utf-8')) + return posts + + page = 1 + posts = get_posterous_posts(api_token, email, password, page) + while len(posts) > 0: + posts = get_posterous_posts(api_token, email, password, page) + page += 1 + + for post in posts: + slug = post.get('slug') + if not slug: + slug = slugify(post.get('title')) + tags = [tag.get('name') for tag in post.get('tags')] + raw_date = post.get('display_date') + date_object = datetime.strptime(raw_date[:-6], "%Y/%m/%d %H:%M:%S") + offset = int(raw_date[-5:]) + delta = timedelta(hours = offset / 100) + date_object -= delta + date = date_object.strftime("%Y-%m-%d %H:%M") + + yield (post.get('title'), post.get('body_cleaned'), slug, date, + post.get('user').get('display_name'), [], tags, "html") + def feed2fields(file): """Read a feed and yield pelican fields""" import feedparser @@ -389,6 +438,8 @@ def main(): help='Wordpress XML export') parser.add_argument('--dotclear', action='store_true', dest='dotclear', help='Dotclear export') + parser.add_argument('--posterous', action='store_true', dest='posterous', + help='Posterous export') parser.add_argument('--feed', action='store_true', dest='feed', help='Feed to parse') parser.add_argument('-o', '--output', dest='output', default='output', @@ -405,6 +456,10 @@ def main(): help='Disable storing slugs from imported posts within output. ' 'With this disabled, your Pelican URLs may not be consistent ' 'with your original posts.') + parser.add_argument('-e', '--email', dest='email', + help="Email address (posterous import only)") + parser.add_argument('-p', '--password', dest='password', + help="Password (posterous import only)") args = parser.parse_args() @@ -413,10 +468,12 @@ def main(): input_type = 'wordpress' elif args.dotclear: input_type = 'dotclear' + elif args.posterous: + input_type = 'posterous' elif args.feed: input_type = 'feed' else: - error = "You must provide either --wpfile, --dotclear or --feed options" + error = "You must provide either --wpfile, --dotclear, --posterous or --feed options" exit(error) if not os.path.exists(args.output): @@ -430,6 +487,8 @@ def main(): fields = wp2fields(args.input) elif input_type == 'dotclear': fields = dc2fields(args.input) + elif input_type == 'posterous': + fields = posterous2fields(args.input, args.email, args.password) elif input_type == 'feed': fields = feed2fields(args.input)