Merge pull request #1247 from paylogic/multiple-authors

Multiple authors implementation for #956
This commit is contained in:
Justin Mayer 2014-02-13 19:08:34 -08:00
commit 826ff4df50
16 changed files with 122 additions and 59 deletions

View file

@ -7,6 +7,7 @@ Next release
* Added the `:modified:` metadata field to complement `:date:`. * Added the `:modified:` metadata field to complement `:date:`.
Used to specify the last date and time an article was updated independently from the date and time it was published. Used to specify the last date and time an article was updated independently from the date and time it was published.
* Produce inline links instead of reference-style links when importing content. * Produce inline links instead of reference-style links when importing content.
* Multiple authors support added via new `:authors:` metadata field.
3.3.0 (2013-09-24) 3.3.0 (2013-09-24)
================== ==================

View file

@ -311,7 +311,7 @@ this metadata in text files via the following syntax (give your file the
:tags: thats, awesome :tags: thats, awesome
:category: yeah :category: yeah
:slug: my-super-post :slug: my-super-post
:author: Alexis Metaireau :authors: Alexis Metaireau, Conan Doyle
:summary: Short version for index and feeds :summary: Short version for index and feeds
Pelican implements an extension to reStructuredText to enable support for the Pelican implements an extension to reStructuredText to enable support for the
@ -331,7 +331,7 @@ pattern::
Category: Python Category: Python
Tags: pelican, publishing Tags: pelican, publishing
Slug: my-super-post Slug: my-super-post
Author: Alexis Metaireau Authors: Alexis Metaireau, Conan Doyle
Summary: Short version for index and feeds Summary: Short version for index and feeds
This is the content of my super blog post. This is the content of my super blog post.
@ -351,7 +351,7 @@ interprets the HTML in a very straightforward manner, reading metadata from
<meta name="date" content="2012-07-09 22:28" /> <meta name="date" content="2012-07-09 22:28" />
<meta name="modified" content="2012-07-10 20:14" /> <meta name="modified" content="2012-07-10 20:14" />
<meta name="category" content="yeah" /> <meta name="category" content="yeah" />
<meta name="author" content="Alexis Métaireau" /> <meta name="authors" content="Alexis Métaireau, Conan Doyle" />
<meta name="summary" content="Short version for index and feeds" /> <meta name="summary" content="Short version for index and feeds" />
</head> </head>
<body> <body>
@ -380,6 +380,9 @@ __ `W3C ISO 8601`_
Besides you can show ``modified`` in the templates, feed entries in feed readers will be updated automatically Besides you can show ``modified`` in the templates, feed entries in feed readers will be updated automatically
when you set ``modified`` to the current date after you modified your article. when you set ``modified`` to the current date after you modified your article.
``authors`` is a comma-separated list of article authors. If there's only one author you
can use ``author`` field.
If you do not explicitly specify summary metadata for a given post, the If you do not explicitly specify summary metadata for a given post, the
``SUMMARY_MAX_LENGTH`` setting can be used to specify how many words from the ``SUMMARY_MAX_LENGTH`` setting can be used to specify how many words from the
beginning of an article are used as the summary. beginning of an article are used as the summary.
@ -587,12 +590,12 @@ classprefix string String to prepend to token class names
hl_lines numbers List of lines to be highlighted. hl_lines numbers List of lines to be highlighted.
lineanchors string Wrap each line in an anchor using this lineanchors string Wrap each line in an anchor using this
string and -linenumber. string and -linenumber.
linenos string If present or set to "table" output line linenos string If present or set to "table" output line
numbers in a table, if set to numbers in a table, if set to
"inline" output them inline. "none" means "inline" output them inline. "none" means
do not output the line numbers for this do not output the line numbers for this
table. table.
linenospecial number If set every nth line will be given the linenospecial number If set every nth line will be given the
'special' css class. 'special' css class.
linenostart number Line number for the first line. linenostart number Line number for the first line.
linenostep number Print every nth line number. linenostep number Print every nth line number.

View file

@ -74,11 +74,17 @@ class Content(object):
#default template if it's not defined in page #default template if it's not defined in page
self.template = self._get_template() self.template = self._get_template()
# default author to the one in settings if not defined # First, read the authors from "authors", if not, fallback to "author"
# and if not use the settings defined one, if any.
if not hasattr(self, 'author'): if not hasattr(self, 'author'):
if 'AUTHOR' in settings: if hasattr(self, 'authors'):
self.author = self.authors[0]
elif 'AUTHOR' in settings:
self.author = Author(settings['AUTHOR'], settings) self.author = Author(settings['AUTHOR'], settings)
if not hasattr(self, 'authors') and hasattr(self, 'author'):
self.authors = [self.author]
# XXX Split all the following code into pieces, there is too much here. # XXX Split all the following code into pieces, there is too much here.
# manage languages # manage languages

View file

@ -434,7 +434,7 @@ class ArticlesGenerator(Generator):
self.articles, self.translations = process_translations(all_articles) self.articles, self.translations = process_translations(all_articles)
signals.article_generator_pretaxonomy.send(self) signals.article_generator_pretaxonomy.send(self)
for article in self.articles: for article in self.articles:
# only main articles are listed in categories and tags # only main articles are listed in categories and tags
@ -444,9 +444,9 @@ class ArticlesGenerator(Generator):
for tag in article.tags: for tag in article.tags:
self.tags[tag].append(article) self.tags[tag].append(article)
# ignore blank authors as well as undefined # ignore blank authors as well as undefined
if hasattr(article, 'author') and article.author.name != '': for author in getattr(article, 'authors', []):
self.authors[article.author].append(article) if author.name != '':
self.authors[author].append(article)
# sort the articles by date # sort the articles by date
self.articles.sort(key=attrgetter('date'), reverse=True) self.articles.sort(key=attrgetter('date'), reverse=True)
self.dates = list(self.articles) self.dates = list(self.articles)

View file

@ -46,6 +46,7 @@ METADATA_PROCESSORS = {
'status': lambda x, y: x.strip(), 'status': lambda x, y: x.strip(),
'category': Category, 'category': Category,
'author': Author, 'author': Author,
'authors': lambda x, y: [Author(author, y) for author in x],
} }
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -144,6 +145,9 @@ class RstReader(BaseReader):
value = render_node_to_html(document, body_elem) value = render_node_to_html(document, body_elem)
else: else:
value = body_elem.astext() value = body_elem.astext()
elif element.tagname == 'authors': # author list
name = element.tagname
value = [element.astext() for element in element.children]
else: # standard fields (e.g. address) else: # standard fields (e.g. address)
name = element.tagname name = element.tagname
value = element.astext() value = element.astext()

View file

@ -0,0 +1,6 @@
This is an article with multiple authors!
#########################################
:date: 2014-02-09 02:20
:modified: 2014-02-09 02:20
:authors: First Author, Second Author

View file

@ -346,6 +346,17 @@ class TestPage(unittest.TestCase):
'<a href="http://notmyidea.org/article-spaces.html">link</a>' '<a href="http://notmyidea.org/article-spaces.html">link</a>'
) )
def test_multiple_authors(self):
"""Test article with multiple authors."""
args = self.page_kwargs.copy()
content = Page(**args)
assert content.authors == [content.author]
args['metadata'].pop('author')
args['metadata']['authors'] = ['First Author', 'Second Author']
content = Page(**args)
assert content.authors
assert content.author == content.authors[0]
class TestArticle(TestPage): class TestArticle(TestPage):
def test_template(self): def test_template(self):

View file

@ -93,6 +93,7 @@ class TestArticlesGenerator(unittest.TestCase):
['This is a super article !', 'published', 'Default', 'article'], ['This is a super article !', 'published', 'Default', 'article'],
['This is an article with category !', 'published', 'yeah', ['This is an article with category !', 'published', 'yeah',
'article'], 'article'],
['This is an article with multiple authors!', 'published', 'Default', 'article'],
['This is an article without category !', 'published', 'Default', ['This is an article without category !', 'published', 'Default',
'article'], 'article'],
['This is an article without category !', 'published', ['This is an article without category !', 'published',
@ -257,6 +258,16 @@ class TestArticlesGenerator(unittest.TestCase):
settings, settings,
blog=True, dates=dates) blog=True, dates=dates)
def test_generate_authors(self):
"""Check authors generation."""
authors = [author.name for author, _ in self.generator.authors]
authors_expected = sorted(['Alexis Métaireau', 'First Author', 'Second Author'])
self.assertEqual(sorted(authors), authors_expected)
# test for slug
authors = [author.slug for author, _ in self.generator.authors]
authors_expected = ['alexis-metaireau', 'first-author', 'second-author']
self.assertEqual(sorted(authors), sorted(authors_expected))
class TestPageGenerator(unittest.TestCase): class TestPageGenerator(unittest.TestCase):
# Note: Every time you want to test for a new field; Make sure the test # Note: Every time you want to test for a new field; Make sure the test

View file

@ -10,7 +10,7 @@ from pelican.tests.support import (unittest, temporary_folder, mute,
from pelican.utils import slugify from pelican.utils import slugify
CUR_DIR = os.path.dirname(__file__) CUR_DIR = os.path.abspath(os.path.dirname(__file__))
WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml') WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml')
WORDPRESS_ENCODED_CONTENT_SAMPLE = os.path.join(CUR_DIR, WORDPRESS_ENCODED_CONTENT_SAMPLE = os.path.join(CUR_DIR,
'content', 'content',
@ -75,7 +75,7 @@ class TestWordpressXmlImporter(unittest.TestCase):
out_name = fnames[index] out_name = fnames[index]
self.assertTrue(out_name.endswith(filename)) self.assertTrue(out_name.endswith(filename))
index += 1 index += 1
def test_unless_custom_post_all_items_should_be_pages_or_posts(self): def test_unless_custom_post_all_items_should_be_pages_or_posts(self):
self.assertTrue(self.posts) self.assertTrue(self.posts)
pages_data = [] pages_data = []
@ -85,7 +85,7 @@ class TestWordpressXmlImporter(unittest.TestCase):
else: else:
pages_data.append((title, fname)) pages_data.append((title, fname))
self.assertEqual(0, len(pages_data)) self.assertEqual(0, len(pages_data))
def test_recognise_custom_post_type(self): def test_recognise_custom_post_type(self):
self.assertTrue(self.custposts) self.assertTrue(self.custposts)
cust_data = [] cust_data = []
@ -98,7 +98,7 @@ class TestWordpressXmlImporter(unittest.TestCase):
self.assertEqual(('A custom post in category 4', 'custom1'), cust_data[0]) self.assertEqual(('A custom post in category 4', 'custom1'), cust_data[0])
self.assertEqual(('A custom post in category 5', 'custom1'), cust_data[1]) self.assertEqual(('A custom post in category 5', 'custom1'), cust_data[1])
self.assertEqual(('A 2nd custom post type also in category 5', 'custom2'), cust_data[2]) self.assertEqual(('A 2nd custom post type also in category 5', 'custom2'), cust_data[2])
def test_custom_posts_put_in_own_dir(self): def test_custom_posts_put_in_own_dir(self):
silent_f2p = mute(True)(fields2pelican) silent_f2p = mute(True)(fields2pelican)
test_posts = [] test_posts = []
@ -130,7 +130,7 @@ class TestWordpressXmlImporter(unittest.TestCase):
else: else:
test_posts.append(post) test_posts.append(post)
with temporary_folder() as temp: with temporary_folder() as temp:
fnames = list(silent_f2p(test_posts, 'markdown', temp, fnames = list(silent_f2p(test_posts, 'markdown', temp,
wp_custpost=True, dircat=True)) wp_custpost=True, dircat=True))
index = 0 index = 0
for post in test_posts: for post in test_posts:
@ -152,7 +152,7 @@ class TestWordpressXmlImporter(unittest.TestCase):
if post[7] == 'page': if post[7] == 'page':
test_posts.append(post) test_posts.append(post)
with temporary_folder() as temp: with temporary_folder() as temp:
fnames = list(silent_f2p(test_posts, 'markdown', temp, fnames = list(silent_f2p(test_posts, 'markdown', temp,
wp_custpost=True, dirpage=False)) wp_custpost=True, dirpage=False))
index = 0 index = 0
for post in test_posts: for post in test_posts:
@ -161,8 +161,8 @@ class TestWordpressXmlImporter(unittest.TestCase):
filename = os.path.join('pages', name) filename = os.path.join('pages', name)
out_name = fnames[index] out_name = fnames[index]
self.assertFalse(out_name.endswith(filename)) self.assertFalse(out_name.endswith(filename))
def test_can_toggle_raw_html_code_parsing(self): def test_can_toggle_raw_html_code_parsing(self):
def r(f): def r(f):
with open(f) as infile: with open(f) as infile:
@ -247,9 +247,9 @@ class TestBuildHeader(unittest.TestCase):
'##############################################\n\n') '##############################################\n\n')
def test_galleries_added_to_header(self): def test_galleries_added_to_header(self):
header = build_header('test', None, None, None, None, header = build_header('test', None, None, None, None,
None, ['output/test1', 'output/test2']) None, ['output/test1', 'output/test2'])
self.assertEqual(header, 'test\n####\n' + ':attachments: output/test1, ' self.assertEqual(header, 'test\n####\n' + ':attachments: output/test1, '
+ 'output/test2\n\n') + 'output/test2\n\n')
def test_galleries_added_to_markdown_header(self): def test_galleries_added_to_markdown_header(self):
@ -258,11 +258,11 @@ class TestBuildHeader(unittest.TestCase):
self.assertEqual(header, 'Title: test\n' + 'Attachments: output/test1, ' self.assertEqual(header, 'Title: test\n' + 'Attachments: output/test1, '
+ 'output/test2\n\n') + 'output/test2\n\n')
@unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module') @unittest.skipUnless(BeautifulSoup, 'Needs BeautifulSoup module')
class TestWordpressXMLAttachements(unittest.TestCase): class TestWordpressXMLAttachements(unittest.TestCase):
def setUp(self): def setUp(self):
self.attachments = get_attachments(WORDPRESS_XML_SAMPLE) self.attachments = get_attachments(WORDPRESS_XML_SAMPLE)
def test_recognise_attachments(self): def test_recognise_attachments(self):
self.assertTrue(self.attachments) self.assertTrue(self.attachments)
self.assertTrue(len(self.attachments.keys()) == 3) self.assertTrue(len(self.attachments.keys()) == 3)
@ -283,7 +283,7 @@ class TestWordpressXMLAttachements(unittest.TestCase):
def test_download_attachments(self): def test_download_attachments(self):
real_file = os.path.join(CUR_DIR, 'content/article.rst') real_file = os.path.join(CUR_DIR, 'content/article.rst')
good_url = 'file://' + real_file good_url = 'file://' + real_file
bad_url = 'http://www.notarealsite.notarealdomain/not_a_file.txt' bad_url = 'http://localhost:1/not_a_file.txt'
silent_da = mute()(download_attachments) silent_da = mute()(download_attachments)
with temporary_folder() as temp: with temporary_folder() as temp:
#locations = download_attachments(temp, [good_url, bad_url]) #locations = download_attachments(temp, [good_url, bad_url])

View file

@ -2,11 +2,11 @@
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
import os import os
from filecmp import dircmp
from tempfile import mkdtemp from tempfile import mkdtemp
from shutil import rmtree from shutil import rmtree
import locale import locale
import logging import logging
import subprocess
from pelican import Pelican from pelican import Pelican
from pelican.settings import read_settings from pelican.settings import read_settings
@ -64,6 +64,13 @@ class TestPelican(LoggedTestCase):
self.assertEqual(diff['right_only'], [], msg=msg) self.assertEqual(diff['right_only'], [], msg=msg)
self.assertEqual(diff['diff_files'], [], msg=msg) self.assertEqual(diff['diff_files'], [], msg=msg)
def assertDirsEqual(self, left_path, right_path):
out, err = subprocess.Popen(
['git', 'diff', '--no-ext-diff', '--exit-code', '-w', left_path, right_path], env={'PAGER': ''},
stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
assert not out, out
assert not err, err
def test_basic_generation_works(self): def test_basic_generation_works(self):
# when running pelican without settings, it should pick up the default # when running pelican without settings, it should pick up the default
# ones and generate correct output without raising any exception # ones and generate correct output without raising any exception
@ -74,8 +81,7 @@ class TestPelican(LoggedTestCase):
}) })
pelican = Pelican(settings=settings) pelican = Pelican(settings=settings)
mute(True)(pelican.run)() mute(True)(pelican.run)()
dcmp = dircmp(self.temp_path, os.path.join(OUTPUT_PATH, 'basic')) self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'basic'))
self.assertFilesEqual(recursiveDiff(dcmp))
self.assertLogCountEqual( self.assertLogCountEqual(
count=4, count=4,
msg="Unable to find.*skipping url replacement", msg="Unable to find.*skipping url replacement",
@ -90,8 +96,7 @@ class TestPelican(LoggedTestCase):
}) })
pelican = Pelican(settings=settings) pelican = Pelican(settings=settings)
mute(True)(pelican.run)() mute(True)(pelican.run)()
dcmp = dircmp(self.temp_path, os.path.join(OUTPUT_PATH, 'custom')) self.assertDirsEqual(self.temp_path, os.path.join(OUTPUT_PATH, 'custom'))
self.assertFilesEqual(recursiveDiff(dcmp))
def test_theme_static_paths_copy(self): def test_theme_static_paths_copy(self):
# the same thing with a specified set of settings should work # the same thing with a specified set of settings should work

View file

@ -360,6 +360,15 @@ class HTMLReaderTest(ReaderTest):
for key, value in expected.items(): for key, value in expected.items():
self.assertEqual(value, page.metadata[key], key) self.assertEqual(value, page.metadata[key], key)
def test_article_with_multiple_authors(self):
page = self.read_file(path='article_with_multiple_authors.rst')
expected = {
'authors': ['First Author', 'Second Author']
}
for key, value in expected.items():
self.assertEqual(value, page.metadata[key], key)
def test_article_with_metadata_and_contents_attrib(self): def test_article_with_metadata_and_contents_attrib(self):
page = self.read_file(path='article_with_metadata_and_contents.html') page = self.read_file(path='article_with_metadata_and_contents.html')
expected = { expected = {

View file

@ -9,9 +9,11 @@
</abbr> </abbr>
{% endif %} {% endif %}
{% if article.author %} {% if article.authors %}
<address class="vcard author"> <address class="vcard author">
By <a class="url fn" href="{{ SITEURL }}/{{ article.author.url }}">{{ article.author }}</a> By {% for author in article.authors %}
<a class="url fn" href="{{ SITEURL }}/{{ author.url }}">{{ author }}</a>
{% endfor %}
</address> </address>
{% endif %} {% endif %}
<p>In <a href="{{ SITEURL }}/{{ article.category.url }}">{{ article.category }}</a>. {% if PDF_PROCESSOR %}<a href="{{ SITEURL }}/pdf/{{ article.slug }}.pdf">get the pdf</a>{% endif %}</p> <p>In <a href="{{ SITEURL }}/{{ article.category.url }}">{{ article.category }}</a>. {% if PDF_PROCESSOR %}<a href="{{ SITEURL }}/pdf/{{ article.slug }}.pdf">get the pdf</a>{% endif %}</p>

View file

@ -6,7 +6,6 @@
<section id="content" class="body"> <section id="content" class="body">
<h1>Authors on {{ SITENAME }}</h1> <h1>Authors on {{ SITENAME }}</h1>
{%- for author, articles in authors|sort %} {%- for author, articles in authors|sort %}
<li><a href="{{ SITEURL }}/{{ author.url }}">{{ author }}</a> ({{ articles|count }})</li> <li><a href="{{ SITEURL }}/{{ author.url }}">{{ author }}</a> ({{ articles|count }})</li>
{% endfor %} {% endfor %}

View file

@ -33,9 +33,11 @@
{{ article.locale_modified }} {{ article.locale_modified }}
</abbr> </abbr>
{% endif %} {% endif %}
{% if article.author %} {% if article.authors %}
<address class="vcard author"> <address class="vcard author">
By <a class="url fn" href="{{ SITEURL }}/{{ article.author.url }}">{{ article.author }}</a> By {% for author in article.authors %}
<a class="url fn" href="{{ SITEURL }}/{{ author.url }}">{{ author }}</a>
{% endfor %}
</address> </address>
{% endif %} {% endif %}
</footer><!-- /.post-info --> </footer><!-- /.post-info -->

View file

@ -11,7 +11,11 @@
<header> <h2 class="entry-title"><a href="{{ SITEURL }}/{{ article.url }}" rel="bookmark" title="Permalink to {{ article.title|striptags }}">{{ article.title }}</a></h2> </header> <header> <h2 class="entry-title"><a href="{{ SITEURL }}/{{ article.url }}" rel="bookmark" title="Permalink to {{ article.title|striptags }}">{{ article.title }}</a></h2> </header>
<footer class="post-info"> <footer class="post-info">
<abbr class="published" title="{{ article.date.isoformat() }}"> {{ article.locale_date }} </abbr> <abbr class="published" title="{{ article.date.isoformat() }}"> {{ article.locale_date }} </abbr>
{% if article.author %}<address class="vcard author">By <a class="url fn" href="{{ SITEURL }}/{{ article.author.url }}">{{ article.author }}</a></address>{% endif %} <address class="vcard author">By
{% for author in article.authors %}
<a class="url fn" href="{{ SITEURL }}/{{ author.url }}">{{ author }}</a>
{% endfor %}
</address>
</footer><!-- /.post-info --> </footer><!-- /.post-info -->
<div class="entry-content"> {{ article.summary }} </div><!-- /.entry-content --> <div class="entry-content"> {{ article.summary }} </div><!-- /.entry-content -->
</article></li> </article></li>

View file

@ -124,7 +124,7 @@ def get_filename(filename, post_id):
def wp2fields(xml, wp_custpost=False): def wp2fields(xml, wp_custpost=False):
"""Opens a wordpress XML file, and yield Pelican fields""" """Opens a wordpress XML file, and yield Pelican fields"""
items = get_items(xml) items = get_items(xml)
for item in items: for item in items:
@ -140,7 +140,7 @@ def wp2fields(xml, wp_custpost=False):
filename = item.find('post_name').string filename = item.find('post_name').string
post_id = item.find('post_id').string post_id = item.find('post_id').string
filename = get_filename(filename, post_id) filename = get_filename(filename, post_id)
content = item.find('encoded').string content = item.find('encoded').string
raw_date = item.find('post_date').string raw_date = item.find('post_date').string
date_object = time.strptime(raw_date, "%Y-%m-%d %H:%M:%S") date_object = time.strptime(raw_date, "%Y-%m-%d %H:%M:%S")
@ -161,7 +161,7 @@ def wp2fields(xml, wp_custpost=False):
pass pass
# Old behaviour was to name everything not a page as an article. # Old behaviour was to name everything not a page as an article.
# Theoretically all attachments have status == inherit so # Theoretically all attachments have status == inherit so
# no attachments should be here. But this statement is to # no attachments should be here. But this statement is to
# maintain existing behaviour in case that doesn't hold true. # maintain existing behaviour in case that doesn't hold true.
elif post_type == 'attachment': elif post_type == 'attachment':
pass pass
@ -469,7 +469,7 @@ def build_header(title, date, author, categories, tags, slug, attachments=None):
header += '\n' header += '\n'
return header return header
def build_markdown_header(title, date, author, categories, tags, slug, def build_markdown_header(title, date, author, categories, tags, slug,
attachments=None): attachments=None):
"""Build a header from a list of fields""" """Build a header from a list of fields"""
header = 'Title: %s\n' % title header = 'Title: %s\n' % title
@ -494,8 +494,8 @@ def get_ext(out_markup, in_markup='html'):
else: else:
ext = '.rst' ext = '.rst'
return ext return ext
def get_out_filename(output_path, filename, ext, kind, def get_out_filename(output_path, filename, ext, kind,
dirpage, dircat, categories, wp_custpost): dirpage, dircat, categories, wp_custpost):
filename = os.path.basename(filename) filename = os.path.basename(filename)
@ -516,7 +516,7 @@ def get_out_filename(output_path, filename, ext, kind,
os.mkdir(pages_dir) os.mkdir(pages_dir)
out_filename = os.path.join(pages_dir, filename+ext) out_filename = os.path.join(pages_dir, filename+ext)
elif not dirpage and kind == 'page': elif not dirpage and kind == 'page':
pass pass
# option to put wp custom post types in directories with post type # option to put wp custom post types in directories with post type
# names. Custom post types can also have categories so option to # names. Custom post types can also have categories so option to
# create subdirectories with category names # create subdirectories with category names
@ -530,7 +530,7 @@ def get_out_filename(output_path, filename, ext, kind,
catname = slugify(categories[0]) catname = slugify(categories[0])
else: else:
catname = '' catname = ''
out_filename = os.path.join(output_path, typename, out_filename = os.path.join(output_path, typename,
catname, filename+ext) catname, filename+ext)
if not os.path.isdir(os.path.join(output_path, typename, catname)): if not os.path.isdir(os.path.join(output_path, typename, catname)):
os.makedirs(os.path.join(output_path, typename, catname)) os.makedirs(os.path.join(output_path, typename, catname))
@ -544,20 +544,20 @@ def get_out_filename(output_path, filename, ext, kind,
return out_filename return out_filename
def get_attachments(xml): def get_attachments(xml):
"""returns a dictionary of posts that have attachments with a list """returns a dictionary of posts that have attachments with a list
of the attachment_urls of the attachment_urls
""" """
items = get_items(xml) items = get_items(xml)
names = {} names = {}
attachments = [] attachments = []
for item in items: for item in items:
kind = item.find('post_type').string kind = item.find('post_type').string
filename = item.find('post_name').string filename = item.find('post_name').string
post_id = item.find('post_id').string post_id = item.find('post_id').string
if kind == 'attachment': if kind == 'attachment':
attachments.append((item.find('post_parent').string, attachments.append((item.find('post_parent').string,
item.find('attachment_url').string)) item.find('attachment_url').string))
else: else:
filename = get_filename(filename, post_id) filename = get_filename(filename, post_id)
@ -569,7 +569,7 @@ def get_attachments(xml):
except KeyError: except KeyError:
#attachment's parent is not a valid post #attachment's parent is not a valid post
parent_name = None parent_name = None
try: try:
attachedposts[parent_name].append(url) attachedposts[parent_name].append(url)
except KeyError: except KeyError:
@ -578,13 +578,13 @@ def get_attachments(xml):
return attachedposts return attachedposts
def download_attachments(output_path, urls): def download_attachments(output_path, urls):
"""Downloads wordpress attachments and returns a list of paths to """Downloads wordpress attachments and returns a list of paths to
attachments that can be associated with a post (relative path to output attachments that can be associated with a post (relative path to output
directory). Files that fail to download, will not be added to posts""" directory). Files that fail to download, will not be added to posts"""
locations = [] locations = []
for url in urls: for url in urls:
path = urlparse(url).path path = urlparse(url).path
#teardown path and rebuild to negate any errors with #teardown path and rebuild to negate any errors with
#os.path.join and leading /'s #os.path.join and leading /'s
path = path.split('/') path = path.split('/')
filename = path.pop(-1) filename = path.pop(-1)
@ -625,16 +625,16 @@ def fields2pelican(fields, out_markup, output_path,
attached_files = download_attachments(output_path, urls) attached_files = download_attachments(output_path, urls)
except KeyError: except KeyError:
attached_files = None attached_files = None
else: else:
attached_files = None attached_files = None
ext = get_ext(out_markup, in_markup) ext = get_ext(out_markup, in_markup)
if ext == '.md': if ext == '.md':
header = build_markdown_header(title, date, author, categories, header = build_markdown_header(title, date, author, categories,
tags, slug, attached_files) tags, slug, attached_files)
else: else:
out_markup = "rst" out_markup = "rst"
header = build_header(title, date, author, categories, header = build_header(title, date, author, categories,
tags, slug, attached_files) tags, slug, attached_files)
out_filename = get_out_filename(output_path, filename, ext, out_filename = get_out_filename(output_path, filename, ext,
@ -690,7 +690,7 @@ def fields2pelican(fields, out_markup, output_path,
print("downloading attachments that don't have a parent post") print("downloading attachments that don't have a parent post")
urls = attachments[None] urls = attachments[None]
orphan_galleries = download_attachments(output_path, urls) orphan_galleries = download_attachments(output_path, urls)
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Transform feed, WordPress, Tumblr, Dotclear, or Posterous " description="Transform feed, WordPress, Tumblr, Dotclear, or Posterous "
@ -723,7 +723,7 @@ def main():
parser.add_argument('--strip-raw', action='store_true', dest='strip_raw', parser.add_argument('--strip-raw', action='store_true', dest='strip_raw',
help="Strip raw HTML code that can't be converted to " help="Strip raw HTML code that can't be converted to "
"markup such as flash embeds or iframes (wordpress import only)") "markup such as flash embeds or iframes (wordpress import only)")
parser.add_argument('--wp-custpost', action='store_true', parser.add_argument('--wp-custpost', action='store_true',
dest='wp_custpost', dest='wp_custpost',
help='Put wordpress custom post types in directories. If used with ' help='Put wordpress custom post types in directories. If used with '
'--dir-cat option directories will be created as ' '--dir-cat option directories will be created as '
@ -775,7 +775,7 @@ def main():
if args.wp_attach and input_type != 'wordpress': if args.wp_attach and input_type != 'wordpress':
error = "You must be importing a wordpress xml to use the --wp-attach option" error = "You must be importing a wordpress xml to use the --wp-attach option"
exit(error) exit(error)
if input_type == 'wordpress': if input_type == 'wordpress':
fields = wp2fields(args.input, args.wp_custpost or False) fields = wp2fields(args.input, args.wp_custpost or False)
elif input_type == 'dotclear': elif input_type == 'dotclear':