diff --git a/dev_requirements.txt b/dev_requirements.txt
index e1a15a3f..ef1dbf31 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -6,3 +6,5 @@ unittest2
pytz
mock
Markdown
+blinker
+BeautifulSoup
diff --git a/docs/index.rst b/docs/index.rst
index 6ad22670..34a1355c 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -61,6 +61,7 @@ A French version of the documentation is available at :doc:`fr/index`.
getting_started
settings
themes
+ plugins
internals
pelican-themes
importer
diff --git a/docs/plugins.rst b/docs/plugins.rst
new file mode 100644
index 00000000..db5a4bfc
--- /dev/null
+++ b/docs/plugins.rst
@@ -0,0 +1,108 @@
+.. _plugins:
+
+Plugins
+#######
+
+Since version 3.0, pelican manages plugins. Plugins are a way to add features
+to pelican without having to directly hack pelican code.
+
+Pelican is shipped with a set of core plugins, but you can easily implement
+your own (and this page describes how).
+
+How to use plugins?
+====================
+
+To load plugins, you have to specify them in your settings file. You have two
+ways to do so.
+Either by specifying strings with the path to the callables::
+
+ PLUGINS = ['pelican.plugins.gravatar',]
+
+Or by importing them and adding them to the list::
+
+ from pelican.plugins import gravatar
+ PLUGINS = [gravatar, ]
+
+If your plugins are not in an importable path, you can specify a `PLUGIN_PATH`
+in the settings::
+
+ PLUGIN_PATH = "plugins"
+ PLUGINS = ["list", "of", "plugins"]
+
+How to create plugins?
+======================
+
+Plugins are based on the concept of signals. Pelican sends signals and plugins
+subscribe to those signals. The list of signals are defined in a following
+section.
+
+The only rule to follow for plugins is to define a `register` callable, in
+which you map the signals to your plugin logic. Let's take a simple exemple::
+
+ from pelican import signals
+
+ def test(sender):
+ print "%s initialized !!" % sender
+
+ def register():
+ signals.initialized.connect(test)
+
+
+List of signals
+===============
+
+Here is the list of currently implemented signals:
+
+========================= ============================ =========================================
+Signal Arguments Description
+========================= ============================ =========================================
+initialized pelican object
+article_generate_context article_generator, metadata
+article_generator_init article_generator invoked in the ArticlesGenerator.__init__
+========================= ============================ =========================================
+
+The list is currently small, don't hesitate to add signals and make a pull
+request if you need them!
+
+List of plugins
+===============
+
+Not all the list are described here, but a few of them have been extracted from
+pelican core and provided in pelican.plugins. They are described here:
+
+Tag cloud
+---------
+
+Translation
+-----------
+
+Github Activity
+---------------
+
+This plugin makes use of the ``feedparser`` library that you'll need to
+install.
+
+Set the GITHUB_ACTIVITY_FEED parameter to your github activity feed.
+For example, my setting would look like::
+
+ GITHUB_ACTIVITY_FEED = 'https://github.com/kpanic.atom'
+
+On the templates side, you just have to iterate over the ``github_activity``
+variable, as in the example::
+
+ {% if GITHUB_ACTIVITY_FEED %}
+
+
Github Activity
+
+
+ {% for entry in github_activity %}
+
{{ entry[0] }} {{ entry[1] }}
+ {% endfor %}
+
+
+ {% endif %}
+
+
+
+``github_activity`` is a list of lists. The first element is the title
+and the second element is the raw html from github.
diff --git a/docs/settings.rst b/docs/settings.rst
index a26c37dd..50ed33f6 100644
--- a/docs/settings.rst
+++ b/docs/settings.rst
@@ -61,6 +61,7 @@ Setting name (default value) What doe
`rst2pdf`.
`RELATIVE_URLS` (``True``) Defines whether Pelican should use relative URLs or
not.
+`PLUGINS` (``[]``) The list of plugins to load. See :ref:`plugins`.
`SITENAME` (``'A Pelican Blog'``) Your site name
`SITEURL` Base URL of your website. Not defined by default,
which means the base URL is assumed to be "/" with a
@@ -375,6 +376,7 @@ Setting name (default value) What does it do?
value is `static`, but if your theme has
other static paths, you can put them here.
`CSS_FILE` (``'main.css'``) Specify the CSS file you want to load.
+`WEBASSETS` (``False``) Asset management with `webassets` (see below)
================================================ =====================================================
By default, two themes are available. You can specify them using the `-t` option:
@@ -424,7 +426,58 @@ adding the following to your configuration::
CSS_FILE = "wide.css"
-.. _pelican-themes: :doc:`pelican-themes`
+Asset management
+----------------
+
+The `WEBASSETS` setting allows to use the `webassets`_ module to manage assets
+(css, js). The module must first be installed::
+
+ pip install webassets
+
+`webassets` allows to concatenate your assets and to use almost all of the
+hype tools of the moment (see the `documentation`_):
+
+* css minifier (`cssmin`, `yuicompressor`, ...)
+* css compiler (`less`, `sass`, ...)
+* js minifier (`uglifyjs`, `yuicompressor`, `closure`, ...)
+
+Others filters include gzip compression, integration of images in css with
+`datauri` and more. Webassets also append a version identifier to your asset
+url to convince browsers to download new versions of your assets when you use
+far future expires headers.
+
+When using it with Pelican, `webassets` is configured to process assets in the
+``OUTPUT_PATH/theme`` directory. You can use it in your templates with a
+template tag, for example:
+
+.. code-block:: jinja
+
+ {% assets filters="cssmin", output="css/style.min.css", "css/inuit.css", "css/pygment-monokai.css", "css/main.css" %}
+
+ {% endassets %}
+
+will produce a minified css file with the version identifier:
+
+.. code-block:: html
+
+
+
+Another example for javascript:
+
+.. code-block:: jinja
+
+ {% assets filters="uglifyjs,gzip", output="js/packed.js", "js/jquery.js", "js/base.js", "js/widgets.js" %}
+
+ {% endassets %}
+
+will produce a minified and gzipped js file:
+
+.. code-block:: html
+
+
+
+.. _webassets: https://github.com/miracle2k/webassets
+.. _documentation: http://webassets.readthedocs.org/en/latest/builtin_filters.html
Example settings
================
diff --git a/docs/tips.rst b/docs/tips.rst
index 6ddc3d33..14a79a5e 100644
--- a/docs/tips.rst
+++ b/docs/tips.rst
@@ -10,6 +10,26 @@ GitHub comes with an interesting "pages" feature: you can upload things there
and it will be available directly from their servers. As Pelican is a static
file generator, we can take advantage of this.
+User Pages
+----------
+Github allows you to create user pages in the form of ``username.github.com``.
+Whatever is created in master branch will be published. For this purposes just
+the output generated by pelican needs to pushed at github.
+
+So given a repository containing your articles, just run pelican over the posts
+and deploy the master branch at github::
+
+ $ pelican -s pelican.conf.py ./path/to/posts -o /path/to/output
+
+Now add all the files in the output directory generated by pelican::
+
+ $ git add /path/to/output/*
+ $ git commit -am "Your Message"
+ $ git push origin master
+
+Project Pages
+-------------
+For creating Project pages, a branch called ``gh-pages`` is used for publishing.
The excellent `ghp-import `_ makes this
really easy. You will have to install it::
@@ -31,3 +51,4 @@ Put the following into `.git/hooks/post-commit`::
pelican -s pelican.conf.py . && ghp-import output && git push origin
gh-pages
+
diff --git a/pelican/__init__.py b/pelican/__init__.py
index 6b3d12fb..6dc7dd36 100644
--- a/pelican/__init__.py
+++ b/pelican/__init__.py
@@ -5,11 +5,13 @@ import time
import logging
import argparse
+from pelican import signals
+
from pelican.generators import (ArticlesGenerator, PagesGenerator,
StaticGenerator, PdfGenerator, LessCSSGenerator)
from pelican.log import init
from pelican.settings import read_settings, _DEFAULT_CONFIG
-from pelican.utils import clean_output_dir, files_changed
+from pelican.utils import clean_output_dir, files_changed, file_changed
from pelican.writers import Writer
__major__ = 3
@@ -22,7 +24,7 @@ logger = logging.getLogger(__name__)
class Pelican(object):
def __init__(self, settings=None, path=None, theme=None, output_path=None,
- markup=None, delete_outputdir=False):
+ markup=None, delete_outputdir=False, plugin_path=None):
"""Read the settings, and performs some checks on the environment
before doing anything else.
"""
@@ -58,6 +60,20 @@ class Pelican(object):
else:
raise Exception("Impossible to find the theme %s" % theme)
+ self.init_plugins()
+ signals.initialized.send(self)
+
+ def init_plugins(self):
+ self.plugins = self.settings['PLUGINS']
+ for plugin in self.plugins:
+ # if it's a string, then import it
+ if isinstance(plugin, basestring):
+ log.debug("Loading plugin `{0}' ...".format(plugin))
+ plugin = __import__(plugin, globals(), locals(), 'module')
+
+ log.debug("Registering plugin `{0}' ...".format(plugin.__name__))
+ plugin.register()
+
def _handle_deprecation(self):
if self.settings.get('CLEAN_URLS', False):
@@ -126,15 +142,20 @@ class Pelican(object):
writer = self.get_writer()
+ # pass the assets environment to the generators
+ if self.settings['WEBASSETS']:
+ generators[1].env.assets_environment = generators[0].assets_env
+ generators[2].env.assets_environment = generators[0].assets_env
+
for p in generators:
if hasattr(p, 'generate_output'):
p.generate_output(writer)
def get_generator_classes(self):
- generators = [ArticlesGenerator, PagesGenerator, StaticGenerator]
+ generators = [StaticGenerator, ArticlesGenerator, PagesGenerator]
if self.settings['PDF_GENERATOR']:
generators.append(PdfGenerator)
- if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc
+ if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc
generators.append(LessCSSGenerator)
return generators
@@ -192,11 +213,7 @@ def parse_arguments():
return parser.parse_args()
-def main():
- args = parse_arguments()
- init(args.verbosity)
- # Split the markup languages only if some have been given. Otherwise,
- # populate the variable with None.
+def get_instance(args):
markup = [a.strip().lower() for a in args.markup.split(',')]\
if args.markup else None
@@ -208,9 +225,18 @@ def main():
module = __import__(module)
cls = getattr(module, cls_name)
+ return cls(settings, args.path, args.theme, args.output, markup,
+ args.delete_outputdir)
+
+
+def main():
+ args = parse_arguments()
+ init(args.verbosity)
+ # Split the markup languages only if some have been given. Otherwise,
+ # populate the variable with None.
+ pelican = get_instance(args)
+
try:
- pelican = cls(settings, args.path, args.theme, args.output, markup,
- args.delete_outputdir)
if args.autoreload:
while True:
try:
@@ -222,6 +248,14 @@ def main():
if files_changed(pelican.path, pelican.markup) or \
files_changed(pelican.theme, ['']):
pelican.run()
+
+ # reload also if settings.py changed
+ if file_changed(args.settings):
+ logger.info('%s changed, re-generating' %
+ args.settings)
+ pelican = get_instance(args)
+ pelican.run()
+
time.sleep(.5) # sleep to avoid cpu load
except KeyboardInterrupt:
break
diff --git a/pelican/generators.py b/pelican/generators.py
index ede948a4..1ddc13c2 100644
--- a/pelican/generators.py
+++ b/pelican/generators.py
@@ -17,6 +17,7 @@ from jinja2.exceptions import TemplateNotFound
from pelican.contents import Article, Page, Category, is_valid_content
from pelican.readers import read_file
from pelican.utils import copy, process_translations, open
+from pelican import signals
logger = logging.getLogger(__name__)
@@ -42,7 +43,7 @@ class Generator(object):
simple_loader = FileSystemLoader(os.path.join(theme_path,
"themes", "simple", "templates"))
- self._env = Environment(
+ self.env = Environment(
loader=ChoiceLoader([
FileSystemLoader(self._templates_path),
simple_loader, # implicit inheritance
@@ -51,11 +52,11 @@ class Generator(object):
extensions=self.settings.get('JINJA_EXTENSIONS', []),
)
- logger.debug('template list: {0}'.format(self._env.list_templates()))
+ logger.debug('template list: {0}'.format(self.env.list_templates()))
# get custom Jinja filters from user settings
custom_filters = self.settings.get('JINJA_FILTERS', {})
- self._env.filters.update(custom_filters)
+ self.env.filters.update(custom_filters)
def get_template(self, name):
"""Return the template by name.
@@ -64,7 +65,7 @@ class Generator(object):
"""
if name not in self._templates:
try:
- self._templates[name] = self._env.get_template(name + '.html')
+ self._templates[name] = self.env.get_template(name + '.html')
except TemplateNotFound:
raise Exception('[templates] unable to load %s.html from %s' \
% (name, self._templates_path))
@@ -118,6 +119,7 @@ class ArticlesGenerator(Generator):
self.authors = defaultdict(list)
super(ArticlesGenerator, self).__init__(*args, **kwargs)
self.drafts = []
+ signals.article_generator_init.send(self)
def generate_feeds(self, writer):
"""Generate the feeds from the current context, and output files."""
@@ -245,7 +247,9 @@ class ArticlesGenerator(Generator):
def generate_context(self):
"""change the context"""
- article_path = os.path.join(self.path, self.settings['ARTICLE_DIR'])
+ article_path = os.path.normpath( # we have to remove trailing slashes
+ os.path.join(self.path, self.settings['ARTICLE_DIR'])
+ )
all_articles = []
for f in self.get_files(
article_path,
@@ -259,8 +263,8 @@ class ArticlesGenerator(Generator):
# if no category is set, use the name of the path as a category
if 'category' not in metadata:
- if os.path.dirname(f) == article_path:
- category = self.settings['DEFAULT_CATEGORY']
+ if os.path.dirname(f) == article_path: # if the article is not in a subdirectory
+ category = self.settings['DEFAULT_CATEGORY']
else:
category = os.path.basename(os.path.dirname(f))\
.decode('utf-8')
@@ -272,6 +276,7 @@ class ArticlesGenerator(Generator):
metadata['date'] = datetime.datetime.fromtimestamp(
os.stat(f).st_ctime)
+ signals.article_generate_context.send(self, metadata=metadata)
article = Article(content, metadata, settings=self.settings,
filename=f)
if not is_valid_content(article, f):
@@ -284,6 +289,10 @@ class ArticlesGenerator(Generator):
all_articles.append(article)
elif article.status == "draft":
self.drafts.append(article)
+ else:
+ logger.warning(u"Unknown status %s for file %s, skipping it." %
+ (repr(unicode.encode(article.status, 'utf-8')),
+ repr(f)))
self.articles, self.translations = process_translations(all_articles)
@@ -389,7 +398,23 @@ class StaticGenerator(Generator):
copy(path, source, os.path.join(output_path, destination),
final_path, overwrite=True)
+ def generate_context(self):
+
+ if self.settings['WEBASSETS']:
+ from webassets import Environment as AssetsEnvironment
+
+ # Define the assets environment that will be passed to the
+ # generators. The StaticGenerator must then be run first to have
+ # the assets in the output_path before generating the templates.
+ assets_url = self.settings['SITEURL'] + '/theme/'
+ assets_src = os.path.join(self.output_path, 'theme')
+ self.assets_env = AssetsEnvironment(assets_src, assets_url)
+
+ if logging.getLevelName(logger.getEffectiveLevel()) == "DEBUG":
+ self.assets_env.debug = True
+
def generate_output(self, writer):
+
self._copy_paths(self.settings['STATIC_PATHS'], self.path,
'static', self.output_path)
self._copy_paths(self.settings['THEME_STATIC_PATHS'], self.theme,
diff --git a/pelican/plugins/__init__.py b/pelican/plugins/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pelican/plugins/github_activity.py b/pelican/plugins/github_activity.py
new file mode 100644
index 00000000..f2ba1da7
--- /dev/null
+++ b/pelican/plugins/github_activity.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+"""
+ Copyright (c) Marco Milanesi
+
+ A plugin to list your Github Activity
+ To enable it set in your pelican config file the GITHUB_ACTIVITY_FEED
+ parameter pointing to your github activity feed.
+
+ for example my personal activity feed is:
+
+ https://github.com/kpanic.atom
+
+ in your template just write a for in jinja2 syntax against the
+ github_activity variable.
+
+ i.e.
+
+
+
Github Activity
+
+
+ {% for entry in github_activity %}
+
{{ entry[0] }} {{ entry[1] }}
+ {% endfor %}
+
+
+
+ github_activity is a list containing a list. The first element is the title
+ and the second element is the raw html from github
+"""
+
+from pelican import signals
+
+
+class GitHubActivity():
+ """
+ A class created to fetch github activity with feedparser
+ """
+ def __init__(self, generator):
+ try:
+ import feedparser
+ self.activities = feedparser.parse(
+ generator.settings['GITHUB_ACTIVITY_FEED'])
+ except ImportError:
+ raise Exception("Unable to find feedparser")
+
+ def fetch(self):
+ """
+ returns a list of html snippets fetched from github actitivy feed
+ """
+
+ entries = []
+ for activity in self.activities['entries']:
+ entries.append(
+ [element for element in [activity['title'],
+ activity['content'][0]['value']]])
+
+ return entries
+
+
+def fetch_github_activity(gen, metadata):
+ """
+ registered handler for the github activity plugin
+ it puts in generator.context the html needed to be displayed on a
+ template
+ """
+
+ if 'GITHUB_ACTIVITY_FEED' in gen.settings.keys():
+ gen.context['github_activity'] = gen.plugin_instance.fetch()
+
+
+def feed_parser_initialization(generator):
+ """
+ Initialization of feed parser
+ """
+
+ generator.plugin_instance = GitHubActivity(generator)
+
+
+def register():
+ """
+ Plugin registration
+ """
+ signals.article_generator_init.connect(feed_parser_initialization)
+ signals.article_generate_context.connect(fetch_github_activity)
diff --git a/pelican/plugins/global_license.py b/pelican/plugins/global_license.py
new file mode 100644
index 00000000..463a93b3
--- /dev/null
+++ b/pelican/plugins/global_license.py
@@ -0,0 +1,23 @@
+from pelican import signals
+
+"""
+License plugin for Pelican
+==========================
+
+Simply add license variable in article's context, which contain
+the license text.
+
+Settings:
+---------
+
+Add LICENSE to your settings file to define default license.
+
+"""
+
+def add_license(generator, metadata):
+ if 'license' not in metadata.keys()\
+ and 'LICENSE' in generator.settings.keys():
+ metadata['license'] = generator.settings['LICENSE']
+
+def register():
+ signals.article_generate_context.connect(add_license)
diff --git a/pelican/plugins/gravatar.py b/pelican/plugins/gravatar.py
new file mode 100644
index 00000000..4ab8ea9c
--- /dev/null
+++ b/pelican/plugins/gravatar.py
@@ -0,0 +1,40 @@
+import hashlib
+
+from pelican import signals
+"""
+Gravatar plugin for Pelican
+===========================
+
+Simply add author_gravatar variable in article's context, which contains
+the gravatar url.
+
+Settings:
+---------
+
+Add AUTHOR_EMAIL to your settings file to define default author email.
+
+Article metadata:
+------------------
+
+:email: article's author email
+
+If one of them are defined, the author_gravatar variable is added to
+article's context.
+"""
+
+def add_gravatar(generator, metadata):
+
+ #first check email
+ if 'email' not in metadata.keys()\
+ and 'AUTHOR_EMAIL' in generator.settings.keys():
+ metadata['email'] = generator.settings['AUTHOR_EMAIL']
+
+ #then add gravatar url
+ if 'email' in metadata.keys():
+ gravatar_url = "http://www.gravatar.com/avatar/" + \
+ hashlib.md5(metadata['email'].lower()).hexdigest()
+ metadata["author_gravatar"] = gravatar_url
+
+
+def register():
+ signals.article_generate_context.connect(add_gravatar)
diff --git a/pelican/plugins/html_rst_directive.py b/pelican/plugins/html_rst_directive.py
new file mode 100644
index 00000000..d0a656f5
--- /dev/null
+++ b/pelican/plugins/html_rst_directive.py
@@ -0,0 +1,63 @@
+from docutils import nodes
+from docutils.parsers.rst import directives, Directive
+from pelican import log
+
+"""
+HTML tags for reStructuredText
+==============================
+
+Directives
+----------
+
+.. html::
+
+ (HTML code)
+
+
+Example
+-------
+
+A search engine:
+
+.. html::
+
+
+
+A contact form:
+
+.. html::
+
+
+
+"""
+
+
+class RawHtml(Directive):
+ required_arguments = 0
+ optional_arguments = 0
+ final_argument_whitespace = True
+ has_content = True
+
+ def run(self):
+ html = u' '.join(self.content)
+ node = nodes.raw('', html, format='html')
+ return [node]
+
+
+
+def register():
+ directives.register_directive('html', RawHtml)
+
diff --git a/pelican/plugins/initialized.py b/pelican/plugins/initialized.py
new file mode 100644
index 00000000..5e4cf174
--- /dev/null
+++ b/pelican/plugins/initialized.py
@@ -0,0 +1,7 @@
+from pelican import signals
+
+def test(sender):
+ print "%s initialized !!" % sender
+
+def register():
+ signals.initialized.connect(test)
diff --git a/pelican/settings.py b/pelican/settings.py
index d2a39cd9..17efea58 100644
--- a/pelican/settings.py
+++ b/pelican/settings.py
@@ -69,6 +69,8 @@ _DEFAULT_CONFIG = {'PATH': '.',
'TYPOGRIFY': False,
'LESS_GENERATOR': False,
'SUMMARY_MAX_LENGTH': 50,
+ 'WEBASSETS': False,
+ 'PLUGINS': [],
}
@@ -151,4 +153,12 @@ def configure_settings(settings, default_settings=None, filename=None):
"http://docs.notmyidea.org/alexis/pelican/settings.html#timezone "
"for more information")
+ if 'WEBASSETS' in settings and settings['WEBASSETS'] is not False:
+ try:
+ from webassets.ext.jinja2 import AssetsExtension
+ settings['JINJA_EXTENSIONS'].append(AssetsExtension)
+ except ImportError:
+ logger.warn("You must install the webassets module to use WEBASSETS.")
+ settings['WEBASSETS'] = False
+
return settings
diff --git a/pelican/signals.py b/pelican/signals.py
new file mode 100644
index 00000000..b1c35794
--- /dev/null
+++ b/pelican/signals.py
@@ -0,0 +1,5 @@
+from blinker import signal
+
+initialized = signal('pelican_initialized')
+article_generate_context = signal('article_generate_context')
+article_generator_init = signal('article_generator_init')
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 050b1010..dab3c3a8 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -25,8 +25,14 @@ def wp2fields(xml):
items = soup.rss.channel.findAll('item')
for item in items:
+
if item.fetch('wp:status')[0].contents[0] == "publish":
- title = item.title.contents[0]
+
+ try:
+ title = item.title.contents[0]
+ except IndexError:
+ continue
+
content = item.fetch('content:encoded')[0].contents[0]
filename = item.fetch('wp:post_name')[0].contents[0]
@@ -197,7 +203,7 @@ def build_markdown_header(title, date, author, categories, tags):
header += '\n'
return header
-def fields2pelican(fields, out_markup, output_path, dircat=False):
+def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=False):
for title, content, filename, date, author, categories, tags, in_markup in fields:
if (in_markup == "markdown") or (out_markup == "markdown") :
ext = '.md'
@@ -230,22 +236,26 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
paragraphs = [u'
{}
'.format(p) for p in paragraphs]
new_content = ''.join(paragraphs)
- fp.write(content)
+ fp.write(new_content)
- cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(
- out_markup, out_filename, html_filename)
+
+ parse_raw = '--parse-raw' if not strip_raw else ''
+ cmd = ('pandoc --normalize --reference-links {0} --from=html'
+ ' --to={1} -o "{2}" "{3}"').format(
+ parse_raw, out_markup, out_filename, html_filename)
try:
rc = subprocess.call(cmd, shell=True)
if rc < 0:
- print("Child was terminated by signal %d" % -rc)
- exit()
+ error = "Child was terminated by signal %d" % -rc
+ exit(error)
+
elif rc > 0:
- print("Please, check your Pandoc installation.")
- exit()
+ error = "Please, check your Pandoc installation."
+ exit(error)
except OSError, e:
- print("Pandoc execution failed: %s" % e)
- exit()
+ error = "Pandoc execution failed: %s" % e
+ exit(error)
os.remove(html_filename)
@@ -279,6 +289,10 @@ def main():
help='Output markup format (supports rst & markdown)')
parser.add_argument('--dir-cat', action='store_true', dest='dircat',
help='Put files in directories with categories name')
+ parser.add_argument('--strip-raw', action='store_true', dest='strip_raw',
+ help="Strip raw HTML code that can't be converted to "
+ "markup such as flash embeds or iframes (wordpress import only)")
+
args = parser.parse_args()
input_type = None
@@ -289,15 +303,15 @@ def main():
elif args.feed:
input_type = 'feed'
else:
- print("You must provide either --wpfile, --dotclear or --feed options")
- exit()
+ error = "You must provide either --wpfile, --dotclear or --feed options"
+ exit(error)
if not os.path.exists(args.output):
try:
os.mkdir(args.output)
except OSError:
- print("Unable to create the output folder: " + args.output)
- exit()
+ error = "Unable to create the output folder: " + args.output
+ exit(error)
if input_type == 'wordpress':
fields = wp2fields(args.input)
@@ -306,4 +320,6 @@ def main():
elif input_type == 'feed':
fields = feed2fields(args.input)
- fields2pelican(fields, args.markup, args.output, dircat=args.dircat or False)
+ fields2pelican(fields, args.markup, args.output,
+ dircat=args.dircat or False,
+ strip_raw=args.strip_raw or False)
diff --git a/pelican/utils.py b/pelican/utils.py
index d4e34842..0940bf72 100644
--- a/pelican/utils.py
+++ b/pelican/utils.py
@@ -4,6 +4,7 @@ import re
import pytz
import shutil
import logging
+from collections import defaultdict
from codecs import open as _open
from datetime import datetime
@@ -221,9 +222,9 @@ def files_changed(path, extensions):
"""Return the last time files have been modified"""
for root, dirs, files in os.walk(path):
dirs[:] = [x for x in dirs if x[0] != '.']
- for file in files:
- if any(file.endswith(ext) for ext in extensions):
- yield os.stat(os.path.join(root, file)).st_mtime
+ for f in files:
+ if any(f.endswith(ext) for ext in extensions):
+ yield os.stat(os.path.join(root, f)).st_mtime
global LAST_MTIME
mtime = max(file_times(path))
@@ -233,6 +234,21 @@ def files_changed(path, extensions):
return False
+FILENAMES_MTIMES = defaultdict(int)
+
+
+def file_changed(filename):
+ mtime = os.stat(filename).st_mtime
+ if FILENAMES_MTIMES[filename] == 0:
+ FILENAMES_MTIMES[filename] = mtime
+ return False
+ else:
+ if mtime > FILENAMES_MTIMES[filename]:
+ FILENAMES_MTIMES[filename] = mtime
+ return True
+ return False
+
+
def set_date_tzinfo(d, tz_name=None):
""" Date without tzinfo shoudbe utc.
This function set the right tz to date that aren't utc and don't have
diff --git a/setup.py b/setup.py
index 0e57c83b..a8a8fbd9 100755
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
from setuptools import setup
-requires = ['feedgenerator', 'jinja2', 'pygments', 'docutils', 'pytz']
+requires = ['feedgenerator', 'jinja2', 'pygments', 'docutils', 'pytz', 'blinker']
try:
import argparse
@@ -25,7 +25,7 @@ setup(
author_email = 'alexis@notmyidea.org',
description = "A tool to generate a static blog from reStructuredText or Markdown input files.",
long_description=open('README.rst').read(),
- packages = ['pelican', 'pelican.tools'],
+ packages = ['pelican', 'pelican.tools', 'pelican.plugins'],
include_package_data = True,
install_requires = requires,
entry_points = entry_points,
diff --git a/tests/content/wordpressexport.xml b/tests/content/wordpressexport.xml
new file mode 100644
index 00000000..d3e86cba
--- /dev/null
+++ b/tests/content/wordpressexport.xml
@@ -0,0 +1,578 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Pelican test channel
+ http://thisisa.test
+ Not a real feed, just for test
+ Sun, 13 May 2012 01:13:52 +0000
+ en
+ 1.1
+ http://thisisa.test
+ http://thisisa.test
+
+ 2Bobbob@thisisa.test
+ 3Jonhjonh@thisisa.test
+
+ 7categ-1
+ 11categ-2
+ 1uncategorized
+ 15categ-3
+ 25tag-1
+ 122tag2
+ 68tag-3
+
+ http://wordpress.org/?v=3.3.1
+
+
+ Empty post
+ http://thisisa.test/?attachment_id=24
+ Sat, 04 Feb 2012 03:17:33 +0000
+ bob
+ https://upload.wikimedia.org/wikipedia/commons/thumb/2/2c/Pelican_lakes_entrance02.jpg/240px-Pelican_lakes_entrance02.jpg
+
+
+
+ 24
+ 2012-02-04 03:17:33
+ 2012-02-04 03:17:33
+ open
+ open
+ empty-post
+ inherit
+ 0
+ 0
+ attachment
+
+ 0
+ https://upload.wikimedia.org/wikipedia/commons/thumb/2/2c/Pelican_lakes_entrance02.jpg/240px-Pelican_lakes_entrance02.jpg
+
+ _wp_attachment_metadata
+
+
+
+ _wp_attached_file
+
+
+
+ _wp_attachment_image_alt
+
+
+
+
+
+ http://thisisa.test/?p=168
+ Thu, 01 Jan 1970 00:00:00 +0000
+ bob
+ http://thisisa.test/?p=168
+
+
+
+ 168
+ 2012-02-15 21:23:57
+ 0000-00-00 00:00:00
+ open
+ open
+
+ draft
+ 0
+ 0
+ post
+
+ 0
+
+
+ _edit_last
+
+
+
+
+ A normal post
+ http://thisisa.test/?p=173
+ Thu, 01 Jan 1970 00:00:00 +0000
+ bob
+ http://thisisa.test/?p=173
+
+
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+
+Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]>
+
+ 173
+ 2012-02-16 15:52:55
+ 0000-00-00 00:00:00
+ open
+ open
+
+ draft
+ 0
+ 0
+ post
+
+ 0
+
+
+ _edit_last
+
+
+
+
+ Complete draft
+ http://thisisa.test/?p=176
+ Thu, 01 Jan 1970 00:00:00 +0000
+ bob
+ http://thisisa.test/?p=176
+
+
+
+ 176
+ 2012-02-17 15:11:55
+ 0000-00-00 00:00:00
+ open
+ open
+
+ draft
+ 0
+ 0
+ post
+
+ 0
+
+
+ _edit_last
+
+
+
+
+ Page
+ http://thisisa.test/contact/
+ Wed, 11 Apr 2012 11:38:08 +0000
+ bob
+ http://thisisa.test/?page_id=334
+
+
+
+ 334
+ 2012-04-11 06:38:08
+ 2012-04-11 11:38:08
+ open
+ open
+ contact
+ publish
+ 0
+ 0
+ page
+
+ 0
+
+ sharing_disabled
+
+
+
+ _wp_page_template
+
+
+
+ _edit_last
+
+
+
+
+ Empty Page
+ http://thisisa.test/empty/
+ Wed, 11 Apr 2012 11:38:08 +0000
+ bob
+ http://thisisa.test/?page_id=334
+
+
+
+ 334
+ 2012-04-11 06:38:08
+ 2012-04-11 11:38:08
+ open
+ open
+ empty
+ publish
+ 0
+ 0
+ page
+
+ 0
+
+ sharing_disabled
+
+
+
+ _wp_page_template
+
+
+
+ _edit_last
+
+
+
+
+ Special chars: l'é
+ http://thisisa.test/?p=471
+ Thu, 01 Jan 1970 00:00:00 +0000
+ bob
+ http://thisisa.test/?p=471
+
+
+
+ 471
+ 2012-04-29 09:44:27
+ 0000-00-00 00:00:00
+ open
+ open
+
+ draft
+ 0
+ 0
+ post
+
+ 0
+
+
+ _edit_last
+
+
+
+
+
+ With excerpt
+ http://thisisa.test/with-excerpt/
+ Sat, 04 Feb 2012 02:03:06 +0000
+ bob
+ http://thisisa.test/?p=8
+
+
+
+ 8
+ 2012-02-04 02:03:06
+ 2012-02-04 02:03:06
+ open
+ open
+ with-excerpt
+ publish
+ 0
+ 0
+ post
+
+ 0
+
+
+
+
+ _edit_last
+
+
+
+ et_bigpost
+
+
+
+ _thumbnail_id
+
+
+
+
+ With tags
+ http://thisisa.test/tags/
+ Sat, 04 Feb 2012 21:05:25 +0000
+ bob
+ http://thisisa.test/?p=25
+
+
+
+ 25
+ 2012-02-04 21:05:25
+ 2012-02-04 21:05:25
+ open
+ open
+ with-tags
+ publish
+ 0
+ 0
+ post
+
+ 0
+
+
+
+
+
+ _edit_last
+
+
+
+ et_bigpost
+
+
+
+ _thumbnail_id
+
+
+
+
+ With comments
+ http://thisisa.test/with-comments/
+ Wed, 18 Apr 2012 08:36:26 +0000
+ john
+ http://thisisa.test/?p=422
+
+
+
+ 422
+ 2012-04-18 03:36:26
+ 2012-04-18 08:36:26
+ open
+ open
+ with-comments
+ publish
+ 0
+ 0
+ post
+
+ 0
+
+
+ _edit_last
+
+
+
+ _thumbnail_id
+
+
+
+ 116
+
+ User2@mail.test
+
+ 127.0.0.1
+ 2012-05-06 15:46:06
+ 2012-05-06 20:46:06
+
+ 1
+
+ 0
+ 0
+
+ akismet_result
+
+
+
+ akismet_history
+
+
+
+ akismet_as_submitted
+
+
+
+
+ 117
+
+ bob@thisisa.test
+
+ 127.0.0.1
+ 2012-05-06 17:44:06
+ 2012-05-06 22:44:06
+
+ 1
+
+ 116
+ 3
+
+ akismet_result
+
+
+
+ akismet_history
+
+
+
+ akismet_as_submitted
+
+
+
+
+ 156
+
+
+ http://thisisa.test/to-article-you-ping-back/
+ 127.0.0.1
+ 2012-05-09 19:30:19
+ 2012-05-10 00:30:19
+
+ trash
+ pingback
+ 0
+ 0
+
+ akismet_history
+
+
+
+ _wp_trash_meta_status
+
+
+
+ _wp_trash_meta_time
+
+
+
+
+ 122
+
+ bob@thisisa.test
+
+ 127.0.0.1
+ 2012-05-07 14:11:34
+ 2012-05-07 19:11:34
+
+ 1
+
+ 121
+ 3
+
+ akismet_result
+
+
+
+ akismet_history
+
+
+
+ akismet_as_submitted
+
+
+
+
+
+ Post with raw data
+ http://thisisa.test/?p=173
+ Thu, 01 Jan 1970 00:00:00 +0000
+ bob
+ http://thisisa.test/?p=173
+
+ Pelicans are scary
+
+Pelicans are supposed to eat fish, damn it!
+
+
+
+Bottom line: don't mess up with birds]]>
+
+ 173
+ 2012-02-16 15:52:55
+ 0000-00-00 00:00:00
+ open
+ open
+ post-with-raw-data
+ publish
+ 0
+ 0
+ post
+
+ 0
+
+
+ _edit_last
+
+
+
+
+
diff --git a/tests/support.py b/tests/support.py
index f2b4a075..994cd509 100644
--- a/tests/support.py
+++ b/tests/support.py
@@ -6,6 +6,11 @@ __all__ = [
import os
import subprocess
+import re
+import sys
+import cStringIO
+
+from functools import wraps
from contextlib import contextmanager
from tempfile import mkdtemp
from shutil import rmtree
@@ -28,8 +33,87 @@ def temporary_folder():
# do whatever you want
"""
tempdir = mkdtemp()
- yield tempdir
- rmtree(tempdir)
+ try:
+ yield tempdir
+ finally:
+ rmtree(tempdir)
+
+
+def isplit(s, sep=None):
+ """
+ Behave like str.split but returns a generator instead of a list.
+
+ >>> list(isplit('\tUse the force\n')) == '\tUse the force\n'.split()
+ True
+ >>> list(isplit('\tUse the force\n')) == ['Use', 'the', 'force']
+ True
+ >>> list(isplit('\tUse the force\n', "e")) == '\tUse the force\n'.split("e")
+ True
+ >>> list(isplit('Use the force', "e")) == 'Use the force'.split("e")
+ True
+ >>> list(isplit('Use the force', "e")) == ['Us', ' th', ' forc', '']
+ True
+
+ """
+ sep, hardsep = r'\s+' if sep is None else re.escape(sep), sep is not None
+ exp, pos, l = re.compile(sep), 0, len(s)
+ while True:
+ m = exp.search(s, pos)
+ if not m:
+ if pos < l or hardsep:
+ # ^ mimic "split()": ''.split() returns []
+ yield s[pos:]
+ break
+ start = m.start()
+ if pos < start or hardsep:
+ # ^ mimic "split()": includes trailing empty string
+ yield s[pos:start]
+ pos = m.end()
+
+
+def mute(returns_output=False):
+ """
+ Decorate a function that prints to stdout, intercepting the output.
+ If "returns_output" is True, the function will return a generator
+ yielding the printed lines instead of the return values.
+
+ The decorator litterally hijack sys.stdout during each function
+ execution, so be careful with what you apply it to.
+
+ >>> def numbers():
+ print "42"
+ print "1984"
+ ...
+ >>> numbers()
+ 42
+ 1984
+ >>> mute()(numbers)()
+ >>> list(mute(True)(numbers)())
+ ['42', '1984']
+
+ """
+
+ def decorator(func):
+
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+
+ saved_stdout = sys.stdout
+ sys.stdout = cStringIO.StringIO()
+
+ try:
+ out = func(*args, **kwargs)
+ if returns_output:
+ out = isplit(sys.stdout.getvalue().strip())
+ finally:
+ sys.stdout = saved_stdout
+
+ return out
+
+ return wrapper
+
+ return decorator
+
def get_article(title, slug, content, lang, extra_metadata=None):
diff --git a/tests/test_importer.py b/tests/test_importer.py
new file mode 100644
index 00000000..5504b12e
--- /dev/null
+++ b/tests/test_importer.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+import os
+
+from pelican.tools.pelican_import import wp2fields, fields2pelican
+from .support import unittest, temporary_folder, mute
+
+CUR_DIR = os.path.dirname(__file__)
+WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml')
+
+PANDOC = os.system('pandoc --version') == 0
+try:
+ import BeautifulSoup
+except ImportError:
+ BeautifulSoup = False # NOQA
+
+
+class TestWordpressXmlImporter(unittest.TestCase):
+
+ def setUp(self):
+ self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
+
+ @unittest.skipUnless(PANDOC and BeautifulSoup,
+ 'Needs Pandoc and BeautifulSoup')
+ def test_ignore_empty_posts(self):
+
+ posts = list(self.posts)
+ self.assertTrue(posts)
+ for title, content, fname, date, author, categ, tags, format in posts:
+ self.assertTrue(title.strip())
+
+ @unittest.skipUnless(PANDOC and BeautifulSoup,
+ 'Needs Pandoc and BeautifulSoup')
+ def test_can_toggle_raw_html_code_parsing(self):
+
+ posts = list(self.posts)
+ r = lambda f: open(f).read()
+ silent_f2p = mute(True)(fields2pelican)
+
+ with temporary_folder() as temp:
+
+ rst_files = (r(f) for f in silent_f2p(posts, 'markdown', temp))
+ self.assertTrue(any('