diff --git a/dev_requirements.txt b/dev_requirements.txt index e1a15a3f..ef1dbf31 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -6,3 +6,5 @@ unittest2 pytz mock Markdown +blinker +BeautifulSoup diff --git a/docs/index.rst b/docs/index.rst index 6ad22670..34a1355c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -61,6 +61,7 @@ A French version of the documentation is available at :doc:`fr/index`. getting_started settings themes + plugins internals pelican-themes importer diff --git a/docs/plugins.rst b/docs/plugins.rst new file mode 100644 index 00000000..db5a4bfc --- /dev/null +++ b/docs/plugins.rst @@ -0,0 +1,108 @@ +.. _plugins: + +Plugins +####### + +Since version 3.0, pelican manages plugins. Plugins are a way to add features +to pelican without having to directly hack pelican code. + +Pelican is shipped with a set of core plugins, but you can easily implement +your own (and this page describes how). + +How to use plugins? +==================== + +To load plugins, you have to specify them in your settings file. You have two +ways to do so. +Either by specifying strings with the path to the callables:: + + PLUGINS = ['pelican.plugins.gravatar',] + +Or by importing them and adding them to the list:: + + from pelican.plugins import gravatar + PLUGINS = [gravatar, ] + +If your plugins are not in an importable path, you can specify a `PLUGIN_PATH` +in the settings:: + + PLUGIN_PATH = "plugins" + PLUGINS = ["list", "of", "plugins"] + +How to create plugins? +====================== + +Plugins are based on the concept of signals. Pelican sends signals and plugins +subscribe to those signals. The list of signals are defined in a following +section. + +The only rule to follow for plugins is to define a `register` callable, in +which you map the signals to your plugin logic. Let's take a simple exemple:: + + from pelican import signals + + def test(sender): + print "%s initialized !!" % sender + + def register(): + signals.initialized.connect(test) + + +List of signals +=============== + +Here is the list of currently implemented signals: + +========================= ============================ ========================================= +Signal Arguments Description +========================= ============================ ========================================= +initialized pelican object +article_generate_context article_generator, metadata +article_generator_init article_generator invoked in the ArticlesGenerator.__init__ +========================= ============================ ========================================= + +The list is currently small, don't hesitate to add signals and make a pull +request if you need them! + +List of plugins +=============== + +Not all the list are described here, but a few of them have been extracted from +pelican core and provided in pelican.plugins. They are described here: + +Tag cloud +--------- + +Translation +----------- + +Github Activity +--------------- + +This plugin makes use of the ``feedparser`` library that you'll need to +install. + +Set the GITHUB_ACTIVITY_FEED parameter to your github activity feed. +For example, my setting would look like:: + + GITHUB_ACTIVITY_FEED = 'https://github.com/kpanic.atom' + +On the templates side, you just have to iterate over the ``github_activity`` +variable, as in the example:: + + {% if GITHUB_ACTIVITY_FEED %} +
+

Github Activity

+ +
+ {% endif %} + + + +``github_activity`` is a list of lists. The first element is the title +and the second element is the raw html from github. diff --git a/docs/settings.rst b/docs/settings.rst index a26c37dd..50ed33f6 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -61,6 +61,7 @@ Setting name (default value) What doe `rst2pdf`. `RELATIVE_URLS` (``True``) Defines whether Pelican should use relative URLs or not. +`PLUGINS` (``[]``) The list of plugins to load. See :ref:`plugins`. `SITENAME` (``'A Pelican Blog'``) Your site name `SITEURL` Base URL of your website. Not defined by default, which means the base URL is assumed to be "/" with a @@ -375,6 +376,7 @@ Setting name (default value) What does it do? value is `static`, but if your theme has other static paths, you can put them here. `CSS_FILE` (``'main.css'``) Specify the CSS file you want to load. +`WEBASSETS` (``False``) Asset management with `webassets` (see below) ================================================ ===================================================== By default, two themes are available. You can specify them using the `-t` option: @@ -424,7 +426,58 @@ adding the following to your configuration:: CSS_FILE = "wide.css" -.. _pelican-themes: :doc:`pelican-themes` +Asset management +---------------- + +The `WEBASSETS` setting allows to use the `webassets`_ module to manage assets +(css, js). The module must first be installed:: + + pip install webassets + +`webassets` allows to concatenate your assets and to use almost all of the +hype tools of the moment (see the `documentation`_): + +* css minifier (`cssmin`, `yuicompressor`, ...) +* css compiler (`less`, `sass`, ...) +* js minifier (`uglifyjs`, `yuicompressor`, `closure`, ...) + +Others filters include gzip compression, integration of images in css with +`datauri` and more. Webassets also append a version identifier to your asset +url to convince browsers to download new versions of your assets when you use +far future expires headers. + +When using it with Pelican, `webassets` is configured to process assets in the +``OUTPUT_PATH/theme`` directory. You can use it in your templates with a +template tag, for example: + +.. code-block:: jinja + + {% assets filters="cssmin", output="css/style.min.css", "css/inuit.css", "css/pygment-monokai.css", "css/main.css" %} + + {% endassets %} + +will produce a minified css file with the version identifier: + +.. code-block:: html + + + +Another example for javascript: + +.. code-block:: jinja + + {% assets filters="uglifyjs,gzip", output="js/packed.js", "js/jquery.js", "js/base.js", "js/widgets.js" %} + + {% endassets %} + +will produce a minified and gzipped js file: + +.. code-block:: html + + + +.. _webassets: https://github.com/miracle2k/webassets +.. _documentation: http://webassets.readthedocs.org/en/latest/builtin_filters.html Example settings ================ diff --git a/docs/tips.rst b/docs/tips.rst index 6ddc3d33..14a79a5e 100644 --- a/docs/tips.rst +++ b/docs/tips.rst @@ -10,6 +10,26 @@ GitHub comes with an interesting "pages" feature: you can upload things there and it will be available directly from their servers. As Pelican is a static file generator, we can take advantage of this. +User Pages +---------- +Github allows you to create user pages in the form of ``username.github.com``. +Whatever is created in master branch will be published. For this purposes just +the output generated by pelican needs to pushed at github. + +So given a repository containing your articles, just run pelican over the posts +and deploy the master branch at github:: + + $ pelican -s pelican.conf.py ./path/to/posts -o /path/to/output + +Now add all the files in the output directory generated by pelican:: + + $ git add /path/to/output/* + $ git commit -am "Your Message" + $ git push origin master + +Project Pages +------------- +For creating Project pages, a branch called ``gh-pages`` is used for publishing. The excellent `ghp-import `_ makes this really easy. You will have to install it:: @@ -31,3 +51,4 @@ Put the following into `.git/hooks/post-commit`:: pelican -s pelican.conf.py . && ghp-import output && git push origin gh-pages + diff --git a/pelican/__init__.py b/pelican/__init__.py index 6b3d12fb..6dc7dd36 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -5,11 +5,13 @@ import time import logging import argparse +from pelican import signals + from pelican.generators import (ArticlesGenerator, PagesGenerator, StaticGenerator, PdfGenerator, LessCSSGenerator) from pelican.log import init from pelican.settings import read_settings, _DEFAULT_CONFIG -from pelican.utils import clean_output_dir, files_changed +from pelican.utils import clean_output_dir, files_changed, file_changed from pelican.writers import Writer __major__ = 3 @@ -22,7 +24,7 @@ logger = logging.getLogger(__name__) class Pelican(object): def __init__(self, settings=None, path=None, theme=None, output_path=None, - markup=None, delete_outputdir=False): + markup=None, delete_outputdir=False, plugin_path=None): """Read the settings, and performs some checks on the environment before doing anything else. """ @@ -58,6 +60,20 @@ class Pelican(object): else: raise Exception("Impossible to find the theme %s" % theme) + self.init_plugins() + signals.initialized.send(self) + + def init_plugins(self): + self.plugins = self.settings['PLUGINS'] + for plugin in self.plugins: + # if it's a string, then import it + if isinstance(plugin, basestring): + log.debug("Loading plugin `{0}' ...".format(plugin)) + plugin = __import__(plugin, globals(), locals(), 'module') + + log.debug("Registering plugin `{0}' ...".format(plugin.__name__)) + plugin.register() + def _handle_deprecation(self): if self.settings.get('CLEAN_URLS', False): @@ -126,15 +142,20 @@ class Pelican(object): writer = self.get_writer() + # pass the assets environment to the generators + if self.settings['WEBASSETS']: + generators[1].env.assets_environment = generators[0].assets_env + generators[2].env.assets_environment = generators[0].assets_env + for p in generators: if hasattr(p, 'generate_output'): p.generate_output(writer) def get_generator_classes(self): - generators = [ArticlesGenerator, PagesGenerator, StaticGenerator] + generators = [StaticGenerator, ArticlesGenerator, PagesGenerator] if self.settings['PDF_GENERATOR']: generators.append(PdfGenerator) - if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc + if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc generators.append(LessCSSGenerator) return generators @@ -192,11 +213,7 @@ def parse_arguments(): return parser.parse_args() -def main(): - args = parse_arguments() - init(args.verbosity) - # Split the markup languages only if some have been given. Otherwise, - # populate the variable with None. +def get_instance(args): markup = [a.strip().lower() for a in args.markup.split(',')]\ if args.markup else None @@ -208,9 +225,18 @@ def main(): module = __import__(module) cls = getattr(module, cls_name) + return cls(settings, args.path, args.theme, args.output, markup, + args.delete_outputdir) + + +def main(): + args = parse_arguments() + init(args.verbosity) + # Split the markup languages only if some have been given. Otherwise, + # populate the variable with None. + pelican = get_instance(args) + try: - pelican = cls(settings, args.path, args.theme, args.output, markup, - args.delete_outputdir) if args.autoreload: while True: try: @@ -222,6 +248,14 @@ def main(): if files_changed(pelican.path, pelican.markup) or \ files_changed(pelican.theme, ['']): pelican.run() + + # reload also if settings.py changed + if file_changed(args.settings): + logger.info('%s changed, re-generating' % + args.settings) + pelican = get_instance(args) + pelican.run() + time.sleep(.5) # sleep to avoid cpu load except KeyboardInterrupt: break diff --git a/pelican/generators.py b/pelican/generators.py index ede948a4..1ddc13c2 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -17,6 +17,7 @@ from jinja2.exceptions import TemplateNotFound from pelican.contents import Article, Page, Category, is_valid_content from pelican.readers import read_file from pelican.utils import copy, process_translations, open +from pelican import signals logger = logging.getLogger(__name__) @@ -42,7 +43,7 @@ class Generator(object): simple_loader = FileSystemLoader(os.path.join(theme_path, "themes", "simple", "templates")) - self._env = Environment( + self.env = Environment( loader=ChoiceLoader([ FileSystemLoader(self._templates_path), simple_loader, # implicit inheritance @@ -51,11 +52,11 @@ class Generator(object): extensions=self.settings.get('JINJA_EXTENSIONS', []), ) - logger.debug('template list: {0}'.format(self._env.list_templates())) + logger.debug('template list: {0}'.format(self.env.list_templates())) # get custom Jinja filters from user settings custom_filters = self.settings.get('JINJA_FILTERS', {}) - self._env.filters.update(custom_filters) + self.env.filters.update(custom_filters) def get_template(self, name): """Return the template by name. @@ -64,7 +65,7 @@ class Generator(object): """ if name not in self._templates: try: - self._templates[name] = self._env.get_template(name + '.html') + self._templates[name] = self.env.get_template(name + '.html') except TemplateNotFound: raise Exception('[templates] unable to load %s.html from %s' \ % (name, self._templates_path)) @@ -118,6 +119,7 @@ class ArticlesGenerator(Generator): self.authors = defaultdict(list) super(ArticlesGenerator, self).__init__(*args, **kwargs) self.drafts = [] + signals.article_generator_init.send(self) def generate_feeds(self, writer): """Generate the feeds from the current context, and output files.""" @@ -245,7 +247,9 @@ class ArticlesGenerator(Generator): def generate_context(self): """change the context""" - article_path = os.path.join(self.path, self.settings['ARTICLE_DIR']) + article_path = os.path.normpath( # we have to remove trailing slashes + os.path.join(self.path, self.settings['ARTICLE_DIR']) + ) all_articles = [] for f in self.get_files( article_path, @@ -259,8 +263,8 @@ class ArticlesGenerator(Generator): # if no category is set, use the name of the path as a category if 'category' not in metadata: - if os.path.dirname(f) == article_path: - category = self.settings['DEFAULT_CATEGORY'] + if os.path.dirname(f) == article_path: # if the article is not in a subdirectory + category = self.settings['DEFAULT_CATEGORY'] else: category = os.path.basename(os.path.dirname(f))\ .decode('utf-8') @@ -272,6 +276,7 @@ class ArticlesGenerator(Generator): metadata['date'] = datetime.datetime.fromtimestamp( os.stat(f).st_ctime) + signals.article_generate_context.send(self, metadata=metadata) article = Article(content, metadata, settings=self.settings, filename=f) if not is_valid_content(article, f): @@ -284,6 +289,10 @@ class ArticlesGenerator(Generator): all_articles.append(article) elif article.status == "draft": self.drafts.append(article) + else: + logger.warning(u"Unknown status %s for file %s, skipping it." % + (repr(unicode.encode(article.status, 'utf-8')), + repr(f))) self.articles, self.translations = process_translations(all_articles) @@ -389,7 +398,23 @@ class StaticGenerator(Generator): copy(path, source, os.path.join(output_path, destination), final_path, overwrite=True) + def generate_context(self): + + if self.settings['WEBASSETS']: + from webassets import Environment as AssetsEnvironment + + # Define the assets environment that will be passed to the + # generators. The StaticGenerator must then be run first to have + # the assets in the output_path before generating the templates. + assets_url = self.settings['SITEURL'] + '/theme/' + assets_src = os.path.join(self.output_path, 'theme') + self.assets_env = AssetsEnvironment(assets_src, assets_url) + + if logging.getLevelName(logger.getEffectiveLevel()) == "DEBUG": + self.assets_env.debug = True + def generate_output(self, writer): + self._copy_paths(self.settings['STATIC_PATHS'], self.path, 'static', self.output_path) self._copy_paths(self.settings['THEME_STATIC_PATHS'], self.theme, diff --git a/pelican/plugins/__init__.py b/pelican/plugins/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pelican/plugins/github_activity.py b/pelican/plugins/github_activity.py new file mode 100644 index 00000000..f2ba1da7 --- /dev/null +++ b/pelican/plugins/github_activity.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" + Copyright (c) Marco Milanesi + + A plugin to list your Github Activity + To enable it set in your pelican config file the GITHUB_ACTIVITY_FEED + parameter pointing to your github activity feed. + + for example my personal activity feed is: + + https://github.com/kpanic.atom + + in your template just write a for in jinja2 syntax against the + github_activity variable. + + i.e. + + + + github_activity is a list containing a list. The first element is the title + and the second element is the raw html from github +""" + +from pelican import signals + + +class GitHubActivity(): + """ + A class created to fetch github activity with feedparser + """ + def __init__(self, generator): + try: + import feedparser + self.activities = feedparser.parse( + generator.settings['GITHUB_ACTIVITY_FEED']) + except ImportError: + raise Exception("Unable to find feedparser") + + def fetch(self): + """ + returns a list of html snippets fetched from github actitivy feed + """ + + entries = [] + for activity in self.activities['entries']: + entries.append( + [element for element in [activity['title'], + activity['content'][0]['value']]]) + + return entries + + +def fetch_github_activity(gen, metadata): + """ + registered handler for the github activity plugin + it puts in generator.context the html needed to be displayed on a + template + """ + + if 'GITHUB_ACTIVITY_FEED' in gen.settings.keys(): + gen.context['github_activity'] = gen.plugin_instance.fetch() + + +def feed_parser_initialization(generator): + """ + Initialization of feed parser + """ + + generator.plugin_instance = GitHubActivity(generator) + + +def register(): + """ + Plugin registration + """ + signals.article_generator_init.connect(feed_parser_initialization) + signals.article_generate_context.connect(fetch_github_activity) diff --git a/pelican/plugins/global_license.py b/pelican/plugins/global_license.py new file mode 100644 index 00000000..463a93b3 --- /dev/null +++ b/pelican/plugins/global_license.py @@ -0,0 +1,23 @@ +from pelican import signals + +""" +License plugin for Pelican +========================== + +Simply add license variable in article's context, which contain +the license text. + +Settings: +--------- + +Add LICENSE to your settings file to define default license. + +""" + +def add_license(generator, metadata): + if 'license' not in metadata.keys()\ + and 'LICENSE' in generator.settings.keys(): + metadata['license'] = generator.settings['LICENSE'] + +def register(): + signals.article_generate_context.connect(add_license) diff --git a/pelican/plugins/gravatar.py b/pelican/plugins/gravatar.py new file mode 100644 index 00000000..4ab8ea9c --- /dev/null +++ b/pelican/plugins/gravatar.py @@ -0,0 +1,40 @@ +import hashlib + +from pelican import signals +""" +Gravatar plugin for Pelican +=========================== + +Simply add author_gravatar variable in article's context, which contains +the gravatar url. + +Settings: +--------- + +Add AUTHOR_EMAIL to your settings file to define default author email. + +Article metadata: +------------------ + +:email: article's author email + +If one of them are defined, the author_gravatar variable is added to +article's context. +""" + +def add_gravatar(generator, metadata): + + #first check email + if 'email' not in metadata.keys()\ + and 'AUTHOR_EMAIL' in generator.settings.keys(): + metadata['email'] = generator.settings['AUTHOR_EMAIL'] + + #then add gravatar url + if 'email' in metadata.keys(): + gravatar_url = "http://www.gravatar.com/avatar/" + \ + hashlib.md5(metadata['email'].lower()).hexdigest() + metadata["author_gravatar"] = gravatar_url + + +def register(): + signals.article_generate_context.connect(add_gravatar) diff --git a/pelican/plugins/html_rst_directive.py b/pelican/plugins/html_rst_directive.py new file mode 100644 index 00000000..d0a656f5 --- /dev/null +++ b/pelican/plugins/html_rst_directive.py @@ -0,0 +1,63 @@ +from docutils import nodes +from docutils.parsers.rst import directives, Directive +from pelican import log + +""" +HTML tags for reStructuredText +============================== + +Directives +---------- + +.. html:: + + (HTML code) + + +Example +------- + +A search engine: + +.. html:: +
+ + + +
+ + +A contact form: + +.. html:: + +
+

+ +
+ +
+ +

+
+ +""" + + +class RawHtml(Directive): + required_arguments = 0 + optional_arguments = 0 + final_argument_whitespace = True + has_content = True + + def run(self): + html = u' '.join(self.content) + node = nodes.raw('', html, format='html') + return [node] + + + +def register(): + directives.register_directive('html', RawHtml) + diff --git a/pelican/plugins/initialized.py b/pelican/plugins/initialized.py new file mode 100644 index 00000000..5e4cf174 --- /dev/null +++ b/pelican/plugins/initialized.py @@ -0,0 +1,7 @@ +from pelican import signals + +def test(sender): + print "%s initialized !!" % sender + +def register(): + signals.initialized.connect(test) diff --git a/pelican/settings.py b/pelican/settings.py index d2a39cd9..17efea58 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -69,6 +69,8 @@ _DEFAULT_CONFIG = {'PATH': '.', 'TYPOGRIFY': False, 'LESS_GENERATOR': False, 'SUMMARY_MAX_LENGTH': 50, + 'WEBASSETS': False, + 'PLUGINS': [], } @@ -151,4 +153,12 @@ def configure_settings(settings, default_settings=None, filename=None): "http://docs.notmyidea.org/alexis/pelican/settings.html#timezone " "for more information") + if 'WEBASSETS' in settings and settings['WEBASSETS'] is not False: + try: + from webassets.ext.jinja2 import AssetsExtension + settings['JINJA_EXTENSIONS'].append(AssetsExtension) + except ImportError: + logger.warn("You must install the webassets module to use WEBASSETS.") + settings['WEBASSETS'] = False + return settings diff --git a/pelican/signals.py b/pelican/signals.py new file mode 100644 index 00000000..b1c35794 --- /dev/null +++ b/pelican/signals.py @@ -0,0 +1,5 @@ +from blinker import signal + +initialized = signal('pelican_initialized') +article_generate_context = signal('article_generate_context') +article_generator_init = signal('article_generator_init') diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 050b1010..dab3c3a8 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -25,8 +25,14 @@ def wp2fields(xml): items = soup.rss.channel.findAll('item') for item in items: + if item.fetch('wp:status')[0].contents[0] == "publish": - title = item.title.contents[0] + + try: + title = item.title.contents[0] + except IndexError: + continue + content = item.fetch('content:encoded')[0].contents[0] filename = item.fetch('wp:post_name')[0].contents[0] @@ -197,7 +203,7 @@ def build_markdown_header(title, date, author, categories, tags): header += '\n' return header -def fields2pelican(fields, out_markup, output_path, dircat=False): +def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=False): for title, content, filename, date, author, categories, tags, in_markup in fields: if (in_markup == "markdown") or (out_markup == "markdown") : ext = '.md' @@ -230,22 +236,26 @@ def fields2pelican(fields, out_markup, output_path, dircat=False): paragraphs = [u'

{}

'.format(p) for p in paragraphs] new_content = ''.join(paragraphs) - fp.write(content) + fp.write(new_content) - cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format( - out_markup, out_filename, html_filename) + + parse_raw = '--parse-raw' if not strip_raw else '' + cmd = ('pandoc --normalize --reference-links {0} --from=html' + ' --to={1} -o "{2}" "{3}"').format( + parse_raw, out_markup, out_filename, html_filename) try: rc = subprocess.call(cmd, shell=True) if rc < 0: - print("Child was terminated by signal %d" % -rc) - exit() + error = "Child was terminated by signal %d" % -rc + exit(error) + elif rc > 0: - print("Please, check your Pandoc installation.") - exit() + error = "Please, check your Pandoc installation." + exit(error) except OSError, e: - print("Pandoc execution failed: %s" % e) - exit() + error = "Pandoc execution failed: %s" % e + exit(error) os.remove(html_filename) @@ -279,6 +289,10 @@ def main(): help='Output markup format (supports rst & markdown)') parser.add_argument('--dir-cat', action='store_true', dest='dircat', help='Put files in directories with categories name') + parser.add_argument('--strip-raw', action='store_true', dest='strip_raw', + help="Strip raw HTML code that can't be converted to " + "markup such as flash embeds or iframes (wordpress import only)") + args = parser.parse_args() input_type = None @@ -289,15 +303,15 @@ def main(): elif args.feed: input_type = 'feed' else: - print("You must provide either --wpfile, --dotclear or --feed options") - exit() + error = "You must provide either --wpfile, --dotclear or --feed options" + exit(error) if not os.path.exists(args.output): try: os.mkdir(args.output) except OSError: - print("Unable to create the output folder: " + args.output) - exit() + error = "Unable to create the output folder: " + args.output + exit(error) if input_type == 'wordpress': fields = wp2fields(args.input) @@ -306,4 +320,6 @@ def main(): elif input_type == 'feed': fields = feed2fields(args.input) - fields2pelican(fields, args.markup, args.output, dircat=args.dircat or False) + fields2pelican(fields, args.markup, args.output, + dircat=args.dircat or False, + strip_raw=args.strip_raw or False) diff --git a/pelican/utils.py b/pelican/utils.py index d4e34842..0940bf72 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -4,6 +4,7 @@ import re import pytz import shutil import logging +from collections import defaultdict from codecs import open as _open from datetime import datetime @@ -221,9 +222,9 @@ def files_changed(path, extensions): """Return the last time files have been modified""" for root, dirs, files in os.walk(path): dirs[:] = [x for x in dirs if x[0] != '.'] - for file in files: - if any(file.endswith(ext) for ext in extensions): - yield os.stat(os.path.join(root, file)).st_mtime + for f in files: + if any(f.endswith(ext) for ext in extensions): + yield os.stat(os.path.join(root, f)).st_mtime global LAST_MTIME mtime = max(file_times(path)) @@ -233,6 +234,21 @@ def files_changed(path, extensions): return False +FILENAMES_MTIMES = defaultdict(int) + + +def file_changed(filename): + mtime = os.stat(filename).st_mtime + if FILENAMES_MTIMES[filename] == 0: + FILENAMES_MTIMES[filename] = mtime + return False + else: + if mtime > FILENAMES_MTIMES[filename]: + FILENAMES_MTIMES[filename] = mtime + return True + return False + + def set_date_tzinfo(d, tz_name=None): """ Date without tzinfo shoudbe utc. This function set the right tz to date that aren't utc and don't have diff --git a/setup.py b/setup.py index 0e57c83b..a8a8fbd9 100755 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from setuptools import setup -requires = ['feedgenerator', 'jinja2', 'pygments', 'docutils', 'pytz'] +requires = ['feedgenerator', 'jinja2', 'pygments', 'docutils', 'pytz', 'blinker'] try: import argparse @@ -25,7 +25,7 @@ setup( author_email = 'alexis@notmyidea.org', description = "A tool to generate a static blog from reStructuredText or Markdown input files.", long_description=open('README.rst').read(), - packages = ['pelican', 'pelican.tools'], + packages = ['pelican', 'pelican.tools', 'pelican.plugins'], include_package_data = True, install_requires = requires, entry_points = entry_points, diff --git a/tests/content/wordpressexport.xml b/tests/content/wordpressexport.xml new file mode 100644 index 00000000..d3e86cba --- /dev/null +++ b/tests/content/wordpressexport.xml @@ -0,0 +1,578 @@ + + + + + + + + + + + + + + + + + + + + + + + Pelican test channel + http://thisisa.test + Not a real feed, just for test + Sun, 13 May 2012 01:13:52 +0000 + en + 1.1 + http://thisisa.test + http://thisisa.test + + 2Bobbob@thisisa.test + 3Jonhjonh@thisisa.test + + 7categ-1 + 11categ-2 + 1uncategorized + 15categ-3 + 25tag-1 + 122tag2 + 68tag-3 + + http://wordpress.org/?v=3.3.1 + + + Empty post + http://thisisa.test/?attachment_id=24 + Sat, 04 Feb 2012 03:17:33 +0000 + bob + https://upload.wikimedia.org/wikipedia/commons/thumb/2/2c/Pelican_lakes_entrance02.jpg/240px-Pelican_lakes_entrance02.jpg + + + + 24 + 2012-02-04 03:17:33 + 2012-02-04 03:17:33 + open + open + empty-post + inherit + 0 + 0 + attachment + + 0 + https://upload.wikimedia.org/wikipedia/commons/thumb/2/2c/Pelican_lakes_entrance02.jpg/240px-Pelican_lakes_entrance02.jpg + + _wp_attachment_metadata + + + + _wp_attached_file + + + + _wp_attachment_image_alt + + + + + + http://thisisa.test/?p=168 + Thu, 01 Jan 1970 00:00:00 +0000 + bob + http://thisisa.test/?p=168 + + + + 168 + 2012-02-15 21:23:57 + 0000-00-00 00:00:00 + open + open + + draft + 0 + 0 + post + + 0 + + + _edit_last + + + + + A normal post + http://thisisa.test/?p=173 + Thu, 01 Jan 1970 00:00:00 +0000 + bob + http://thisisa.test/?p=173 + + +
  • Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod +tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, +quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo +consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse +cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non +proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
  • +
  • Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod +tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, +quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo +consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse +cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non +proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
  • + + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod +tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, +quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo +consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse +cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non +proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]>
    + + 173 + 2012-02-16 15:52:55 + 0000-00-00 00:00:00 + open + open + + draft + 0 + 0 + post + + 0 + + + _edit_last + + +
    + + Complete draft + http://thisisa.test/?p=176 + Thu, 01 Jan 1970 00:00:00 +0000 + bob + http://thisisa.test/?p=176 + + + + 176 + 2012-02-17 15:11:55 + 0000-00-00 00:00:00 + open + open + + draft + 0 + 0 + post + + 0 + + + _edit_last + + + + + Page + http://thisisa.test/contact/ + Wed, 11 Apr 2012 11:38:08 +0000 + bob + http://thisisa.test/?page_id=334 + + + + 334 + 2012-04-11 06:38:08 + 2012-04-11 11:38:08 + open + open + contact + publish + 0 + 0 + page + + 0 + + sharing_disabled + + + + _wp_page_template + + + + _edit_last + + + + + Empty Page + http://thisisa.test/empty/ + Wed, 11 Apr 2012 11:38:08 +0000 + bob + http://thisisa.test/?page_id=334 + + + + 334 + 2012-04-11 06:38:08 + 2012-04-11 11:38:08 + open + open + empty + publish + 0 + 0 + page + + 0 + + sharing_disabled + + + + _wp_page_template + + + + _edit_last + + + + + Special chars: l'é + http://thisisa.test/?p=471 + Thu, 01 Jan 1970 00:00:00 +0000 + bob + http://thisisa.test/?p=471 + + + + 471 + 2012-04-29 09:44:27 + 0000-00-00 00:00:00 + open + open + + draft + 0 + 0 + post + + 0 + + + _edit_last + + + + + + With excerpt + http://thisisa.test/with-excerpt/ + Sat, 04 Feb 2012 02:03:06 +0000 + bob + http://thisisa.test/?p=8 + + + + 8 + 2012-02-04 02:03:06 + 2012-02-04 02:03:06 + open + open + with-excerpt + publish + 0 + 0 + post + + 0 + + + + + _edit_last + + + + et_bigpost + + + + _thumbnail_id + + + + + With tags + http://thisisa.test/tags/ + Sat, 04 Feb 2012 21:05:25 +0000 + bob + http://thisisa.test/?p=25 + + + + 25 + 2012-02-04 21:05:25 + 2012-02-04 21:05:25 + open + open + with-tags + publish + 0 + 0 + post + + 0 + + + + + + _edit_last + + + + et_bigpost + + + + _thumbnail_id + + + + + With comments + http://thisisa.test/with-comments/ + Wed, 18 Apr 2012 08:36:26 +0000 + john + http://thisisa.test/?p=422 + + + + 422 + 2012-04-18 03:36:26 + 2012-04-18 08:36:26 + open + open + with-comments + publish + 0 + 0 + post + + 0 + + + _edit_last + + + + _thumbnail_id + + + + 116 + + User2@mail.test + + 127.0.0.1 + 2012-05-06 15:46:06 + 2012-05-06 20:46:06 + + 1 + + 0 + 0 + + akismet_result + + + + akismet_history + + + + akismet_as_submitted + + + + + 117 + + bob@thisisa.test + + 127.0.0.1 + 2012-05-06 17:44:06 + 2012-05-06 22:44:06 + + 1 + + 116 + 3 + + akismet_result + + + + akismet_history + + + + akismet_as_submitted + + + + + 156 + + + http://thisisa.test/to-article-you-ping-back/ + 127.0.0.1 + 2012-05-09 19:30:19 + 2012-05-10 00:30:19 + + trash + pingback + 0 + 0 + + akismet_history + + + + _wp_trash_meta_status + + + + _wp_trash_meta_time + + + + + 122 + + bob@thisisa.test + + 127.0.0.1 + 2012-05-07 14:11:34 + 2012-05-07 19:11:34 + + 1 + + 121 + 3 + + akismet_result + + + + akismet_history + + + + akismet_as_submitted + + + + + + Post with raw data + http://thisisa.test/?p=173 + Thu, 01 Jan 1970 00:00:00 +0000 + bob + http://thisisa.test/?p=173 + + Pelicans are scary + +Pelicans are supposed to eat fish, damn it! + + + +Bottom line: don't mess up with birds]]> + + 173 + 2012-02-16 15:52:55 + 0000-00-00 00:00:00 + open + open + post-with-raw-data + publish + 0 + 0 + post + + 0 + + + _edit_last + + + +
    +
    diff --git a/tests/support.py b/tests/support.py index f2b4a075..994cd509 100644 --- a/tests/support.py +++ b/tests/support.py @@ -6,6 +6,11 @@ __all__ = [ import os import subprocess +import re +import sys +import cStringIO + +from functools import wraps from contextlib import contextmanager from tempfile import mkdtemp from shutil import rmtree @@ -28,8 +33,87 @@ def temporary_folder(): # do whatever you want """ tempdir = mkdtemp() - yield tempdir - rmtree(tempdir) + try: + yield tempdir + finally: + rmtree(tempdir) + + +def isplit(s, sep=None): + """ + Behave like str.split but returns a generator instead of a list. + + >>> list(isplit('\tUse the force\n')) == '\tUse the force\n'.split() + True + >>> list(isplit('\tUse the force\n')) == ['Use', 'the', 'force'] + True + >>> list(isplit('\tUse the force\n', "e")) == '\tUse the force\n'.split("e") + True + >>> list(isplit('Use the force', "e")) == 'Use the force'.split("e") + True + >>> list(isplit('Use the force', "e")) == ['Us', ' th', ' forc', ''] + True + + """ + sep, hardsep = r'\s+' if sep is None else re.escape(sep), sep is not None + exp, pos, l = re.compile(sep), 0, len(s) + while True: + m = exp.search(s, pos) + if not m: + if pos < l or hardsep: + # ^ mimic "split()": ''.split() returns [] + yield s[pos:] + break + start = m.start() + if pos < start or hardsep: + # ^ mimic "split()": includes trailing empty string + yield s[pos:start] + pos = m.end() + + +def mute(returns_output=False): + """ + Decorate a function that prints to stdout, intercepting the output. + If "returns_output" is True, the function will return a generator + yielding the printed lines instead of the return values. + + The decorator litterally hijack sys.stdout during each function + execution, so be careful with what you apply it to. + + >>> def numbers(): + print "42" + print "1984" + ... + >>> numbers() + 42 + 1984 + >>> mute()(numbers)() + >>> list(mute(True)(numbers)()) + ['42', '1984'] + + """ + + def decorator(func): + + @wraps(func) + def wrapper(*args, **kwargs): + + saved_stdout = sys.stdout + sys.stdout = cStringIO.StringIO() + + try: + out = func(*args, **kwargs) + if returns_output: + out = isplit(sys.stdout.getvalue().strip()) + finally: + sys.stdout = saved_stdout + + return out + + return wrapper + + return decorator + def get_article(title, slug, content, lang, extra_metadata=None): diff --git a/tests/test_importer.py b/tests/test_importer.py new file mode 100644 index 00000000..5504b12e --- /dev/null +++ b/tests/test_importer.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +import os + +from pelican.tools.pelican_import import wp2fields, fields2pelican +from .support import unittest, temporary_folder, mute + +CUR_DIR = os.path.dirname(__file__) +WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml') + +PANDOC = os.system('pandoc --version') == 0 +try: + import BeautifulSoup +except ImportError: + BeautifulSoup = False # NOQA + + +class TestWordpressXmlImporter(unittest.TestCase): + + def setUp(self): + self.posts = wp2fields(WORDPRESS_XML_SAMPLE) + + @unittest.skipUnless(PANDOC and BeautifulSoup, + 'Needs Pandoc and BeautifulSoup') + def test_ignore_empty_posts(self): + + posts = list(self.posts) + self.assertTrue(posts) + for title, content, fname, date, author, categ, tags, format in posts: + self.assertTrue(title.strip()) + + @unittest.skipUnless(PANDOC and BeautifulSoup, + 'Needs Pandoc and BeautifulSoup') + def test_can_toggle_raw_html_code_parsing(self): + + posts = list(self.posts) + r = lambda f: open(f).read() + silent_f2p = mute(True)(fields2pelican) + + with temporary_folder() as temp: + + rst_files = (r(f) for f in silent_f2p(posts, 'markdown', temp)) + self.assertTrue(any('