mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge branch master of github.com:jbcurtin/pelican
This commit is contained in:
commit
954d6e496b
22 changed files with 349 additions and 269 deletions
|
|
@ -291,10 +291,10 @@ Related posts
|
|||
-------------
|
||||
|
||||
This plugin adds the ``related_posts`` variable to the article's context.
|
||||
To enable, add the following to your settings file::
|
||||
By default, up to 5 articles are listed. You can customize this value by
|
||||
defining ``RELATED_POSTS_MAX`` in your settings file::
|
||||
|
||||
from pelican.plugins import related_posts
|
||||
PLUGINS = [related_posts]
|
||||
RELATED_POSTS_MAX = 10
|
||||
|
||||
You can then use the ``article.related_posts`` variable in your templates.
|
||||
For example::
|
||||
|
|
|
|||
|
|
@ -417,9 +417,6 @@ class ArticlesGenerator(Generator):
|
|||
self.add_source_path(article)
|
||||
|
||||
if article.status == "published":
|
||||
if hasattr(article, 'tags'):
|
||||
for tag in article.tags:
|
||||
self.tags[tag].append(article)
|
||||
all_articles.append(article)
|
||||
elif article.status == "draft":
|
||||
self.drafts.append(article)
|
||||
|
|
@ -431,12 +428,17 @@ class ArticlesGenerator(Generator):
|
|||
self.articles, self.translations = process_translations(all_articles)
|
||||
|
||||
for article in self.articles:
|
||||
# only main articles are listed in categories, not translations
|
||||
# only main articles are listed in categories and tags
|
||||
# not translations
|
||||
self.categories[article.category].append(article)
|
||||
if hasattr(article, 'tags'):
|
||||
for tag in article.tags:
|
||||
self.tags[tag].append(article)
|
||||
# ignore blank authors as well as undefined
|
||||
if hasattr(article, 'author') and article.author.name != '':
|
||||
self.authors[article.author].append(article)
|
||||
|
||||
|
||||
# sort the articles by date
|
||||
self.articles.sort(key=attrgetter('date'), reverse=True)
|
||||
self.dates = list(self.articles)
|
||||
|
|
|
|||
|
|
@ -1,71 +1,35 @@
|
|||
from pelican import signals
|
||||
|
||||
"""
|
||||
Related posts plugin for Pelican
|
||||
================================
|
||||
|
||||
Adds related_posts variable to article's context
|
||||
|
||||
Settings
|
||||
--------
|
||||
To enable, add
|
||||
|
||||
from pelican.plugins import related_posts
|
||||
PLUGINS = [related_posts]
|
||||
|
||||
to your pelicanconf.py.
|
||||
|
||||
Control the number of entries with in the config file with:
|
||||
|
||||
RELATED_POSTS = {
|
||||
'numentries': 6,
|
||||
}
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
{% if article.related_posts %}
|
||||
<ul>
|
||||
{% for related_post in article.related_posts %}
|
||||
<li><a href="{{ related_post.url }}">{{ related_post.title }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
|
||||
|
||||
"""
|
||||
|
||||
related_posts = []
|
||||
from pelican import signals
|
||||
from collections import Counter
|
||||
|
||||
|
||||
def add_related_posts(generator, metadata):
|
||||
if 'tags' in metadata:
|
||||
for tag in metadata['tags']:
|
||||
#print tag
|
||||
for related_article in generator.tags[tag]:
|
||||
related_posts.append(related_article)
|
||||
def add_related_posts(generator):
|
||||
# get the max number of entries from settings
|
||||
# or fall back to default (5)
|
||||
numentries = generator.settings.get('RELATED_POSTS_MAX', 5)
|
||||
|
||||
if len(related_posts) < 1:
|
||||
return
|
||||
|
||||
metadata["related_posts"] = sorted(set(related_posts))
|
||||
for article in generator.articles:
|
||||
# no tag, no relation
|
||||
if not hasattr(article, 'tags'):
|
||||
continue
|
||||
|
||||
relation_score = dict(list(zip(set(related_posts), list(map(related_posts.count,
|
||||
set(related_posts))))))
|
||||
ranked_related = sorted(relation_score, key=relation_score.get)
|
||||
|
||||
#Load the confg file and get the number of entries specified there
|
||||
settings = generator.settings
|
||||
config = settings.get('RELATED_POSTS', {})
|
||||
# score = number of common tags
|
||||
scores = Counter()
|
||||
for tag in article.tags:
|
||||
scores += Counter(generator.tags[tag])
|
||||
|
||||
#check if the related_posts var is set in the pythonconfig.py
|
||||
if not isinstance(config, dict):
|
||||
info("realted_links plugin: Using default number of related links ("+numentries+")")
|
||||
else:
|
||||
numentries = config.get('numentries', 5)
|
||||
|
||||
metadata["related_posts"] = ranked_related[:numentries]
|
||||
# remove itself
|
||||
scores.pop(article)
|
||||
|
||||
article.related_posts = [other for other, count
|
||||
in scores.most_common(numentries)]
|
||||
|
||||
|
||||
def register():
|
||||
signals.article_generate_context.connect(add_related_posts)
|
||||
signals.article_generator_finalized.connect(add_related_posts)
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
import logging
|
||||
logging.getLogger().addHandler(logging.NullHandler())
|
||||
48
pelican/tests/content/wordpress_content_decoded
Normal file
48
pelican/tests/content/wordpress_content_decoded
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||
<p><object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/XSrW-wAWZe4"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/XSrW-wAWZe4" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object></p>
|
||||
<blockquote><p>
|
||||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p></blockquote>
|
||||
<ul>
|
||||
<li>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</li>
|
||||
<li>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</li>
|
||||
</ul>
|
||||
<pre>
|
||||
<code>
|
||||
a = [1, 2, 3]
|
||||
b = [4, 5, 6]
|
||||
for i in zip(a, b):
|
||||
print i
|
||||
</code>
|
||||
</pre>
|
||||
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||
55
pelican/tests/content/wordpress_content_encoded
Normal file
55
pelican/tests/content/wordpress_content_encoded
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
|
||||
<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/XSrW-wAWZe4"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/XSrW-wAWZe4" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object>
|
||||
|
||||
<blockquote>
|
||||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
</blockquote>
|
||||
<ul>
|
||||
<li>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</li>
|
||||
<li>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</li>
|
||||
</ul>
|
||||
|
||||
<pre>
|
||||
<code>
|
||||
a = [1, 2, 3]
|
||||
b = [4, 5, 6]
|
||||
for i in zip(a, b):
|
||||
print i
|
||||
</code>
|
||||
</pre>
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
|
||||
|
|
@ -628,5 +628,59 @@ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></con
|
|||
<wp:meta_value><![CDATA[3]]></wp:meta_value>
|
||||
</wp:postmeta>
|
||||
</item>
|
||||
<item>
|
||||
<title>Code in List</title>
|
||||
<link>http://thisisa.test/?p=175</link>
|
||||
<pubDate>Thu, 01 Jan 1970 00:00:00 +0000</pubDate>
|
||||
<dc:creator>bob</dc:creator>
|
||||
<guid isPermaLink="false">http://thisisa.test/?p=175</guid>
|
||||
<description></description>
|
||||
<content:encoded><![CDATA[Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
|
||||
<ul>
|
||||
<li>List Item One!</li>
|
||||
<li>List Item Two!</li>
|
||||
<li>This is a code sample
|
||||
<pre>
|
||||
<code>
|
||||
a = [1, 2, 3]
|
||||
b = [4, 5, 6]
|
||||
for i in zip(a, b):
|
||||
print i
|
||||
</code>
|
||||
</pre></li>
|
||||
<li>List Item Four!</li>
|
||||
</ul>
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></content:encoded>
|
||||
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
|
||||
<wp:post_id>175</wp:post_id>
|
||||
<wp:post_date>2012-02-16 15:52:55</wp:post_date>
|
||||
<wp:post_date_gmt>0000-00-00 00:00:00</wp:post_date_gmt>
|
||||
<wp:comment_status>open</wp:comment_status>
|
||||
<wp:ping_status>open</wp:ping_status>
|
||||
<wp:post_name>code-in-list-test</wp:post_name>
|
||||
<wp:status>publish</wp:status>
|
||||
<wp:post_parent>0</wp:post_parent>
|
||||
<wp:menu_order>0</wp:menu_order>
|
||||
<wp:post_type>post</wp:post_type>
|
||||
<wp:post_password></wp:post_password>
|
||||
<wp:is_sticky>0</wp:is_sticky>
|
||||
<category domain="category" nicename="category-2"><![CDATA[Category 2]]></category>
|
||||
<wp:postmeta>
|
||||
<wp:meta_key>_edit_last</wp:meta_key>
|
||||
<wp:meta_value><![CDATA[3]]></wp:meta_value>
|
||||
</wp:postmeta>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
|
|||
|
|
@ -35,48 +35,24 @@
|
|||
|
||||
<aside id="featured" class="body">
|
||||
<article>
|
||||
<h1 class="entry-title"><a href="../second-article-fr.html">Deuxième article</a></h1>
|
||||
<h1 class="entry-title"><a href="../second-article.html">Second article</a></h1>
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article.html">en</a>
|
||||
|
||||
</footer><!-- /.post-info --><p>Ceci est un article, en français.</p>
|
||||
</article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
<header>
|
||||
<h1><a href="../second-article.html" rel="bookmark"
|
||||
title="Permalink to Second article">Second article</a></h1>
|
||||
</header>
|
||||
|
||||
<div class="entry-content">
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article-fr.html">fr</a>
|
||||
|
||||
</footer><!-- /.post-info --> <p>This is some article, in english</p>
|
||||
|
||||
<a class="readmore" href="../second-article.html">read more</a>
|
||||
</div><!-- /.entry-content -->
|
||||
</article></li>
|
||||
|
||||
</footer><!-- /.post-info --><p>This is some article, in english</p>
|
||||
</article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
|
|
|
|||
|
|
@ -35,49 +35,21 @@
|
|||
|
||||
<aside id="featured" class="body">
|
||||
<article>
|
||||
<h1 class="entry-title"><a href="../second-article-fr.html">Deuxième article</a></h1>
|
||||
<h1 class="entry-title"><a href="../second-article.html">Second article</a></h1>
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article.html">en</a>
|
||||
|
||||
</footer><!-- /.post-info --><p>Ceci est un article, en français.</p>
|
||||
</article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
<header>
|
||||
<h1><a href="../second-article.html" rel="bookmark"
|
||||
title="Permalink to Second article">Second article</a></h1>
|
||||
</header>
|
||||
|
||||
<div class="entry-content">
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article-fr.html">fr</a>
|
||||
|
||||
</footer><!-- /.post-info --> <p>This is some article, in english</p>
|
||||
|
||||
<a class="readmore" href="../second-article.html">read more</a>
|
||||
</div><!-- /.entry-content -->
|
||||
</article></li>
|
||||
</ol><!-- /#posts-list -->
|
||||
</section><!-- /#content -->
|
||||
</footer><!-- /.post-info --><p>This is some article, in english</p>
|
||||
</article>
|
||||
</aside><!-- /#featured -->
|
||||
</ol><!-- /#posts-list -->
|
||||
</section><!-- /#content -->
|
||||
<section id="extras" class="body">
|
||||
<div class="social">
|
||||
<h2>social</h2>
|
||||
|
|
|
|||
|
|
@ -35,48 +35,24 @@
|
|||
|
||||
<aside id="featured" class="body">
|
||||
<article>
|
||||
<h1 class="entry-title"><a href="../second-article-fr.html">Deuxième article</a></h1>
|
||||
<h1 class="entry-title"><a href="../second-article.html">Second article</a></h1>
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article.html">en</a>
|
||||
|
||||
</footer><!-- /.post-info --><p>Ceci est un article, en français.</p>
|
||||
</article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
<header>
|
||||
<h1><a href="../second-article.html" rel="bookmark"
|
||||
title="Permalink to Second article">Second article</a></h1>
|
||||
</header>
|
||||
|
||||
<div class="entry-content">
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article-fr.html">fr</a>
|
||||
|
||||
</footer><!-- /.post-info --> <p>This is some article, in english</p>
|
||||
|
||||
<a class="readmore" href="../second-article.html">read more</a>
|
||||
</div><!-- /.entry-content -->
|
||||
</article></li>
|
||||
|
||||
</footer><!-- /.post-info --><p>This is some article, in english</p>
|
||||
</article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
<aside id="featured" class="body">
|
||||
<article>
|
||||
<h1 class="entry-title"><a href="../second-article-fr.html">Deuxième article</a></h1>
|
||||
<h1 class="entry-title"><a href="../second-article.html">Second article</a></h1>
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
|
|
@ -49,44 +49,17 @@
|
|||
By <a class="url fn" href="../author/alexis-metaireau.html">Alexis Métaireau</a>
|
||||
</address>
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article.html">en</a>
|
||||
|
||||
</footer><!-- /.post-info --><p>Ceci est un article, en français.</p>
|
||||
<p>There are <a href="../second-article-fr.html#disqus_thread">comments</a>.</p> </article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
<header>
|
||||
<h1><a href="../second-article.html" rel="bookmark"
|
||||
title="Permalink to Second article">Second article</a></h1>
|
||||
</header>
|
||||
|
||||
<div class="entry-content">
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<address class="vcard author">
|
||||
By <a class="url fn" href="../author/alexis-metaireau.html">Alexis Métaireau</a>
|
||||
</address>
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article-fr.html">fr</a>
|
||||
|
||||
</footer><!-- /.post-info --> <p>This is some article, in english</p>
|
||||
|
||||
<a class="readmore" href="../second-article.html">read more</a>
|
||||
<p>There are <a href="../second-article.html#disqus_thread">comments</a>.</p> </div><!-- /.entry-content -->
|
||||
</article></li>
|
||||
|
||||
</footer><!-- /.post-info --><p>This is some article, in english</p>
|
||||
<p>There are <a href="../second-article.html#disqus_thread">comments</a>.</p> </article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
<aside id="featured" class="body">
|
||||
<article>
|
||||
<h1 class="entry-title"><a href="../second-article-fr.html">Deuxième article</a></h1>
|
||||
<h1 class="entry-title"><a href="../second-article.html">Second article</a></h1>
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
|
|
@ -49,47 +49,16 @@
|
|||
By <a class="url fn" href="../author/alexis-metaireau.html">Alexis Métaireau</a>
|
||||
</address>
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article.html">en</a>
|
||||
|
||||
</footer><!-- /.post-info --><p>Ceci est un article, en français.</p>
|
||||
<p>There are <a href="../second-article-fr.html#disqus_thread">comments</a>.</p> </article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
<header>
|
||||
<h1><a href="../second-article.html" rel="bookmark"
|
||||
title="Permalink to Second article">Second article</a></h1>
|
||||
</header>
|
||||
|
||||
<div class="entry-content">
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<address class="vcard author">
|
||||
By <a class="url fn" href="../author/alexis-metaireau.html">Alexis Métaireau</a>
|
||||
</address>
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article-fr.html">fr</a>
|
||||
|
||||
</footer><!-- /.post-info --> <p>This is some article, in english</p>
|
||||
|
||||
<a class="readmore" href="../second-article.html">read more</a>
|
||||
<p>There are <a href="../second-article.html#disqus_thread">comments</a>.</p> </div><!-- /.entry-content -->
|
||||
</article></li>
|
||||
</ol><!-- /#posts-list -->
|
||||
<p class="paginator">
|
||||
</footer><!-- /.post-info --><p>This is some article, in english</p>
|
||||
<p>There are <a href="../second-article.html#disqus_thread">comments</a>.</p> </article>
|
||||
<p class="paginator">
|
||||
Page 1 / 1
|
||||
</p>
|
||||
</aside><!-- /#featured -->
|
||||
</ol><!-- /#posts-list -->
|
||||
</section><!-- /#content -->
|
||||
<section id="extras" class="body">
|
||||
<div class="blogroll">
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
<aside id="featured" class="body">
|
||||
<article>
|
||||
<h1 class="entry-title"><a href="../second-article-fr.html">Deuxième article</a></h1>
|
||||
<h1 class="entry-title"><a href="../second-article.html">Second article</a></h1>
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
|
|
@ -49,44 +49,17 @@
|
|||
By <a class="url fn" href="../author/alexis-metaireau.html">Alexis Métaireau</a>
|
||||
</address>
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article.html">en</a>
|
||||
|
||||
</footer><!-- /.post-info --><p>Ceci est un article, en français.</p>
|
||||
<p>There are <a href="../second-article-fr.html#disqus_thread">comments</a>.</p> </article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
<header>
|
||||
<h1><a href="../second-article.html" rel="bookmark"
|
||||
title="Permalink to Second article">Second article</a></h1>
|
||||
</header>
|
||||
|
||||
<div class="entry-content">
|
||||
<footer class="post-info">
|
||||
<abbr class="published" title="2012-02-29T00:00:00">
|
||||
Wed 29 February 2012
|
||||
</abbr>
|
||||
|
||||
<address class="vcard author">
|
||||
By <a class="url fn" href="../author/alexis-metaireau.html">Alexis Métaireau</a>
|
||||
</address>
|
||||
<p>In <a href="../category/misc.html">misc</a>. </p>
|
||||
<p>tags: <a href="../tag/foo.html">foo</a><a href="../tag/bar.html">bar</a><a href="../tag/baz.html">baz</a></p>Translations:
|
||||
<a href="../second-article-fr.html">fr</a>
|
||||
|
||||
</footer><!-- /.post-info --> <p>This is some article, in english</p>
|
||||
|
||||
<a class="readmore" href="../second-article.html">read more</a>
|
||||
<p>There are <a href="../second-article.html#disqus_thread">comments</a>.</p> </div><!-- /.entry-content -->
|
||||
</article></li>
|
||||
|
||||
</footer><!-- /.post-info --><p>This is some article, in english</p>
|
||||
<p>There are <a href="../second-article.html#disqus_thread">comments</a>.</p> </article>
|
||||
</aside><!-- /#featured -->
|
||||
<section id="content" class="body">
|
||||
<h1>Other articles</h1>
|
||||
<hr />
|
||||
<ol id="posts-list" class="hfeed">
|
||||
|
||||
|
||||
|
||||
<li><article class="hentry">
|
||||
|
|
|
|||
|
|
@ -2,13 +2,20 @@
|
|||
from __future__ import unicode_literals, print_function
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from pelican.tools.pelican_import import wp2fields, fields2pelican
|
||||
from pelican.tools.pelican_import import wp2fields, fields2pelican, decode_wp_content
|
||||
from pelican.tests.support import (unittest, temporary_folder, mute,
|
||||
skipIfNoExecutable)
|
||||
|
||||
CUR_DIR = os.path.dirname(__file__)
|
||||
WORDPRESS_XML_SAMPLE = os.path.join(CUR_DIR, 'content', 'wordpressexport.xml')
|
||||
WORDPRESS_ENCODED_CONTENT_SAMPLE = os.path.join(CUR_DIR,
|
||||
'content',
|
||||
'wordpress_content_encoded')
|
||||
WORDPRESS_DECODED_CONTENT_SAMPLE = os.path.join(CUR_DIR,
|
||||
'content',
|
||||
'wordpress_content_decoded')
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
|
|
@ -21,38 +28,33 @@ except ImportError:
|
|||
class TestWordpressXmlImporter(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.posts = wp2fields(WORDPRESS_XML_SAMPLE)
|
||||
self.posts = list(wp2fields(WORDPRESS_XML_SAMPLE))
|
||||
|
||||
def test_ignore_empty_posts(self):
|
||||
|
||||
posts = list(self.posts)
|
||||
self.assertTrue(posts)
|
||||
for title, content, fname, date, author, categ, tags, format in posts:
|
||||
self.assertTrue(self.posts)
|
||||
for title, content, fname, date, author, categ, tags, format in self.posts:
|
||||
self.assertTrue(title.strip())
|
||||
|
||||
def test_can_toggle_raw_html_code_parsing(self):
|
||||
|
||||
posts = list(self.posts)
|
||||
r = lambda f: open(f).read()
|
||||
silent_f2p = mute(True)(fields2pelican)
|
||||
|
||||
with temporary_folder() as temp:
|
||||
|
||||
rst_files = (r(f) for f in silent_f2p(posts, 'markdown', temp))
|
||||
rst_files = (r(f) for f in silent_f2p(self.posts, 'markdown', temp))
|
||||
self.assertTrue(any('<iframe' in rst for rst in rst_files))
|
||||
rst_files = (r(f) for f in silent_f2p(posts, 'markdown', temp,
|
||||
rst_files = (r(f) for f in silent_f2p(self.posts, 'markdown', temp,
|
||||
strip_raw=True))
|
||||
self.assertFalse(any('<iframe' in rst for rst in rst_files))
|
||||
# no effect in rst
|
||||
rst_files = (r(f) for f in silent_f2p(posts, 'rst', temp))
|
||||
rst_files = (r(f) for f in silent_f2p(self.posts, 'rst', temp))
|
||||
self.assertFalse(any('<iframe' in rst for rst in rst_files))
|
||||
rst_files = (r(f) for f in silent_f2p(posts, 'rst', temp,
|
||||
rst_files = (r(f) for f in silent_f2p(self.posts, 'rst', temp,
|
||||
strip_raw=True))
|
||||
self.assertFalse(any('<iframe' in rst for rst in rst_files))
|
||||
|
||||
def test_decode_html_entities_in_titles(self):
|
||||
posts = list(self.posts)
|
||||
test_posts = [post for post in posts if post[2] == 'html-entity-test']
|
||||
test_posts = [post for post in self.posts if post[2] == 'html-entity-test']
|
||||
self.assertTrue(len(test_posts) == 1)
|
||||
|
||||
post = test_posts[0]
|
||||
|
|
@ -60,3 +62,38 @@ class TestWordpressXmlImporter(unittest.TestCase):
|
|||
self.assertTrue(title, "A normal post with some <html> entities in the"
|
||||
" title. You can't miss them.")
|
||||
self.assertTrue('&' not in title)
|
||||
|
||||
def test_decode_wp_content_returns_empty(self):
|
||||
""" Check that given an empty string we return an empty string."""
|
||||
self.assertEqual(decode_wp_content(""), "")
|
||||
|
||||
def test_decode_wp_content(self):
|
||||
""" Check that we can decode a wordpress content string."""
|
||||
with open(WORDPRESS_ENCODED_CONTENT_SAMPLE, 'r') as encoded_file:
|
||||
encoded_content = encoded_file.read()
|
||||
with open(WORDPRESS_DECODED_CONTENT_SAMPLE, 'r') as decoded_file:
|
||||
decoded_content = decoded_file.read()
|
||||
self.assertEqual(decode_wp_content(encoded_content, br=False), decoded_content)
|
||||
|
||||
def test_preserve_verbatim_formatting(self):
|
||||
r = lambda f: open(f).read()
|
||||
silent_f2p = mute(True)(fields2pelican)
|
||||
test_post = filter(lambda p: p[0].startswith("Code in List"), self.posts)
|
||||
with temporary_folder() as temp:
|
||||
md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]
|
||||
self.assertTrue(re.search(r'\s+a = \[1, 2, 3\]', md))
|
||||
self.assertTrue(re.search(r'\s+b = \[4, 5, 6\]', md))
|
||||
|
||||
for_line = re.search(r'\s+for i in zip\(a, b\):', md).group(0)
|
||||
print_line = re.search(r'\s+print i', md).group(0)
|
||||
self.assertTrue(for_line.rindex('for') < print_line.rindex('print'))
|
||||
|
||||
def test_code_in_list(self):
|
||||
r = lambda f: open(f).read()
|
||||
silent_f2p = mute(True)(fields2pelican)
|
||||
test_post = filter(lambda p: p[0].startswith("Code in List"), self.posts)
|
||||
with temporary_folder() as temp:
|
||||
md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]
|
||||
sample_line = re.search(r'- This is a code sample', md).group(0)
|
||||
code_line = re.search(r'\s+a = \[1, 2, 3\]', md).group(0)
|
||||
self.assertTrue(sample_line.rindex('This') < code_line.rindex('a'))
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
# from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import locale
|
||||
import os
|
||||
from codecs import open
|
||||
from tempfile import mkdtemp
|
||||
|
|
@ -31,6 +32,7 @@ class TestWebAssets(unittest.TestCase):
|
|||
'OUTPUT_PATH': self.temp_path,
|
||||
'PLUGINS': ['pelican.plugins.assets', ],
|
||||
'THEME': THEME_DIR,
|
||||
'LOCALE': locale.normalize('en_US'),
|
||||
}
|
||||
if override:
|
||||
settings.update(override)
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ except ImportError:
|
|||
# py2 import
|
||||
from HTMLParser import HTMLParser # NOQA
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
|
@ -19,6 +20,79 @@ from codecs import open
|
|||
from pelican.utils import slugify
|
||||
|
||||
|
||||
def decode_wp_content(content, br=True):
|
||||
pre_tags = {}
|
||||
if content.strip() == "":
|
||||
return ""
|
||||
|
||||
content += "\n"
|
||||
if "<pre" in content:
|
||||
pre_parts = content.split("</pre>")
|
||||
last_pre = pre_parts.pop()
|
||||
content = ""
|
||||
pre_index = 0
|
||||
|
||||
for pre_part in pre_parts:
|
||||
start = pre_part.index("<pre")
|
||||
if start == -1:
|
||||
content = content + pre_part
|
||||
continue
|
||||
name = "<pre wp-pre-tag-{0}></pre>".format(pre_index)
|
||||
pre_tags[name] = pre_part[start:] + "</pre>"
|
||||
content = content + pre_part[0:start] + name
|
||||
pre_index += 1
|
||||
content = content + last_pre
|
||||
|
||||
content = re.sub(r'<br />\s*<br />', "\n\n", content)
|
||||
allblocks = ('(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|'
|
||||
'td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|'
|
||||
'map|area|blockquote|address|math|style|p|h[1-6]|hr|'
|
||||
'fieldset|noscript|samp|legend|section|article|aside|'
|
||||
'hgroup|header|footer|nav|figure|figcaption|details|'
|
||||
'menu|summary)')
|
||||
content = re.sub(r'(<' + allblocks + r'[^>]*>)', "\n\\1", content)
|
||||
content = re.sub(r'(</' + allblocks + r'>)', "\\1\n\n", content)
|
||||
# content = content.replace("\r\n", "\n")
|
||||
if "<object" in content:
|
||||
# no <p> inside object/embed
|
||||
content = re.sub(r'\s*<param([^>]*)>\s*', "<param\\1>", content)
|
||||
content = re.sub(r'\s*</embed>\s*', '</embed>', content)
|
||||
# content = re.sub(r'/\n\n+/', '\n\n', content)
|
||||
pgraphs = filter(lambda s: s != "", re.split(r'\n\s*\n', content))
|
||||
content = ""
|
||||
for p in pgraphs:
|
||||
content = content + "<p>" + p.strip() + "</p>\n"
|
||||
# under certain strange conditions it could create a P of entirely whitespace
|
||||
content = re.sub(r'<p>\s*</p>', '', content)
|
||||
content = re.sub(r'<p>([^<]+)</(div|address|form)>', "<p>\\1</p></\\2>", content)
|
||||
# don't wrap tags
|
||||
content = re.sub(r'<p>\s*(</?' + allblocks + r'[^>]*>)\s*</p>', "\\1", content)
|
||||
#problem with nested lists
|
||||
content = re.sub(r'<p>(<li.*)</p>', "\\1", content)
|
||||
content = re.sub(r'<p><blockquote([^>]*)>', "<blockquote\\1><p>", content)
|
||||
content = content.replace('</blockquote></p>', '</p></blockquote>')
|
||||
content = re.sub(r'<p>\s*(</?' + allblocks + '[^>]*>)', "\\1", content)
|
||||
content = re.sub(r'(</?' + allblocks + '[^>]*>)\s*</p>', "\\1", content)
|
||||
if br:
|
||||
def _preserve_newline(match):
|
||||
return match.group(0).replace("\n", "<WPPreserveNewline />")
|
||||
content = re.sub(r'/<(script|style).*?<\/\\1>/s', _preserve_newline, content)
|
||||
# optionally make line breaks
|
||||
content = re.sub(r'(?<!<br />)\s*\n', "<br />\n", content)
|
||||
content = content.replace("<WPPreserveNewline />", "\n")
|
||||
content = re.sub(r'(</?' + allblocks + r'[^>]*>)\s*<br />', "\\1", content)
|
||||
content = re.sub(r'<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)', '\\1', content)
|
||||
content = re.sub(r'\n</p>', "</p>", content)
|
||||
|
||||
if pre_tags:
|
||||
def _multi_replace(dic, string):
|
||||
pattern = r'|'.join(map(re.escape, dic.keys()))
|
||||
return re.sub(pattern, lambda m: dic[m.group()], string)
|
||||
content = _multi_replace(pre_tags, content)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def wp2fields(xml):
|
||||
"""Opens a wordpress XML file, and yield pelican fields"""
|
||||
try:
|
||||
|
|
@ -55,7 +129,7 @@ def wp2fields(xml):
|
|||
|
||||
tags = [tag.string for tag in item.findAll('category', {'domain' : 'post_tag'})]
|
||||
|
||||
yield (title, content, filename, date, author, categories, tags, "html")
|
||||
yield (title, content, filename, date, author, categories, tags, "wp-html")
|
||||
|
||||
def dc2fields(file):
|
||||
"""Opens a Dotclear export file, and yield pelican fields"""
|
||||
|
|
@ -257,15 +331,18 @@ def fields2pelican(fields, out_markup, output_path, dircat=False, strip_raw=Fals
|
|||
|
||||
print(out_filename)
|
||||
|
||||
if in_markup == "html":
|
||||
if in_markup in ("html", "wp-html"):
|
||||
html_filename = os.path.join(output_path, filename+'.html')
|
||||
|
||||
with open(html_filename, 'w', encoding='utf-8') as fp:
|
||||
# Replace newlines with paragraphs wrapped with <p> so
|
||||
# HTML is valid before conversion
|
||||
paragraphs = content.splitlines()
|
||||
paragraphs = ['<p>{0}</p>'.format(p) for p in paragraphs]
|
||||
new_content = ''.join(paragraphs)
|
||||
if in_markup == "wp-html":
|
||||
new_content = decode_wp_content(content)
|
||||
else:
|
||||
paragraphs = content.splitlines()
|
||||
paragraphs = ['<p>{0}</p>'.format(p) for p in paragraphs]
|
||||
new_content = ''.join(paragraphs)
|
||||
|
||||
fp.write(new_content)
|
||||
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 803 B |
Binary file not shown.
|
Before Width: | Height: | Size: 544 B |
Binary file not shown.
|
Before Width: | Height: | Size: 458 B |
Binary file not shown.
|
Before Width: | Height: | Size: 803 B |
Binary file not shown.
|
Before Width: | Height: | Size: 544 B |
Binary file not shown.
|
Before Width: | Height: | Size: 458 B |
Loading…
Add table
Add a link
Reference in a new issue