Merge pull request #2309 from oulenz/translations

Control translation identification scope with new translation_id settings
This commit is contained in:
Justin Mayer 2018-11-01 13:11:19 +01:00 committed by GitHub
commit 5c08af8f80
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 165 additions and 99 deletions

View file

@ -10,6 +10,10 @@ Next release
for more finegrained control
* ``'{base_name}'`` value in ``PAGINATION_PATTERNS`` setting no longer strips
``'bar'`` from ``'foo/bar.html'`` (unless ``'bar' == 'index'``).
* ``ARTICLE_ORDER_BY`` and ``PAGE_ORDER_BY`` now also affect 1) category, tag
and author pages 2) feeds 3) draft and hidden articles and pages
* New ``ARTICLE_TRANSLATION_ID`` and ``PAGE_TRANSLATION_ID`` settings to specify
metadata attributes used to identify translations; or to disable translations
3.7.1 (2017-01-10)
==================

View file

@ -386,8 +386,9 @@ of available translations for that article.
language. For such advanced functionality the `i18n_subsites
plugin`_ can be used.
Pelican uses the article's URL "slug" to determine if two or more articles are
translations of one another. The slug can be set manually in the file's
By default, Pelican uses the article's URL "slug" to determine if two or more
articles are translations of one another. (This can be changed with the
``ARTICLE_TRANSLATION_ID`` setting.) The slug can be set manually in the file's
metadata; if not set explicitly, Pelican will auto-generate the slug from the
title of the article.

View file

@ -1038,6 +1038,18 @@ more information.
The default language to use.
.. data:: ARTICLE_TRANSLATION_ID = 'slug'
The metadata attribute(s) used to identify which articles are translations
of one another. May be a string or a collection of strings. Set to ``None``
or ``False`` to disable the identification of translations.
.. data:: PAGE_TRANSLATION_ID = 'slug'
The metadata attribute(s) used to identify which pages are translations
of one another. May be a string or a collection of strings. Set to ``None``
or ``False`` to disable the identification of translations.
.. data:: TRANSLATION_FEED_ATOM = 'feeds/all-%s.atom.xml'
The location to save the Atom feed for translations. [3]_

View file

@ -597,12 +597,14 @@ class ArticlesGenerator(CachingGenerator):
all_drafts.append(article)
self.add_source_path(article)
self.articles, self.translations = process_translations(all_articles)
self.articles = order_content(
self.articles,
order_by=self.settings['ARTICLE_ORDER_BY'])
self.drafts, self.drafts_translations = \
process_translations(all_drafts)
def _process(arts):
origs, translations = process_translations(
arts, translation_id=self.settings['ARTICLE_TRANSLATION_ID'])
origs = order_content(origs, self.settings['ARTICLE_ORDER_BY'])
return origs, translations
self.articles, self.translations = _process(all_articles)
self.drafts, self.drafts_translations = _process(all_drafts)
signals.article_generator_pretaxonomy.send(self)
@ -701,12 +703,15 @@ class PagesGenerator(CachingGenerator):
draft_pages.append(page)
self.add_source_path(page)
self.pages, self.translations = process_translations(all_pages)
self.pages = order_content(self.pages, self.settings['PAGE_ORDER_BY'])
self.hidden_pages, self.hidden_translations = \
process_translations(hidden_pages)
self.draft_pages, self.draft_translations = \
process_translations(draft_pages)
def _process(pages):
origs, translations = process_translations(
pages, translation_id=self.settings['PAGE_TRANSLATION_ID'])
origs = order_content(origs, self.settings['PAGE_ORDER_BY'])
return origs, translations
self.pages, self.translations = _process(all_pages)
self.hidden_pages, self.hidden_translations = _process(hidden_pages)
self.draft_pages, self.draft_translations = _process(draft_pages)
self._update_context(('pages', 'hidden_pages', 'draft_pages'))

View file

@ -108,6 +108,8 @@ DEFAULT_CONFIG = {
'DAY_ARCHIVE_SAVE_AS': '',
'RELATIVE_URLS': False,
'DEFAULT_LANG': 'en',
'ARTICLE_TRANSLATION_ID': 'slug',
'PAGE_TRANSLATION_ID': 'slug',
'DIRECT_TEMPLATES': ['index', 'tags', 'categories', 'authors', 'archives'],
'THEME_TEMPLATES_OVERRIDES': [],
'PAGINATED_TEMPLATES': {'index': None, 'tag': None, 'category': None,

View file

@ -17,6 +17,7 @@ from tempfile import mkdtemp
from six import StringIO
from pelican.contents import Article
from pelican.readers import default_metadata
from pelican.settings import DEFAULT_CONFIG
__all__ = ['get_article', 'unittest', ]
@ -113,9 +114,10 @@ def mute(returns_output=False):
return decorator
def get_article(title, slug, content, lang, extra_metadata=None):
metadata = {'slug': slug, 'title': title, 'lang': lang}
if extra_metadata is not None:
def get_article(title, content, **extra_metadata):
metadata = default_metadata(settings=DEFAULT_CONFIG)
metadata['title'] = title
if extra_metadata:
metadata.update(extra_metadata)
return Article(content, metadata=metadata)

View file

@ -255,48 +255,71 @@ class TestUtils(LoggedTestCase):
content='en français'))
en_articles.append(get_article(lang='en', slug='yay1', title='Title',
content='in english',
extra_metadata={'translation': 'true'}))
translation='true'))
# 2: translation metadata not on default lang
fr_articles.append(get_article(lang='fr', slug='yay2', title='Titre',
content='en français',
extra_metadata={'translation': 'true'}))
translation='true'))
en_articles.append(get_article(lang='en', slug='yay2', title='Title',
content='in english'))
# 3: back to default language detection if all items have the
# translation metadata
fr_articles.append(get_article(lang='fr', slug='yay3', title='Titre',
content='en français',
extra_metadata={'translation': 'yep'}))
translation='yep'))
en_articles.append(get_article(lang='en', slug='yay3', title='Title',
content='in english',
extra_metadata={'translation': 'yes'}))
translation='yes'))
# 4-5: translation pairs with the same slug but different category
fr_articles.append(get_article(lang='fr', slug='yay4', title='Titre',
content='en français', category='foo'))
en_articles.append(get_article(lang='en', slug='yay4', title='Title',
content='in english', category='foo'))
fr_articles.append(get_article(lang='fr', slug='yay4', title='Titre',
content='en français', category='bar'))
en_articles.append(get_article(lang='en', slug='yay4', title='Title',
content='in english', category='bar'))
# try adding articles in both orders
for lang0_articles, lang1_articles in ((fr_articles, en_articles),
(en_articles, fr_articles)):
articles = lang0_articles + lang1_articles
index, trans = utils.process_translations(articles)
# test process_translations with falsy translation_id
index, trans = utils.process_translations(
articles, translation_id=None)
for i in range(6):
for lang_articles in [en_articles, fr_articles]:
self.assertIn(lang_articles[i], index)
self.assertNotIn(lang_articles[i], trans)
self.assertIn(en_articles[0], index)
self.assertIn(fr_articles[0], trans)
self.assertNotIn(en_articles[0], trans)
self.assertNotIn(fr_articles[0], index)
# test process_translations with simple and complex translation_id
for translation_id in ['slug', {'slug', 'category'}]:
index, trans = utils.process_translations(
articles, translation_id=translation_id)
self.assertIn(fr_articles[1], index)
self.assertIn(en_articles[1], trans)
self.assertNotIn(fr_articles[1], trans)
self.assertNotIn(en_articles[1], index)
for a in [en_articles[0], fr_articles[1], en_articles[2],
en_articles[3], en_articles[4], en_articles[5]]:
self.assertIn(a, index)
self.assertNotIn(a, trans)
self.assertIn(en_articles[2], index)
self.assertIn(fr_articles[2], trans)
self.assertNotIn(en_articles[2], trans)
self.assertNotIn(fr_articles[2], index)
for a in [fr_articles[0], en_articles[1], fr_articles[2],
fr_articles[3], fr_articles[4], fr_articles[5]]:
self.assertIn(a, trans)
self.assertNotIn(a, index)
self.assertIn(en_articles[3], index)
self.assertIn(fr_articles[3], trans)
self.assertNotIn(en_articles[3], trans)
self.assertNotIn(fr_articles[3], index)
for i in range(6):
self.assertIn(en_articles[i], fr_articles[i].translations)
self.assertIn(fr_articles[i], en_articles[i].translations)
for a_arts in [en_articles, fr_articles]:
for b_arts in [en_articles, fr_articles]:
if translation_id == 'slug':
self.assertIn(a_arts[4], b_arts[5].translations)
self.assertIn(a_arts[5], b_arts[4].translations)
elif translation_id == {'slug', 'category'}:
self.assertNotIn(a_arts[4], b_arts[5].translations)
self.assertNotIn(a_arts[5], b_arts[4].translations)
def test_watchers(self):
# Test if file changes are correctly detected

View file

@ -639,77 +639,94 @@ def escape_html(text, quote=True):
return escape(text, quote=quote)
def process_translations(content_list):
def process_translations(content_list, translation_id=None):
""" Finds translations and returns them.
Returns a tuple with two lists (index, translations). Index list includes
For each content_list item, populates the 'translations' attribute, and
returns a tuple with two lists (index, translations). Index list includes
items in default language or items which have no variant in default
language. Items with the `translation` metadata set to something else than
`False` or `false` will be used as translations, unless all the items with
the same slug have that metadata.
`False` or `false` will be used as translations, unless all the items in
the same group have that metadata.
For each content_list item, sets the 'translations' attribute.
Translations and original items are determined relative to one another
amongst items in the same group. Items are in the same group if they
have the same value(s) for the metadata attribute(s) specified by the
'translation_id', which must be a string or a collection of strings.
If 'translation_id' is falsy, the identification of translations is skipped
and all items are returned as originals.
"""
content_list.sort(key=attrgetter('slug'))
grouped_by_slugs = groupby(content_list, attrgetter('slug'))
index = []
translations = []
if not translation_id:
return content_list, []
if isinstance(translation_id, six.string_types):
translation_id = {translation_id}
index = []
try:
content_list.sort(key=attrgetter(*translation_id))
except TypeError:
raise TypeError('Cannot unpack {}, \'translation_id\' must be falsy, a'
'string or a collection of strings'
.format(translation_id))
except AttributeError:
raise AttributeError('Cannot use {} as \'translation_id\', there'
'appear to be items without these metadata'
'attributes'.format(translation_id))
for id_vals, items in groupby(content_list, attrgetter(*translation_id)):
items = list(items)
with_str = 'with' + ', '.join([' {} "{{}}"'] * len(translation_id))\
.format(*translation_id).format(*id_vals)
original_items = get_original_items(items, with_str)
index.extend(original_items)
for a in items:
a.translations = [x for x in items if x != a]
translations = [x for x in content_list if x not in index]
return index, translations
def get_original_items(items, with_str):
def _warn_source_paths(msg, items, *extra):
args = [len(items)]
args.extend(extra)
args.extend((x.source_path for x in items))
logger.warning('{}: {}'.format(msg, '\n%s' * len(items)), *args)
for slug, items in grouped_by_slugs:
items = list(items)
# warn if several items have the same lang
for lang, lang_items in groupby(items, attrgetter('lang')):
lang_items = list(lang_items)
if len(lang_items) > 1:
_warn_source_paths('There are %s items "%s" with lang %s',
lang_items, with_str, lang)
# display warnings if slug is empty
if not slug:
_warn_source_paths('There are %s items with empty slug', items)
# items with `translation` metadata will be used as translations...
candidate_items = [
i for i in items
if i.metadata.get('translation', 'false').lower() == 'false']
# display warnings if several items have the same lang
for lang, lang_items in groupby(items, attrgetter('lang')):
lang_items = list(lang_items)
if len(lang_items) > 1:
_warn_source_paths(
'There are %s items with slug "%s" with lang %s',
lang_items,
slug,
lang)
# ...unless all items with that slug are translations
if not candidate_items:
_warn_source_paths('All items ("%s") "%s" are translations',
items, with_str)
candidate_items = items
# items with `translation` metadata will be used as translations...
candidate_items = list(filter(
lambda i:
i.metadata.get('translation', 'false').lower() == 'false',
items))
# ...unless all items with that slug are translations
if not candidate_items:
logger.warning('All items with slug "%s" are translations', slug)
candidate_items = items
# find items with default language
original_items = [i for i in candidate_items if i.in_default_lang]
# find items with default language
original_items = list(filter(
attrgetter('in_default_lang'),
candidate_items))
# if there is no article with default language, go back one step
if not original_items:
original_items = candidate_items
# if there is no article with default language, go back one step
if not original_items:
original_items = candidate_items
# display warning if there are several original items
if len(original_items) > 1:
_warn_source_paths(
'There are %s original (not translated) items with slug "%s"',
original_items,
slug)
index.extend(original_items)
translations.extend([x for x in items if x not in original_items])
for a in items:
a.translations = [x for x in items if x != a]
return index, translations
# warn if there are several original items
if len(original_items) > 1:
_warn_source_paths('There are %s original (not translated) items %s',
original_items, with_str)
return original_items
def order_content(content_list, order_by='slug'):