From 77c967f1dbb4c50f900b843abf8693f924ef00a8 Mon Sep 17 00:00:00 2001 From: Oliver Urs Lenz Date: Thu, 22 Mar 2018 23:47:51 +0100 Subject: [PATCH] control scope of identification of translations with new settings --- docs/changelog.rst | 4 ++ docs/content.rst | 5 +- docs/settings.rst | 12 ++++ pelican/generators.py | 29 +++++---- pelican/settings.py | 2 + pelican/tests/support.py | 8 ++- pelican/tests/test_utils.py | 65 +++++++++++++------ pelican/utils.py | 123 ++++++++++++++++++++---------------- 8 files changed, 157 insertions(+), 91 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 58521e61..aa54009d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,10 @@ Next release for more finegrained control * ``'{base_name}'`` value in ``PAGINATION_PATTERNS`` setting no longer strips ``'bar'`` from ``'foo/bar.html'`` (unless ``'bar' == 'index'``). +* ``ARTICLE_ORDER_BY`` and ``PAGE_ORDER_BY`` now also affect 1) category, tag + and author pages 2) feeds 3) draft and hidden articles and pages +* New ``ARTICLE_TRANSLATION_ID`` and ``PAGE_TRANSLATION_ID`` settings to specify + metadata attributes used to identify translations; or to disable translations 3.7.1 (2017-01-10) ================== diff --git a/docs/content.rst b/docs/content.rst index 24f91900..7bafbf6f 100644 --- a/docs/content.rst +++ b/docs/content.rst @@ -386,8 +386,9 @@ of available translations for that article. language. For such advanced functionality the `i18n_subsites plugin`_ can be used. -Pelican uses the article's URL "slug" to determine if two or more articles are -translations of one another. The slug can be set manually in the file's +By default, Pelican uses the article's URL "slug" to determine if two or more +articles are translations of one another. (This can be changed with the +``ARTICLE_TRANSLATION_ID`` setting.) The slug can be set manually in the file's metadata; if not set explicitly, Pelican will auto-generate the slug from the title of the article. diff --git a/docs/settings.rst b/docs/settings.rst index c397255c..28b68980 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -1038,6 +1038,18 @@ more information. The default language to use. +.. data:: ARTICLE_TRANSLATION_ID = 'slug' + + The metadata attribute(s) used to identify which articles are translations + of one another. May be a string or a collection of strings. Set to ``None`` + or ``False`` to disable the identification of translations. + +.. data:: PAGE_TRANSLATION_ID = 'slug' + + The metadata attribute(s) used to identify which pages are translations + of one another. May be a string or a collection of strings. Set to ``None`` + or ``False`` to disable the identification of translations. + .. data:: TRANSLATION_FEED_ATOM = 'feeds/all-%s.atom.xml' The location to save the Atom feed for translations. [3]_ diff --git a/pelican/generators.py b/pelican/generators.py index 2b2c02a3..a960051d 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -597,12 +597,14 @@ class ArticlesGenerator(CachingGenerator): all_drafts.append(article) self.add_source_path(article) - self.articles, self.translations = process_translations(all_articles) - self.articles = order_content( - self.articles, - order_by=self.settings['ARTICLE_ORDER_BY']) - self.drafts, self.drafts_translations = \ - process_translations(all_drafts) + def _process(arts): + origs, translations = process_translations( + arts, translation_id=self.settings['ARTICLE_TRANSLATION_ID']) + origs = order_content(origs, self.settings['ARTICLE_ORDER_BY']) + return origs, translations + + self.articles, self.translations = _process(all_articles) + self.drafts, self.drafts_translations = _process(all_drafts) signals.article_generator_pretaxonomy.send(self) @@ -701,12 +703,15 @@ class PagesGenerator(CachingGenerator): draft_pages.append(page) self.add_source_path(page) - self.pages, self.translations = process_translations(all_pages) - self.pages = order_content(self.pages, self.settings['PAGE_ORDER_BY']) - self.hidden_pages, self.hidden_translations = \ - process_translations(hidden_pages) - self.draft_pages, self.draft_translations = \ - process_translations(draft_pages) + def _process(pages): + origs, translations = process_translations( + pages, translation_id=self.settings['PAGE_TRANSLATION_ID']) + origs = order_content(origs, self.settings['PAGE_ORDER_BY']) + return origs, translations + + self.pages, self.translations = _process(all_pages) + self.hidden_pages, self.hidden_translations = _process(hidden_pages) + self.draft_pages, self.draft_translations = _process(draft_pages) self._update_context(('pages', 'hidden_pages', 'draft_pages')) diff --git a/pelican/settings.py b/pelican/settings.py index a58052c3..0bf4284a 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -108,6 +108,8 @@ DEFAULT_CONFIG = { 'DAY_ARCHIVE_SAVE_AS': '', 'RELATIVE_URLS': False, 'DEFAULT_LANG': 'en', + 'ARTICLE_TRANSLATION_ID': 'slug', + 'PAGE_TRANSLATION_ID': 'slug', 'DIRECT_TEMPLATES': ['index', 'tags', 'categories', 'authors', 'archives'], 'THEME_TEMPLATES_OVERRIDES': [], 'PAGINATED_TEMPLATES': {'index': None, 'tag': None, 'category': None, diff --git a/pelican/tests/support.py b/pelican/tests/support.py index d425395d..252a28c8 100644 --- a/pelican/tests/support.py +++ b/pelican/tests/support.py @@ -17,6 +17,7 @@ from tempfile import mkdtemp from six import StringIO from pelican.contents import Article +from pelican.readers import default_metadata from pelican.settings import DEFAULT_CONFIG __all__ = ['get_article', 'unittest', ] @@ -113,9 +114,10 @@ def mute(returns_output=False): return decorator -def get_article(title, slug, content, lang, extra_metadata=None): - metadata = {'slug': slug, 'title': title, 'lang': lang} - if extra_metadata is not None: +def get_article(title, content, **extra_metadata): + metadata = default_metadata(settings=DEFAULT_CONFIG) + metadata['title'] = title + if extra_metadata: metadata.update(extra_metadata) return Article(content, metadata=metadata) diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 2c6c4cd8..2831eeed 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -255,48 +255,71 @@ class TestUtils(LoggedTestCase): content='en français')) en_articles.append(get_article(lang='en', slug='yay1', title='Title', content='in english', - extra_metadata={'translation': 'true'})) + translation='true')) # 2: translation metadata not on default lang fr_articles.append(get_article(lang='fr', slug='yay2', title='Titre', content='en français', - extra_metadata={'translation': 'true'})) + translation='true')) en_articles.append(get_article(lang='en', slug='yay2', title='Title', content='in english')) # 3: back to default language detection if all items have the # translation metadata fr_articles.append(get_article(lang='fr', slug='yay3', title='Titre', content='en français', - extra_metadata={'translation': 'yep'})) + translation='yep')) en_articles.append(get_article(lang='en', slug='yay3', title='Title', content='in english', - extra_metadata={'translation': 'yes'})) + translation='yes')) + # 4-5: translation pairs with the same slug but different category + fr_articles.append(get_article(lang='fr', slug='yay4', title='Titre', + content='en français', category='foo')) + en_articles.append(get_article(lang='en', slug='yay4', title='Title', + content='in english', category='foo')) + fr_articles.append(get_article(lang='fr', slug='yay4', title='Titre', + content='en français', category='bar')) + en_articles.append(get_article(lang='en', slug='yay4', title='Title', + content='in english', category='bar')) # try adding articles in both orders for lang0_articles, lang1_articles in ((fr_articles, en_articles), (en_articles, fr_articles)): articles = lang0_articles + lang1_articles - index, trans = utils.process_translations(articles) + # test process_translations with falsy translation_id + index, trans = utils.process_translations( + articles, translation_id=None) + for i in range(6): + for lang_articles in [en_articles, fr_articles]: + self.assertIn(lang_articles[i], index) + self.assertNotIn(lang_articles[i], trans) - self.assertIn(en_articles[0], index) - self.assertIn(fr_articles[0], trans) - self.assertNotIn(en_articles[0], trans) - self.assertNotIn(fr_articles[0], index) + # test process_translations with simple and complex translation_id + for translation_id in ['slug', {'slug', 'category'}]: + index, trans = utils.process_translations( + articles, translation_id=translation_id) - self.assertIn(fr_articles[1], index) - self.assertIn(en_articles[1], trans) - self.assertNotIn(fr_articles[1], trans) - self.assertNotIn(en_articles[1], index) + for a in [en_articles[0], fr_articles[1], en_articles[2], + en_articles[3], en_articles[4], en_articles[5]]: + self.assertIn(a, index) + self.assertNotIn(a, trans) - self.assertIn(en_articles[2], index) - self.assertIn(fr_articles[2], trans) - self.assertNotIn(en_articles[2], trans) - self.assertNotIn(fr_articles[2], index) + for a in [fr_articles[0], en_articles[1], fr_articles[2], + fr_articles[3], fr_articles[4], fr_articles[5]]: + self.assertIn(a, trans) + self.assertNotIn(a, index) - self.assertIn(en_articles[3], index) - self.assertIn(fr_articles[3], trans) - self.assertNotIn(en_articles[3], trans) - self.assertNotIn(fr_articles[3], index) + for i in range(6): + self.assertIn(en_articles[i], fr_articles[i].translations) + self.assertIn(fr_articles[i], en_articles[i].translations) + + for a_arts in [en_articles, fr_articles]: + for b_arts in [en_articles, fr_articles]: + if translation_id == 'slug': + self.assertIn(a_arts[4], b_arts[5].translations) + self.assertIn(a_arts[5], b_arts[4].translations) + elif translation_id == {'slug', 'category'}: + self.assertNotIn(a_arts[4], b_arts[5].translations) + self.assertNotIn(a_arts[5], b_arts[4].translations) def test_watchers(self): # Test if file changes are correctly detected diff --git a/pelican/utils.py b/pelican/utils.py index efc32e0c..96447586 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -639,77 +639,94 @@ def escape_html(text, quote=True): return escape(text, quote=quote) -def process_translations(content_list): +def process_translations(content_list, translation_id=None): """ Finds translations and returns them. - Returns a tuple with two lists (index, translations). Index list includes + For each content_list item, populates the 'translations' attribute, and + returns a tuple with two lists (index, translations). Index list includes items in default language or items which have no variant in default language. Items with the `translation` metadata set to something else than - `False` or `false` will be used as translations, unless all the items with - the same slug have that metadata. + `False` or `false` will be used as translations, unless all the items in + the same group have that metadata. - For each content_list item, sets the 'translations' attribute. + Translations and original items are determined relative to one another + amongst items in the same group. Items are in the same group if they + have the same value(s) for the metadata attribute(s) specified by the + 'translation_id', which must be a string or a collection of strings. + If 'translation_id' is falsy, the identification of translations is skipped + and all items are returned as originals. """ - content_list.sort(key=attrgetter('slug')) - grouped_by_slugs = groupby(content_list, attrgetter('slug')) - index = [] - translations = [] + if not translation_id: + return content_list, [] + + if isinstance(translation_id, six.string_types): + translation_id = {translation_id} + + index = [] + + try: + content_list.sort(key=attrgetter(*translation_id)) + except TypeError: + raise TypeError('Cannot unpack {}, \'translation_id\' must be falsy, a' + 'string or a collection of strings' + .format(translation_id)) + except AttributeError: + raise AttributeError('Cannot use {} as \'translation_id\', there' + 'appear to be items without these metadata' + 'attributes'.format(translation_id)) + + for id_vals, items in groupby(content_list, attrgetter(*translation_id)): + items = list(items) + with_str = 'with' + ', '.join([' {} "{{}}"'] * len(translation_id))\ + .format(*translation_id).format(*id_vals) + original_items = get_original_items(items, with_str) + index.extend(original_items) + for a in items: + a.translations = [x for x in items if x != a] + + translations = [x for x in content_list if x not in index] + + return index, translations + + +def get_original_items(items, with_str): def _warn_source_paths(msg, items, *extra): args = [len(items)] args.extend(extra) args.extend((x.source_path for x in items)) logger.warning('{}: {}'.format(msg, '\n%s' * len(items)), *args) - for slug, items in grouped_by_slugs: - items = list(items) + # warn if several items have the same lang + for lang, lang_items in groupby(items, attrgetter('lang')): + lang_items = list(lang_items) + if len(lang_items) > 1: + _warn_source_paths('There are %s items "%s" with lang %s', + lang_items, with_str, lang) - # display warnings if slug is empty - if not slug: - _warn_source_paths('There are %s items with empty slug', items) + # items with `translation` metadata will be used as translations... + candidate_items = [ + i for i in items + if i.metadata.get('translation', 'false').lower() == 'false'] - # display warnings if several items have the same lang - for lang, lang_items in groupby(items, attrgetter('lang')): - lang_items = list(lang_items) - if len(lang_items) > 1: - _warn_source_paths( - 'There are %s items with slug "%s" with lang %s', - lang_items, - slug, - lang) + # ...unless all items with that slug are translations + if not candidate_items: + _warn_source_paths('All items ("%s") "%s" are translations', + items, with_str) + candidate_items = items - # items with `translation` metadata will be used as translations... - candidate_items = list(filter( - lambda i: - i.metadata.get('translation', 'false').lower() == 'false', - items)) - # ...unless all items with that slug are translations - if not candidate_items: - logger.warning('All items with slug "%s" are translations', slug) - candidate_items = items + # find items with default language + original_items = [i for i in candidate_items if i.in_default_lang] - # find items with default language - original_items = list(filter( - attrgetter('in_default_lang'), - candidate_items)) + # if there is no article with default language, go back one step + if not original_items: + original_items = candidate_items - # if there is no article with default language, go back one step - if not original_items: - original_items = candidate_items - - # display warning if there are several original items - if len(original_items) > 1: - _warn_source_paths( - 'There are %s original (not translated) items with slug "%s"', - original_items, - slug) - - index.extend(original_items) - translations.extend([x for x in items if x not in original_items]) - for a in items: - a.translations = [x for x in items if x != a] - - return index, translations + # warn if there are several original items + if len(original_items) > 1: + _warn_source_paths('There are %s original (not translated) items %s', + original_items, with_str) + return original_items def order_content(content_list, order_by='slug'):