Merge pull request #2309 from oulenz/translations

Control translation identification scope with new translation_id settings
2025-10-15 20:28:56 +02:00 · 2018-11-01 13:11:19 +01:00 · 2018-11-01 13:11:19 +01:00 · 5c08af8f80
commit 5c08af8f80
parent c97c128d16 77c967f1db
8 changed files with 165 additions and 99 deletions
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -10,6 +10,10 @@ Next release
  for more finegrained control
 * ``'{base_name}'`` value in ``PAGINATION_PATTERNS`` setting no longer strips
  ``'bar'`` from ``'foo/bar.html'`` (unless ``'bar' == 'index'``).
+* ``ARTICLE_ORDER_BY`` and ``PAGE_ORDER_BY`` now also affect 1) category, tag
+  and author pages 2) feeds 3) draft and hidden articles and pages
+* New ``ARTICLE_TRANSLATION_ID`` and ``PAGE_TRANSLATION_ID`` settings to specify
+  metadata attributes used to identify translations; or to disable translations

 3.7.1 (2017-01-10)
 ==================
--- a/docs/content.rst
+++ b/docs/content.rst
@ -386,8 +386,9 @@ of available translations for that article.
   language. For such advanced functionality the `i18n_subsites
   plugin`_ can be used.

-Pelican uses the article's URL "slug" to determine if two or more articles are
-translations of one another. The slug can be set manually in the file's
+By default, Pelican uses the article's URL "slug" to determine if two or more
+articles are translations of one another. (This can be changed with the
+``ARTICLE_TRANSLATION_ID`` setting.) The slug can be set manually in the file's
 metadata; if not set explicitly, Pelican will auto-generate the slug from the
 title of the article.

--- a/docs/settings.rst
+++ b/docs/settings.rst
@ -1038,6 +1038,18 @@ more information.

   The default language to use.

+.. data:: ARTICLE_TRANSLATION_ID = 'slug'
+
+   The metadata attribute(s) used to identify which articles are translations
+   of one another. May be a string or a collection of strings. Set to ``None``
+   or ``False`` to disable the identification of translations.
+
+.. data:: PAGE_TRANSLATION_ID = 'slug'
+
+   The metadata attribute(s) used to identify which pages are translations
+   of one another. May be a string or a collection of strings. Set to ``None``
+   or ``False`` to disable the identification of translations.
+
 .. data:: TRANSLATION_FEED_ATOM = 'feeds/all-%s.atom.xml'

   The location to save the Atom feed for translations. [3]_
--- a/pelican/generators.py
+++ b/pelican/generators.py
@ -597,12 +597,14 @@ class ArticlesGenerator(CachingGenerator):
                all_drafts.append(article)
            self.add_source_path(article)

-        self.articles, self.translations = process_translations(all_articles)
-        self.articles = order_content(
-            self.articles,
-            order_by=self.settings['ARTICLE_ORDER_BY'])
-        self.drafts, self.drafts_translations = \
-            process_translations(all_drafts)
+        def _process(arts):
+            origs, translations = process_translations(
+                arts, translation_id=self.settings['ARTICLE_TRANSLATION_ID'])
+            origs = order_content(origs, self.settings['ARTICLE_ORDER_BY'])
+            return origs, translations
+
+        self.articles, self.translations = _process(all_articles)
+        self.drafts, self.drafts_translations = _process(all_drafts)

        signals.article_generator_pretaxonomy.send(self)

@ -701,12 +703,15 @@ class PagesGenerator(CachingGenerator):
                draft_pages.append(page)
            self.add_source_path(page)

-        self.pages, self.translations = process_translations(all_pages)
-        self.pages = order_content(self.pages, self.settings['PAGE_ORDER_BY'])
-        self.hidden_pages, self.hidden_translations = \
-            process_translations(hidden_pages)
-        self.draft_pages, self.draft_translations = \
-            process_translations(draft_pages)
+        def _process(pages):
+            origs, translations = process_translations(
+                pages, translation_id=self.settings['PAGE_TRANSLATION_ID'])
+            origs = order_content(origs, self.settings['PAGE_ORDER_BY'])
+            return origs, translations
+
+        self.pages, self.translations = _process(all_pages)
+        self.hidden_pages, self.hidden_translations = _process(hidden_pages)
+        self.draft_pages, self.draft_translations = _process(draft_pages)

        self._update_context(('pages', 'hidden_pages', 'draft_pages'))

--- a/pelican/settings.py
+++ b/pelican/settings.py
@ -108,6 +108,8 @@ DEFAULT_CONFIG = {
    'DAY_ARCHIVE_SAVE_AS': '',
    'RELATIVE_URLS': False,
    'DEFAULT_LANG': 'en',
+    'ARTICLE_TRANSLATION_ID': 'slug',
+    'PAGE_TRANSLATION_ID': 'slug',
    'DIRECT_TEMPLATES': ['index', 'tags', 'categories', 'authors', 'archives'],
    'THEME_TEMPLATES_OVERRIDES': [],
    'PAGINATED_TEMPLATES': {'index': None, 'tag': None, 'category': None,
--- a/pelican/tests/support.py
+++ b/pelican/tests/support.py
@ -17,6 +17,7 @@ from tempfile import mkdtemp
 from six import StringIO

 from pelican.contents import Article
+from pelican.readers import default_metadata
 from pelican.settings import DEFAULT_CONFIG

 __all__ = ['get_article', 'unittest', ]
@ -113,9 +114,10 @@ def mute(returns_output=False):
    return decorator


-def get_article(title, slug, content, lang, extra_metadata=None):
-    metadata = {'slug': slug, 'title': title, 'lang': lang}
-    if extra_metadata is not None:
+def get_article(title, content, **extra_metadata):
+    metadata = default_metadata(settings=DEFAULT_CONFIG)
+    metadata['title'] = title
+    if extra_metadata:
        metadata.update(extra_metadata)
    return Article(content, metadata=metadata)

--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@ -255,48 +255,71 @@ class TestUtils(LoggedTestCase):
                                       content='en français'))
        en_articles.append(get_article(lang='en', slug='yay1', title='Title',
                                       content='in english',
-                                       extra_metadata={'translation': 'true'}))
+                                       translation='true'))
        # 2: translation metadata not on default lang
        fr_articles.append(get_article(lang='fr', slug='yay2', title='Titre',
                                       content='en français',
-                                       extra_metadata={'translation': 'true'}))
+                                       translation='true'))
        en_articles.append(get_article(lang='en', slug='yay2', title='Title',
                                       content='in english'))
        # 3: back to default language detection if all items have the
        #    translation metadata
        fr_articles.append(get_article(lang='fr', slug='yay3', title='Titre',
                                       content='en français',
-                                       extra_metadata={'translation': 'yep'}))
+                                       translation='yep'))
        en_articles.append(get_article(lang='en', slug='yay3', title='Title',
                                       content='in english',
-                                       extra_metadata={'translation': 'yes'}))
+                                       translation='yes'))
+        # 4-5: translation pairs with the same slug but different category
+        fr_articles.append(get_article(lang='fr', slug='yay4', title='Titre',
+                                       content='en français', category='foo'))
+        en_articles.append(get_article(lang='en', slug='yay4', title='Title',
+                                       content='in english', category='foo'))
+        fr_articles.append(get_article(lang='fr', slug='yay4', title='Titre',
+                                       content='en français', category='bar'))
+        en_articles.append(get_article(lang='en', slug='yay4', title='Title',
+                                       content='in english', category='bar'))

        # try adding articles in both orders
        for lang0_articles, lang1_articles in ((fr_articles, en_articles),
                                               (en_articles, fr_articles)):
            articles = lang0_articles + lang1_articles

-            index, trans = utils.process_translations(articles)
+            # test process_translations with falsy translation_id
+            index, trans = utils.process_translations(
+                articles, translation_id=None)
+            for i in range(6):
+                for lang_articles in [en_articles, fr_articles]:
+                    self.assertIn(lang_articles[i], index)
+                    self.assertNotIn(lang_articles[i], trans)

-            self.assertIn(en_articles[0], index)
-            self.assertIn(fr_articles[0], trans)
-            self.assertNotIn(en_articles[0], trans)
-            self.assertNotIn(fr_articles[0], index)
+            # test process_translations with simple and complex translation_id
+            for translation_id in ['slug', {'slug', 'category'}]:
+                index, trans = utils.process_translations(
+                    articles, translation_id=translation_id)

-            self.assertIn(fr_articles[1], index)
-            self.assertIn(en_articles[1], trans)
-            self.assertNotIn(fr_articles[1], trans)
-            self.assertNotIn(en_articles[1], index)
+                for a in [en_articles[0], fr_articles[1], en_articles[2],
+                          en_articles[3], en_articles[4], en_articles[5]]:
+                    self.assertIn(a, index)
+                    self.assertNotIn(a, trans)

-            self.assertIn(en_articles[2], index)
-            self.assertIn(fr_articles[2], trans)
-            self.assertNotIn(en_articles[2], trans)
-            self.assertNotIn(fr_articles[2], index)
+                for a in [fr_articles[0], en_articles[1], fr_articles[2],
+                          fr_articles[3], fr_articles[4], fr_articles[5]]:
+                    self.assertIn(a, trans)
+                    self.assertNotIn(a, index)

-            self.assertIn(en_articles[3], index)
-            self.assertIn(fr_articles[3], trans)
-            self.assertNotIn(en_articles[3], trans)
-            self.assertNotIn(fr_articles[3], index)
+                for i in range(6):
+                    self.assertIn(en_articles[i], fr_articles[i].translations)
+                    self.assertIn(fr_articles[i], en_articles[i].translations)
+
+                for a_arts in [en_articles, fr_articles]:
+                    for b_arts in [en_articles, fr_articles]:
+                        if translation_id == 'slug':
+                            self.assertIn(a_arts[4], b_arts[5].translations)
+                            self.assertIn(a_arts[5], b_arts[4].translations)
+                        elif translation_id == {'slug', 'category'}:
+                            self.assertNotIn(a_arts[4], b_arts[5].translations)
+                            self.assertNotIn(a_arts[5], b_arts[4].translations)

    def test_watchers(self):
        # Test if file changes are correctly detected
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -639,77 +639,94 @@ def escape_html(text, quote=True):
    return escape(text, quote=quote)


-def process_translations(content_list):
+def process_translations(content_list, translation_id=None):
    """ Finds translations and returns them.

-    Returns a tuple with two lists (index, translations).  Index list includes
+    For each content_list item, populates the 'translations' attribute, and
+    returns a tuple with two lists (index, translations). Index list includes
    items in default language or items which have no variant in default
    language. Items with the `translation` metadata set to something else than
-    `False` or `false` will be used as translations, unless all the items with
-    the same slug have that metadata.
+    `False` or `false` will be used as translations, unless all the items in
+    the same group have that metadata.

-    For each content_list item, sets the 'translations' attribute.
+    Translations and original items are determined relative to one another
+    amongst items in the same group. Items are in the same group if they
+    have the same value(s) for the metadata attribute(s) specified by the
+    'translation_id', which must be a string or a collection of strings.
+    If 'translation_id' is falsy, the identification of translations is skipped
+    and all items are returned as originals.
    """
-    content_list.sort(key=attrgetter('slug'))
-    grouped_by_slugs = groupby(content_list, attrgetter('slug'))
-    index = []
-    translations = []

+    if not translation_id:
+        return content_list, []
+
+    if isinstance(translation_id, six.string_types):
+        translation_id = {translation_id}
+
+    index = []
+
+    try:
+        content_list.sort(key=attrgetter(*translation_id))
+    except TypeError:
+        raise TypeError('Cannot unpack {}, \'translation_id\' must be falsy, a'
+                        'string or a collection of strings'
+                        .format(translation_id))
+    except AttributeError:
+        raise AttributeError('Cannot use {} as \'translation_id\', there'
+                             'appear to be items without these metadata'
+                             'attributes'.format(translation_id))
+
+    for id_vals, items in groupby(content_list, attrgetter(*translation_id)):
+        items = list(items)
+        with_str = 'with' + ', '.join([' {} "{{}}"'] * len(translation_id))\
+            .format(*translation_id).format(*id_vals)
+        original_items = get_original_items(items, with_str)
+        index.extend(original_items)
+        for a in items:
+            a.translations = [x for x in items if x != a]
+
+    translations = [x for x in content_list if x not in index]
+
+    return index, translations
+
+
+def get_original_items(items, with_str):
    def _warn_source_paths(msg, items, *extra):
        args = [len(items)]
        args.extend(extra)
        args.extend((x.source_path for x in items))
        logger.warning('{}: {}'.format(msg, '\n%s' * len(items)), *args)

-    for slug, items in grouped_by_slugs:
-        items = list(items)
+    # warn if several items have the same lang
+    for lang, lang_items in groupby(items, attrgetter('lang')):
+        lang_items = list(lang_items)
+        if len(lang_items) > 1:
+            _warn_source_paths('There are %s items "%s" with lang %s',
+                               lang_items, with_str, lang)

-        # display warnings if slug is empty
-        if not slug:
-            _warn_source_paths('There are %s items with empty slug', items)
+    # items with `translation` metadata will be used as translations...
+    candidate_items = [
+        i for i in items
+        if i.metadata.get('translation', 'false').lower() == 'false']

-        # display warnings if several items have the same lang
-        for lang, lang_items in groupby(items, attrgetter('lang')):
-            lang_items = list(lang_items)
-            if len(lang_items) > 1:
-                _warn_source_paths(
-                    'There are %s items with slug "%s" with lang %s',
-                    lang_items,
-                    slug,
-                    lang)
+    # ...unless all items with that slug are translations
+    if not candidate_items:
+        _warn_source_paths('All items ("%s") "%s" are translations',
+                           items, with_str)
+        candidate_items = items

-        # items with `translation` metadata will be used as translations...
-        candidate_items = list(filter(
-            lambda i:
-                i.metadata.get('translation', 'false').lower() == 'false',
-            items))
-        # ...unless all items with that slug are translations
-        if not candidate_items:
-            logger.warning('All items with slug "%s" are translations', slug)
-            candidate_items = items
+    # find items with default language
+    original_items = [i for i in candidate_items if i.in_default_lang]

-        # find items with default language
-        original_items = list(filter(
-            attrgetter('in_default_lang'),
-            candidate_items))
+    # if there is no article with default language, go back one step
+    if not original_items:
+        original_items = candidate_items

-        # if there is no article with default language, go back one step
-        if not original_items:
-            original_items = candidate_items
-
-        # display warning if there are several original items
-        if len(original_items) > 1:
-            _warn_source_paths(
-                'There are %s original (not translated) items with slug "%s"',
-                original_items,
-                slug)
-
-        index.extend(original_items)
-        translations.extend([x for x in items if x not in original_items])
-        for a in items:
-            a.translations = [x for x in items if x != a]
-
-    return index, translations
+    # warn if there are several original items
+    if len(original_items) > 1:
+        _warn_source_paths('There are %s original (not translated) items %s',
+                           original_items, with_str)
+    return original_items


 def order_content(content_list, order_by='slug'):