From 77c967f1dbb4c50f900b843abf8693f924ef00a8 Mon Sep 17 00:00:00 2001
From: Oliver Urs Lenz <oliver.urs.lenz@gmail.com>
Date: Thu, 22 Mar 2018 23:47:51 +0100
Subject: [PATCH] control scope of identification of translations with new
 settings

---
 docs/changelog.rst          |   4 ++
 docs/content.rst            |   5 +-
 docs/settings.rst           |  12 ++++
 pelican/generators.py       |  29 +++++----
 pelican/settings.py         |   2 +
 pelican/tests/support.py    |   8 ++-
 pelican/tests/test_utils.py |  65 +++++++++++++------
 pelican/utils.py            | 123 ++++++++++++++++++++----------------
 8 files changed, 157 insertions(+), 91 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 58521e61..aa54009d 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -10,6 +10,10 @@ Next release
   for more finegrained control
 * ``'{base_name}'`` value in ``PAGINATION_PATTERNS`` setting no longer strips
   ``'bar'`` from ``'foo/bar.html'`` (unless ``'bar' == 'index'``).
+* ``ARTICLE_ORDER_BY`` and ``PAGE_ORDER_BY`` now also affect 1) category, tag
+  and author pages 2) feeds 3) draft and hidden articles and pages
+* New ``ARTICLE_TRANSLATION_ID`` and ``PAGE_TRANSLATION_ID`` settings to specify
+  metadata attributes used to identify translations; or to disable translations
 
 3.7.1 (2017-01-10)
 ==================
diff --git a/docs/content.rst b/docs/content.rst
index 24f91900..7bafbf6f 100644
--- a/docs/content.rst
+++ b/docs/content.rst
@@ -386,8 +386,9 @@ of available translations for that article.
    language. For such advanced functionality the `i18n_subsites
    plugin`_ can be used.
 
-Pelican uses the article's URL "slug" to determine if two or more articles are
-translations of one another. The slug can be set manually in the file's
+By default, Pelican uses the article's URL "slug" to determine if two or more
+articles are translations of one another. (This can be changed with the
+``ARTICLE_TRANSLATION_ID`` setting.) The slug can be set manually in the file's
 metadata; if not set explicitly, Pelican will auto-generate the slug from the
 title of the article.
 
diff --git a/docs/settings.rst b/docs/settings.rst
index c397255c..28b68980 100644
--- a/docs/settings.rst
+++ b/docs/settings.rst
@@ -1038,6 +1038,18 @@ more information.
 
    The default language to use.
 
+.. data:: ARTICLE_TRANSLATION_ID = 'slug'
+
+   The metadata attribute(s) used to identify which articles are translations
+   of one another. May be a string or a collection of strings. Set to ``None``
+   or ``False`` to disable the identification of translations.
+
+.. data:: PAGE_TRANSLATION_ID = 'slug'
+
+   The metadata attribute(s) used to identify which pages are translations
+   of one another. May be a string or a collection of strings. Set to ``None``
+   or ``False`` to disable the identification of translations.
+
 .. data:: TRANSLATION_FEED_ATOM = 'feeds/all-%s.atom.xml'
 
    The location to save the Atom feed for translations. [3]_
diff --git a/pelican/generators.py b/pelican/generators.py
index 2b2c02a3..a960051d 100644
--- a/pelican/generators.py
+++ b/pelican/generators.py
@@ -597,12 +597,14 @@ class ArticlesGenerator(CachingGenerator):
                 all_drafts.append(article)
             self.add_source_path(article)
 
-        self.articles, self.translations = process_translations(all_articles)
-        self.articles = order_content(
-            self.articles,
-            order_by=self.settings['ARTICLE_ORDER_BY'])
-        self.drafts, self.drafts_translations = \
-            process_translations(all_drafts)
+        def _process(arts):
+            origs, translations = process_translations(
+                arts, translation_id=self.settings['ARTICLE_TRANSLATION_ID'])
+            origs = order_content(origs, self.settings['ARTICLE_ORDER_BY'])
+            return origs, translations
+
+        self.articles, self.translations = _process(all_articles)
+        self.drafts, self.drafts_translations = _process(all_drafts)
 
         signals.article_generator_pretaxonomy.send(self)
 
@@ -701,12 +703,15 @@ class PagesGenerator(CachingGenerator):
                 draft_pages.append(page)
             self.add_source_path(page)
 
-        self.pages, self.translations = process_translations(all_pages)
-        self.pages = order_content(self.pages, self.settings['PAGE_ORDER_BY'])
-        self.hidden_pages, self.hidden_translations = \
-            process_translations(hidden_pages)
-        self.draft_pages, self.draft_translations = \
-            process_translations(draft_pages)
+        def _process(pages):
+            origs, translations = process_translations(
+                pages, translation_id=self.settings['PAGE_TRANSLATION_ID'])
+            origs = order_content(origs, self.settings['PAGE_ORDER_BY'])
+            return origs, translations
+
+        self.pages, self.translations = _process(all_pages)
+        self.hidden_pages, self.hidden_translations = _process(hidden_pages)
+        self.draft_pages, self.draft_translations = _process(draft_pages)
 
         self._update_context(('pages', 'hidden_pages', 'draft_pages'))
 
diff --git a/pelican/settings.py b/pelican/settings.py
index a58052c3..0bf4284a 100644
--- a/pelican/settings.py
+++ b/pelican/settings.py
@@ -108,6 +108,8 @@ DEFAULT_CONFIG = {
     'DAY_ARCHIVE_SAVE_AS': '',
     'RELATIVE_URLS': False,
     'DEFAULT_LANG': 'en',
+    'ARTICLE_TRANSLATION_ID': 'slug',
+    'PAGE_TRANSLATION_ID': 'slug',
     'DIRECT_TEMPLATES': ['index', 'tags', 'categories', 'authors', 'archives'],
     'THEME_TEMPLATES_OVERRIDES': [],
     'PAGINATED_TEMPLATES': {'index': None, 'tag': None, 'category': None,
diff --git a/pelican/tests/support.py b/pelican/tests/support.py
index d425395d..252a28c8 100644
--- a/pelican/tests/support.py
+++ b/pelican/tests/support.py
@@ -17,6 +17,7 @@ from tempfile import mkdtemp
 from six import StringIO
 
 from pelican.contents import Article
+from pelican.readers import default_metadata
 from pelican.settings import DEFAULT_CONFIG
 
 __all__ = ['get_article', 'unittest', ]
@@ -113,9 +114,10 @@ def mute(returns_output=False):
     return decorator
 
 
-def get_article(title, slug, content, lang, extra_metadata=None):
-    metadata = {'slug': slug, 'title': title, 'lang': lang}
-    if extra_metadata is not None:
+def get_article(title, content, **extra_metadata):
+    metadata = default_metadata(settings=DEFAULT_CONFIG)
+    metadata['title'] = title
+    if extra_metadata:
         metadata.update(extra_metadata)
     return Article(content, metadata=metadata)
 
diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py
index 2c6c4cd8..2831eeed 100644
--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@@ -255,48 +255,71 @@ class TestUtils(LoggedTestCase):
                                        content='en français'))
         en_articles.append(get_article(lang='en', slug='yay1', title='Title',
                                        content='in english',
-                                       extra_metadata={'translation': 'true'}))
+                                       translation='true'))
         # 2: translation metadata not on default lang
         fr_articles.append(get_article(lang='fr', slug='yay2', title='Titre',
                                        content='en français',
-                                       extra_metadata={'translation': 'true'}))
+                                       translation='true'))
         en_articles.append(get_article(lang='en', slug='yay2', title='Title',
                                        content='in english'))
         # 3: back to default language detection if all items have the
         #    translation metadata
         fr_articles.append(get_article(lang='fr', slug='yay3', title='Titre',
                                        content='en français',
-                                       extra_metadata={'translation': 'yep'}))
+                                       translation='yep'))
         en_articles.append(get_article(lang='en', slug='yay3', title='Title',
                                        content='in english',
-                                       extra_metadata={'translation': 'yes'}))
+                                       translation='yes'))
+        # 4-5: translation pairs with the same slug but different category
+        fr_articles.append(get_article(lang='fr', slug='yay4', title='Titre',
+                                       content='en français', category='foo'))
+        en_articles.append(get_article(lang='en', slug='yay4', title='Title',
+                                       content='in english', category='foo'))
+        fr_articles.append(get_article(lang='fr', slug='yay4', title='Titre',
+                                       content='en français', category='bar'))
+        en_articles.append(get_article(lang='en', slug='yay4', title='Title',
+                                       content='in english', category='bar'))
 
         # try adding articles in both orders
         for lang0_articles, lang1_articles in ((fr_articles, en_articles),
                                                (en_articles, fr_articles)):
             articles = lang0_articles + lang1_articles
 
-            index, trans = utils.process_translations(articles)
+            # test process_translations with falsy translation_id
+            index, trans = utils.process_translations(
+                articles, translation_id=None)
+            for i in range(6):
+                for lang_articles in [en_articles, fr_articles]:
+                    self.assertIn(lang_articles[i], index)
+                    self.assertNotIn(lang_articles[i], trans)
 
-            self.assertIn(en_articles[0], index)
-            self.assertIn(fr_articles[0], trans)
-            self.assertNotIn(en_articles[0], trans)
-            self.assertNotIn(fr_articles[0], index)
+            # test process_translations with simple and complex translation_id
+            for translation_id in ['slug', {'slug', 'category'}]:
+                index, trans = utils.process_translations(
+                    articles, translation_id=translation_id)
 
-            self.assertIn(fr_articles[1], index)
-            self.assertIn(en_articles[1], trans)
-            self.assertNotIn(fr_articles[1], trans)
-            self.assertNotIn(en_articles[1], index)
+                for a in [en_articles[0], fr_articles[1], en_articles[2],
+                          en_articles[3], en_articles[4], en_articles[5]]:
+                    self.assertIn(a, index)
+                    self.assertNotIn(a, trans)
 
-            self.assertIn(en_articles[2], index)
-            self.assertIn(fr_articles[2], trans)
-            self.assertNotIn(en_articles[2], trans)
-            self.assertNotIn(fr_articles[2], index)
+                for a in [fr_articles[0], en_articles[1], fr_articles[2],
+                          fr_articles[3], fr_articles[4], fr_articles[5]]:
+                    self.assertIn(a, trans)
+                    self.assertNotIn(a, index)
 
-            self.assertIn(en_articles[3], index)
-            self.assertIn(fr_articles[3], trans)
-            self.assertNotIn(en_articles[3], trans)
-            self.assertNotIn(fr_articles[3], index)
+                for i in range(6):
+                    self.assertIn(en_articles[i], fr_articles[i].translations)
+                    self.assertIn(fr_articles[i], en_articles[i].translations)
+
+                for a_arts in [en_articles, fr_articles]:
+                    for b_arts in [en_articles, fr_articles]:
+                        if translation_id == 'slug':
+                            self.assertIn(a_arts[4], b_arts[5].translations)
+                            self.assertIn(a_arts[5], b_arts[4].translations)
+                        elif translation_id == {'slug', 'category'}:
+                            self.assertNotIn(a_arts[4], b_arts[5].translations)
+                            self.assertNotIn(a_arts[5], b_arts[4].translations)
 
     def test_watchers(self):
         # Test if file changes are correctly detected
diff --git a/pelican/utils.py b/pelican/utils.py
index efc32e0c..96447586 100644
--- a/pelican/utils.py
+++ b/pelican/utils.py
@@ -639,77 +639,94 @@ def escape_html(text, quote=True):
     return escape(text, quote=quote)
 
 
-def process_translations(content_list):
+def process_translations(content_list, translation_id=None):
     """ Finds translations and returns them.
 
-    Returns a tuple with two lists (index, translations).  Index list includes
+    For each content_list item, populates the 'translations' attribute, and
+    returns a tuple with two lists (index, translations). Index list includes
     items in default language or items which have no variant in default
     language. Items with the `translation` metadata set to something else than
-    `False` or `false` will be used as translations, unless all the items with
-    the same slug have that metadata.
+    `False` or `false` will be used as translations, unless all the items in
+    the same group have that metadata.
 
-    For each content_list item, sets the 'translations' attribute.
+    Translations and original items are determined relative to one another
+    amongst items in the same group. Items are in the same group if they
+    have the same value(s) for the metadata attribute(s) specified by the
+    'translation_id', which must be a string or a collection of strings.
+    If 'translation_id' is falsy, the identification of translations is skipped
+    and all items are returned as originals.
     """
-    content_list.sort(key=attrgetter('slug'))
-    grouped_by_slugs = groupby(content_list, attrgetter('slug'))
-    index = []
-    translations = []
 
+    if not translation_id:
+        return content_list, []
+
+    if isinstance(translation_id, six.string_types):
+        translation_id = {translation_id}
+
+    index = []
+
+    try:
+        content_list.sort(key=attrgetter(*translation_id))
+    except TypeError:
+        raise TypeError('Cannot unpack {}, \'translation_id\' must be falsy, a'
+                        'string or a collection of strings'
+                        .format(translation_id))
+    except AttributeError:
+        raise AttributeError('Cannot use {} as \'translation_id\', there'
+                             'appear to be items without these metadata'
+                             'attributes'.format(translation_id))
+
+    for id_vals, items in groupby(content_list, attrgetter(*translation_id)):
+        items = list(items)
+        with_str = 'with' + ', '.join([' {} "{{}}"'] * len(translation_id))\
+            .format(*translation_id).format(*id_vals)
+        original_items = get_original_items(items, with_str)
+        index.extend(original_items)
+        for a in items:
+            a.translations = [x for x in items if x != a]
+
+    translations = [x for x in content_list if x not in index]
+
+    return index, translations
+
+
+def get_original_items(items, with_str):
     def _warn_source_paths(msg, items, *extra):
         args = [len(items)]
         args.extend(extra)
         args.extend((x.source_path for x in items))
         logger.warning('{}: {}'.format(msg, '\n%s' * len(items)), *args)
 
-    for slug, items in grouped_by_slugs:
-        items = list(items)
+    # warn if several items have the same lang
+    for lang, lang_items in groupby(items, attrgetter('lang')):
+        lang_items = list(lang_items)
+        if len(lang_items) > 1:
+            _warn_source_paths('There are %s items "%s" with lang %s',
+                               lang_items, with_str, lang)
 
-        # display warnings if slug is empty
-        if not slug:
-            _warn_source_paths('There are %s items with empty slug', items)
+    # items with `translation` metadata will be used as translations...
+    candidate_items = [
+        i for i in items
+        if i.metadata.get('translation', 'false').lower() == 'false']
 
-        # display warnings if several items have the same lang
-        for lang, lang_items in groupby(items, attrgetter('lang')):
-            lang_items = list(lang_items)
-            if len(lang_items) > 1:
-                _warn_source_paths(
-                    'There are %s items with slug "%s" with lang %s',
-                    lang_items,
-                    slug,
-                    lang)
+    # ...unless all items with that slug are translations
+    if not candidate_items:
+        _warn_source_paths('All items ("%s") "%s" are translations',
+                           items, with_str)
+        candidate_items = items
 
-        # items with `translation` metadata will be used as translations...
-        candidate_items = list(filter(
-            lambda i:
-                i.metadata.get('translation', 'false').lower() == 'false',
-            items))
-        # ...unless all items with that slug are translations
-        if not candidate_items:
-            logger.warning('All items with slug "%s" are translations', slug)
-            candidate_items = items
+    # find items with default language
+    original_items = [i for i in candidate_items if i.in_default_lang]
 
-        # find items with default language
-        original_items = list(filter(
-            attrgetter('in_default_lang'),
-            candidate_items))
+    # if there is no article with default language, go back one step
+    if not original_items:
+        original_items = candidate_items
 
-        # if there is no article with default language, go back one step
-        if not original_items:
-            original_items = candidate_items
-
-        # display warning if there are several original items
-        if len(original_items) > 1:
-            _warn_source_paths(
-                'There are %s original (not translated) items with slug "%s"',
-                original_items,
-                slug)
-
-        index.extend(original_items)
-        translations.extend([x for x in items if x not in original_items])
-        for a in items:
-            a.translations = [x for x in items if x != a]
-
-    return index, translations
+    # warn if there are several original items
+    if len(original_items) > 1:
+        _warn_source_paths('There are %s original (not translated) items %s',
+                           original_items, with_str)
+    return original_items
 
 
 def order_content(content_list, order_by='slug'):