Merge pull request #851 from avaris/better-strftime

refactored utils.strftime
2025-10-15 20:28:56 +02:00 · 2013-04-17 09:31:19 -07:00 · 2013-04-17 09:31:19 -07:00 · e7370945c9
commit e7370945c9
parent bef5e4479e 48f7132ba8
3 changed files with 117 additions and 44 deletions
--- a/pelican/tests/support.py
+++ b/pelican/tests/support.py
@ -10,6 +10,7 @@ from six import StringIO
 import logging
 from logging.handlers import BufferingHandler
 import unittest
 import locale
 from functools import wraps
 from contextlib import contextmanager
@ -148,6 +149,18 @@ def module_exists(module_name):
        return True
 def locale_available(locale_):
    old_locale = locale.setlocale(locale.LC_TIME)
    try:
        locale.setlocale(locale.LC_TIME, str(locale_))
    except locale.Error:
        return False
    else:
        locale.setlocale(locale.LC_TIME, old_locale)
        return True
 def get_settings():
    settings = _DEFAULT_CONFIG.copy()
    settings['DIRECT_TEMPLATES'] = ['archives']
--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@ -5,9 +5,11 @@ import shutil
 import os
 import datetime
 import time
 import locale
 from sys import platform
 from pelican import utils
-from .support import get_article, LoggedTestCase
+from .support import get_article, LoggedTestCase, locale_available, unittest
 from pelican.utils import NoFilesError
@ -179,3 +181,65 @@ class TestUtils(LoggedTestCase):
        f.close()
        utils.clean_output_dir(test_directory)
        self.assertTrue(not os.path.exists(test_directory))
    def test_strftime(self):
        d = datetime.date(2012, 8, 29)
        # simple formatting
        self.assertEqual(utils.strftime(d, '%d/%m/%y'), '29/08/12')
        self.assertEqual(utils.strftime(d, '%d/%m/%Y'), '29/08/2012')
        # % escaped
        self.assertEqual(utils.strftime(d, '%d%%%m%%%y'), '29%08%12')
        self.assertEqual(utils.strftime(d, '%d %% %m %% %y'), '29 % 08 % 12')
        # not valid % formatter
        self.assertEqual(utils.strftime(d, '10% reduction in %Y'),
                         '10% reduction in 2012')
        self.assertEqual(utils.strftime(d, '%10 reduction in %Y'),
                         '%10 reduction in 2012')
        # with text
        self.assertEqual(utils.strftime(d, 'Published in %d-%m-%Y'),
                         'Published in 29-08-2012')
        # with non-ascii text
        self.assertEqual(utils.strftime(d, '%d/%m/%Y Øl trinken beim Besäufnis'),
                         '29/08/2012 Øl trinken beim Besäufnis')
    # test the output of utils.strftime in a different locale
    # right now, this uses Turkish locale
    # why Turkish? because I know Turkish :). And it produces non-ascii output
    # Feel free to extend with different locales
    @unittest.skipUnless(locale_available('tr_TR') or
                         locale_available('Turkish'),
                         'Turkish locale needed')
    def test_strftime_locale_dependent(self):
        # store current locale
        old_locale = locale.setlocale(locale.LC_TIME)
        if platform == 'win32':
            locale.setlocale(locale.LC_TIME, str('Turkish'))
        else:
            locale.setlocale(locale.LC_TIME, str('tr_TR'))
        d = datetime.date(2012, 8, 29)
        # simple
        self.assertEqual(utils.strftime(d, '%d %B %Y'), '29 Ağustos 2012')
        self.assertEqual(utils.strftime(d, '%d %b %Y'), '29 Ağu 2012')
        self.assertEqual(utils.strftime(d, '%a, %d %b %Y'),
                         'Çrş, 29 Ağu 2012')
        self.assertEqual(utils.strftime(d, '%A, %d %B %Y'),
                         'Çarşamba, 29 Ağustos 2012')
        # with text
        self.assertEqual(utils.strftime(d, 'Yayınlanma tarihi: %A, %d %B %Y'),
            'Yayınlanma tarihi: Çarşamba, 29 Ağustos 2012')
        # non-ascii format candidate (someone might pass it... for some reason)
        self.assertEqual(utils.strftime(d, '%Y yılında %üretim artışı'),
            '2012 yılında %üretim artışı')
        # restore locale back
        locale.setlocale(locale.LC_TIME, old_locale)
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -24,54 +24,50 @@ logger = logging.getLogger(__name__)
 def strftime(date, date_format):
-    """
+    '''
-    Replacement for the builtin strftime().
+    Replacement for built-in strftime
-    This :func:`strftime()` is compatible to Python 2 and 3. In both cases,
+    This is necessary because of the way Py2 handles date format strings.
-    input and output is always unicode.
+    Specifically, Py2 strftime takes a bytestring. In the case of text output
    (e.g. %b, %a, etc), the output is encoded with an encoding defined by
    locale.LC_TIME. Things get messy if the formatting string has chars that
    are not valid in LC_TIME defined encoding.
-    Still, Python 3's :func:`strftime()` seems to somehow "normalize" unicode
+    This works by 'grabbing' possible format strings (those starting with %),
-    chars in the format string. So if e.g. your format string contains 'ø' or
+    formatting them with the date, (if necessary) decoding the output and
-    'ä', the result will be 'o' and 'a'.
+    replacing formatted output back.
    '''
-    See here for an `extensive testcase
+    # grab candidate format options
-    <https://github.com/dmdm/test_strftime>`_.
+    format_options = '%+.?'
    candidates = re.findall(format_options, date_format)
-    :param date: Any object that sports a :meth:`strftime()` method.
+    # replace candidates with placeholders for later % formatting
-    :param date_format: Format string, can always be unicode.
+    template = re.sub(format_options, '%s', date_format)
-    :returns: Unicode string with formatted date.
+
-    """
+    # we need to convert formatted dates back to unicode in Py2
-    # As tehkonst confirmed, above mentioned testcase runs correctly on
+    # LC_TIME determines the encoding for built-in strftime outputs
-    # Python 2 and 3 on Windows as well. Thanks.
+    lang_code, enc = locale.getlocale(locale.LC_TIME)
-    if six.PY3:
+
-        # It could be so easy... *sigh*
+    formatted_candidates = []
-        return date.strftime(date_format)
+    for candidate in candidates:
-        # TODO Perhaps we should refactor again, so that the
+        try:
-        # xmlcharrefreplace-regex-dance is always done, regardless
+            # a valid format string should be ascii
-        # of the Python version.
+            candidate.encode('ascii')
-    else:
+        except UnicodeEncodeError:
-        # We must ensure that the format string is an encoded byte
+            # if it fails, it's not a valid format option
-        # string, ASCII only WTF!!!
+            # put the candidate back as it was
-        # But with "xmlcharrefreplace" our formatted date will produce
+            formatted = candidate
        # *yuck* like this:
        #        "Øl trinken beim Besäufnis"
        #    --> "&#216;l trinken beim Bes&#228;ufnis"
        date_format = date_format.encode('ascii',
            errors="xmlcharrefreplace")
        result = date.strftime(date_format)
        # strftime() returns an encoded byte string
        # which we must decode into unicode.
        lang_code, enc = locale.getlocale(locale.LC_ALL)
        if enc:
            result = result.decode(enc)
        else:
-            result = unicode(result)
+            # if it's ascii, pass it to strftime to format
-        # Convert XML character references back to unicode characters.
+            formatted = date.strftime(candidate)
-        if "&#" in result:
+            # convert Py2 result to unicode
-            result = re.sub(r'&#(?P<num>\d+);',
+            if not six.PY3 and enc is not None:
-                            lambda m: unichr(int(m.group('num'))),
+                formatted = formatted.decode(enc)
-                            result)
+        formatted_candidates.append(formatted)
-        return result
+
    # put formatted candidates back and return
    return template % tuple(formatted_candidates)
 def python_2_unicode_compatible(klass):