refactored utils.strftime

2013-04-16 20:53:27 -04:00 · 2013-04-16 20:53:27 -04:00 · 48f7132ba8
commit 48f7132ba8
parent a6167f64f1
3 changed files with 117 additions and 44 deletions
--- a/pelican/tests/support.py
+++ b/pelican/tests/support.py
@ -10,6 +10,7 @@ from six import StringIO
 import logging
 from logging.handlers import BufferingHandler
 import unittest
+import locale

 from functools import wraps
 from contextlib import contextmanager
@ -148,6 +149,18 @@ def module_exists(module_name):
        return True


+def locale_available(locale_):
+    old_locale = locale.setlocale(locale.LC_TIME)
+
+    try:
+        locale.setlocale(locale.LC_TIME, str(locale_))
+    except locale.Error:
+        return False
+    else:
+        locale.setlocale(locale.LC_TIME, old_locale)
+        return True
+
+
 def get_settings():
    settings = _DEFAULT_CONFIG.copy()
    settings['DIRECT_TEMPLATES'] = ['archives']
--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@ -5,9 +5,11 @@ import shutil
 import os
 import datetime
 import time
+import locale
+from sys import platform

 from pelican import utils
-from .support import get_article, LoggedTestCase
+from .support import get_article, LoggedTestCase, locale_available, unittest
 from pelican.utils import NoFilesError


@ -179,3 +181,65 @@ class TestUtils(LoggedTestCase):
        f.close()
        utils.clean_output_dir(test_directory)
        self.assertTrue(not os.path.exists(test_directory))
+
+    def test_strftime(self):
+        d = datetime.date(2012, 8, 29)
+
+        # simple formatting
+        self.assertEqual(utils.strftime(d, '%d/%m/%y'), '29/08/12')
+        self.assertEqual(utils.strftime(d, '%d/%m/%Y'), '29/08/2012')
+
+        # % escaped
+        self.assertEqual(utils.strftime(d, '%d%%%m%%%y'), '29%08%12')
+        self.assertEqual(utils.strftime(d, '%d %% %m %% %y'), '29 % 08 % 12')
+        # not valid % formatter
+        self.assertEqual(utils.strftime(d, '10% reduction in %Y'),
+                         '10% reduction in 2012')
+        self.assertEqual(utils.strftime(d, '%10 reduction in %Y'),
+                         '%10 reduction in 2012')
+
+        # with text
+        self.assertEqual(utils.strftime(d, 'Published in %d-%m-%Y'),
+                         'Published in 29-08-2012')
+
+        # with non-ascii text
+        self.assertEqual(utils.strftime(d, '%d/%m/%Y Øl trinken beim Besäufnis'),
+                         '29/08/2012 Øl trinken beim Besäufnis')
+
+
+    # test the output of utils.strftime in a different locale
+    # right now, this uses Turkish locale
+    # why Turkish? because I know Turkish :). And it produces non-ascii output
+    # Feel free to extend with different locales
+    @unittest.skipUnless(locale_available('tr_TR') or
+                         locale_available('Turkish'),
+                         'Turkish locale needed')
+    def test_strftime_locale_dependent(self):
+        # store current locale
+        old_locale = locale.setlocale(locale.LC_TIME)
+
+        if platform == 'win32':
+            locale.setlocale(locale.LC_TIME, str('Turkish'))
+        else:
+            locale.setlocale(locale.LC_TIME, str('tr_TR'))
+
+        d = datetime.date(2012, 8, 29)
+
+        # simple
+        self.assertEqual(utils.strftime(d, '%d %B %Y'), '29 Ağustos 2012')
+        self.assertEqual(utils.strftime(d, '%d %b %Y'), '29 Ağu 2012')
+        self.assertEqual(utils.strftime(d, '%a, %d %b %Y'),
+                         'Çrş, 29 Ağu 2012')
+        self.assertEqual(utils.strftime(d, '%A, %d %B %Y'),
+                         'Çarşamba, 29 Ağustos 2012')
+
+        # with text
+        self.assertEqual(utils.strftime(d, 'Yayınlanma tarihi: %A, %d %B %Y'),
+            'Yayınlanma tarihi: Çarşamba, 29 Ağustos 2012')
+
+        # non-ascii format candidate (someone might pass it... for some reason)
+        self.assertEqual(utils.strftime(d, '%Y yılında %üretim artışı'),
+            '2012 yılında %üretim artışı')
+
+        # restore locale back
+        locale.setlocale(locale.LC_TIME, old_locale)
--- a/pelican/utils.py
+++ b/pelican/utils.py
@ -24,54 +24,50 @@ logger = logging.getLogger(__name__)


 def strftime(date, date_format):
-    """
-    Replacement for the builtin strftime().
+    '''
+    Replacement for built-in strftime

-    This :func:`strftime()` is compatible to Python 2 and 3. In both cases,
-    input and output is always unicode.
+    This is necessary because of the way Py2 handles date format strings.
+    Specifically, Py2 strftime takes a bytestring. In the case of text output
+    (e.g. %b, %a, etc), the output is encoded with an encoding defined by
+    locale.LC_TIME. Things get messy if the formatting string has chars that
+    are not valid in LC_TIME defined encoding.

-    Still, Python 3's :func:`strftime()` seems to somehow "normalize" unicode
-    chars in the format string. So if e.g. your format string contains 'ø' or
-    'ä', the result will be 'o' and 'a'.
+    This works by 'grabbing' possible format strings (those starting with %),
+    formatting them with the date, (if necessary) decoding the output and
+    replacing formatted output back.
+    '''

-    See here for an `extensive testcase
-    <https://github.com/dmdm/test_strftime>`_.
+    # grab candidate format options
+    format_options = '%+.?'
+    candidates = re.findall(format_options, date_format)

-    :param date: Any object that sports a :meth:`strftime()` method.
-    :param date_format: Format string, can always be unicode.
-    :returns: Unicode string with formatted date.
-    """
-    # As tehkonst confirmed, above mentioned testcase runs correctly on
-    # Python 2 and 3 on Windows as well. Thanks.
-    if six.PY3:
-        # It could be so easy... *sigh*
-        return date.strftime(date_format)
-        # TODO Perhaps we should refactor again, so that the
-        # xmlcharrefreplace-regex-dance is always done, regardless
-        # of the Python version.
-    else:
-        # We must ensure that the format string is an encoded byte
-        # string, ASCII only WTF!!!
-        # But with "xmlcharrefreplace" our formatted date will produce
-        # *yuck* like this:
-        #        "Øl trinken beim Besäufnis"
-        #    --> "&#216;l trinken beim Bes&#228;ufnis"
-        date_format = date_format.encode('ascii',
-            errors="xmlcharrefreplace")
-        result = date.strftime(date_format)
-        # strftime() returns an encoded byte string
-        # which we must decode into unicode.
-        lang_code, enc = locale.getlocale(locale.LC_ALL)
-        if enc:
-            result = result.decode(enc)
+    # replace candidates with placeholders for later % formatting
+    template = re.sub(format_options, '%s', date_format)
+
+    # we need to convert formatted dates back to unicode in Py2
+    # LC_TIME determines the encoding for built-in strftime outputs
+    lang_code, enc = locale.getlocale(locale.LC_TIME)
+
+    formatted_candidates = []
+    for candidate in candidates:
+        try:
+            # a valid format string should be ascii
+            candidate.encode('ascii')
+        except UnicodeEncodeError:
+            # if it fails, it's not a valid format option
+            # put the candidate back as it was
+            formatted = candidate
        else:
-            result = unicode(result)
-        # Convert XML character references back to unicode characters.
-        if "&#" in result:
-            result = re.sub(r'&#(?P<num>\d+);',
-                            lambda m: unichr(int(m.group('num'))),
-                            result)
-        return result
+            # if it's ascii, pass it to strftime to format
+            formatted = date.strftime(candidate)
+            # convert Py2 result to unicode
+            if not six.PY3 and enc is not None:
+                formatted = formatted.decode(enc)
+        formatted_candidates.append(formatted)
+
+    # put formatted candidates back and return
+    return template % tuple(formatted_candidates)


 def python_2_unicode_compatible(klass):