diff --git a/pelican/tests/support.py b/pelican/tests/support.py index d7466c63..c02a3e34 100644 --- a/pelican/tests/support.py +++ b/pelican/tests/support.py @@ -10,6 +10,7 @@ from six import StringIO import logging from logging.handlers import BufferingHandler import unittest +import locale from functools import wraps from contextlib import contextmanager @@ -148,6 +149,18 @@ def module_exists(module_name): return True +def locale_available(locale_): + old_locale = locale.setlocale(locale.LC_TIME) + + try: + locale.setlocale(locale.LC_TIME, str(locale_)) + except locale.Error: + return False + else: + locale.setlocale(locale.LC_TIME, old_locale) + return True + + def get_settings(): settings = _DEFAULT_CONFIG.copy() settings['DIRECT_TEMPLATES'] = ['archives'] diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index f5e530f3..768bbc44 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -5,9 +5,11 @@ import shutil import os import datetime import time +import locale +from sys import platform from pelican import utils -from .support import get_article, LoggedTestCase +from .support import get_article, LoggedTestCase, locale_available, unittest from pelican.utils import NoFilesError @@ -179,3 +181,65 @@ class TestUtils(LoggedTestCase): f.close() utils.clean_output_dir(test_directory) self.assertTrue(not os.path.exists(test_directory)) + + def test_strftime(self): + d = datetime.date(2012, 8, 29) + + # simple formatting + self.assertEqual(utils.strftime(d, '%d/%m/%y'), '29/08/12') + self.assertEqual(utils.strftime(d, '%d/%m/%Y'), '29/08/2012') + + # % escaped + self.assertEqual(utils.strftime(d, '%d%%%m%%%y'), '29%08%12') + self.assertEqual(utils.strftime(d, '%d %% %m %% %y'), '29 % 08 % 12') + # not valid % formatter + self.assertEqual(utils.strftime(d, '10% reduction in %Y'), + '10% reduction in 2012') + self.assertEqual(utils.strftime(d, '%10 reduction in %Y'), + '%10 reduction in 2012') + + # with text + self.assertEqual(utils.strftime(d, 'Published in %d-%m-%Y'), + 'Published in 29-08-2012') + + # with non-ascii text + self.assertEqual(utils.strftime(d, '%d/%m/%Y Øl trinken beim Besäufnis'), + '29/08/2012 Øl trinken beim Besäufnis') + + + # test the output of utils.strftime in a different locale + # right now, this uses Turkish locale + # why Turkish? because I know Turkish :). And it produces non-ascii output + # Feel free to extend with different locales + @unittest.skipUnless(locale_available('tr_TR') or + locale_available('Turkish'), + 'Turkish locale needed') + def test_strftime_locale_dependent(self): + # store current locale + old_locale = locale.setlocale(locale.LC_TIME) + + if platform == 'win32': + locale.setlocale(locale.LC_TIME, str('Turkish')) + else: + locale.setlocale(locale.LC_TIME, str('tr_TR')) + + d = datetime.date(2012, 8, 29) + + # simple + self.assertEqual(utils.strftime(d, '%d %B %Y'), '29 Ağustos 2012') + self.assertEqual(utils.strftime(d, '%d %b %Y'), '29 Ağu 2012') + self.assertEqual(utils.strftime(d, '%a, %d %b %Y'), + 'Çrş, 29 Ağu 2012') + self.assertEqual(utils.strftime(d, '%A, %d %B %Y'), + 'Çarşamba, 29 Ağustos 2012') + + # with text + self.assertEqual(utils.strftime(d, 'Yayınlanma tarihi: %A, %d %B %Y'), + 'Yayınlanma tarihi: Çarşamba, 29 Ağustos 2012') + + # non-ascii format candidate (someone might pass it... for some reason) + self.assertEqual(utils.strftime(d, '%Y yılında %üretim artışı'), + '2012 yılında %üretim artışı') + + # restore locale back + locale.setlocale(locale.LC_TIME, old_locale) diff --git a/pelican/utils.py b/pelican/utils.py index 7e7b9ccb..2519bdf6 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -24,54 +24,50 @@ logger = logging.getLogger(__name__) def strftime(date, date_format): - """ - Replacement for the builtin strftime(). + ''' + Replacement for built-in strftime - This :func:`strftime()` is compatible to Python 2 and 3. In both cases, - input and output is always unicode. + This is necessary because of the way Py2 handles date format strings. + Specifically, Py2 strftime takes a bytestring. In the case of text output + (e.g. %b, %a, etc), the output is encoded with an encoding defined by + locale.LC_TIME. Things get messy if the formatting string has chars that + are not valid in LC_TIME defined encoding. - Still, Python 3's :func:`strftime()` seems to somehow "normalize" unicode - chars in the format string. So if e.g. your format string contains 'ø' or - 'ä', the result will be 'o' and 'a'. + This works by 'grabbing' possible format strings (those starting with %), + formatting them with the date, (if necessary) decoding the output and + replacing formatted output back. + ''' - See here for an `extensive testcase - `_. + # grab candidate format options + format_options = '%+.?' + candidates = re.findall(format_options, date_format) - :param date: Any object that sports a :meth:`strftime()` method. - :param date_format: Format string, can always be unicode. - :returns: Unicode string with formatted date. - """ - # As tehkonst confirmed, above mentioned testcase runs correctly on - # Python 2 and 3 on Windows as well. Thanks. - if six.PY3: - # It could be so easy... *sigh* - return date.strftime(date_format) - # TODO Perhaps we should refactor again, so that the - # xmlcharrefreplace-regex-dance is always done, regardless - # of the Python version. - else: - # We must ensure that the format string is an encoded byte - # string, ASCII only WTF!!! - # But with "xmlcharrefreplace" our formatted date will produce - # *yuck* like this: - # "Øl trinken beim Besäufnis" - # --> "Øl trinken beim Besäufnis" - date_format = date_format.encode('ascii', - errors="xmlcharrefreplace") - result = date.strftime(date_format) - # strftime() returns an encoded byte string - # which we must decode into unicode. - lang_code, enc = locale.getlocale(locale.LC_ALL) - if enc: - result = result.decode(enc) + # replace candidates with placeholders for later % formatting + template = re.sub(format_options, '%s', date_format) + + # we need to convert formatted dates back to unicode in Py2 + # LC_TIME determines the encoding for built-in strftime outputs + lang_code, enc = locale.getlocale(locale.LC_TIME) + + formatted_candidates = [] + for candidate in candidates: + try: + # a valid format string should be ascii + candidate.encode('ascii') + except UnicodeEncodeError: + # if it fails, it's not a valid format option + # put the candidate back as it was + formatted = candidate else: - result = unicode(result) - # Convert XML character references back to unicode characters. - if "&#" in result: - result = re.sub(r'&#(?P\d+);', - lambda m: unichr(int(m.group('num'))), - result) - return result + # if it's ascii, pass it to strftime to format + formatted = date.strftime(candidate) + # convert Py2 result to unicode + if not six.PY3 and enc is not None: + formatted = formatted.decode(enc) + formatted_candidates.append(formatted) + + # put formatted candidates back and return + return template % tuple(formatted_candidates) def python_2_unicode_compatible(klass):