Merge pull request #851 from avaris/better-strftime

refactored utils.strftime
This commit is contained in:
Justin Mayer 2013-04-17 09:31:19 -07:00
commit e7370945c9
3 changed files with 117 additions and 44 deletions

View file

@ -10,6 +10,7 @@ from six import StringIO
import logging import logging
from logging.handlers import BufferingHandler from logging.handlers import BufferingHandler
import unittest import unittest
import locale
from functools import wraps from functools import wraps
from contextlib import contextmanager from contextlib import contextmanager
@ -148,6 +149,18 @@ def module_exists(module_name):
return True return True
def locale_available(locale_):
old_locale = locale.setlocale(locale.LC_TIME)
try:
locale.setlocale(locale.LC_TIME, str(locale_))
except locale.Error:
return False
else:
locale.setlocale(locale.LC_TIME, old_locale)
return True
def get_settings(): def get_settings():
settings = _DEFAULT_CONFIG.copy() settings = _DEFAULT_CONFIG.copy()
settings['DIRECT_TEMPLATES'] = ['archives'] settings['DIRECT_TEMPLATES'] = ['archives']

View file

@ -5,9 +5,11 @@ import shutil
import os import os
import datetime import datetime
import time import time
import locale
from sys import platform
from pelican import utils from pelican import utils
from .support import get_article, LoggedTestCase from .support import get_article, LoggedTestCase, locale_available, unittest
from pelican.utils import NoFilesError from pelican.utils import NoFilesError
@ -179,3 +181,65 @@ class TestUtils(LoggedTestCase):
f.close() f.close()
utils.clean_output_dir(test_directory) utils.clean_output_dir(test_directory)
self.assertTrue(not os.path.exists(test_directory)) self.assertTrue(not os.path.exists(test_directory))
def test_strftime(self):
d = datetime.date(2012, 8, 29)
# simple formatting
self.assertEqual(utils.strftime(d, '%d/%m/%y'), '29/08/12')
self.assertEqual(utils.strftime(d, '%d/%m/%Y'), '29/08/2012')
# % escaped
self.assertEqual(utils.strftime(d, '%d%%%m%%%y'), '29%08%12')
self.assertEqual(utils.strftime(d, '%d %% %m %% %y'), '29 % 08 % 12')
# not valid % formatter
self.assertEqual(utils.strftime(d, '10% reduction in %Y'),
'10% reduction in 2012')
self.assertEqual(utils.strftime(d, '%10 reduction in %Y'),
'%10 reduction in 2012')
# with text
self.assertEqual(utils.strftime(d, 'Published in %d-%m-%Y'),
'Published in 29-08-2012')
# with non-ascii text
self.assertEqual(utils.strftime(d, '%d/%m/%Y Øl trinken beim Besäufnis'),
'29/08/2012 Øl trinken beim Besäufnis')
# test the output of utils.strftime in a different locale
# right now, this uses Turkish locale
# why Turkish? because I know Turkish :). And it produces non-ascii output
# Feel free to extend with different locales
@unittest.skipUnless(locale_available('tr_TR') or
locale_available('Turkish'),
'Turkish locale needed')
def test_strftime_locale_dependent(self):
# store current locale
old_locale = locale.setlocale(locale.LC_TIME)
if platform == 'win32':
locale.setlocale(locale.LC_TIME, str('Turkish'))
else:
locale.setlocale(locale.LC_TIME, str('tr_TR'))
d = datetime.date(2012, 8, 29)
# simple
self.assertEqual(utils.strftime(d, '%d %B %Y'), '29 Ağustos 2012')
self.assertEqual(utils.strftime(d, '%d %b %Y'), '29 Ağu 2012')
self.assertEqual(utils.strftime(d, '%a, %d %b %Y'),
'Çrş, 29 Ağu 2012')
self.assertEqual(utils.strftime(d, '%A, %d %B %Y'),
'Çarşamba, 29 Ağustos 2012')
# with text
self.assertEqual(utils.strftime(d, 'Yayınlanma tarihi: %A, %d %B %Y'),
'Yayınlanma tarihi: Çarşamba, 29 Ağustos 2012')
# non-ascii format candidate (someone might pass it... for some reason)
self.assertEqual(utils.strftime(d, '%Y yılında %üretim artışı'),
'2012 yılında %üretim artışı')
# restore locale back
locale.setlocale(locale.LC_TIME, old_locale)

View file

@ -24,54 +24,50 @@ logger = logging.getLogger(__name__)
def strftime(date, date_format): def strftime(date, date_format):
""" '''
Replacement for the builtin strftime(). Replacement for built-in strftime
This :func:`strftime()` is compatible to Python 2 and 3. In both cases, This is necessary because of the way Py2 handles date format strings.
input and output is always unicode. Specifically, Py2 strftime takes a bytestring. In the case of text output
(e.g. %b, %a, etc), the output is encoded with an encoding defined by
locale.LC_TIME. Things get messy if the formatting string has chars that
are not valid in LC_TIME defined encoding.
Still, Python 3's :func:`strftime()` seems to somehow "normalize" unicode This works by 'grabbing' possible format strings (those starting with %),
chars in the format string. So if e.g. your format string contains 'ø' or formatting them with the date, (if necessary) decoding the output and
'ä', the result will be 'o' and 'a'. replacing formatted output back.
'''
See here for an `extensive testcase # grab candidate format options
<https://github.com/dmdm/test_strftime>`_. format_options = '%+.?'
candidates = re.findall(format_options, date_format)
:param date: Any object that sports a :meth:`strftime()` method. # replace candidates with placeholders for later % formatting
:param date_format: Format string, can always be unicode. template = re.sub(format_options, '%s', date_format)
:returns: Unicode string with formatted date.
""" # we need to convert formatted dates back to unicode in Py2
# As tehkonst confirmed, above mentioned testcase runs correctly on # LC_TIME determines the encoding for built-in strftime outputs
# Python 2 and 3 on Windows as well. Thanks. lang_code, enc = locale.getlocale(locale.LC_TIME)
if six.PY3:
# It could be so easy... *sigh* formatted_candidates = []
return date.strftime(date_format) for candidate in candidates:
# TODO Perhaps we should refactor again, so that the try:
# xmlcharrefreplace-regex-dance is always done, regardless # a valid format string should be ascii
# of the Python version. candidate.encode('ascii')
else: except UnicodeEncodeError:
# We must ensure that the format string is an encoded byte # if it fails, it's not a valid format option
# string, ASCII only WTF!!! # put the candidate back as it was
# But with "xmlcharrefreplace" our formatted date will produce formatted = candidate
# *yuck* like this:
# "Øl trinken beim Besäufnis"
# --> "&#216;l trinken beim Bes&#228;ufnis"
date_format = date_format.encode('ascii',
errors="xmlcharrefreplace")
result = date.strftime(date_format)
# strftime() returns an encoded byte string
# which we must decode into unicode.
lang_code, enc = locale.getlocale(locale.LC_ALL)
if enc:
result = result.decode(enc)
else: else:
result = unicode(result) # if it's ascii, pass it to strftime to format
# Convert XML character references back to unicode characters. formatted = date.strftime(candidate)
if "&#" in result: # convert Py2 result to unicode
result = re.sub(r'&#(?P<num>\d+);', if not six.PY3 and enc is not None:
lambda m: unichr(int(m.group('num'))), formatted = formatted.decode(enc)
result) formatted_candidates.append(formatted)
return result
# put formatted candidates back and return
return template % tuple(formatted_candidates)
def python_2_unicode_compatible(klass): def python_2_unicode_compatible(klass):