From 228fc82fc9c09493748e7fdf949fc1942cad0a7e Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 11 Jun 2013 21:40:13 -0400 Subject: [PATCH 1/2] utils: Add some ISO 8601 forms to get_date() Support the forms listed by the W3C [1]. I also removed the '%Y-%d-%m' form, which can be confused with the '%Y-%m-%d' ISO form. The new ISO forms can use 'Z' to designate UTC or '[+-]HHMM' to specify offsets from UTC. Other time zone designators are not supported. The '%z' directive has only been supported since Python 3.2 [2], so if you're running Pelican on Python 2.7, you're stuck with 'Z' for UTC. Conveniently, we get ValueErrors for both invalid directives and data/format missmatches, so we don't need special handling for the 2.7 case inside get_date(). [1]: http://www.w3.org/TR/NOTE-datetime [2]: http://bugs.python.org/issue6641 --- docs/getting_started.rst | 6 ++++- pelican/tests/test_utils.py | 52 +++++++++++++++++++++++++------------ pelican/utils.py | 50 +++++++++++++++++++++++------------ 3 files changed, 74 insertions(+), 34 deletions(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index b41f8c18..7cd3267e 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -286,7 +286,10 @@ by the directory in which the file resides. For example, a file located at ``python/foobar/myfoobar.rst`` will have a category of ``foobar``. If you would like to organize your files in other ways where the name of the subfolder would not be a good category name, you can set the setting ``USE_FOLDER_AS_CATEGORY`` -to ``False``. +to ``False``. When parsing dates given in the page metadata, Pelican supports +the W3C's `suggested subset ISO 8601`__. + +__ `W3C ISO 8601`_ If you do not explicitly specify summary metadata for a given post, the ``SUMMARY_MAX_LENGTH`` setting can be used to specify how many words from the @@ -478,3 +481,4 @@ metadata. That article will then be output to the ``drafts`` folder and not listed on the index page nor on any category page. .. _virtualenv: http://www.virtualenv.org/ +.. _W3C ISO 8601: http://www.w3.org/TR/NOTE-datetime diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 8b0dc13e..77e3bccb 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -6,7 +6,7 @@ import os import datetime import time import locale -from sys import platform +from sys import platform, version_info from tempfile import mkdtemp from pelican.generators import TemplatePagesGenerator @@ -37,26 +37,44 @@ class TestUtils(LoggedTestCase): def test_get_date(self): # valid ones date = datetime.datetime(year=2012, month=11, day=22) - date_hour = datetime.datetime(year=2012, month=11, day=22, hour=22, - minute=11) - date_hour_sec = datetime.datetime(year=2012, month=11, day=22, hour=22, - minute=11, second=10) - dates = {'2012-11-22': date, - '2012/11/22': date, - '2012-11-22 22:11': date_hour, - '2012/11/22 22:11': date_hour, - '22-11-2012': date, - '22/11/2012': date, - '22.11.2012': date, - '2012-22-11': date, - '22.11.2012 22:11': date_hour, - '2012-11-22 22:11:10': date_hour_sec} + date_hour = datetime.datetime( + year=2012, month=11, day=22, hour=22, minute=11) + date_hour_sec = datetime.datetime( + year=2012, month=11, day=22, hour=22, minute=11, second=10) + date_hour_sec_z = datetime.datetime( + year=2012, month=11, day=22, hour=22, minute=11, second=10, + tzinfo=datetime.timezone.utc) + date_hour_sec_0430 = datetime.datetime( + year=2012, month=11, day=22, hour=22, minute=11, second=10, + tzinfo=datetime.timezone(datetime.timedelta(hours=4, minutes=30))) + date_hour_sec_frac_z = datetime.datetime( + year=2012, month=11, day=22, hour=22, minute=11, second=10, + microsecond=123000, tzinfo=datetime.timezone.utc) + dates = { + '2012-11-22': date, + '2012/11/22': date, + '2012-11-22 22:11': date_hour, + '2012/11/22 22:11': date_hour, + '22-11-2012': date, + '22/11/2012': date, + '22.11.2012': date, + '22.11.2012 22:11': date_hour, + '2012-11-22 22:11:10': date_hour_sec, + '2012-11-22T22:11:10Z': date_hour_sec_z, + '2012-11-22T22:11:10+0430': date_hour_sec_0430, + '2012-11-22T22:11:10.123Z': date_hour_sec_frac_z, + } + + # invalid ones + invalid_dates = ['2010-110-12', 'yay'] + + if version_info < (3, 2): + dates.pop('2012-11-22T22:11:10-0500') + invalid_dates.append('2012-11-22T22:11:10-0500') for value, expected in dates.items(): self.assertEqual(utils.get_date(value), expected, value) - # invalid ones - invalid_dates = ('2010-110-12', 'yay') for item in invalid_dates: self.assertRaises(ValueError, utils.get_date, item) diff --git a/pelican/utils.py b/pelican/utils.py index 4e7bdbd1..e2e69121 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -15,7 +15,7 @@ from collections import Hashable from functools import partial from codecs import open, BOM_UTF8 -from datetime import datetime +from datetime import datetime, timezone from itertools import groupby from jinja2 import Markup from operator import attrgetter @@ -180,17 +180,39 @@ def get_date(string): If no format matches the given date, raise a ValueError. """ string = re.sub(' +', ' ', string) - formats = ['%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M', - '%Y-%m-%d', '%Y/%m/%d', - '%d-%m-%Y', '%Y-%d-%m', # Weird ones - '%d/%m/%Y', '%d.%m.%Y', - '%d.%m.%Y %H:%M', '%Y-%m-%d %H:%M:%S'] + formats = [ + # ISO 8601 + '%Y', + '%Y-%m', + '%Y-%m-%d', + '%Y-%m-%dT%H:%M%z', + '%Y-%m-%dT%H:%MZ', + '%Y-%m-%dT%H:%M', + '%Y-%m-%dT%H:%M:%S%z', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%S.%f%z', + '%Y-%m-%dT%H:%M:%S.%fZ', + '%Y-%m-%dT%H:%M:%S.%f', + # end ISO 8601 forms + '%Y-%m-%d %H:%M', + '%Y-%m-%d %H:%M:%S', + '%Y/%m/%d %H:%M', + '%Y/%m/%d', + '%d-%m-%Y', + '%d.%m.%Y %H:%M', + '%d.%m.%Y', + '%d/%m/%Y', + ] for date_format in formats: try: - return datetime.strptime(string, date_format) + date = datetime.strptime(string, date_format) except ValueError: - pass - raise ValueError("'%s' is not a valid date" % string) + continue + if date_format.endswith('Z'): + date = date.replace(tzinfo=timezone.utc) + return date + raise ValueError('{0!r} is not a valid date'.format(string)) class pelican_open(object): @@ -510,15 +532,11 @@ def file_watcher(path): def set_date_tzinfo(d, tz_name=None): - """ Date without tzinfo shoudbe utc. - This function set the right tz to date that aren't utc and don't have - tzinfo. - """ - if tz_name is not None: + """Set the timezone for dates that don't have tzinfo""" + if tz_name and not d.tzinfo: tz = pytz.timezone(tz_name) return tz.localize(d) - else: - return d + return d def mkdir_p(path): From 1102143c330573f25a0fad29349cc131b171e67c Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 11 Jun 2013 22:32:51 -0400 Subject: [PATCH 2/2] utils: Use pytz instead of datetime.timezone for timezones datetime.timezone is new in Python 3.2 [1], so pytz allows us to keep support for Python 2.7. [1]: http://docs.python.org/dev/library/datetime.html#datetime.timezone --- pelican/tests/test_utils.py | 12 +++++++----- pelican/utils.py | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 77e3bccb..911d6266 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -9,6 +9,8 @@ import locale from sys import platform, version_info from tempfile import mkdtemp +import pytz + from pelican.generators import TemplatePagesGenerator from pelican.writers import Writer from pelican.settings import read_settings @@ -43,13 +45,13 @@ class TestUtils(LoggedTestCase): year=2012, month=11, day=22, hour=22, minute=11, second=10) date_hour_sec_z = datetime.datetime( year=2012, month=11, day=22, hour=22, minute=11, second=10, - tzinfo=datetime.timezone.utc) - date_hour_sec_0430 = datetime.datetime( + tzinfo=pytz.timezone('UTC')) + date_hour_sec_est = datetime.datetime( year=2012, month=11, day=22, hour=22, minute=11, second=10, - tzinfo=datetime.timezone(datetime.timedelta(hours=4, minutes=30))) + tzinfo=pytz.timezone('EST')) date_hour_sec_frac_z = datetime.datetime( year=2012, month=11, day=22, hour=22, minute=11, second=10, - microsecond=123000, tzinfo=datetime.timezone.utc) + microsecond=123000, tzinfo=pytz.timezone('UTC')) dates = { '2012-11-22': date, '2012/11/22': date, @@ -61,7 +63,7 @@ class TestUtils(LoggedTestCase): '22.11.2012 22:11': date_hour, '2012-11-22 22:11:10': date_hour_sec, '2012-11-22T22:11:10Z': date_hour_sec_z, - '2012-11-22T22:11:10+0430': date_hour_sec_0430, + '2012-11-22T22:11:10-0500': date_hour_sec_est, '2012-11-22T22:11:10.123Z': date_hour_sec_frac_z, } diff --git a/pelican/utils.py b/pelican/utils.py index e2e69121..9ba234a5 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -15,7 +15,7 @@ from collections import Hashable from functools import partial from codecs import open, BOM_UTF8 -from datetime import datetime, timezone +from datetime import datetime from itertools import groupby from jinja2 import Markup from operator import attrgetter @@ -210,7 +210,7 @@ def get_date(string): except ValueError: continue if date_format.endswith('Z'): - date = date.replace(tzinfo=timezone.utc) + date = date.replace(tzinfo=pytz.timezone('UTC')) return date raise ValueError('{0!r} is not a valid date'.format(string))