From 5a94e364fcaf1fb5c76954fa6b534d68c84f42e0 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 22 Jul 2022 01:10:38 +0200 Subject: [PATCH 1/5] Adjust usage of `utcnow()` and `utcfromtimestamp()` for Python 3 On the transition from Python 2 to Python 3, this subtle adjustment needed when working with date and datetime objects sometimes slips through. - Instead of `datetime.utcnow()`, use `datetime.now(timezone.utc)` - Instead of `utcfromtimestamp()`, use `fromtimestamp(..., tz=timezone.utc)` See also: https://blog.ganssle.io/articles/2019/11/utcnow.html Please note that this patch still reflects the fact that only timezone-unaware (naive) datetime objects are handled yet. In order to compensate that, a number of `.replace(tzinfo=None)` calls have been added along the lines. --- src/crate/client/sqlalchemy/dialect.py | 8 +++-- .../client/sqlalchemy/doctests/itests.txt | 15 ++++------ .../client/sqlalchemy/tests/dialect_test.py | 5 ++-- .../tests/insert_from_select_test.py | 6 ++-- .../client/sqlalchemy/tests/update_test.py | 7 +++-- src/crate/client/tests.py | 8 ++--- src/crate/testing/util.py | 30 +++++++++++++++++++ 7 files changed, 55 insertions(+), 24 deletions(-) create mode 100644 src/crate/testing/util.py diff --git a/src/crate/client/sqlalchemy/dialect.py b/src/crate/client/sqlalchemy/dialect.py index 637a8f92..7b693221 100644 --- a/src/crate/client/sqlalchemy/dialect.py +++ b/src/crate/client/sqlalchemy/dialect.py @@ -20,7 +20,7 @@ # software solely pursuant to the terms of the relevant commercial agreement. import logging -from datetime import datetime, date +from datetime import datetime, date, timezone from sqlalchemy import types as sqltypes from sqlalchemy.engine import default, reflection @@ -90,7 +90,8 @@ def process(value): if not value: return try: - return datetime.utcfromtimestamp(value / 1e3).date() + return datetime.fromtimestamp(value / 1e3, tz=timezone.utc) \ + .replace(tzinfo=None).date() except TypeError: pass @@ -129,7 +130,8 @@ def process(value): if not value: return try: - return datetime.utcfromtimestamp(value / 1e3) + return datetime.fromtimestamp(value / 1e3, tz=timezone.utc) \ + .replace(tzinfo=None) except TypeError: pass diff --git a/src/crate/client/sqlalchemy/doctests/itests.txt b/src/crate/client/sqlalchemy/doctests/itests.txt index 6f285610..87c269e8 100644 --- a/src/crate/client/sqlalchemy/doctests/itests.txt +++ b/src/crate/client/sqlalchemy/doctests/itests.txt @@ -65,10 +65,10 @@ Retrieve the location from the database:: >>> location.name 'Earth' -Date should have been set at the insert due to default value via python method:: +Date should have been set at the insert due to default value via Python method:: - >>> from datetime import datetime - >>> now = datetime.utcnow() + >>> from datetime import datetime, timezone + >>> now = datetime.now(timezone.utc).replace(tzinfo=None) >>> dt = location.date >>> dt.year == now.year @@ -80,9 +80,6 @@ Date should have been set at the insert due to default value via python method:: >>> dt.day == now.day True - >>> (now - location.datetime_tz).seconds < 4 - True - Verify the return type of date and datetime:: >>> type(location.date) @@ -103,10 +100,10 @@ aren't set when the row is inserted as there is no default method:: >>> location.nullable_date is None True -The datetime and date can be set using a update statement:: +The datetime and date can be set using an update statement:: - >>> location.nullable_date = datetime.today() - >>> location.nullable_datetime = datetime.utcnow() + >>> location.nullable_date = datetime.now(timezone.utc).replace(tzinfo=None).date() + >>> location.nullable_datetime = datetime.now(timezone.utc).replace(tzinfo=None) >>> session.flush() Refresh "locations" table: diff --git a/src/crate/client/sqlalchemy/tests/dialect_test.py b/src/crate/client/sqlalchemy/tests/dialect_test.py index 7505b0e4..2566013e 100644 --- a/src/crate/client/sqlalchemy/tests/dialect_test.py +++ b/src/crate/client/sqlalchemy/tests/dialect_test.py @@ -19,7 +19,6 @@ # with Crate these terms will supersede the license and you may use the # software solely pursuant to the terms of the relevant commercial agreement. -from datetime import datetime from unittest import TestCase from unittest.mock import MagicMock, patch @@ -32,6 +31,8 @@ from sqlalchemy.orm import Session from sqlalchemy.testing import eq_, in_ +from crate.testing.util import datetime_now_utc_naive + FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) @@ -62,7 +63,7 @@ class Character(self.base): name = sa.Column(sa.String, primary_key=True) age = sa.Column(sa.Integer, primary_key=True) obj = sa.Column(Object) - ts = sa.Column(sa.DateTime, onupdate=datetime.utcnow) + ts = sa.Column(sa.DateTime, onupdate=datetime_now_utc_naive) self.character = Character self.session = Session() diff --git a/src/crate/client/sqlalchemy/tests/insert_from_select_test.py b/src/crate/client/sqlalchemy/tests/insert_from_select_test.py index 0c5ba73f..7cc91352 100644 --- a/src/crate/client/sqlalchemy/tests/insert_from_select_test.py +++ b/src/crate/client/sqlalchemy/tests/insert_from_select_test.py @@ -19,7 +19,6 @@ # with Crate these terms will supersede the license and you may use the # software solely pursuant to the terms of the relevant commercial agreement. -from datetime import datetime from unittest import TestCase from unittest.mock import patch, MagicMock @@ -29,6 +28,7 @@ from sqlalchemy import select, insert from crate.client.cursor import Cursor +from crate.testing.util import datetime_now_utc_naive fake_cursor = MagicMock(name='fake_cursor') @@ -51,7 +51,7 @@ class Character(Base): name = sa.Column(sa.String, primary_key=True) age = sa.Column(sa.Integer) - ts = sa.Column(sa.DateTime, onupdate=datetime.utcnow) + ts = sa.Column(sa.DateTime, onupdate=datetime_now_utc_naive) status = sa.Column(sa.String) class CharacterArchive(Base): @@ -59,7 +59,7 @@ class CharacterArchive(Base): name = sa.Column(sa.String, primary_key=True) age = sa.Column(sa.Integer) - ts = sa.Column(sa.DateTime, onupdate=datetime.utcnow) + ts = sa.Column(sa.DateTime, onupdate=datetime_now_utc_naive) status = sa.Column(sa.String) self.character = Character diff --git a/src/crate/client/sqlalchemy/tests/update_test.py b/src/crate/client/sqlalchemy/tests/update_test.py index e955ed4b..1570d2ac 100644 --- a/src/crate/client/sqlalchemy/tests/update_test.py +++ b/src/crate/client/sqlalchemy/tests/update_test.py @@ -30,6 +30,7 @@ from sqlalchemy.ext.declarative import declarative_base from crate.client.cursor import Cursor +from crate.testing.util import datetime_now_utc_naive fake_cursor = MagicMock(name='fake_cursor') @@ -50,7 +51,7 @@ class Character(self.base): name = sa.Column(sa.String, primary_key=True) age = sa.Column(sa.Integer) obj = sa.Column(Object) - ts = sa.Column(sa.DateTime, onupdate=datetime.utcnow) + ts = sa.Column(sa.DateTime, onupdate=datetime_now_utc_naive) self.character = Character self.session = Session() @@ -60,7 +61,7 @@ def test_onupdate_is_triggered(self): char = self.character(name='Arthur') self.session.add(char) self.session.commit() - now = datetime.utcnow() + now = datetime_now_utc_naive() fake_cursor.fetchall.return_value = [('Arthur', None)] fake_cursor.description = ( @@ -89,7 +90,7 @@ def test_bulk_update(self): Checks whether bulk updates work correctly on native types and Crate types. """ - before_update_time = datetime.utcnow() + before_update_time = datetime_now_utc_naive() self.session.query(self.character).update({ # change everyone's name to Julia diff --git a/src/crate/client/tests.py b/src/crate/client/tests.py index fe0a8300..7245a0c5 100644 --- a/src/crate/client/tests.py +++ b/src/crate/client/tests.py @@ -27,7 +27,6 @@ import unittest import doctest from pprint import pprint -from datetime import datetime, date from http.server import HTTPServer, BaseHTTPRequestHandler import ssl import time @@ -56,6 +55,7 @@ ) from .sqlalchemy.tests import test_suite as sqlalchemy_test_suite from .sqlalchemy.types import ObjectArray +from ..testing.util import datetime_now_utc_naive, date_now_utc_naive log = logging.getLogger('crate.testing.layer') ch = logging.StreamHandler() @@ -213,9 +213,9 @@ class Location(Base): __tablename__ = 'locations' name = sa.Column(sa.String, primary_key=True) kind = sa.Column(sa.String) - date = sa.Column(sa.Date, default=date.today) - datetime_tz = sa.Column(sa.DateTime, default=datetime.utcnow) - datetime_notz = sa.Column(sa.DateTime, default=datetime.utcnow) + date = sa.Column(sa.Date, default=date_now_utc_naive) + datetime_tz = sa.Column(sa.DateTime, default=datetime_now_utc_naive) + datetime_notz = sa.Column(sa.DateTime, default=datetime_now_utc_naive) nullable_datetime = sa.Column(sa.DateTime) nullable_date = sa.Column(sa.Date) flag = sa.Column(sa.Boolean) diff --git a/src/crate/testing/util.py b/src/crate/testing/util.py new file mode 100644 index 00000000..af2ded88 --- /dev/null +++ b/src/crate/testing/util.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8; -*- +# +# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor +# license agreements. See the NOTICE file distributed with this work for +# additional information regarding copyright ownership. Crate licenses +# this file to you under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. You may +# obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# However, if you have executed another commercial license agreement +# with Crate these terms will supersede the license and you may use the +# software solely pursuant to the terms of the relevant commercial agreement. + +from datetime import datetime, timezone + + +def datetime_now_utc_naive(): + return datetime.now(timezone.utc).replace(tzinfo=None) + + +def date_now_utc_naive(): + return datetime_now_utc_naive().date() From 70af9c018d14c6991046b59675b993f8dc83372d Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 22 Jul 2022 19:55:37 +0200 Subject: [PATCH 2/5] Add changelog item for adjustment of `utcnow()` and `utcfromtimestamp()` --- CHANGES.txt | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/CHANGES.txt b/CHANGES.txt index c646db82..6126c9d0 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,46 @@ Changes for crate Unreleased ========== +- BREAKING CHANGE: The datetime handling is now independent of the time zone + setting of your machine and always uses UTC. See next section for more + details. + + +Datetime handling +----------------- + +The datetime handling is now independent of the time zone setting of your +machine. This has to be considered when working with the SQLAlchemy dialect, +returning timestamps, and your system time zone is not expressed in UTC. + +To find out about your situation, invoke:: + + date +"%Z %z" + +where you can find out if your system is running on UTC, or not:: + + UTC +0000 + CEST +0200 + +For more information about this change, please consider reading those sections +of the Python documentation and corresponding articles. + +.. warning:: + + Because naive ``datetime`` objects are treated by many ``datetime`` methods + as local times, it is preferred to use aware datetimes to represent times + in UTC. As such, the recommended way to create an object representing + + - the current time in UTC is by calling ``datetime.now(timezone.utc)``. + - a specific timestamp in UTC is by calling ``datetime.fromtimestamp(..., tz=timezone.utc)``. + +References: + +- https://docs.python.org/3/library/datetime.html#datetime.datetime.utcnow +- https://docs.python.org/3/library/datetime.html#datetime.datetime.utcfromtimestamp +- https://blog.ganssle.io/articles/2019/11/utcnow.html +- https://aaronoellis.com/articles/python-datetime-utcnow-considered-harmful + 2022/07/04 0.27.1 ================= From 0c296080aa0d2cd2bfe8185c6c487f2655769013 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 22 Jul 2022 19:52:47 +0200 Subject: [PATCH 3/5] Add test cases for the adjustment of `utcnow()` and `utcfromtimestamp()` Those test cases try to outline the situation why 5a94e364fc is needed. They specifically exercise some scenarios where the system time zone is expressed in UTC, or not, and shows the corresponding deviances. Other test cases attempt to outline that the new helper functions `datetime_now_utc_naive` and `date_now_utc_naive` do the right thing in all situations, regardless of any corresponding system locale or time zone setting. --- setup.py | 2 + src/crate/testing/test_datetime.py | 165 +++++++++++++++++++++++++++++ src/crate/testing/tests.py | 3 + 3 files changed, 170 insertions(+) create mode 100644 src/crate/testing/test_datetime.py diff --git a/setup.py b/setup.py index 41aed1bb..57fb19df 100644 --- a/setup.py +++ b/setup.py @@ -70,6 +70,8 @@ def read(path): 'zope.testing>=4,<5', 'zope.testrunner>=5,<6', 'zc.customdoctests>=1.0.1,<2', + 'freezegun>=1,<2', + 'time-machine>=2,<3', 'createcoverage>=1,<2', 'stopit>=1.1.2,<2', 'flake8>=4,<5'], diff --git a/src/crate/testing/test_datetime.py b/src/crate/testing/test_datetime.py new file mode 100644 index 00000000..fb57273c --- /dev/null +++ b/src/crate/testing/test_datetime.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8; -*- +# +# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor +# license agreements. See the NOTICE file distributed with this work for +# additional information regarding copyright ownership. Crate licenses +# this file to you under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. You may +# obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# However, if you have executed another commercial license agreement +# with Crate these terms will supersede the license and you may use the +# software solely pursuant to the terms of the relevant commercial agreement. + +import os +from datetime import datetime, timezone, date +from unittest import TestCase, mock + +import time_machine +from freezegun import freeze_time + +from crate.testing.util import datetime_now_utc_naive, date_now_utc_naive + + +class UtcNowDatetimeTest(TestCase): + """ + Demonstrate some scenarios of "datetime.utcnow() considered harmful". + + The reason is that it depends on the system time zone setting of the + machine. On the other hand, `datetime.now(timezone.utc)` works the same way + in all situations. + + - https://blog.ganssle.io/articles/2019/11/utcnow.html + - https://aaronoellis.com/articles/python-datetime-utcnow-considered-harmful + """ + + @mock.patch.dict(os.environ, {"TZ": "UTC"}) + def test_utcnow_depends_on_system_timezone_success_with_utc(self): + """ + Exercise difference between `datetime.now(timezone.utc)` vs. `datetime.utcnow()`. + + When your server time is UTC time, everything will work perfectly. + """ + self.assertAlmostEqual( + datetime.now(timezone.utc).timestamp(), + datetime.utcnow().timestamp(), + places=1) + + @mock.patch.dict(os.environ, {"TZ": "Europe/Prague"}) + def test_utcnow_depends_on_system_timezone_failure_with_non_utc(self): + """ + Exercise difference between `datetime.now(timezone.utc)` vs. `datetime.utcnow()`. + + When your server time is expressed in a different time zone than UTC, + things will go south. + """ + self.assertNotAlmostEqual( + datetime.now(timezone.utc).timestamp(), + datetime.utcnow().timestamp(), + places=1) + + @time_machine.travel("2022-07-22T00:42:00+0200") + def test_utcnow_naive_success(self): + """ + Demonstrate that `datetime_now_utc_naive()` correctly expresses UTC. + The `day` component should be one day before the day of the timestamp + expressed in UTC. + """ + dt = datetime_now_utc_naive() + self.assertEqual(dt.day, 21) + + @time_machine.travel("2022-07-22T00:42:00+0200") + def test_date_today_naive_success(self): + """ + Demonstrate that `date_now_utc_naive()` correctly expresses UTC. + The `day` component should be one day before the day of the timestamp + expressed in UTC. + """ + dt = date_now_utc_naive() + self.assertEqual(dt.day, 21) + + @time_machine.travel("2022-07-22T00:42:00+0200") + def test_date_today_failure(self): + """ + Demonstrate that `date.today()` does not yield the date in UTC. + + This causes problems when verifying individual date components + (here: day) around midnight. + """ + dt = date.today() + self.assertEqual(dt.day, 22) + + +class UtcFromTimestampDatetimeTest(TestCase): + """ + Demonstrate some scenarios of "datetime.utcfromtimestamp() considered harmful". + + The reason is that it depends on the system time zone setting of the + machine. On the other hand, `datetime.fromtimestamp(..., tz=timezone.utc)` + works the same way in all situations. + + - https://blog.ganssle.io/articles/2019/11/utcnow.html + - https://aaronoellis.com/articles/python-datetime-utcnow-considered-harmful + """ + + TIMESTAMP_AFTER_MIDNIGHT = 1658450520 # Fri, 22 Jul 2022 00:42:00 GMT + + @mock.patch.dict(os.environ, {"TZ": "UTC"}) + def test_utcfromtimestamp_depends_on_system_timezone_success_with_utc(self): + """ + Exercise flaw of `datetime.utcfromtimestamp()`. + + When your server time is UTC time, everything will work perfectly. + """ + dt = datetime.utcfromtimestamp(self.TIMESTAMP_AFTER_MIDNIGHT) + self.assertEqual(dt.timestamp(), self.TIMESTAMP_AFTER_MIDNIGHT) + + @mock.patch.dict(os.environ, {"TZ": "Europe/Prague"}) + def test_utcfromtimestamp_depends_on_system_timezone_failure_with_non_utc(self): + """ + Exercise flaw of `datetime.utcfromtimestamp()`. + + When your server time is expressed in a different time zone than UTC, + things will go south. + """ + dt = datetime.utcfromtimestamp(self.TIMESTAMP_AFTER_MIDNIGHT) + self.assertNotEqual(dt.timestamp(), self.TIMESTAMP_AFTER_MIDNIGHT) + + @mock.patch.dict(os.environ, {"TZ": "Europe/Prague"}) + @freeze_time("2022-07-22T00:42:00+0200") + def test_utcfromtimestamp_depends_on_system_timezone_failure_with_non_utc_success_with_freezegun(self): + """ + Exercise flaw of `datetime.utcfromtimestamp()`. + + Don't be fooled: While this test has an apparent positive outcome, this + is only because `freezegun` works around the problem resp. has the same + flaw. + """ + dt = datetime.utcfromtimestamp(self.TIMESTAMP_AFTER_MIDNIGHT) + self.assertEqual(dt.timestamp(), self.TIMESTAMP_AFTER_MIDNIGHT) + + @mock.patch.dict(os.environ, {"TZ": "UTC"}) + def test_fromtimestamp_always_correct_with_utc(self): + """ + Demonstrate that `datetime.fromtimestamp(..., tz=timezone.utc)` is always correct. + Here: Emulate system time zone in UTC. + """ + dt = datetime.fromtimestamp(self.TIMESTAMP_AFTER_MIDNIGHT, tz=timezone.utc) + self.assertEqual(dt.timestamp(), self.TIMESTAMP_AFTER_MIDNIGHT) + + @mock.patch.dict(os.environ, {"TZ": "Europe/Prague"}) + def test_fromtimestamp_always_correct_with_non_utc(self): + """ + Demonstrate that `datetime.fromtimestamp(..., tz=timezone.utc)` is always correct. + Here: Emulate non-UTC system time zone. + """ + dt = datetime.fromtimestamp(self.TIMESTAMP_AFTER_MIDNIGHT, tz=timezone.utc) + self.assertEqual(dt.timestamp(), self.TIMESTAMP_AFTER_MIDNIGHT) diff --git a/src/crate/testing/tests.py b/src/crate/testing/tests.py index 9c9c3017..66eee575 100644 --- a/src/crate/testing/tests.py +++ b/src/crate/testing/tests.py @@ -25,6 +25,7 @@ import doctest import tempfile from .test_layer import LayerUtilsTest +from .test_datetime import UtcNowDatetimeTest, UtcFromTimestampDatetimeTest def docs_path(*parts): @@ -64,4 +65,6 @@ def test_suite(): ) suite.addTest(s) suite.addTest(unittest.makeSuite(LayerUtilsTest)) + suite.addTest(unittest.makeSuite(UtcNowDatetimeTest)) + suite.addTest(unittest.makeSuite(UtcFromTimestampDatetimeTest)) return suite From 10a2c7b919146609697560225b3cd4a9d3adea90 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 22 Jul 2022 20:13:17 +0200 Subject: [PATCH 4/5] CI: Don't fail fast to get a broader picture --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4e5fe1e8..b9645552 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,7 +20,7 @@ jobs: python-version: ['3.7', '3.8', '3.9', '3.10'] cratedb-version: ['4.8.0'] sqla-version: ['1.3.24', '1.4.37'] - fail-fast: true + fail-fast: false env: CRATEDB_VERSION: ${{ matrix.cratedb-version }} SQLALCHEMY_VERSION: ${{ matrix.sqla-version }} From 302984c33b814071b0c3047064c15600bb6bf549 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 22 Jul 2022 20:18:36 +0200 Subject: [PATCH 5/5] Fix? Try to clear all environment variables. --- src/crate/testing/test_datetime.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/crate/testing/test_datetime.py b/src/crate/testing/test_datetime.py index fb57273c..3c39a95a 100644 --- a/src/crate/testing/test_datetime.py +++ b/src/crate/testing/test_datetime.py @@ -41,6 +41,9 @@ class UtcNowDatetimeTest(TestCase): - https://aaronoellis.com/articles/python-datetime-utcnow-considered-harmful """ + def setUp(self) -> None: + os.environ.clear() + @mock.patch.dict(os.environ, {"TZ": "UTC"}) def test_utcnow_depends_on_system_timezone_success_with_utc(self): """ @@ -112,6 +115,9 @@ class UtcFromTimestampDatetimeTest(TestCase): TIMESTAMP_AFTER_MIDNIGHT = 1658450520 # Fri, 22 Jul 2022 00:42:00 GMT + def setUp(self) -> None: + os.environ.clear() + @mock.patch.dict(os.environ, {"TZ": "UTC"}) def test_utcfromtimestamp_depends_on_system_timezone_success_with_utc(self): """