Fix temporal issues and some other improvements

Open-EO · Jan 17, 2024 · 7b28019 · 7b28019
1 parent fe7224e
commit 7b28019
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 22 deletions.
diff --git a/assets/processes b/assets/processes
diff --git a/src/openeo_test_suite/lib/process_runner/util.py b/src/openeo_test_suite/lib/process_runner/util.py
@@ -1,9 +1,13 @@
+import re
 from datetime import datetime, timezone
 
 import numpy as np
+import pandas as pd
 import xarray as xr
-from dateutil.parser import parse
+from dateutil.parser import isoparse
+from pandas._libs.tslibs.timestamps import Timestamp
 
+ISO8601_REGEX = r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}"
 
 def numpy_to_native(data, expected):
     # Converting numpy dtypes to native python types
@@ -27,10 +31,17 @@ def datacube_to_xarray(cube):
     for name in cube["order"]:
         dim = cube["dimensions"][name]
         if dim["type"] == "temporal":
-            # date replace for older Python versions that don't support ISO parsing (only available since 3.11)
             values = [
-                datetime.fromisoformat(date.replace("Z", "")) for date in dim["values"]
+                isostr_to_datetime(date, fail_on_error=False) for date in dim["values"]
             ]
+            # Verify that the values are all datetimes, otherwise likely the tests are invalid
+            if all(isinstance(date, datetime) for date in values):
+                # Ot looks like xarray does not support creating proper time dimensions from datetimes,
+                # so we convert to np.datetime64 explicitly.
+                # np.datetime64 doesn't like timezone-aware datetimes, so we remove the timezone.
+                values = [np.datetime64(dt.replace(tzinfo=None), 'ns') for dt in values]
+            else:
+                raise Exception("Mixed datetime types in temporal dimension")
         elif dim["type"] == "spatial":
             values = dim["values"]
             if "reference_system" in dim:
@@ -60,7 +71,8 @@ def xarray_to_datacube(data):
         type = "bands"
         values = []
         axis = None
-        if np.issubdtype(data.coords[c].dtype, np.datetime64):
+        dtype = data.coords[c].dtype
+        if np.issubdtype(dtype, np.datetime64) or isinstance(dtype, Timestamp):
             type = "temporal"
             values = [datetime_to_isostr(date) for date in data.coords[c].values]
         else:
@@ -71,6 +83,8 @@ def xarray_to_datacube(data):
             elif c == "y":  # todo: non-standardized
                 type = "spatial"
                 axis = "y"
+            elif c == "t":  # todo: non-standardized
+                type = "temporal"
 
         dim = {"type": type, "values": values}
         if axis is not None:
@@ -93,15 +107,36 @@ def xarray_to_datacube(data):
     return cube
 
 
-def isostr_to_datetime(dt):
-    return parse(dt)
+def isostr_to_datetime(dt, fail_on_error=True):
+    if not fail_on_error:
+        try:
+            return isostr_to_datetime(dt)
+        except:
+            return dt
+    else:
+        if re.match(ISO8601_REGEX, dt):
+            return isoparse(dt)
+        else:
+            raise Exception(
+                "Datetime is not in ISO format (YYYY-MM-DDThh:mm:ss plus timezone))"
+            )
 
 
 def datetime_to_isostr(dt):
-    # Convert numpy.datetime64 to timestamp (in seconds)
-    timestamp = dt.astype("datetime64[s]").astype(int)
-    # Create a datetime object from the timestamp
-    dt_object = datetime.utcfromtimestamp(timestamp).replace(tzinfo=timezone.utc)
+    if isinstance(dt, Timestamp):
+        dt_object = dt.to_pydatetime()
+    elif isinstance(dt, np.datetime64):
+        # Convert numpy.datetime64 to timestamp (in seconds)
+        timestamp = dt.astype("datetime64[s]").astype(int)
+        # Create a datetime object from the timestamp
+        dt_object = datetime.utcfromtimestamp(timestamp).replace(tzinfo=timezone.utc)
+    elif isinstance(dt, datetime):
+        dt_object = dt
+    elif re.match(ISO8601_REGEX, dt):
+        return dt
+    else:
+        raise NotImplementedError("Unsupported datetime type")
+
     # Convert to ISO format string
     return dt_object.isoformat().replace("+00:00", "Z")
 

diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py
@@ -3,7 +3,7 @@
 import logging
 import math
 from pathlib import Path
-from typing import List, Optional, Tuple, Union
+from typing import List, Tuple, Union
 
 import json5
 import pytest
@@ -95,9 +95,7 @@ def test_process(
             connection=connection,
             file=file,
         )
-    except Exception as e:
-        # TODO: this `except: pytest.skip()` is overly liberal, possibly hiding real issues.
-        #       On what precise conditions should we skip? e.g. NotImplementedError?
+    except NotImplementedError as e:
         pytest.skip(str(e))
 
     throws = bool(example.get("throws"))
@@ -111,7 +109,6 @@ def test_process(
 
     # check the process results / behavior
     if throws and returns:
-        # TODO what does it mean if test can both throw and return?
         if isinstance(result, Exception):
             check_exception(example, result)
         else:
@@ -121,8 +118,6 @@ def test_process(
     elif returns:
         check_return_value(example, result, connection, file)
     else:
-        # TODO: skipping at this point of test is a bit useless.
-        #       Instead: skip earlier, or just consider the test as passed?
         pytest.skip(
             f"Test for process {process_id} doesn't provide an expected result for arguments: {example['arguments']}"
         )
@@ -288,13 +283,12 @@ def check_non_json_values(value):
 def check_exception(example, result):
     assert isinstance(result, Exception), f"Expected an exception, but got {result}"
     if isinstance(example["throws"], str):
+        # todo: we should assert here and remove the warning, but right now tooling doesn't really implement this
+        # assert result.__class__.__name__ == example["throws"]
         if result.__class__.__name__ != example["throws"]:
-            # TODO: better way to report this warning?
             _log.warning(
                 f"Expected exception {example['throws']} but got {result.__class__}"
             )
-        # todo: we should enable this end remove the two lines above, but right now tooling doesn't really implement this
-        # assert result.__class__.__name__ == example["throws"]
 
 
 def check_return_value(example, result, connection, file):
@@ -348,7 +342,9 @@ def check_return_value(example, result, connection, file):
         )
         assert {} == diff, f"Differences: {diff!s}"
     elif isinstance(example["returns"], float) and math.isnan(example["returns"]):
-        assert isinstance(result, float) and math.isnan(result), f"Got {result} instead of NaN"
+        assert isinstance(result, float) and math.isnan(
+            result
+        ), f"Got {result} instead of NaN"
     elif isinstance(example["returns"], float) or isinstance(example["returns"], int):
         msg = f"Expected a numerical result but got {result} of type {type(result)}"
         assert isinstance(result, float) or isinstance(result, int), msg