From 9bb76f2501fece047e9e5d2e93133d732350e66f Mon Sep 17 00:00:00 2001 From: Magnus Ulimoen Date: Wed, 7 Dec 2022 14:58:49 +0100 Subject: [PATCH 1/5] Initial xarray support --- modules/xarray/README.md | 21 +++ modules/xarray/fimex_xarray/__init__.py | 1 + modules/xarray/fimex_xarray/xr.py | 171 ++++++++++++++++++++++++ modules/xarray/pyproject.toml | 17 +++ 4 files changed, 210 insertions(+) create mode 100644 modules/xarray/README.md create mode 100644 modules/xarray/fimex_xarray/__init__.py create mode 100755 modules/xarray/fimex_xarray/xr.py create mode 100644 modules/xarray/pyproject.toml diff --git a/modules/xarray/README.md b/modules/xarray/README.md new file mode 100644 index 00000000..25d9b66d --- /dev/null +++ b/modules/xarray/README.md @@ -0,0 +1,21 @@ +# Installation (from VCS) + +```sh +pip install "git+https://github.com/magnusuMET/fimex.git@feature/xarray#subdirectory=modules/xarray" +``` + +# Usage + +The module is installed as an engine which `xarray` can use directly. You can open a dataset using +```python +import xarray + +xarray.open_dataset("mydataset.nc", engine="fimex") +``` + +Pass `config` or `filetype` if necessary: +```python +import xarray + +xarray.open_dataset("mydataset", filtype="grbml", config="myconfig.xml", engine="fimex") +``` diff --git a/modules/xarray/fimex_xarray/__init__.py b/modules/xarray/fimex_xarray/__init__.py new file mode 100644 index 00000000..f1337666 --- /dev/null +++ b/modules/xarray/fimex_xarray/__init__.py @@ -0,0 +1 @@ +from .xr import FimexBackendEntrypoint, FimexDataVariable diff --git a/modules/xarray/fimex_xarray/xr.py b/modules/xarray/fimex_xarray/xr.py new file mode 100755 index 00000000..0fc11123 --- /dev/null +++ b/modules/xarray/fimex_xarray/xr.py @@ -0,0 +1,171 @@ +#! /usr/bin/env python3 +import pyfimex0 +import numpy as np +import xarray +from xarray.backends import BackendEntrypoint, BackendArray +import os + + +DTYPE_MAPPINGS = { + pyfimex0.CDMDataType.FLOAT: np.float32, + pyfimex0.CDMDataType.DOUBLE: np.float64, + pyfimex0.CDMDataType.INT: np.int, + pyfimex0.CDMDataType.INT64: np.int64, + pyfimex0.CDMDataType.UINT: np.uint, + pyfimex0.CDMDataType.UINT64: np.uint64, + pyfimex0.CDMDataType.SHORT: np.short, + pyfimex0.CDMDataType.USHORT: np.ushort, + pyfimex0.CDMDataType.STRING: str, + pyfimex0.CDMDataType.CHAR: np.byte, + pyfimex0.CDMDataType.UCHAR: np.ubyte, +} + + +class FimexBackendEntrypoint(BackendEntrypoint): + def open_dataset(self, filename_or_obj, drop_variables, *, config=None, filetype=None, decode_times=True, scale_offset=True): + if filetype is None: + filetype = "" + if config is None: + config = "" + fh = pyfimex0.createFileReader(filetype, filename_or_obj, config) + cdm = fh.getCDM() + + var_names = set(cdm.getVariableNames()) + attr_names = cdm.getGlobalAttributeNames() + + attrs = dict() + for aname in attr_names: + attr = cdm.getGlobalAttribute(aname) + attrv = attr.getData().values() + attrs[aname] = attrv + + coord_names = cdm.getDimensionNames() + coords = dict() + for cname in coord_names: + var = cdm.getVariable(cname) + values = var.getData().values() + shape = var.getShape() + if len(shape) != 1: + raise Exception() + + if scale_offset: + try: + scale_factor = cdm.getAttribute(cname, "scale_factor").getData().values() + except RuntimeError: + scale_factor = 1 + try: + add_offset = cdm.getAttribute(cname, "add_offset").getData().values() + except RuntimeError: + add_offset = 0 + values = values*scale_factor + add_offset + coords[cname] = values + try: + var_names.remove(cname) + except KeyError: + pass + + vars = dict() + for vname in var_names: + var = cdm.getVariable(vname) + dims = list(reversed(var.getShape())) + attrs = dict() + for aname in cdm.getAttributeNames(vname): + attr = cdm.getAttribute(vname, aname) + attrv = attr.getData().values() + if len(attrv) == 1: + attrv = attrv[0] + attrs[aname] = attrv + + data = FimexDataVariable(fh, cdm, var, scale_offset=scale_offset) + data = xarray.core.indexing.LazilyIndexedArray(data) + var = xarray.Variable(data=data, dims=dims, attrs=attrs) + vars[vname] = var + + if decode_times: + if "time" in coord_names: + var = coords["time"] + var = np.array([np.datetime64(int(v), "s") for v in var]) + + coords["time"] = var + + ds = xarray.Dataset(data_vars=vars, attrs=attrs, coords=coords) + + return ds + + open_dataset_parameters = ["filename_or_obj", "drop_variables", "config", "filetype", "decode_times", "scale_offset"] + + def guess_can_open(self, filename_or_obj): + try: + _, ext = os.path.splitext(filename_or_obj) + except TypeError: + return False + + return ext in {".grbml", ".ncml", ".grb", ".nc"} + + description = "Read using fimex" + url = "https://github.com/metno/fimex" + + +class FimexDataVariable(BackendArray): + def __init__(self, fh, cdm, var, scale_offset): + super().__init__() + self.fh = fh + self.var = var + self.cdm = cdm + self.data = var.getData() + self.scale_offset = scale_offset + + shape = [] + for s in var.getShape(): + shape.append(cdm.getDimension(s).getLength()) + self.shape = list(reversed(shape)) + try: + dtype = self.data.getDataType() + self.dtype = np.dtype(DTYPE_MAPPINGS[dtype]) + except AttributeError: + self.dtype = np.dtype(np.float64) + + def __getitem__( + self, key: xarray.core.indexing.ExplicitIndexer + ) -> np.typing.ArrayLike: + return xarray.core.indexing.explicit_indexing_adapter( + key, + self.shape, + xarray.core.indexing.IndexingSupport.BASIC, + self._raw_indexing_method, + ) + + def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike: + vname = self.var.getName() + + slicebuilder = pyfimex0.SliceBuilder(self.cdm, vname, False) + dimsizes = [] + for k, dim, dimname in zip(key, self.shape, reversed(self.var.getShape())): + ignore_dim = dimname + if isinstance(k, int): + slicebuilder.setStartAndSize(dimname, k, 1) + elif isinstance(k, slice): + start = k.start if k.start is not None else 0 + step = k.step if k.step is not None else 1 + stop = k.stop if k.stop is not None else dim - 1 + size = (stop - start) // step + 1 + slicebuilder.setStartAndSize(dimname, start, size) + dimsizes.append(size) + else: + raise TypeError(f"Unknown type of {k}: {type(k)}") + + data = self.fh.getDataSliceSB(vname, slicebuilder) + datav = data.values() + data_shaped = np.reshape(datav, dimsizes) + if self.scale_offset: + try: + scale_factor = self.cdm.getAttribute(vname, "scale_factor").getData().values() + except RuntimeError: + scale_factor = 1 + try: + add_offset = self.cdm.getAttribute(vname, "add_offset").getData().values() + except RuntimeError: + add_offset = 0 + data_shaped = data_shaped*scale_factor + add_offset + + return data_shaped diff --git a/modules/xarray/pyproject.toml b/modules/xarray/pyproject.toml new file mode 100644 index 00000000..1d7bc688 --- /dev/null +++ b/modules/xarray/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "fimex_xarray" +version = "0.0.1" +readme = "README.md" + +dependencies = [ + # "pyfimex0", + "numpy", + "xarray", +] + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project.entry-points."xarray.backends"] +fimex = "fimex_xarray.xr:FimexBackendEntrypoint" From 49660c4e5734d68ec2f42bad7814aafa2a82a0e7 Mon Sep 17 00:00:00 2001 From: Magnus Ulimoen Date: Tue, 17 Jan 2023 10:22:07 +0100 Subject: [PATCH 2/5] Fix numpy.int deprecation --- modules/xarray/fimex_xarray/xr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/xarray/fimex_xarray/xr.py b/modules/xarray/fimex_xarray/xr.py index 0fc11123..8a88e15a 100755 --- a/modules/xarray/fimex_xarray/xr.py +++ b/modules/xarray/fimex_xarray/xr.py @@ -9,7 +9,7 @@ DTYPE_MAPPINGS = { pyfimex0.CDMDataType.FLOAT: np.float32, pyfimex0.CDMDataType.DOUBLE: np.float64, - pyfimex0.CDMDataType.INT: np.int, + pyfimex0.CDMDataType.INT: np.intc, pyfimex0.CDMDataType.INT64: np.int64, pyfimex0.CDMDataType.UINT: np.uint, pyfimex0.CDMDataType.UINT64: np.uint64, From 33515f1d218a3f1ef054f543f41968a9c8dd23e8 Mon Sep 17 00:00:00 2001 From: Magnus Ulimoen Date: Tue, 7 Feb 2023 13:06:43 +0100 Subject: [PATCH 3/5] Add tests and improve coord reading --- modules/xarray/fimex_xarray/xr.py | 15 ++++++++++----- modules/xarray/pyproject.toml | 11 ++++++++--- modules/xarray/tests/test_fimex.py | 16 ++++++++++++++++ 3 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 modules/xarray/tests/test_fimex.py diff --git a/modules/xarray/fimex_xarray/xr.py b/modules/xarray/fimex_xarray/xr.py index 8a88e15a..32a8ceb1 100755 --- a/modules/xarray/fimex_xarray/xr.py +++ b/modules/xarray/fimex_xarray/xr.py @@ -41,12 +41,17 @@ def open_dataset(self, filename_or_obj, drop_variables, *, config=None, filetype coord_names = cdm.getDimensionNames() coords = dict() - for cname in coord_names: + for cname in set(coord_names).intersection(var_names): var = cdm.getVariable(cname) - values = var.getData().values() - shape = var.getShape() - if len(shape) != 1: - raise Exception() + vardata = var.getData() + if vardata is not None: + values = vardata.values() + shape = var.getShape() + if len(shape) != 1: + raise Exception() + else: + values = fh.getData(cname).values() + shape = [len(values)] if scale_offset: try: diff --git a/modules/xarray/pyproject.toml b/modules/xarray/pyproject.toml index 1d7bc688..a4f2f87f 100644 --- a/modules/xarray/pyproject.toml +++ b/modules/xarray/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + [project] name = "fimex_xarray" version = "0.0.1" @@ -9,9 +13,10 @@ dependencies = [ "xarray", ] -[build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" +[project.optional-dependencies] +dev = [ + "pytest", +] [project.entry-points."xarray.backends"] fimex = "fimex_xarray.xr:FimexBackendEntrypoint" diff --git a/modules/xarray/tests/test_fimex.py b/modules/xarray/tests/test_fimex.py new file mode 100644 index 00000000..4dd0c4f6 --- /dev/null +++ b/modules/xarray/tests/test_fimex.py @@ -0,0 +1,16 @@ +import xarray +import os + +test_srcdir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "test") + + +def test_open_hirlam12(): + path = os.path.join(test_srcdir, "hirlam12.nc") + path = os.path.join(test_srcdir, "verticalPressure.nc") + path = os.path.join(test_srcdir, "erai.sfc.40N.0.75d.200301011200.nc") + path = os.path.join(test_srcdir, "verticalOceanSG2.nc") + ds = xarray.open_dataset(path, engine="fimex", decode_times=False) + + temp = ds["temp"] + selection = temp.isel(ocean_time=3, s_rho=[4, 6, 9], xi_rho=range(4, 7)) + values = selection.values From 91209ac4a5b77a859cbf7877709f246ea6ec02da Mon Sep 17 00:00:00 2001 From: Magnus Ulimoen Date: Tue, 21 Mar 2023 15:16:04 +0100 Subject: [PATCH 4/5] Fix slice indexing --- modules/xarray/fimex_xarray/xr.py | 4 ++-- modules/xarray/tests/test_fimex.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/modules/xarray/fimex_xarray/xr.py b/modules/xarray/fimex_xarray/xr.py index 32a8ceb1..95cf16f3 100755 --- a/modules/xarray/fimex_xarray/xr.py +++ b/modules/xarray/fimex_xarray/xr.py @@ -152,8 +152,8 @@ def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike: elif isinstance(k, slice): start = k.start if k.start is not None else 0 step = k.step if k.step is not None else 1 - stop = k.stop if k.stop is not None else dim - 1 - size = (stop - start) // step + 1 + stop = k.stop if k.stop is not None else dim + size = (stop - start) // step slicebuilder.setStartAndSize(dimname, start, size) dimsizes.append(size) else: diff --git a/modules/xarray/tests/test_fimex.py b/modules/xarray/tests/test_fimex.py index 4dd0c4f6..3acaf1db 100644 --- a/modules/xarray/tests/test_fimex.py +++ b/modules/xarray/tests/test_fimex.py @@ -1,5 +1,6 @@ import xarray import os +import numpy as np test_srcdir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "test") @@ -14,3 +15,21 @@ def test_open_hirlam12(): temp = ds["temp"] selection = temp.isel(ocean_time=3, s_rho=[4, 6, 9], xi_rho=range(4, 7)) values = selection.values + + +def test_isel_slice(): + path = os.path.join(test_srcdir, "verticalOceanSG2.nc") + ds = xarray.open_dataset(path, engine="fimex", decode_times=False) + + zeta = ds["zeta"] + + zeta_sub = ds.isel(eta_rho=slice(None, None, None))["zeta"] + assert np.all(zeta == zeta_sub) + + zeta_sub = ds.isel(eta_rho=slice(2, None, None))["zeta"] + zeta_isel = zeta.isel(eta_rho=slice(2, None, None)) + assert np.all(zeta_isel == zeta_sub) + + zeta_sub = ds.isel(eta_rho=slice(2, 10, None))["zeta"] + zeta_isel = zeta.isel(eta_rho=slice(2, 10, None)) + assert np.all(zeta_isel == zeta_sub) From 6473a08fd791d52525b0818658300c9182f371e2 Mon Sep 17 00:00:00 2001 From: Magnus Ulimoen Date: Mon, 27 Mar 2023 09:40:32 +0200 Subject: [PATCH 5/5] comment buggy scalar reading --- modules/xarray/fimex_xarray/xr.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/xarray/fimex_xarray/xr.py b/modules/xarray/fimex_xarray/xr.py index 95cf16f3..7a279709 100755 --- a/modules/xarray/fimex_xarray/xr.py +++ b/modules/xarray/fimex_xarray/xr.py @@ -160,8 +160,12 @@ def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike: raise TypeError(f"Unknown type of {k}: {type(k)}") data = self.fh.getDataSliceSB(vname, slicebuilder) - datav = data.values() - data_shaped = np.reshape(datav, dimsizes) + if len(dimsizes) == 0: + # Bug? Fimex can't read scalar data? + data_shaped = -1 + else: + datav = data.values() + data_shaped = np.reshape(datav, dimsizes) if self.scale_offset: try: scale_factor = self.cdm.getAttribute(vname, "scale_factor").getData().values()