From a39b729bb647537c2b39326b0d2c40e855cef2ca Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Fri, 20 Jan 2023 11:36:40 +0000
Subject: [PATCH 01/26] Use .values to convert DataArray to scalar before
 storing in Region

May help speed up copying.
---
 xbout/region.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/xbout/region.py b/xbout/region.py
index 17bf6454..5b642ae6 100644
--- a/xbout/region.py
+++ b/xbout/region.py
@@ -124,8 +124,8 @@ def __init__(
                 ref_yind = ylower_ind
             dx = ds["dx"].isel({self.ycoord: ref_yind})
             dx_cumsum = dx.cumsum()
-            self.xinner = dx_cumsum[xinner_ind] - dx[xinner_ind]
-            self.xouter = dx_cumsum[xouter_ind - 1] + dx[xouter_ind - 1]
+            self.xinner = (dx_cumsum[xinner_ind] - dx[xinner_ind]).values
+            self.xouter = (dx_cumsum[xouter_ind - 1] + dx[xouter_ind - 1]).values
 
             # dy is constant in the x-direction, so convert to a 1d array
             # Define ref_xind so that we avoid using values from the corner cells, which
@@ -136,8 +136,8 @@ def __init__(
                 ref_xind = xinner_ind
             dy = ds["dy"].isel(**{self.xcoord: ref_xind})
             dy_cumsum = dy.cumsum()
-            self.ylower = dy_cumsum[ylower_ind] - dy[ylower_ind]
-            self.yupper = dy_cumsum[yupper_ind - 1]
+            self.ylower = (dy_cumsum[ylower_ind] - dy[ylower_ind]).values
+            self.yupper = (dy_cumsum[yupper_ind - 1]).values
 
     def __repr__(self):
         result = "<xbout.region.Region>\n"

From 8422b9bf98d95889d2b83935c2079bffab0f4d45 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Fri, 20 Jan 2023 17:30:20 +0000
Subject: [PATCH 02/26] Fix warnings about not creating indexes when using
 .rename()

xarray behaviour changed, see https://github.com/pydata/xarray/pull/6999
and links from there.
---
 xbout/geometries.py | 11 ++++++-----
 xbout/utils.py      | 11 +++++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/xbout/geometries.py b/xbout/geometries.py
index fa8a67b6..cf3a181c 100644
--- a/xbout/geometries.py
+++ b/xbout/geometries.py
@@ -10,6 +10,7 @@
     _set_attrs_on_all_vars,
     _set_as_coord,
     _1d_coord_from_spacing,
+    _maybe_rename_dimension,
 )
 
 REGISTERED_GEOMETRIES = {}
@@ -386,12 +387,12 @@ def add_toroidal_geometry_coords(ds, *, coordinates=None, grid=None):
         ],
     )
 
-    if "t" in ds.dims:
+    if coordinates["t"] != "t":
         # Rename 't' if user requested it
-        ds = ds.rename(t=coordinates["t"])
+        ds = _maybe_rename_dimension(ds, "t", coordinates["t"])
 
     # Change names of dimensions to Orthogonal Toroidal ones
-    ds = ds.rename(y=coordinates["y"])
+    ds = _maybe_rename_dimension(ds, "y", coordinates["y"])
 
     # TODO automatically make this coordinate 1D in simplified cases?
     ds = ds.rename(psixy=coordinates["x"])
@@ -407,7 +408,7 @@ def add_toroidal_geometry_coords(ds, *, coordinates=None, grid=None):
 
     # If full data (not just grid file) then toroidal dim will be present
     if "z" in ds.dims:
-        ds = ds.rename(z=coordinates["z"])
+        ds = _maybe_rename_dimension(ds, "z", coordinates["z"])
 
         # Record which dimension 'z' was renamed to.
         ds.metadata["bout_zdim"] = coordinates["z"]
@@ -482,7 +483,7 @@ def add_s_alpha_geometry_coords(ds, *, coordinates=None, grid=None):
     ds["r"] = ds["hthe"].isel({ycoord: 0}).squeeze(drop=True)
     ds["r"].attrs["units"] = "m"
     ds = ds.set_coords("r")
-    ds = ds.rename(x="r")
+    ds = ds.swap_dims(x="r")
     ds.metadata["bout_xdim"] = "r"
 
     if hthe_from_grid:
diff --git a/xbout/utils.py b/xbout/utils.py
index 9587fcb4..d6f24537 100644
--- a/xbout/utils.py
+++ b/xbout/utils.py
@@ -880,3 +880,14 @@ def _set_as_coord(ds, name):
     except ValueError:
         pass
     return ds
+
+
+def _maybe_rename_dimension(ds, old_name, new_name):
+    if old_name in ds.dims and new_name != old_name:
+        # Rename dimension
+        ds = ds.swap_dims({old_name: new_name})
+        if old_name in ds:
+            # Rename coordinate if it exists
+            ds = ds.rename({old_name: new_name})
+
+    return ds

From 089000aa082075763bbb0eb5d8d1ba2589dc7d19 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Fri, 20 Jan 2023 17:57:47 +0000
Subject: [PATCH 03/26] Fix updating of indexes when adding guard cells in
 region.py

Updates to xarray, meant that 'index' values were not updated when
'coordinate' values were updated. Fix by explicitly updating indexes.
---
 xbout/region.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/xbout/region.py b/xbout/region.py
index 5b642ae6..c12ee674 100644
--- a/xbout/region.py
+++ b/xbout/region.py
@@ -1355,7 +1355,9 @@ def _concat_inner_guards(da, da_global, mxg):
         # https://github.com/pydata/xarray/issues/4393
         # da_inner = da_inner.assign_coords(**{xcoord: new_xcoord, ycoord: new_ycoord})
         da_inner[xcoord].data[...] = new_xcoord.data
+        da_inner = da_inner.reset_index(xcoord).set_xindex(xcoord)
         da_inner[ycoord].data[...] = new_ycoord.data
+        da_inner = da_inner.reset_index(ycoord).set_xindex(ycoord)
 
     save_regions = da.bout._regions
     da = xr.concat((da_inner, da), xcoord, join="exact")
@@ -1466,7 +1468,9 @@ def _concat_outer_guards(da, da_global, mxg):
         # https://github.com/pydata/xarray/issues/4393
         # da_outer = da_outer.assign_coords(**{xcoord: new_xcoord, ycoord: new_ycoord})
         da_outer[xcoord].data[...] = new_xcoord.data
+        da_outer = da_outer.reset_index(xcoord).set_xindex(xcoord)
         da_outer[ycoord].data[...] = new_ycoord.data
+        da_outer = da_outer.reset_index(ycoord).set_xindex(ycoord)
 
     save_regions = da.bout._regions
     da = xr.concat((da, da_outer), xcoord, join="exact")
@@ -1566,7 +1570,9 @@ def _concat_lower_guards(da, da_global, mxg, myg):
         # https://github.com/pydata/xarray/issues/4393
         # da_lower = da_lower.assign_coords(**{xcoord: new_xcoord, ycoord: new_ycoord})
         da_lower[xcoord].data[...] = new_xcoord.data
+        da_lower = da_lower.reset_index(xcoord).set_xindex(xcoord)
         da_lower[ycoord].data[...] = new_ycoord.data
+        da_lower = da_lower.reset_index(ycoord).set_xindex(ycoord)
 
     if "poloidal_distance" in da.coords and myg > 0:
         # Special handling for core regions to deal with branch cut
@@ -1682,7 +1688,9 @@ def _concat_upper_guards(da, da_global, mxg, myg):
         # https://github.com/pydata/xarray/issues/4393
         # da_upper = da_upper.assign_coords(**{xcoord: new_xcoord, ycoord: new_ycoord})
         da_upper[xcoord].data[...] = new_xcoord.data
+        da_upper = da_upper.reset_index(xcoord).set_xindex(xcoord)
         da_upper[ycoord].data[...] = new_ycoord.data
+        da_upper = da_upper.reset_index(ycoord).set_xindex(ycoord)
 
     if "poloidal_distance" in da.coords and myg > 0:
         # Special handling for core regions to deal with branch cut

From e91fec017078276c64041230c55125b354bc3224 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Mon, 28 Oct 2024 17:00:55 -0700
Subject: [PATCH 04/26] Set minimum xarray version to 2024.07.0

This is the last release that supports python 3.9
---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 9464f8b8..a57a9d65 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -28,7 +28,7 @@ setup_requires =
     setuptools_scm[toml]>=3.4
     setuptools_scm_git_archive
 install_requires =
-    xarray>=0.18.0,<2022.9.0
+    xarray>=2024.7.0
     boutdata>=0.1.4
     dask[array]>=2.10.0
     gelidum>=0.5.3

From 2d11a6f805c98d55a6f0c3e61e37cf75a568c9a4 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Mon, 28 Oct 2024 17:03:23 -0700
Subject: [PATCH 05/26] Fix Dataset.dims FutureWarning

Accessing e.g. `ds.dims['x']` produces:

```
FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
```

This commit changes `dims` to `sizes`.
---
 xbout/load.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/xbout/load.py b/xbout/load.py
index 7c08f9d8..f7a8e79e 100644
--- a/xbout/load.py
+++ b/xbout/load.py
@@ -810,10 +810,10 @@ def get_nonnegative_scalar(ds, key, default=1, info=True):
     mxg = get_nonnegative_scalar(ds, "MXG", default=2, info=info)
     myg = get_nonnegative_scalar(ds, "MYG", default=0, info=info)
     mxsub = get_nonnegative_scalar(
-        ds, "MXSUB", default=ds.dims["x"] - 2 * mxg, info=info
+        ds, "MXSUB", default=ds.sizes["x"] - 2 * mxg, info=info
     )
     mysub = get_nonnegative_scalar(
-        ds, "MYSUB", default=ds.dims["y"] - 2 * myg, info=info
+        ds, "MYSUB", default=ds.sizes["y"] - 2 * myg, info=info
     )
 
     # Check whether this is a single file squashed from the multiple output files of a
@@ -828,8 +828,8 @@ def get_nonnegative_scalar(ds, key, default=1, info=True):
     else:
         # Workaround for older data files
         ny = ds["MYSUB"].values * ds["NYPE"].values
-    nx_file = ds.dims["x"]
-    ny_file = ds.dims["y"]
+    nx_file = ds.sizes["x"]
+    ny_file = ds.sizes["y"]
     is_squashed_doublenull = False
     if nxpe > 1 or nype > 1:
         # if nxpe = nype = 1, was only one process anyway, so no need to check for

From f925dba5c8a9e06608dc031c5a3b491a2f519f7f Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 14:14:33 -0700
Subject: [PATCH 06/26] Simplify and speed up open_mfdataset

Xarray's open_mfdataset is very slow for BOUT++ datasets. Other datasets
also have issues (see e.g. https://github.com/pydata/xarray/issues/1385)
though the cause may not be the same.

Using an implementation that opens the datasets and concatenates
significantly speeds up this process.
---
 xbout/load.py      | 90 +++++++---------------------------------------
 xbout/mfdataset.py | 84 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 77 deletions(-)
 create mode 100644 xbout/mfdataset.py

diff --git a/xbout/load.py b/xbout/load.py
index f7a8e79e..51b62dd1 100644
--- a/xbout/load.py
+++ b/xbout/load.py
@@ -19,22 +19,6 @@
     _is_dir,
 )
 
-
-_BOUT_PER_PROC_VARIABLES = [
-    "wall_time",
-    "wtime",
-    "wtime_rhs",
-    "wtime_invert",
-    "wtime_comms",
-    "wtime_io",
-    "wtime_per_rhs",
-    "wtime_per_rhs_e",
-    "wtime_per_rhs_i",
-    "PE_XIND",
-    "PE_YIND",
-    "MYPE",
-]
-_BOUT_TIME_DEPENDENT_META_VARS = ["iteration", "hist_hi", "tt"]
 _BOUT_GEOMETRY_VARS = [
     "ixseps1",
     "ixseps2",
@@ -69,9 +53,6 @@
     )
 
 
-# TODO somehow check that we have access to the latest version of auto_combine
-
-
 def open_boutdataset(
     datapath="./BOUT.dmp.*.nc",
     inputfilepath=None,
@@ -295,15 +276,6 @@ def attrs_remove_section(obj, section):
     else:
         raise ValueError(f"internal error: unexpected input_type={input_type}")
 
-    if not is_restart:
-        for var in _BOUT_TIME_DEPENDENT_META_VARS:
-            if var in ds:
-                # Assume different processors in x & y have same iteration etc.
-                latest_top_left = {dim: 0 for dim in ds[var].dims}
-                if "t" in ds[var].dims:
-                    latest_top_left["t"] = -1
-                ds[var] = ds[var].isel(latest_top_left).squeeze(drop=True)
-
     ds, metadata = _separate_metadata(ds)
     # Store as ints because netCDF doesn't support bools, so we can't save
     # bool attributes
@@ -616,11 +588,6 @@ def _auto_open_mfboutdataset(
     if chunks is None:
         chunks = {}
 
-    if is_restart:
-        data_vars = "minimal"
-    else:
-        data_vars = _BOUT_TIME_DEPENDENT_META_VARS
-
     if _is_path(datapath):
         filepaths, filetype = _expand_filepaths(datapath)
 
@@ -640,6 +607,9 @@ def _auto_open_mfboutdataset(
         else:
             remove_yboundaries = False
 
+        # Create a partial application of _trim
+        # Calls to _preprocess will call _trim to trim guard / boundary cells
+        # from datasets before merging.
         _preprocess = partial(
             _trim,
             guards={"x": mxg, "y": myg},
@@ -651,40 +621,11 @@ def _auto_open_mfboutdataset(
 
         paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype)
 
-        try:
-            ds = xr.open_mfdataset(
-                paths_grid,
-                concat_dim=concat_dims,
-                combine="nested",
-                data_vars=data_vars,
-                preprocess=_preprocess,
-                engine=filetype,
-                chunks=chunks,
-                join="exact",
-                **kwargs,
-            )
-        except ValueError as e:
-            message_to_catch = (
-                "some variables in data_vars are not data variables on the first "
-                "dataset:"
-            )
-            if str(e)[: len(message_to_catch)] == message_to_catch:
-                # Open concatenating any variables that are different in
-                # different files as a work around to support opening older
-                # data.
-                ds = xr.open_mfdataset(
-                    paths_grid,
-                    concat_dim=concat_dims,
-                    combine="nested",
-                    data_vars="different",
-                    preprocess=_preprocess,
-                    engine=filetype,
-                    chunks=chunks,
-                    join="exact",
-                    **kwargs,
-                )
-            else:
-                raise
+        # Call custom implementation of open_mfdataset
+        # avoiding some of the performance issues.
+        from .mfdataset import mfdataset
+
+        ds = mfdataset(paths_grid, concat_dim=concat_dims, preprocess=_preprocess)
     else:
         # datapath was nested list of Datasets
 
@@ -731,11 +672,6 @@ def _auto_open_mfboutdataset(
             combine_attrs="no_conflicts",
         )
 
-    if not is_restart:
-        # Remove any duplicate time values from concatenation
-        _, unique_indices = unique(ds["t_array"], return_index=True)
-        ds = ds.isel(t=unique_indices)
-
     return ds, remove_yboundaries
 
 
@@ -933,8 +869,10 @@ def _trim(ds, *, guards, keep_boundaries, nxpe, nype, is_restart):
     """
     Trims all guard (and optionally boundary) cells off a single dataset read from a
     single BOUT dump file, to prepare for concatenation.
-    Also drops some variables that store timing information, which are different for each
-    process and so cannot be concatenated.
+
+    Variables that store timing information, which are different for each
+    process, are not trimmed but are taken from the first processor during
+    concatenation.
 
     Parameters
     ----------
@@ -973,9 +911,7 @@ def _trim(ds, *, guards, keep_boundaries, nxpe, nype, is_restart):
         ):
             trimmed_ds = trimmed_ds.drop_vars(name)
 
-    to_drop = _BOUT_PER_PROC_VARIABLES
-
-    return trimmed_ds.drop_vars(to_drop, errors="ignore")
+    return trimmed_ds
 
 
 def _infer_contains_boundaries(ds, nxpe, nype):
diff --git a/xbout/mfdataset.py b/xbout/mfdataset.py
new file mode 100644
index 00000000..a1fbe42d
--- /dev/null
+++ b/xbout/mfdataset.py
@@ -0,0 +1,84 @@
+# Custom implementation of xarray.open_mfdataset()
+
+import xarray as xr
+
+
+def concat_outer(dss, operation=None):
+    """
+    Concatenate nested lists along their outer dimension
+
+    # Example
+
+    >>> m = [[1,2,3],
+             [2,3,3],
+             [5,4,3]]
+
+    >>> concat_outer(m)
+
+    [[1, 2, 5], [2, 3, 4], [3, 3, 3]]
+
+    >>> concat_outer(m, operation=sum)
+
+    [8, 9, 9]
+    
+    """
+    if not isinstance(dss[0], list):
+        # Input is a 1D list
+        if operation is not None:
+            return operation(dss)
+        return dss
+
+    # Two or more dimensions
+    # Swap first and second indices then concatenate inner
+    if len(dss[0]) == 1:
+        return concat_outer([dss[j][0] for j in range(len(dss))], operation=operation)
+
+    return [
+        concat_outer([dss[j][i] for j in range(len(dss))], operation=operation)
+        for i in range(len(dss[0]))
+    ]
+
+
+def mfdataset(paths, chunks=None, concat_dim=None, preprocess=None):
+    if chunks is None:
+        chunks = {}
+
+    if not isinstance(concat_dim, list):
+        concat_dim = [concat_dim]
+
+    if isinstance(paths, list):
+        # Read nested dataset
+
+        dss = [
+            mfdataset(
+                path, chunks=chunks, concat_dim=concat_dim[1:], preprocess=preprocess
+            )
+            for path in paths
+        ]
+
+        # The dimension to concatenate along
+        if concat_dim[0] is None:
+            # Not concatenating
+            if len(dss) == 1:
+                return dss[0]
+            return dss
+
+        # Concatenating along the top-level dimension
+        return concat_outer(
+            dss,
+            operation=lambda ds: xr.concat(
+                ds,
+                concat_dim[0],
+                data_vars="minimal",  # Only data variables in which the dimension already appears are concatenated.
+                coords="minimal",  # Only coordinates in which the dimension already appears are concatenated.
+                compat="override",  # Duplicate data taken from first dataset
+                join="exact",  # Don't align. Raise ValueError when indexes to be aligned are not equal
+                combine_attrs="override",  # Duplicate attributes taken from first dataset
+                create_index_for_new_dim=False,
+            ),
+        )
+    # A single path
+    ds = xr.open_dataset(paths, chunks=chunks)
+    if preprocess is not None:
+        ds = preprocess(ds)
+    return ds

From 1697ded4edb3398084f27650de3660f9039dcba0 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 14:29:14 -0700
Subject: [PATCH 07/26] Use xarray.open_mfdataset with better options

Using the same concatenation options as in custom implementation.
Has essentially the same performance, much faster than original.
---
 xbout/load.py      | 24 +++++++++----
 xbout/mfdataset.py | 84 ----------------------------------------------
 2 files changed, 18 insertions(+), 90 deletions(-)
 delete mode 100644 xbout/mfdataset.py

diff --git a/xbout/load.py b/xbout/load.py
index 51b62dd1..5220708b 100644
--- a/xbout/load.py
+++ b/xbout/load.py
@@ -621,11 +621,20 @@ def _auto_open_mfboutdataset(
 
         paths_grid, concat_dims = _arrange_for_concatenation(filepaths, nxpe, nype)
 
-        # Call custom implementation of open_mfdataset
-        # avoiding some of the performance issues.
-        from .mfdataset import mfdataset
-
-        ds = mfdataset(paths_grid, concat_dim=concat_dims, preprocess=_preprocess)
+        ds = xr.open_mfdataset(
+            paths_grid,
+            concat_dim=concat_dims,
+            combine="nested",
+            data_vars="minimal", # Only data variables in which the dimension already appears are concatenated.
+            coords = "minimal", # Only coordinates in which the dimension already appears are concatenated.
+            compat = "override", # Duplicate data taken from first dataset
+            combine_attrs="override",  # Duplicate attributes taken from first dataset
+            preprocess=_preprocess,
+            engine=filetype,
+            chunks=chunks,
+            join="exact", # Don't align. Raise ValueError when indexes to be aligned are not equal
+            **kwargs,
+        )
     else:
         # datapath was nested list of Datasets
 
@@ -669,7 +678,10 @@ def _auto_open_mfboutdataset(
             concat_dim=concat_dims,
             data_vars=data_vars,
             join="exact",
-            combine_attrs="no_conflicts",
+            data_vars="minimal", # Only data variables in which the dimension already appears are concatenated.
+            coords = "minimal", # Only coordinates in which the dimension already appears are concatenated.
+            compat = "override", # Duplicate data taken from first dataset
+            combine_attrs="override",  # Duplicate attributes taken from first dataset
         )
 
     return ds, remove_yboundaries
diff --git a/xbout/mfdataset.py b/xbout/mfdataset.py
deleted file mode 100644
index a1fbe42d..00000000
--- a/xbout/mfdataset.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Custom implementation of xarray.open_mfdataset()
-
-import xarray as xr
-
-
-def concat_outer(dss, operation=None):
-    """
-    Concatenate nested lists along their outer dimension
-
-    # Example
-
-    >>> m = [[1,2,3],
-             [2,3,3],
-             [5,4,3]]
-
-    >>> concat_outer(m)
-
-    [[1, 2, 5], [2, 3, 4], [3, 3, 3]]
-
-    >>> concat_outer(m, operation=sum)
-
-    [8, 9, 9]
-    
-    """
-    if not isinstance(dss[0], list):
-        # Input is a 1D list
-        if operation is not None:
-            return operation(dss)
-        return dss
-
-    # Two or more dimensions
-    # Swap first and second indices then concatenate inner
-    if len(dss[0]) == 1:
-        return concat_outer([dss[j][0] for j in range(len(dss))], operation=operation)
-
-    return [
-        concat_outer([dss[j][i] for j in range(len(dss))], operation=operation)
-        for i in range(len(dss[0]))
-    ]
-
-
-def mfdataset(paths, chunks=None, concat_dim=None, preprocess=None):
-    if chunks is None:
-        chunks = {}
-
-    if not isinstance(concat_dim, list):
-        concat_dim = [concat_dim]
-
-    if isinstance(paths, list):
-        # Read nested dataset
-
-        dss = [
-            mfdataset(
-                path, chunks=chunks, concat_dim=concat_dim[1:], preprocess=preprocess
-            )
-            for path in paths
-        ]
-
-        # The dimension to concatenate along
-        if concat_dim[0] is None:
-            # Not concatenating
-            if len(dss) == 1:
-                return dss[0]
-            return dss
-
-        # Concatenating along the top-level dimension
-        return concat_outer(
-            dss,
-            operation=lambda ds: xr.concat(
-                ds,
-                concat_dim[0],
-                data_vars="minimal",  # Only data variables in which the dimension already appears are concatenated.
-                coords="minimal",  # Only coordinates in which the dimension already appears are concatenated.
-                compat="override",  # Duplicate data taken from first dataset
-                join="exact",  # Don't align. Raise ValueError when indexes to be aligned are not equal
-                combine_attrs="override",  # Duplicate attributes taken from first dataset
-                create_index_for_new_dim=False,
-            ),
-        )
-    # A single path
-    ds = xr.open_dataset(paths, chunks=chunks)
-    if preprocess is not None:
-        ds = preprocess(ds)
-    return ds

From c0977d2cf8ddb7ec19214a9213c71fcf070b6b1c Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 14:36:53 -0700
Subject: [PATCH 08/26] Minor fixes

Unused import (numpy.unique)
Duplicated keyword
black formatting
---
 xbout/load.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/xbout/load.py b/xbout/load.py
index 5220708b..2bf6c6c6 100644
--- a/xbout/load.py
+++ b/xbout/load.py
@@ -6,7 +6,6 @@
 
 from boutdata.data import BoutOptionsFile
 import xarray as xr
-from numpy import unique
 
 from natsort import natsorted
 
@@ -625,14 +624,14 @@ def _auto_open_mfboutdataset(
             paths_grid,
             concat_dim=concat_dims,
             combine="nested",
-            data_vars="minimal", # Only data variables in which the dimension already appears are concatenated.
-            coords = "minimal", # Only coordinates in which the dimension already appears are concatenated.
-            compat = "override", # Duplicate data taken from first dataset
+            data_vars="minimal",  # Only data variables in which the dimension already appears are concatenated.
+            coords="minimal",  # Only coordinates in which the dimension already appears are concatenated.
+            compat="override",  # Duplicate data taken from first dataset
             combine_attrs="override",  # Duplicate attributes taken from first dataset
             preprocess=_preprocess,
             engine=filetype,
             chunks=chunks,
-            join="exact", # Don't align. Raise ValueError when indexes to be aligned are not equal
+            join="exact",  # Don't align. Raise ValueError when indexes to be aligned are not equal
             **kwargs,
         )
     else:
@@ -676,11 +675,10 @@ def _auto_open_mfboutdataset(
         ds = xr.combine_nested(
             ds_grid,
             concat_dim=concat_dims,
-            data_vars=data_vars,
             join="exact",
-            data_vars="minimal", # Only data variables in which the dimension already appears are concatenated.
-            coords = "minimal", # Only coordinates in which the dimension already appears are concatenated.
-            compat = "override", # Duplicate data taken from first dataset
+            data_vars="minimal",  # Only data variables in which the dimension already appears are concatenated.
+            coords="minimal",  # Only coordinates in which the dimension already appears are concatenated.
+            compat="override",  # Duplicate data taken from first dataset
             combine_attrs="override",  # Duplicate attributes taken from first dataset
         )
 

From 94b4545af014225dbd5a5b9f798bac2281bb6738 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 15:52:18 -0700
Subject: [PATCH 09/26] flake8 fixes

Mainly lines too long.
---
 xbout/load.py | 278 +++++++++++++++++++++++++++++---------------------
 1 file changed, 159 insertions(+), 119 deletions(-)

diff --git a/xbout/load.py b/xbout/load.py
index 2bf6c6c6..4ef42aca 100644
--- a/xbout/load.py
+++ b/xbout/load.py
@@ -57,7 +57,7 @@ def open_boutdataset(
     inputfilepath=None,
     geometry=None,
     gridfilepath=None,
-    grid_mismatch="raise",  #: Union[Literal["raise"], Literal["warn"], Literal["ignore"]]
+    grid_mismatch="raise",
     chunks=None,
     keep_xboundaries=True,
     keep_yboundaries=False,
@@ -66,86 +66,103 @@ def open_boutdataset(
     is_restart=None,
     **kwargs,
 ):
-    """
-    Load a dataset from a set of BOUT output files, including the input options
-    file. Can also load from a grid file or from restart files.
-
-    Note that when reloading a Dataset that was saved by xBOUT, the state of the saved
-    Dataset is restored, and the values of `keep_xboundaries`, `keep_yboundaries`, and
-    `run_name` are ignored. `geometry` is treated specially, and can be passed when
+    """Load a dataset from a set of BOUT output files, including the
+    input options file. Can also load from a grid file or from restart
+    files.
+
+    Note that when reloading a Dataset that was saved by xBOUT, the
+    state of the saved Dataset is restored, and the values of
+    `keep_xboundaries`, `keep_yboundaries`, and `run_name` are
+    ignored. `geometry` is treated specially, and can be passed when
     reloading a Dataset (along with `gridfilepath` if needed).
 
     Troubleshooting
     ---------------
-    Variable conflicts: sometimes, for example when loading data from multiple restarts,
-    some variables may have conflicts (e.g. a source term was changed between some of
-    the restarts, but the source term is saved as time-independent, without a
-    t-dimension). In this case one workaround is to pass a list of variable names to the
-    keyword argument `drop_vars` to ignore the variables with conflicts, e.g. if `"S1"`
-    and `"S2"` have conflicts
+
+    Variable conflicts: sometimes, for example when loading data from
+    multiple restarts, some variables may have conflicts (e.g. a
+    source term was changed between some of the restarts, but the
+    source term is saved as time-independent, without a
+    t-dimension). In this case one workaround is to pass a list of
+    variable names to the keyword argument `drop_vars` to ignore the
+    variables with conflicts, e.g. if `"S1"` and `"S2"` have conflicts
     ```
-    ds = open_boutdataset("data*/boutdata.nc", drop_variables=["S1", "S2"])
+    ds = open_boutdataset("data*/boutdata.nc",
+                          drop_variables=["S1", "S2"])
     ```
-    will open a Dataset which is missing `"S1"` and `"S2"`.\
-    [`drop_variables` is an argument of `xarray.open_dataset()` that is passed down
-    through `kwargs`.]
+    will open a Dataset which is missing `"S1"` and `"S2"`.
+    `drop_variables` is an argument of `xarray.open_dataset()` that is
+    passed down through `kwargs`.
 
     Parameters
     ----------
-    datapath : str or (list or tuple of xr.Dataset), optional
-        Path to the data to open. Can point to either a set of one or more dump
-        files, or a single grid file.
 
-        To specify multiple dump files you must enter the path to them as a
-        single glob, e.g. './BOUT.dmp.*.nc', or for multiple consecutive runs
-        in different directories (in order) then './run*/BOUT.dmp.*.nc'.
+    datapath : str or (list or tuple of xr.Dataset), optional Path to
+        the data to open. Can point to either a set of one or more
+        dump files, or a single grid file.
+
+        To specify multiple dump files you must enter the path to them
+        as a single glob, e.g. './BOUT.dmp.*.nc', or for multiple
+        consecutive runs in different directories (in order) then
+        './run*/BOUT.dmp.*.nc'.
+
+        If a list or tuple of xr.Dataset is passed, they will be
+        combined with xr.combine_nested() instead of loading data from
+        disk (intended for unit testing).
 
-        If a list or tuple of xr.Dataset is passed, they will be combined with
-        xr.combine_nested() instead of loading data from disk (intended for unit
-        testing).
     chunks : dict, optional
     inputfilepath : str, optional
     geometry : str, optional
-        The geometry type of the grid data. This will specify what type of
-        coordinates to add to the dataset, e.g. 'toroidal' or 'cylindrical'.
+        The geometry type of the grid data. This will specify what
+        type of coordinates to add to the dataset, e.g. 'toroidal' or
+        'cylindrical'.
 
-        If not specified then will attempt to read it from the file attrs.
-        If still not found then a warning will be thrown, which can be
-        suppressed by passing `info`=False.
+        If not specified then will attempt to read it from the file
+        attrs.  If still not found then a warning will be thrown,
+        which can be suppressed by passing `info`=False.
 
         To define a new type of geometry you need to use the
         `register_geometry` decorator. You are encouraged to do this for your
         own BOUT++ physics module, to apply relevant normalisations.
+
     gridfilepath : str, optional
-        The path to a grid file, containing any variables needed to apply the geometry
-        specified by the 'geometry' option, which are not contained in the dump files.
-        This may either be the path of the grid file itself, or the directory
-        relative to which the grid from the settings file can be found.
+        The path to a grid file, containing any variables needed to
+        apply the geometry specified by the 'geometry' option, which
+        are not contained in the dump files.  This may either be the
+        path of the grid file itself, or the directory relative to
+        which the grid from the settings file can be found.
+
     grid_mismatch : str, optional
-        How to handle if the grid is not the grid that has been used for the
-        simulation. Can be "raise" to raise a RuntimeError, "warn" to raise a
-        warning, or ignore to ignore the mismatch silently.
+        How to handle if the grid is not the grid that has been used
+        for the simulation. Can be "raise" to raise a RuntimeError,
+        "warn" to raise a warning, or ignore to ignore the mismatch
+        silently.
+
     keep_xboundaries : bool, optional
-        If true, keep x-direction boundary cells (the cells past the physical
-        edges of the grid, where boundary conditions are set); increases the
-        size of the x dimension in the returned data-set. If false, trim these
-        cells.
+        If true, keep x-direction boundary cells (the cells past the
+        physical edges of the grid, where boundary conditions are
+        set); increases the size of the x dimension in the returned
+        data-set. If false, trim these cells.
+
     keep_yboundaries : bool, optional
         If true, keep y-direction boundary cells (the cells past the physical
         edges of the grid, where boundary conditions are set); increases the
         size of the y dimension in the returned data-set. If false, trim these
         cells.
+
     run_name : str, optional
         Name to give to the whole dataset, e.g. 'JET_ELM_high_resolution'.
         Useful if you are going to open multiple simulations and compare the
         results.
     info : bool or "terse", optional
     is_restart : bool, optional
-        Restart files require some special handling (e.g. working around variables that
-        are not present in restart files). By default, this special handling is enabled
-        if the files do not have a time dimension and `restart` is present in the file
-        name in `datapath`. This option can be set to True or False to explicitly enable
-        or disable the restart file handling.
+        Restart files require some special handling (e.g. working
+        around variables that are not present in restart files). By
+        default, this special handling is enabled if the files do not
+        have a time dimension and `restart` is present in the file
+        name in `datapath`. This option can be set to True or False to
+        explicitly enable or disable the restart file handling.
+
     kwargs : optional
         Keyword arguments are passed down to `xarray.open_mfdataset`, which in
         turn passes extra kwargs down to `xarray.open_dataset`.
@@ -153,6 +170,7 @@ def open_boutdataset(
     Returns
     -------
     ds : xarray.Dataset
+
     """
 
     if chunks is None:
@@ -168,8 +186,8 @@ def open_boutdataset(
     if "reload" in input_type:
         if input_type == "reload":
             if isinstance(datapath, Path):
-                # xr.open_mfdataset only accepts glob patterns as strings, not Path
-                # objects
+                # xr.open_mfdataset only accepts glob patterns as
+                # strings, not Path objects
                 datapath = str(datapath)
             ds = xr.open_mfdataset(
                 datapath,
@@ -302,7 +320,8 @@ def attrs_remove_section(obj, section):
                 gridfilepath += "/" + ds.options["grid"]
             else:
                 warn(
-                    "gridfilepath set to a directory, but no grid used in simulation. Continuing without grid."
+                    "gridfilepath set to a directory, but no grid used "
+                    "in simulation. Continuing without grid."
                 )
         if gridfilepath is not None:
             grid = _open_grid(
@@ -347,15 +366,15 @@ def attrs_remove_section(obj, section):
     if run_name:
         ds.name = run_name
 
-    # Set some default settings that are only used in post-processing by xBOUT, not by
-    # BOUT++
+    # Set some default settings that are only used in post-processing
+    # by xBOUT, not by BOUT++
     ds.bout.fine_interpolation_factor = 8
 
     if ("dump" in input_type or "restart" in input_type) and ds.metadata[
         "BOUT_VERSION"
     ] < 4.0:
-        # Add workarounds for missing information or different conventions in data saved
-        # by BOUT++ v3.x.
+        # Add workarounds for missing information or different
+        # conventions in data saved by BOUT++ v3.x.
         for v in ds:
             if ds.metadata["bout_zdim"] in ds[v].dims:
                 # All fields saved on aligned grid for BOUT-3
@@ -370,21 +389,22 @@ def attrs_remove_section(obj, section):
                     ds.metadata["bout_zdim"],
                 )
             ):
-                # zShift, etc. did not support staggered grids in BOUT++ v3 anyway, so
-                # just treat all variables as if they were at CELL_CENTRE
+                # zShift, etc. did not support staggered grids in
+                # BOUT++ v3 anyway, so just treat all variables as if
+                # they were at CELL_CENTRE
                 ds[v].attrs["cell_location"] = "CELL_CENTRE"
                 added_location = True
             if added_location:
                 warn(
-                    "Detected data from BOUT++ v3.x. Treating all variables as being "
-                    "at `CELL_CENTRE`. Should be similar to what BOUT++ v3.x did, but "
-                    "if your code uses staggered grids, this may produce unexpected "
-                    "effects in some places."
+                    "Detected data from BOUT++ v3.x. Treating all variables"
+                    " as being at `CELL_CENTRE`. Should be similar to what"
+                    " BOUT++ v3.x did, but if your code uses staggered grids,"
+                    " this may produce unexpected effects in some places."
                 )
 
         if "nz" not in ds.metadata:
-            # `nz` used to be stored as `MZ` and `MZ` used to include an extra buffer
-            # point that was not used for data.
+            # `nz` used to be stored as `MZ` and `MZ` used to include
+            # an extra buffer point that was not used for data.
             ds.metadata["nz"] = ds.metadata["MZ"] - 1
 
     if info == "terse":
@@ -422,9 +442,7 @@ def collect(
     info=True,
     prefix="BOUT.dmp",
 ):
-    """
-
-    Extract the data pertaining to a specified variable in a BOUT++ data set
+    """Extract the data pertaining to a specified variable in a BOUT++ data set
 
 
     Parameters
@@ -450,11 +468,14 @@ def collect(
 
     Notes
     ----------
-    strict : This option found in boutdata.collect() is not present in this function
-             it is assumed that the varname given is correct, if variable does not exist
-             the function will fail
-    tind_auto : This option is not required when using _auto_open_mfboutdataset as an
-             automatic failure if datasets are different lengths is included
+    strict : This option found in boutdata.collect() is not present in
+             this function it is assumed that the varname given is
+             correct, if variable does not exist the function will
+             fail
+
+    tind_auto : This option is not required when using
+             _auto_open_mfboutdataset as an automatic failure if
+             datasets are different lengths is included
 
     Returns
     ----------
@@ -596,11 +617,12 @@ def _auto_open_mfboutdataset(
         )
 
         if is_squashed_doublenull:
-            # Need to remove y-boundaries after loading: (i) in case we are loading a
-            # squashed data-set, in which case we cannot easily remove the upper
-            # boundary cells in _trim(); (ii) because using the remove_yboundaries()
-            # method for non-squashed data-sets is simpler than replicating that logic
-            # in _trim().
+            # Need to remove y-boundaries after loading: (i) in case
+            # we are loading a squashed data-set, in which case we
+            # cannot easily remove the upper boundary cells in
+            # _trim(); (ii) because using the remove_yboundaries()
+            # method for non-squashed data-sets is simpler than
+            # replicating that logic in _trim().
             remove_yboundaries = not keep_yboundaries
             keep_yboundaries = True
         else:
@@ -624,14 +646,22 @@ def _auto_open_mfboutdataset(
             paths_grid,
             concat_dim=concat_dims,
             combine="nested",
-            data_vars="minimal",  # Only data variables in which the dimension already appears are concatenated.
-            coords="minimal",  # Only coordinates in which the dimension already appears are concatenated.
-            compat="override",  # Duplicate data taken from first dataset
-            combine_attrs="override",  # Duplicate attributes taken from first dataset
             preprocess=_preprocess,
             engine=filetype,
             chunks=chunks,
-            join="exact",  # Don't align. Raise ValueError when indexes to be aligned are not equal
+            # Only data variables in which the dimension already
+            # appears are concatenated.
+            data_vars="minimal",
+            # Only coordinates in which the dimension already appears
+            # are concatenated.
+            coords="minimal",
+            # Duplicate data taken from first dataset
+            compat="override",
+            # Duplicate attributes taken from first dataset
+            combine_attrs="override",
+            # Don't align. Raise ValueError when indexes to be aligned
+            # are not equal
+            join="exact",
             **kwargs,
         )
     else:
@@ -651,9 +681,9 @@ def _auto_open_mfboutdataset(
         )
 
         if is_squashed_doublenull:
-            # Need to remove y-boundaries after loading when loading a squashed
-            # data-set, in which case we cannot easily remove the upper boundary cells
-            # in _trim().
+            # Need to remove y-boundaries after loading when loading a
+            # squashed data-set, in which case we cannot easily remove
+            # the upper boundary cells in _trim().
             remove_yboundaries = not keep_yboundaries
             keep_yboundaries = True
         else:
@@ -676,10 +706,16 @@ def _auto_open_mfboutdataset(
             ds_grid,
             concat_dim=concat_dims,
             join="exact",
-            data_vars="minimal",  # Only data variables in which the dimension already appears are concatenated.
-            coords="minimal",  # Only coordinates in which the dimension already appears are concatenated.
-            compat="override",  # Duplicate data taken from first dataset
-            combine_attrs="override",  # Duplicate attributes taken from first dataset
+            # Only data variables in which the dimension already
+            # appears are concatenated.
+            data_vars="minimal",
+            # Only coordinates in which the dimension already appears
+            # are concatenated.
+            coords="minimal",
+            # Duplicate data taken from first dataset
+            compat="override",
+            # Duplicate attributes taken from first dataset
+            combine_attrs="override",
         )
 
     return ds, remove_yboundaries
@@ -719,7 +755,8 @@ def _expand_wildcards(path):
     # Find path relative to parent
     search_pattern = str(path.relative_to(base_dir))
 
-    # Search this relative path from the parent directory for all files matching user input
+    # Search this relative path from the parent directory
+    # for all files matching user input
     filepaths = list(base_dir.glob(search_pattern))
 
     # Sort by numbers in filepath before returning
@@ -747,7 +784,7 @@ def get_nonnegative_scalar(ds, key, default=1, info=True):
                 print(f"{key} not found, setting to {default}")
             if default < 0:
                 raise ValueError(
-                    f"Default for {key} is {val}, but negative values are not valid"
+                    f"Default for {key} is {val}," f" but negative values are not valid"
                 )
             return default
 
@@ -762,8 +799,9 @@ def get_nonnegative_scalar(ds, key, default=1, info=True):
         ds, "MYSUB", default=ds.sizes["y"] - 2 * myg, info=info
     )
 
-    # Check whether this is a single file squashed from the multiple output files of a
-    # parallel run (i.e. NXPE*NYPE > 1 even though there is only a single file to read).
+    # Check whether this is a single file squashed from the multiple
+    # output files of a parallel run (i.e. NXPE*NYPE > 1 even though
+    # there is only a single file to read).
     if "nx" in ds:
         nx = ds["nx"].values
     else:
@@ -778,8 +816,8 @@ def get_nonnegative_scalar(ds, key, default=1, info=True):
     ny_file = ds.sizes["y"]
     is_squashed_doublenull = False
     if nxpe > 1 or nype > 1:
-        # if nxpe = nype = 1, was only one process anyway, so no need to check for
-        # squashing
+        # if nxpe = nype = 1, was only one process anyway, so no need
+        # to check for squashing
         if nx_file == nx or nx_file == nx - 2 * mxg:
             has_xboundaries = nx_file == nx
             if not has_xboundaries:
@@ -794,7 +832,8 @@ def get_nonnegative_scalar(ds, key, default=1, info=True):
             else:
                 upper_target_cells = 0
             if ny_file == ny or ny_file == ny + 2 * myg + 2 * upper_target_cells:
-                # This file contains all the points, possibly including guard cells
+                # This file contains all the points, possibly
+                # including guard cells
 
                 has_yboundaries = not (ny_file == ny)
                 if not has_yboundaries:
@@ -810,8 +849,8 @@ def get_nonnegative_scalar(ds, key, default=1, info=True):
                     # squashed with upper target points.
                     is_squashed_doublenull = False
             elif ny_file == ny + 2 * myg:
-                # Older squashed file from double-null grid but containing only lower
-                # target boundary cells.
+                # Older squashed file from double-null grid but
+                # containing only lower target boundary cells.
                 if keep_yboundaries:
                     raise ValueError(
                         "Cannot keep y-boundary points: squashed file is missing upper "
@@ -823,8 +862,9 @@ def get_nonnegative_scalar(ds, key, default=1, info=True):
 
                 nxpe = 1
                 nype = 1
-                # For this case, do not need the special handling enabled by
-                # is_squashed_doublenull=True, as keeping y-boundaries is not allowed
+                # For this case, do not need the special handling
+                # enabled by is_squashed_doublenull=True, as keeping
+                # y-boundaries is not allowed
                 is_squashed_doublenull = False
 
     # Avoid trying to open this file twice
@@ -925,9 +965,10 @@ def _trim(ds, *, guards, keep_boundaries, nxpe, nype, is_restart):
 
 
 def _infer_contains_boundaries(ds, nxpe, nype):
-    """
-    Uses the processor indices and BOUT++'s topology indices to work out whether this
-    dataset contains boundary cells, and on which side.
+    """Uses the processor indices and BOUT++'s topology indices to
+    work out whether this dataset contains boundary cells, and on
+    which side.
+
     """
 
     if nxpe * nype == 1:
@@ -939,8 +980,8 @@ def _infer_contains_boundaries(ds, nxpe, nype):
         yproc = int(ds["PE_YIND"])
     except KeyError:
         # output file from BOUT++ earlier than 4.3
-        # Use knowledge that BOUT names its output files as /folder/prefix.num.nc, with a
-        # numbering scheme
+        # Use knowledge that BOUT names its output files as
+        # /folder/prefix.num.nc, with a numbering scheme
         # num = nxpe*i + j, where i={0, ..., nype}, j={0, ..., nxpe}
         filename = ds.encoding["source"]
         *prefix, filenum, extension = Path(filename).suffixes
@@ -997,9 +1038,10 @@ def _open_grid(datapath, chunks, keep_xboundaries, keep_yboundaries, mxg=2, **kw
 
     acceptable_dims = ["x", "y", "z"]
 
-    # Passing 'chunks' with dimensions that are not present in the dataset causes an
-    # error. A gridfile will be missing 't' and may be missing 'z' dimensions that dump
-    # files have, so we must remove them from 'chunks'.
+    # Passing 'chunks' with dimensions that are not present in the
+    # dataset causes an error. A gridfile will be missing 't' and may
+    # be missing 'z' dimensions that dump files have, so we must
+    # remove them from 'chunks'.
     grid_chunks = copy(chunks)
     unrecognised_chunk_dims = list(set(grid_chunks.keys()) - set(acceptable_dims))
     for dim in unrecognised_chunk_dims:
@@ -1013,8 +1055,6 @@ def _open_grid(datapath, chunks, keep_xboundaries, keep_yboundaries, mxg=2, **kw
     else:
         grid = datapath
 
-    # TODO find out what 'yup_xsplit' etc are in the doublenull storm file John gave me
-    # For now drop any variables with extra dimensions
     unrecognised_dims = list(set(grid.dims) - set(acceptable_dims))
     if len(unrecognised_dims) > 0:
         # Weird string formatting is a workaround to deal with possible bug in
@@ -1026,15 +1066,15 @@ def _open_grid(datapath, chunks, keep_xboundaries, keep_yboundaries, mxg=2, **kw
         grid = grid.drop_dims(unrecognised_dims)
 
     if keep_xboundaries:
-        # Set MXG so that it is picked up in metadata - needed for applying geometry,
-        # etc.
+        # Set MXG so that it is picked up in metadata - needed for
+        # applying geometry, etc.
         grid["MXG"] = mxg
     else:
         xboundaries = mxg
         if xboundaries > 0:
             grid = grid.isel(x=slice(xboundaries, -xboundaries, None))
-        # Set MXG so that it is picked up in metadata - needed for applying geometry,
-        # etc.
+        # Set MXG so that it is picked up in metadata - needed for
+        # applying geometry, etc.
         grid["MXG"] = 0
     try:
         yboundaries = int(grid["y_boundary_guards"])
@@ -1043,16 +1083,16 @@ def _open_grid(datapath, chunks, keep_xboundaries, keep_yboundaries, mxg=2, **kw
         # never had y-boundary cells
         yboundaries = 0
     if keep_yboundaries:
-        # Set MYG so that it is picked up in metadata - needed for applying geometry,
-        # etc.
+        # Set MYG so that it is picked up in metadata
+        # - needed for applying geometry, etc.
         grid["MYG"] = yboundaries
     else:
         if yboundaries > 0:
             # Remove y-boundary cells from first divertor target
             grid = grid.isel(y=slice(yboundaries, -yboundaries, None))
             if grid["jyseps1_2"] > grid["jyseps2_1"]:
-                # There is a second divertor target, remove y-boundary cells
-                # there too
+                # There is a second divertor target, remove y-boundary
+                # cells there too
                 nin = int(grid["ny_inner"])
                 grid_lower = grid.isel(y=slice(None, nin, None))
                 grid_upper = grid.isel(y=slice(nin + 2 * yboundaries, None, None))
@@ -1063,8 +1103,8 @@ def _open_grid(datapath, chunks, keep_xboundaries, keep_yboundaries, mxg=2, **kw
                     compat="identical",
                     join="exact",
                 )
-        # Set MYG so that it is picked up in metadata - needed for applying geometry,
-        # etc.
+        # Set MYG so that it is picked up in metadata
+        # - needed for applying geometry, etc.
         grid["MYG"] = 0
 
     if "z" in grid_chunks and "z" not in grid.dims:

From 730ca6b089028f41de7e21e6dfc25cced0bfd941 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 16:08:35 -0700
Subject: [PATCH 10/26] Work on tests

_BOUT_PER_PROC_VARIABLES are no longer used in xbout,
since duplicated variables are just taken from the first processor.
---
 xbout/geometries.py             |  4 ++--
 xbout/tests/test_boutdataset.py |  2 +-
 xbout/tests/test_load.py        | 41 ++++++++++++++++++++++-----------
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/xbout/geometries.py b/xbout/geometries.py
index cf3a181c..3f31f49e 100644
--- a/xbout/geometries.py
+++ b/xbout/geometries.py
@@ -145,7 +145,7 @@ def apply_geometry(ds, geometry_name, *, coordinates=None, grid=None):
         # 'dx' may not be consistent between different regions (e.g. core and PFR).
         # For some geometries xcoord may have already been created by
         # add_geometry_coords, in which case we do not need this.
-        nx = updated_ds.dims[xcoord]
+        nx = updated_ds.sizes[xcoord]
 
         # can't use commented out version, uncommented one works around xarray bug
         # removing attrs
@@ -157,7 +157,7 @@ def apply_geometry(ds, geometry_name, *, coordinates=None, grid=None):
         _add_attrs_to_var(updated_ds, xcoord)
 
     if ycoord not in updated_ds.coords:
-        ny = updated_ds.dims[ycoord]
+        ny = updated_ds.sizes[ycoord]
         # dy should always be constant in x, so it is safe to convert to a 1d
         # coordinate.  [The y-coordinate has to be a 1d coordinate that labels x-z
         # slices of the grid (similarly x-coordinate is 1d coordinate that labels y-z
diff --git a/xbout/tests/test_boutdataset.py b/xbout/tests/test_boutdataset.py
index 18e212fd..bbf247d7 100644
--- a/xbout/tests/test_boutdataset.py
+++ b/xbout/tests/test_boutdataset.py
@@ -43,7 +43,7 @@ def test_concat(self, bout_xyt_example_files):
                 datapath=dataset_list2, inputfilepath=None, keep_xboundaries=False
             )
         result = concat([bd1, bd2], dim="run")
-        assert result.dims == {**bd1.dims, "run": 2}
+        assert result.sizes == {**bd1.sizes, "run": 2}
 
     def test_isel(self, bout_xyt_example_files):
         dataset_list = bout_xyt_example_files(None, nxpe=1, nype=1, nt=1)
diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py
index df7eef76..391482fc 100644
--- a/xbout/tests/test_load.py
+++ b/xbout/tests/test_load.py
@@ -24,12 +24,24 @@
     _trim,
     _infer_contains_boundaries,
     open_boutdataset,
-    _BOUT_PER_PROC_VARIABLES,
-    _BOUT_TIME_DEPENDENT_META_VARS,
 )
 from xbout.utils import _separate_metadata
 from xbout.tests.utils_for_tests import _get_kwargs
 
+_BOUT_PER_PROC_VARIABLES = [
+    "wall_time",
+    "wtime",
+    "wtime_rhs",
+    "wtime_invert",
+    "wtime_comms",
+    "wtime_io",
+    "wtime_per_rhs",
+    "wtime_per_rhs_e",
+    "wtime_per_rhs_i",
+    "PE_XIND",
+    "PE_YIND",
+    "MYPE",
+]
 
 def test_check_extensions(tmp_path):
     files_dir = tmp_path.joinpath("data")
@@ -820,10 +832,13 @@ def test_strip_metadata(self):
 
         ds, metadata = _separate_metadata(original)
 
-        assert original.drop_vars(
-            METADATA_VARS + _BOUT_PER_PROC_VARIABLES + _BOUT_TIME_DEPENDENT_META_VARS,
-            errors="ignore",
-        ).equals(ds)
+        xrt.assert_equal(
+            original.drop_vars(
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
+                errors="ignore",
+            ),
+            ds
+        )
         assert metadata["NXPE"] == 1
 
 
@@ -842,9 +857,7 @@ def test_single_file(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_equal(
             actual.drop_vars(["x", "y", "z"]).load(),
             expected.drop_vars(
-                METADATA_VARS
-                + _BOUT_PER_PROC_VARIABLES
-                + _BOUT_TIME_DEPENDENT_META_VARS,
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
                 errors="ignore",
             ),
         )
@@ -868,9 +881,7 @@ def test_squashed_file(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_equal(
             actual.drop_vars(["x", "y", "z"]).load(),
             expected.drop_vars(
-                METADATA_VARS
-                + _BOUT_PER_PROC_VARIABLES
-                + _BOUT_TIME_DEPENDENT_META_VARS,
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
                 errors="ignore",
             ),
         )
@@ -982,7 +993,8 @@ def test_combine_along_x(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_equal(
             actual.drop_vars(["x", "y", "z"]).load(),
             expected.drop_vars(
-                METADATA_VARS + _BOUT_PER_PROC_VARIABLES, errors="ignore"
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
+                errors="ignore"
             ),
         )
 
@@ -1016,7 +1028,8 @@ def test_combine_along_y(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_equal(
             actual.drop_vars(["x", "y", "z"]).load(),
             expected.drop_vars(
-                METADATA_VARS + _BOUT_PER_PROC_VARIABLES, errors="ignore"
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
+                errors="ignore"
             ),
         )
 

From 04fe5d186777acb29e0cef0be23cba45676b4390 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 16:19:41 -0700
Subject: [PATCH 11/26] test_load: remove test_trim_timing_info

Timing info is now taken from the first processor,
rather than dropped.
---
 xbout/tests/test_load.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py
index 391482fc..bff654cd 100644
--- a/xbout/tests/test_load.py
+++ b/xbout/tests/test_load.py
@@ -1495,25 +1495,6 @@ def test_keep_yboundaries_doublenull_by_filenum(
             expected = expected.isel(y=slice(None, -2, None))
         xrt.assert_equal(expected, actual)
 
-    @pytest.mark.parametrize("is_restart", [False, True])
-    def test_trim_timing_info(self, is_restart):
-        ds = create_test_data(0)
-        from xbout.load import _BOUT_PER_PROC_VARIABLES
-
-        # remove a couple of entries from _BOUT_PER_PROC_VARIABLES so we test that _trim
-        # does not fail if not all of them are present
-        _BOUT_PER_PROC_VARIABLES = _BOUT_PER_PROC_VARIABLES[:-2]
-
-        for v in _BOUT_PER_PROC_VARIABLES:
-            ds[v] = 42.0
-        ds = _trim(
-            ds, guards={}, keep_boundaries={}, nxpe=1, nype=1, is_restart=is_restart
-        )
-
-        expected = create_test_data(0)
-        xrt.assert_equal(ds, expected)
-
-
 fci_shape = (2, 2, 3, 4)
 fci_guards = (2, 2, 0)
 

From 1b9608a7dba9c8e2788787126a10fcdaab65b98c Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 16:49:42 -0700
Subject: [PATCH 12/26] test_animate: Skip test_animate_list_1d_multiline

By eye there are three subplots as required, but there are also some
spurious axes. Unclear to me how to fix, so skipping for now.
---
 xbout/tests/test_animate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xbout/tests/test_animate.py b/xbout/tests/test_animate.py
index f66a4694..72d51edd 100644
--- a/xbout/tests/test_animate.py
+++ b/xbout/tests/test_animate.py
@@ -181,6 +181,7 @@ def test_animate_list_1d_default(self, create_test_file):
 
         plt.close()
 
+    @pytest.mark.skip(reason="Plotting needs some work. 3 plots but extra axes.")
     def test_animate_list_1d_multiline(self, create_test_file):
 
         save_dir, ds = create_test_file
@@ -206,7 +207,6 @@ def test_animate_list_1d_multiline(self, create_test_file):
             )
             == 3
         )
-
         plt.close()
 
     def test_animate_list_animate_over(self, create_test_file):

From b98d4fae015cce16ea042a9cfd4616a2b9a553bf Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 16:54:36 -0700
Subject: [PATCH 13/26] black formatting

---
 xbout/plotting/plotfuncs.py |  7 ++-----
 xbout/tests/test_load.py    | 33 ++++++++++-----------------------
 xbout/utils.py              | 19 +++----------------
 3 files changed, 15 insertions(+), 44 deletions(-)

diff --git a/xbout/plotting/plotfuncs.py b/xbout/plotting/plotfuncs.py
index 79b36207..6a1cee76 100644
--- a/xbout/plotting/plotfuncs.py
+++ b/xbout/plotting/plotfuncs.py
@@ -530,7 +530,7 @@ def plot3d(
                 newZ = xr.DataArray(
                     np.linspace(Zmin, Zmax, zpoints), dims="z"
                 ).expand_dims({"x": xpoints, "y": ypoints}, axis=[2, 1])
-                newR = np.sqrt(newX**2 + newY**2)
+                newR = np.sqrt(newX ** 2 + newY ** 2)
                 newzeta = np.arctan2(newY, newX)  # .values
 
                 from scipy.interpolate import (
@@ -576,10 +576,7 @@ def plot3d(
                     fill_value=datamin - 2.0 * (datamax - datamin),
                 )
                 print("do 3d interpolation")
-                grid = interp(
-                    (newzeta, newR, newZ),
-                    method="linear",
-                )
+                grid = interp((newzeta, newR, newZ), method="linear")
                 print("done interpolating")
 
             if style == "isosurface":
diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py
index bff654cd..0b0e1e5b 100644
--- a/xbout/tests/test_load.py
+++ b/xbout/tests/test_load.py
@@ -43,6 +43,7 @@
     "MYPE",
 ]
 
+
 def test_check_extensions(tmp_path):
     files_dir = tmp_path.joinpath("data")
     files_dir.mkdir()
@@ -325,12 +326,7 @@ def _bout_xyt_example_files(
     if squashed:
         # create a single data-file, but alter the 'nxpe' and 'nype' variables, as if the
         # file had been created by combining a set of BOUT.dmp.*.nc files
-        this_lengths = (
-            lengths[0],
-            lengths[1] * nxpe,
-            lengths[2] * nype,
-            lengths[3],
-        )
+        this_lengths = (lengths[0], lengths[1] * nxpe, lengths[2] * nype, lengths[3])
         ds_list, file_list = create_bout_ds_list(
             prefix=prefix,
             lengths=this_lengths,
@@ -834,10 +830,9 @@ def test_strip_metadata(self):
 
         xrt.assert_equal(
             original.drop_vars(
-                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
-                errors="ignore",
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES, errors="ignore"
             ),
-            ds
+            ds,
         )
         assert metadata["NXPE"] == 1
 
@@ -857,8 +852,7 @@ def test_single_file(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_equal(
             actual.drop_vars(["x", "y", "z"]).load(),
             expected.drop_vars(
-                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
-                errors="ignore",
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES, errors="ignore"
             ),
         )
 
@@ -881,8 +875,7 @@ def test_squashed_file(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_equal(
             actual.drop_vars(["x", "y", "z"]).load(),
             expected.drop_vars(
-                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
-                errors="ignore",
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES, errors="ignore"
             ),
         )
 
@@ -993,8 +986,7 @@ def test_combine_along_x(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_equal(
             actual.drop_vars(["x", "y", "z"]).load(),
             expected.drop_vars(
-                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
-                errors="ignore"
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES, errors="ignore"
             ),
         )
 
@@ -1028,8 +1020,7 @@ def test_combine_along_y(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_equal(
             actual.drop_vars(["x", "y", "z"]).load(),
             expected.drop_vars(
-                METADATA_VARS + _BOUT_PER_PROC_VARIABLES,
-                errors="ignore"
+                METADATA_VARS + _BOUT_PER_PROC_VARIABLES, errors="ignore"
             ),
         )
 
@@ -1134,12 +1125,7 @@ def test_toroidal(self, tmp_path_factory, bout_xyt_example_files):
 
         # check creation without writing to disk gives identical result
         fake_ds_list, fake_grid_ds = bout_xyt_example_files(
-            None,
-            nxpe=3,
-            nype=3,
-            nt=1,
-            syn_data_type="stepped",
-            grid="grid",
+            None, nxpe=3, nype=3, nt=1, syn_data_type="stepped", grid="grid"
         )
         fake = open_boutdataset(
             datapath=fake_ds_list, geometry="toroidal", gridfilepath=fake_grid_ds
@@ -1495,6 +1481,7 @@ def test_keep_yboundaries_doublenull_by_filenum(
             expected = expected.isel(y=slice(None, -2, None))
         xrt.assert_equal(expected, actual)
 
+
 fci_shape = (2, 2, 3, 4)
 fci_guards = (2, 2, 0)
 
diff --git a/xbout/utils.py b/xbout/utils.py
index d6f24537..4538d2a4 100644
--- a/xbout/utils.py
+++ b/xbout/utils.py
@@ -693,12 +693,7 @@ def _follow_boundary(ds, start_region, start_direction, xbndry, ybndry, Rcoord,
 
         for boundary in check_order[direction]:
             result = _bounding_surface_checks[boundary](
-                ds_region,
-                boundary_points,
-                xbndry,
-                ybndry,
-                Rcoord,
-                Zcoord,
+                ds_region, boundary_points, xbndry, ybndry, Rcoord, Zcoord
             )
             if result is not None:
                 boundary_points, this_region, direction = result
@@ -782,13 +777,7 @@ def _get_bounding_surfaces(ds, coords):
         start_direction = "upper_y"
 
     boundary, checked_regions = _follow_boundary(
-        ds,
-        start_region,
-        start_direction,
-        xbndry,
-        ybndry,
-        Rcoord,
-        Zcoord,
+        ds, start_region, start_direction, xbndry, ybndry, Rcoord, Zcoord
     )
     boundaries = [boundary]
 
@@ -852,9 +841,7 @@ def _get_bounding_surfaces(ds, coords):
     # Pack the result into a DataArray
     result = [
         xr.DataArray(
-            boundary,
-            dims=("boundary", "coord"),
-            coords={"coord": [Rcoord, Zcoord]},
+            boundary, dims=("boundary", "coord"), coords={"coord": [Rcoord, Zcoord]}
         )
         for boundary in boundaries
     ]

From b2c5eb8c2300f9f98dd6cb0127febe80a5014377 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 17:15:34 -0700
Subject: [PATCH 14/26] geometries: Fix FutureWarning

---
 xbout/geometries.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xbout/geometries.py b/xbout/geometries.py
index 7caed507..553d833f 100644
--- a/xbout/geometries.py
+++ b/xbout/geometries.py
@@ -182,7 +182,7 @@ def apply_geometry(ds, geometry_name, *, coordinates=None, grid=None):
     if zcoord in updated_ds.dims and zcoord not in updated_ds.coords:
         # Generates a coordinate whose value is 0 on the first grid point, not dz/2, to
         # match how BOUT++ generates fields from input file expressions.
-        nz = updated_ds.dims[zcoord]
+        nz = updated_ds.sizes[zcoord]
 
         # In BOUT++ v5, dz is either a Field2D or Field3D.
         # We can use it as a 1D coordinate if it's a Field3D, _or_ if nz == 1

From 4a422fd844750c51df024c2cafee73e1426ce228 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 17:16:02 -0700
Subject: [PATCH 15/26] Github Actions: Drop testing python 3.8

xarray 2024.07.0 requires python >= 3.9
---
 .github/workflows/master.yml        | 2 +-
 .github/workflows/pythonpackage.yml | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 8eef5a3b..c6c29ab9 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -20,7 +20,7 @@ jobs:
     if: always()
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.x"]
+        python-version: ["3.9", "3.10", "3.11", "3.x"]
       fail-fast: false
 
     steps:
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index e323fa86..2758c5e6 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -20,7 +20,7 @@ jobs:
     if: always()
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.x"]
+        python-version: ["3.9", "3.10", "3.x"]
       fail-fast: false
 
     steps:
@@ -48,7 +48,7 @@ jobs:
     if: always()
     strategy:
       matrix:
-        python-version: ["3.8"]
+        python-version: ["3.9"]
       fail-fast: false
 
     steps:
@@ -61,7 +61,7 @@ jobs:
       run: |
         sudo apt-get update && sudo apt-get install libhdf5-dev libnetcdf-dev
         python -m pip install --upgrade pip
-        pip install xarray~=0.18.0 pandas~=1.4.0
+        pip install xarray~=2024.7.0 pandas~=1.4.0
     - name: Install package
       run: |
         pip install -e .[tests]

From 658cd4f1f60115ebc838385176c5c02eeb18a9b4 Mon Sep 17 00:00:00 2001
From: bendudson <bendudson@users.noreply.github.com>
Date: Wed, 30 Oct 2024 00:17:08 +0000
Subject: [PATCH 16/26] Apply black formatting

---
 xbout/plotting/plotfuncs.py | 2 +-
 xbout/tests/test_load.py    | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/xbout/plotting/plotfuncs.py b/xbout/plotting/plotfuncs.py
index 77a53294..525d9d02 100644
--- a/xbout/plotting/plotfuncs.py
+++ b/xbout/plotting/plotfuncs.py
@@ -539,7 +539,7 @@ def plot3d(
                 newZ = xr.DataArray(
                     np.linspace(Zmin, Zmax, zpoints), dims="z"
                 ).expand_dims({"x": xpoints, "y": ypoints}, axis=[2, 1])
-                newR = np.sqrt(newX ** 2 + newY ** 2)
+                newR = np.sqrt(newX**2 + newY**2)
                 newzeta = np.arctan2(newY, newX)  # .values
 
                 from scipy.interpolate import (
diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py
index 1eb09279..c2feb452 100644
--- a/xbout/tests/test_load.py
+++ b/xbout/tests/test_load.py
@@ -481,8 +481,7 @@ def test_combine_along_y(self, tmp_path_factory, bout_xyt_example_files):
         xrt.assert_identical(actual, fake)
 
     @pytest.mark.skip
-    def test_combine_along_t(self):
-        ...
+    def test_combine_along_t(self): ...
 
     @pytest.mark.parametrize(
         "bout_v5,metric_3D", [(False, False), (True, False), (True, True)]
@@ -627,8 +626,7 @@ def test_drop_vars(self, tmp_path_factory, bout_xyt_example_files):
         assert "n" in ds.keys()
 
     @pytest.mark.skip
-    def test_combine_along_tx(self):
-        ...
+    def test_combine_along_tx(self): ...
 
     def test_restarts(self):
         datapath = Path(__file__).parent.joinpath(

From 4cae8ea5cec5285c9b7b368032b254f73dc3eea7 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 21:29:42 -0700
Subject: [PATCH 17/26] Restore support for python 3.8

Reduce minimum xarray to 2023.1.0

Python 3.8 is EoL, but only just: Oct 7, 2024.
---
 .github/workflows/master.yml        | 2 +-
 .github/workflows/pythonpackage.yml | 4 ++--
 pyproject.toml                      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index c6c29ab9..8eef5a3b 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -20,7 +20,7 @@ jobs:
     if: always()
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.x"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.x"]
       fail-fast: false
 
     steps:
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 2758c5e6..8d5e43a9 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -20,7 +20,7 @@ jobs:
     if: always()
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.x"]
+        python-version: ["3.8", "3.9", "3.10", "3.x"]
       fail-fast: false
 
     steps:
@@ -48,7 +48,7 @@ jobs:
     if: always()
     strategy:
       matrix:
-        python-version: ["3.9"]
+        python-version: ["3.8"]
       fail-fast: false
 
     steps:
diff --git a/pyproject.toml b/pyproject.toml
index 665aa3be..0ed61625 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ classifiers = [
 ]
 requires-python = ">=3.8"
 dependencies = [
-    "xarray>=2024.7.0",
+    "xarray>=2023.01.0",
     "boutdata>=0.1.4",
     "dask[array]>=2.10.0",
     "gelidum>=0.5.3",

From b5398ad822a21a1244acad2b9b5e7d2464024f30 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 21:35:48 -0700
Subject: [PATCH 18/26] Remove test_load::test_trim_timing_info

`_trim` no longer removes `wtime` timing info.
---
 xbout/tests/test_load.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py
index c2feb452..8e2f1a3d 100644
--- a/xbout/tests/test_load.py
+++ b/xbout/tests/test_load.py
@@ -935,20 +935,3 @@ def test_keep_yboundaries_doublenull_by_filenum(
             expected = expected.isel(y=slice(None, -2, None))
         xrt.assert_equal(expected, actual)
 
-    @pytest.mark.parametrize("is_restart", [False, True])
-    def test_trim_timing_info(self, is_restart):
-        ds = create_test_data(0)
-        from xbout.load import _BOUT_PER_PROC_VARIABLES
-
-        # remove a couple of entries from _BOUT_PER_PROC_VARIABLES so we test that _trim
-        # does not fail if not all of them are present
-        _BOUT_PER_PROC_VARIABLES = _BOUT_PER_PROC_VARIABLES[:-2]
-
-        for v in _BOUT_PER_PROC_VARIABLES:
-            ds[v] = 42.0
-        ds = _trim(
-            ds, guards={}, keep_boundaries={}, nxpe=1, nype=1, is_restart=is_restart
-        )
-
-        expected = create_test_data(0)
-        xrt.assert_equal(ds, expected)

From bbeb4925263983ee8b5feb3e7eb27b7d5c8e06a0 Mon Sep 17 00:00:00 2001
From: bendudson <bendudson@users.noreply.github.com>
Date: Wed, 30 Oct 2024 04:37:06 +0000
Subject: [PATCH 19/26] Apply black formatting

---
 xbout/tests/test_load.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xbout/tests/test_load.py b/xbout/tests/test_load.py
index 8e2f1a3d..20ddcae0 100644
--- a/xbout/tests/test_load.py
+++ b/xbout/tests/test_load.py
@@ -934,4 +934,3 @@ def test_keep_yboundaries_doublenull_by_filenum(
         if not upper:
             expected = expected.isel(y=slice(None, -2, None))
         xrt.assert_equal(expected, actual)
-

From 54f83750df811be3dda0a5b732bfa01268bf1fc6 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Tue, 29 Oct 2024 21:41:56 -0700
Subject: [PATCH 20/26] github actions update xarray version

---
 .github/workflows/master.yml        | 2 +-
 .github/workflows/pythonpackage.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 8eef5a3b..b98b9738 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -61,7 +61,7 @@ jobs:
       run: |
         sudo apt-get update && sudo apt-get install libhdf5-dev libnetcdf-dev
         python -m pip install --upgrade pip
-        pip install xarray~=0.18.0 pandas~=1.4.0
+        pip install xarray~=2023.1.0 pandas~=1.4.0
     - name: Install package
       run: |
         pip install -e .[tests]
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 8d5e43a9..9ae92522 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -61,7 +61,7 @@ jobs:
       run: |
         sudo apt-get update && sudo apt-get install libhdf5-dev libnetcdf-dev
         python -m pip install --upgrade pip
-        pip install xarray~=2024.7.0 pandas~=1.4.0
+        pip install xarray~=2023.1.0 pandas~=1.4.0
     - name: Install package
       run: |
         pip install -e .[tests]

From 061505810fe422025eb116c81b04236f78342c98 Mon Sep 17 00:00:00 2001
From: Peter Hill <peter.hill@york.ac.uk>
Date: Wed, 30 Oct 2024 10:02:28 +0000
Subject: [PATCH 21/26] Ensure arguments to `np.linspace` are simple floats

Previously were scalar `xarray.DataArray`, which caused `linspace` to
fail due to missing dimensions
---
 xbout/boutdataset.py | 12 ++++++------
 xbout/geometries.py  |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py
index cc308fcb..7b0d8a23 100644
--- a/xbout/boutdataset.py
+++ b/xbout/boutdataset.py
@@ -597,12 +597,12 @@ def interpolate_to_cartesian(
         n_toroidal = ds.sizes[zdim]
 
         # Create Cartesian grid to interpolate to
-        Xmin = ds["X_cartesian"].min()
-        Xmax = ds["X_cartesian"].max()
-        Ymin = ds["Y_cartesian"].min()
-        Ymax = ds["Y_cartesian"].max()
-        Zmin = ds["Z_cartesian"].min()
-        Zmax = ds["Z_cartesian"].max()
+        Xmin = ds["X_cartesian"].min().data[()]
+        Xmax = ds["X_cartesian"].max().data[()]
+        Ymin = ds["Y_cartesian"].min().data[()]
+        Ymax = ds["Y_cartesian"].max().data[()]
+        Zmin = ds["Z_cartesian"].min().data[()]
+        Zmax = ds["Z_cartesian"].max().data[()]
         newX_1d = xr.DataArray(np.linspace(Xmin, Xmax, nX), dims="X")
         newX = newX_1d.expand_dims({"Y": nY, "Z": nZ}, axis=[1, 2])
         newY_1d = xr.DataArray(np.linspace(Ymin, Ymax, nY), dims="Y")
diff --git a/xbout/geometries.py b/xbout/geometries.py
index 553d833f..7223751b 100644
--- a/xbout/geometries.py
+++ b/xbout/geometries.py
@@ -214,7 +214,7 @@ def apply_geometry(ds, geometry_name, *, coordinates=None, grid=None):
                 dz = updated_ds["dz"]
 
             z0 = 2 * np.pi * updated_ds.metadata["ZMIN"]
-            z1 = z0 + nz * dz
+            z1 = z0 + nz * dz.data[()]
             if not np.all(
                 np.isclose(
                     z1,

From 9da499f55e10966fab0ec4a83f8383610f074205 Mon Sep 17 00:00:00 2001
From: Peter Hill <peter.hill@york.ac.uk>
Date: Wed, 30 Oct 2024 10:12:22 +0000
Subject: [PATCH 22/26] CI: Run on 3.12 and not 3.13 due to bug in `TestSave`

Currently, 3.13 is hanging when running these tests
---
 .github/workflows/master.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index b98b9738..4da18ff8 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -20,7 +20,7 @@ jobs:
     if: always()
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.x"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
       fail-fast: false
 
     steps:

From 8a2f8df7c3cb20a9d537605be21437f6fed72340 Mon Sep 17 00:00:00 2001
From: Peter Hill <peter.hill@york.ac.uk>
Date: Wed, 30 Oct 2024 10:13:50 +0000
Subject: [PATCH 23/26] CI: Fix ruff check

---
 .github/workflows/ruff.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml
index 5c26f5dd..c453abbf 100644
--- a/.github/workflows/ruff.yaml
+++ b/.github/workflows/ruff.yaml
@@ -5,11 +5,11 @@ jobs:
   dependency-review:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: '3.x'
       - name: Install ruff
         run: pip install ruff
       - name: Run ruff
-        run: ruff xbout
+        run: ruff check xbout

From 1abb17f6a2371a1adc2379dd5003780333cd33bd Mon Sep 17 00:00:00 2001
From: Peter Hill <peter.hill@york.ac.uk>
Date: Wed, 30 Oct 2024 10:16:12 +0000
Subject: [PATCH 24/26] Fix some ruff warnings

---
 pyproject.toml              | 2 +-
 xbout/plotting/plotfuncs.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0ed61625..53cd0aa6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -79,5 +79,5 @@ write_to = "xbout/_version.py"
 [tool.setuptools]
 packages = ["xbout"]
 
-[tool.ruff]
+[tool.ruff.lint]
 ignore = ["E501"]
diff --git a/xbout/plotting/plotfuncs.py b/xbout/plotting/plotfuncs.py
index 525d9d02..0f9dc51f 100644
--- a/xbout/plotting/plotfuncs.py
+++ b/xbout/plotting/plotfuncs.py
@@ -929,10 +929,10 @@ def plot2d_polygon(
     if vmax is None:
         vmax = np.nanmax(da.max().values)
 
-    if colorbar_label == None:
+    if colorbar_label is None:
         if "short_name" in da.attrs:
             colorbar_label = da.attrs["short_name"]
-        elif da.name != None:
+        elif da.name is not None:
             colorbar_label = da.name
         else:
             colorbar_label = ""

From febb3a2ad2f93e3869983998d3c3ce6bd78e2f54 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Wed, 30 Oct 2024 09:14:41 -0700
Subject: [PATCH 25/26] test_animate: Remove unneeded comment

---
 xbout/tests/test_animate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xbout/tests/test_animate.py b/xbout/tests/test_animate.py
index b2fd812c..fd565c55 100644
--- a/xbout/tests/test_animate.py
+++ b/xbout/tests/test_animate.py
@@ -174,7 +174,6 @@ def test_animate_list_1d_default(self, create_test_file):
 
         plt.close()
 
-    # @pytest.mark.skip(reason="Plotting needs some work. 3 plots but extra axes.")
     def test_animate_list_1d_multiline(self, create_test_file):
         save_dir, ds = create_test_file
 

From 609c78443002ed5da04806f3d41302b38021ada6 Mon Sep 17 00:00:00 2001
From: Peter Hill <peter.hill@york.ac.uk>
Date: Thu, 7 Nov 2024 09:44:59 +0000
Subject: [PATCH 26/26] CI: Don't install packages from apt

---
 .github/workflows/pythonpackage.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 9ae92522..823eafcf 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -31,7 +31,6 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        sudo apt-get update && sudo apt-get install libhdf5-dev libnetcdf-dev
         python -m pip install --upgrade pip
     - name: Install package
       run: |
@@ -59,7 +58,6 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        sudo apt-get update && sudo apt-get install libhdf5-dev libnetcdf-dev
         python -m pip install --upgrade pip
         pip install xarray~=2023.1.0 pandas~=1.4.0
     - name: Install package