SciTools · HGWright · Oct 30, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 29, 2025
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -65,6 +65,10 @@ This document explains the changes made to Iris for this release
    using :meth:`~iris.cube.Cube.aggregated_by` or :meth:`~iris.cube.Cube.collapsed`.
    (:issue:`6473`, :pull:`6706`, :pull:`6719`)
 
+#. `@trexfeathers`_ protected the NetCDF saving code from a transient I/O
+   error, caused by bad synchronisation between Python-layer and HDF-layer
+   file locking on certain filesystems. (:pull:`6760`).
+
 
 💣 Incompatible Changes
 =======================

diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
@@ -10,6 +10,7 @@
 
 from abc import ABC
 from threading import Lock
+from time import sleep
 import typing
 
 import netCDF4
@@ -386,7 +387,24 @@ def __setitem__(self, keys, array_data):
         with _GLOBAL_NETCDF4_LOCK:
             dataset = None
             try:
-                dataset = netCDF4.Dataset(self.path, "r+")
+                # Even when fully serialised - no parallelism - HDF still
+                #  occasionally fails to acquire the file. This is despite all
+                #  Python locks being available at expected moments, and the
+                #  file reporting as closed. During testing, 2nd retry always
+                #  succeeded. This is likely caused by HDF-level locking
+                #  running on a different timescale to Python-level locking -
+                #  i.e. sometimes Python has released its locks but HDF still
+                #  has not. Thought to be filesystem-dependent; further
+                #  investigation needed.
+                for attempt in range(5):
+                    try:
+                        dataset = netCDF4.Dataset(self.path, "r+")
+                        break
+                    except OSError:
+                        if attempt < 4:
+                            sleep(0.1)
+                        else:
+                            raise
                 var = dataset.variables[self.varname]
                 var[keys] = array_data
             finally:

diff --git a/lib/iris/tests/test_coding_standards.py b/lib/iris/tests/test_coding_standards.py
@@ -15,7 +15,6 @@
 import pytest
 
 import iris
-from iris.fileformats.netcdf import _thread_safe_nc
 from iris.tests import system_test
 
 LICENSE_TEMPLATE = """# Copyright Iris contributors
@@ -44,6 +43,9 @@ def test_netcdf4_import():
     # Please avoid including these phrases in any comments/strings throughout
     #  Iris (e.g. use "from the netCDF4 library" instead) - this allows the
     #  below search to remain quick and simple.
+    from iris.fileformats.netcdf import _thread_safe_nc
+    from iris.tests.unit.fileformats.netcdf._thread_safe_nc import test_NetCDFWriteProxy
+
     import_strings = ("import netCDF4", "from netCDF4")
 
     files_including_import = []
@@ -55,6 +57,7 @@ def test_netcdf4_import():
 
     expected = [
         Path(_thread_safe_nc.__file__),
+        Path(test_NetCDFWriteProxy.__file__),
         Path(system_test.__file__),
         Path(__file__),
     ]

diff --git a/lib/iris/tests/unit/fileformats/netcdf/_thread_safe_nc/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/_thread_safe_nc/__init__.py
@@ -0,0 +1,8 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Unit tests for the :mod:`iris.fileformats.netcdf._thread_safe_nc` module.
+
+Not required for a private module, but useful for specific checks.
+"""
diff --git a/lib/iris/tests/unit/fileformats/netcdf/_thread_safe_nc/test_NetCDFWriteProxy.py b/lib/iris/tests/unit/fileformats/netcdf/_thread_safe_nc/test_NetCDFWriteProxy.py
@@ -0,0 +1,86 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Unit tests for :class:`iris.fileformats.netcdf._thread_safe_nc.NetCDFWriteProxy`."""
+
+from threading import Lock
+
+import netCDF4 as nc
+from netCDF4 import Dataset as DatasetOriginal
+import pytest
+
+from iris.fileformats.netcdf._thread_safe_nc import DatasetWrapper, NetCDFWriteProxy
+
+
+@pytest.fixture
+def dataset_path(tmp_path):
+    return tmp_path / "test.nc"
+
+
+@pytest.fixture
+def netcdf_variable(dataset_path):
+    dataset = DatasetWrapper(dataset_path, "w")
+    _ = dataset.createDimension("dim1", 1)
+    variable = dataset.createVariable(
+        "test_var",
+        "f4",
+        ("dim1",),
+    )
+    return variable
+
+
+@pytest.fixture
+def write_proxy(netcdf_variable) -> NetCDFWriteProxy:
+    dataset = netcdf_variable.group()
+    proxy = NetCDFWriteProxy(
+        filepath=dataset.filepath(),
+        cf_var=netcdf_variable,
+        file_write_lock=Lock(),
+    )
+    dataset.close()
+    return proxy
+
+
+class UnreliableDatasetMaker:
+    """A mock operation that returns a Dataset, but fails the first time it is called.
+
+    This simulates non-deterministic HDF locking errors which are difficult to
+    debug at the Python layer - pending further investigation.
+    """
+
+    def __init__(self, attempts_before_success=1):
+        self.attempts_before_success = attempts_before_success
+        self.call_count = 0
+
+    def __call__(self, *args, **kwargs) -> nc.Dataset:
+        self.call_count += 1
+        if self.call_count <= self.attempts_before_success:
+            raise OSError("Simulated non-deterministic HDF locking error")
+        else:
+            return DatasetOriginal(*args, **kwargs)
+
+
+def test_handle_hdf_locking_error(dataset_path, monkeypatch, write_proxy):
+    """Test that NetCDFWriteProxy can handle non-deterministic HDF locking errors."""
+    monkeypatch.setattr(nc, "Dataset", UnreliableDatasetMaker())
+    with pytest.raises(OSError, match="Simulated non-deterministic HDF locking error"):
+        dataset = nc.Dataset(write_proxy.path, "r+")
+        var = dataset.variables[write_proxy.varname]
+        var[0] = 1.0
+
+    # Reset.
+    monkeypatch.setattr(nc, "Dataset", UnreliableDatasetMaker())
+    try:
+        write_proxy[0] = 1.0
+    except OSError:
+        pytest.fail("NetCDFWriteProxy failed to handle HDF locking error")
+
+
+def test_abandon_many_failures(dataset_path, monkeypatch, write_proxy):
+    """Test that NetCDFWriteProxy gives up after many failed attempts."""
+    monkeypatch.setattr(
+        nc, "Dataset", UnreliableDatasetMaker(attempts_before_success=10)
+    )
+    with pytest.raises(OSError, match="Simulated non-deterministic HDF locking error"):
+        write_proxy[0] = 1.0