Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ This document explains the changes made to Iris for this release
using :meth:`~iris.cube.Cube.aggregated_by` or :meth:`~iris.cube.Cube.collapsed`.
(:issue:`6473`, :pull:`6706`, :pull:`6719`)

#. `@trexfeathers`_ protected the NetCDF saving code from a transient I/O
error, caused by bad synchronisation between Python-layer and HDF-layer
file locking on certain filesystems. (:pull:`6760`).


💣 Incompatible Changes
=======================
Expand Down
20 changes: 19 additions & 1 deletion lib/iris/fileformats/netcdf/_thread_safe_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from abc import ABC
from threading import Lock
from time import sleep
import typing

import netCDF4
Expand Down Expand Up @@ -386,7 +387,24 @@ def __setitem__(self, keys, array_data):
with _GLOBAL_NETCDF4_LOCK:
dataset = None
try:
dataset = netCDF4.Dataset(self.path, "r+")
# Even when fully serialised - no parallelism - HDF still
# occasionally fails to acquire the file. This is despite all
# Python locks being available at expected moments, and the
# file reporting as closed. During testing, 2nd retry always
# succeeded. This is likely caused by HDF-level locking
# running on a different timescale to Python-level locking -
# i.e. sometimes Python has released its locks but HDF still
# has not. Thought to be filesystem-dependent; further
# investigation needed.
for attempt in range(5):
try:
dataset = netCDF4.Dataset(self.path, "r+")
break
except OSError:
if attempt < 4:
sleep(0.1)
else:
raise
var = dataset.variables[self.varname]
var[keys] = array_data
finally:
Expand Down
5 changes: 4 additions & 1 deletion lib/iris/tests/test_coding_standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import pytest

import iris
from iris.fileformats.netcdf import _thread_safe_nc
from iris.tests import system_test

LICENSE_TEMPLATE = """# Copyright Iris contributors
Expand Down Expand Up @@ -44,6 +43,9 @@ def test_netcdf4_import():
# Please avoid including these phrases in any comments/strings throughout
# Iris (e.g. use "from the netCDF4 library" instead) - this allows the
# below search to remain quick and simple.
from iris.fileformats.netcdf import _thread_safe_nc
from iris.tests.unit.fileformats.netcdf._thread_safe_nc import test_NetCDFWriteProxy

import_strings = ("import netCDF4", "from netCDF4")

files_including_import = []
Expand All @@ -55,6 +57,7 @@ def test_netcdf4_import():

expected = [
Path(_thread_safe_nc.__file__),
Path(test_NetCDFWriteProxy.__file__),
Path(system_test.__file__),
Path(__file__),
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for the :mod:`iris.fileformats.netcdf._thread_safe_nc` module.

Not required for a private module, but useful for specific checks.
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for :class:`iris.fileformats.netcdf._thread_safe_nc.NetCDFWriteProxy`."""

from threading import Lock

import netCDF4 as nc
from netCDF4 import Dataset as DatasetOriginal
import pytest

from iris.fileformats.netcdf._thread_safe_nc import DatasetWrapper, NetCDFWriteProxy


@pytest.fixture
def dataset_path(tmp_path):
return tmp_path / "test.nc"


@pytest.fixture
def netcdf_variable(dataset_path):
dataset = DatasetWrapper(dataset_path, "w")
_ = dataset.createDimension("dim1", 1)
variable = dataset.createVariable(
"test_var",
"f4",
("dim1",),
)
return variable


@pytest.fixture
def write_proxy(netcdf_variable) -> NetCDFWriteProxy:
dataset = netcdf_variable.group()
proxy = NetCDFWriteProxy(
filepath=dataset.filepath(),
cf_var=netcdf_variable,
file_write_lock=Lock(),
)
dataset.close()
return proxy


class UnreliableDatasetMaker:
"""A mock operation that returns a Dataset, but fails the first time it is called.

This simulates non-deterministic HDF locking errors which are difficult to
debug at the Python layer - pending further investigation.
"""

def __init__(self, attempts_before_success=1):
self.attempts_before_success = attempts_before_success
self.call_count = 0

def __call__(self, *args, **kwargs) -> nc.Dataset:
self.call_count += 1
if self.call_count <= self.attempts_before_success:
raise OSError("Simulated non-deterministic HDF locking error")
else:
return DatasetOriginal(*args, **kwargs)


def test_handle_hdf_locking_error(dataset_path, monkeypatch, write_proxy):
"""Test that NetCDFWriteProxy can handle non-deterministic HDF locking errors."""
monkeypatch.setattr(nc, "Dataset", UnreliableDatasetMaker())
with pytest.raises(OSError, match="Simulated non-deterministic HDF locking error"):
dataset = nc.Dataset(write_proxy.path, "r+")
var = dataset.variables[write_proxy.varname]
var[0] = 1.0

# Reset.
monkeypatch.setattr(nc, "Dataset", UnreliableDatasetMaker())
try:
write_proxy[0] = 1.0
except OSError:
pytest.fail("NetCDFWriteProxy failed to handle HDF locking error")


def test_abandon_many_failures(dataset_path, monkeypatch, write_proxy):
"""Test that NetCDFWriteProxy gives up after many failed attempts."""
monkeypatch.setattr(
nc, "Dataset", UnreliableDatasetMaker(attempts_before_success=10)
)
with pytest.raises(OSError, match="Simulated non-deterministic HDF locking error"):
write_proxy[0] = 1.0
Loading