From b1a056093e06384057133f9050ed6443281e4e67 Mon Sep 17 00:00:00 2001 From: Julian Geiger Date: Thu, 21 Nov 2024 16:14:00 +0100 Subject: [PATCH] Implement function with `du` and `lstat` fallback In addition, extend the tests for `RemoteData` in `test_remote.py` for the methods added in this PR, as well as parametrize them to run on a `RemoteData` via local and ssh transport. --- src/aiida/orm/nodes/data/remote/base.py | 41 ++++++++++++++++--------- tests/orm/nodes/data/test_remote.py | 16 ++++++---- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/src/aiida/orm/nodes/data/remote/base.py b/src/aiida/orm/nodes/data/remote/base.py index d77f25000..0d08ba065 100644 --- a/src/aiida/orm/nodes/data/remote/base.py +++ b/src/aiida/orm/nodes/data/remote/base.py @@ -10,12 +10,13 @@ from __future__ import annotations -import os import logging +import os from pathlib import Path from aiida.orm import AuthInfo from aiida.orm.fields import add_field +from aiida.transports import Transport from ..data import Data @@ -109,7 +110,10 @@ def listdir(self, relpath='.'): try: return transport.listdir(full_path) except OSError as exception: - if exception.errno in (2, 20): # directory not existing or not a directory + if exception.errno in ( + 2, + 20, + ): # directory not existing or not a directory exc = OSError( f'The required remote folder {full_path} on {self.computer.label} does not exist, is not a ' 'directory or has been deleted.' @@ -138,7 +142,10 @@ def listdir_withattributes(self, path='.'): try: return transport.listdir_withattributes(full_path) except OSError as exception: - if exception.errno in (2, 20): # directory not existing or not a directory + if exception.errno in ( + 2, + 20, + ): # directory not existing or not a directory exc = OSError( f'The required remote folder {full_path} on {self.computer.label} does not exist, is not a ' 'directory or has been deleted.' @@ -199,7 +206,8 @@ def get_size_on_disk(self, relpath: Path | None = None) -> str: Connects to the remote folder and returns the total size of all files in the directory recursively in a human-readable format. - :param relpath: File or directory path for which the total size should be returned, relative to ``self.get_remote_path``. + :param relpath: File or directory path for which the total size should be returned, relative to + ``self.get_remote_path``. :return: Total size of file or directory in human-readable format. :raises: FileNotFoundError, if file or directory does not exist. @@ -213,25 +221,28 @@ def get_size_on_disk(self, relpath: Path | None = None) -> str: with authinfo.get_transport() as transport: if not transport.isdir(str(full_path)) and not transport.isfile(str(full_path)): - exc_message = f'The required remote folder {full_path} on Computer <{computer_label}> does not exist, is not a directory or has been deleted.' + exc_message = ( + f'The required remote folder {full_path} on Computer <{computer_label}>' + 'does not exist, is not a directory or has been deleted.' + ) raise FileNotFoundError(exc_message) try: total_size: int = self._get_size_on_disk_du(full_path, transport) except RuntimeError: - lstat_warn = ( - "Problem executing `du` command. Will return total file size based on `lstat`. " - "Take the result with a grain of salt, as `lstat` does not consider the file system block size, " - "but instead returns the true size of the files in bytes, which differs from the actual space requirements on disk." + 'Problem executing `du` command. Will return total file size based on `lstat`. ' + 'Take the result with a grain of salt, as `lstat` does not consider the file system block size, ' + 'but instead returns the true size of the files in bytes, which differs from the actual space' + 'requirements on disk.' ) _logger.warning(lstat_warn) total_size: int = self._get_size_on_disk_lstat(full_path, transport) except OSError: - _logger.critical("Could not evaluate directory size using either `du` or `lstat`.") + _logger.critical('Could not evaluate directory size using either `du` or `lstat`.') return format_directory_size(size_in_bytes=total_size) @@ -254,13 +265,13 @@ def _get_size_on_disk_du(self, full_path: Path, transport: 'Transport') -> int: total_size: int = int(stdout.split('\t')[0]) return total_size else: - raise RuntimeError(f"Error executing `du` command: {stderr}") + raise RuntimeError(f'Error executing `du` command: {stderr}') def _get_size_on_disk_lstat(self, full_path, transport) -> int: - - """Connects to the remote folder and returns the total size of all files in the directory recursively in bytes - using ``lstat``. Note that even if a file is only 1 byte, on disk, it still occupies one full disk block size. As - such, getting accurate measures of the total expected size on disk when retrieving a ``RemoteData`` is not + """ + Connects to the remote folder and returns the total size of all files in the directory recursively in bytes + using ``lstat``. Note that even if a file is only 1 byte, on disk, it still occupies one full disk block size. + As such, getting accurate measures of the total expected size on disk when retrieving a ``RemoteData`` is not straightforward with ``lstat``, as one would need to consider the occupied block sizes for each file, as well as repository metadata. Thus, this function only serves as a fallback in the absence of the ``du`` command. diff --git a/tests/orm/nodes/data/test_remote.py b/tests/orm/nodes/data/test_remote.py index 50b0db05e..f5d169d01 100644 --- a/tests/orm/nodes/data/test_remote.py +++ b/tests/orm/nodes/data/test_remote.py @@ -23,6 +23,7 @@ def remote_data_local(tmp_path, aiida_localhost): (tmp_path / 'file.txt').write_bytes(b'some content') return node + @pytest.fixture def remote_data_ssh(tmp_path, aiida_computer_ssh): """Return a non-empty ``RemoteData`` instance.""" @@ -35,7 +36,8 @@ def remote_data_ssh(tmp_path, aiida_computer_ssh): (tmp_path / 'file.txt').write_bytes(b'some content') return node -@pytest.mark.parametrize('fixture', ["remote_data_local", "remote_data_ssh"]) + +@pytest.mark.parametrize('fixture', ['remote_data_local', 'remote_data_ssh']) def test_clean(request, fixture): """Test the :meth:`aiida.orm.nodes.data.remote.base.RemoteData.clean` method.""" @@ -47,7 +49,8 @@ def test_clean(request, fixture): assert remote_data.is_empty assert remote_data.is_cleaned -@pytest.mark.parametrize('fixture', ["remote_data_local", "remote_data_ssh"]) + +@pytest.mark.parametrize('fixture', ['remote_data_local', 'remote_data_ssh']) def test_get_size_on_disk_du(request, fixture, monkeypatch): """Test the :meth:`aiida.orm.nodes.data.remote.base.RemoteData.clean` method.""" @@ -66,11 +69,11 @@ def mock_exec_command_wait(command): return (1, '', 'Error executing `du` command') monkeypatch.setattr(transport, 'exec_command_wait', mock_exec_command_wait) - with pytest.raises(RuntimeError) as excinfo: + with pytest.raises(RuntimeError, match='Error executing `du`.*'): remote_data._get_size_on_disk_du(full_path, transport) -@pytest.mark.parametrize('fixture', ["remote_data_local", "remote_data_ssh"]) +@pytest.mark.parametrize('fixture', ['remote_data_local', 'remote_data_ssh']) def test_get_size_on_disk_lstat(request, fixture): """Test the :meth:`aiida.orm.nodes.data.remote.base.RemoteData.clean` method.""" @@ -84,13 +87,14 @@ def test_get_size_on_disk_lstat(request, fixture): assert size_on_disk == 12 -@pytest.mark.parametrize('fixture', ["remote_data_local", "remote_data_ssh"]) +@pytest.mark.parametrize('fixture', ['remote_data_local', 'remote_data_ssh']) def test_get_size_on_disk(request, fixture): """Test the :meth:`aiida.orm.nodes.data.remote.base.RemoteData.clean` method.""" remote_data = request.getfixturevalue(fixture) - # Check here for human-readable output string, as integer byte values are checked in `test_get_size_on_disk_[du|lstat]` + # Check here for human-readable output string, as integer byte values are checked in + # `test_get_size_on_disk_[du|lstat]` size_on_disk = remote_data.get_size_on_disk() assert size_on_disk == '4.01 KB'