Skip to content

Commit

Permalink
Implement function with du and lstat fallback
Browse files Browse the repository at this point in the history
In addition, extend the tests for `RemoteData` in `test_remote.py`
for the methods added in this PR, as well as parametrize them to run on
a `RemoteData` via local and ssh transport.
  • Loading branch information
GeigerJ2 committed Nov 21, 2024
1 parent e568e2a commit b1a0560
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 21 deletions.
41 changes: 26 additions & 15 deletions src/aiida/orm/nodes/data/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@

from __future__ import annotations

import os
import logging
import os
from pathlib import Path

from aiida.orm import AuthInfo
from aiida.orm.fields import add_field
from aiida.transports import Transport

from ..data import Data

Expand Down Expand Up @@ -109,7 +110,10 @@ def listdir(self, relpath='.'):
try:
return transport.listdir(full_path)
except OSError as exception:
if exception.errno in (2, 20): # directory not existing or not a directory
if exception.errno in (

Check warning on line 113 in src/aiida/orm/nodes/data/remote/base.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/orm/nodes/data/remote/base.py#L113

Added line #L113 was not covered by tests
2,
20,
): # directory not existing or not a directory
exc = OSError(
f'The required remote folder {full_path} on {self.computer.label} does not exist, is not a '
'directory or has been deleted.'
Expand Down Expand Up @@ -138,7 +142,10 @@ def listdir_withattributes(self, path='.'):
try:
return transport.listdir_withattributes(full_path)
except OSError as exception:
if exception.errno in (2, 20): # directory not existing or not a directory
if exception.errno in (

Check warning on line 145 in src/aiida/orm/nodes/data/remote/base.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/orm/nodes/data/remote/base.py#L145

Added line #L145 was not covered by tests
2,
20,
): # directory not existing or not a directory
exc = OSError(
f'The required remote folder {full_path} on {self.computer.label} does not exist, is not a '
'directory or has been deleted.'
Expand Down Expand Up @@ -199,7 +206,8 @@ def get_size_on_disk(self, relpath: Path | None = None) -> str:
Connects to the remote folder and returns the total size of all files in the directory recursively in a
human-readable format.
:param relpath: File or directory path for which the total size should be returned, relative to ``self.get_remote_path``.
:param relpath: File or directory path for which the total size should be returned, relative to
``self.get_remote_path``.
:return: Total size of file or directory in human-readable format.
:raises: FileNotFoundError, if file or directory does not exist.
Expand All @@ -213,25 +221,28 @@ def get_size_on_disk(self, relpath: Path | None = None) -> str:

with authinfo.get_transport() as transport:
if not transport.isdir(str(full_path)) and not transport.isfile(str(full_path)):
exc_message = f'The required remote folder {full_path} on Computer <{computer_label}> does not exist, is not a directory or has been deleted.'
exc_message = (
f'The required remote folder {full_path} on Computer <{computer_label}>'
'does not exist, is not a directory or has been deleted.'
)
raise FileNotFoundError(exc_message)

try:
total_size: int = self._get_size_on_disk_du(full_path, transport)

except RuntimeError:

lstat_warn = (

Check warning on line 234 in src/aiida/orm/nodes/data/remote/base.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/orm/nodes/data/remote/base.py#L233-L234

Added lines #L233 - L234 were not covered by tests
"Problem executing `du` command. Will return total file size based on `lstat`. "
"Take the result with a grain of salt, as `lstat` does not consider the file system block size, "
"but instead returns the true size of the files in bytes, which differs from the actual space requirements on disk."
'Problem executing `du` command. Will return total file size based on `lstat`. '
'Take the result with a grain of salt, as `lstat` does not consider the file system block size, '
'but instead returns the true size of the files in bytes, which differs from the actual space'
'requirements on disk.'
)
_logger.warning(lstat_warn)

Check warning on line 240 in src/aiida/orm/nodes/data/remote/base.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/orm/nodes/data/remote/base.py#L240

Added line #L240 was not covered by tests

total_size: int = self._get_size_on_disk_lstat(full_path, transport)

Check warning on line 242 in src/aiida/orm/nodes/data/remote/base.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/orm/nodes/data/remote/base.py#L242

Added line #L242 was not covered by tests

except OSError:

Check warning on line 244 in src/aiida/orm/nodes/data/remote/base.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/orm/nodes/data/remote/base.py#L244

Added line #L244 was not covered by tests
_logger.critical("Could not evaluate directory size using either `du` or `lstat`.")
_logger.critical('Could not evaluate directory size using either `du` or `lstat`.')

return format_directory_size(size_in_bytes=total_size)

Expand All @@ -254,13 +265,13 @@ def _get_size_on_disk_du(self, full_path: Path, transport: 'Transport') -> int:
total_size: int = int(stdout.split('\t')[0])
return total_size
else:
raise RuntimeError(f"Error executing `du` command: {stderr}")
raise RuntimeError(f'Error executing `du` command: {stderr}')

def _get_size_on_disk_lstat(self, full_path, transport) -> int:

"""Connects to the remote folder and returns the total size of all files in the directory recursively in bytes
using ``lstat``. Note that even if a file is only 1 byte, on disk, it still occupies one full disk block size. As
such, getting accurate measures of the total expected size on disk when retrieving a ``RemoteData`` is not
"""
Connects to the remote folder and returns the total size of all files in the directory recursively in bytes
using ``lstat``. Note that even if a file is only 1 byte, on disk, it still occupies one full disk block size.
As such, getting accurate measures of the total expected size on disk when retrieving a ``RemoteData`` is not
straightforward with ``lstat``, as one would need to consider the occupied block sizes for each file, as well as
repository metadata. Thus, this function only serves as a fallback in the absence of the ``du`` command.
Expand Down
16 changes: 10 additions & 6 deletions tests/orm/nodes/data/test_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def remote_data_local(tmp_path, aiida_localhost):
(tmp_path / 'file.txt').write_bytes(b'some content')
return node


@pytest.fixture
def remote_data_ssh(tmp_path, aiida_computer_ssh):
"""Return a non-empty ``RemoteData`` instance."""
Expand All @@ -35,7 +36,8 @@ def remote_data_ssh(tmp_path, aiida_computer_ssh):
(tmp_path / 'file.txt').write_bytes(b'some content')
return node

@pytest.mark.parametrize('fixture', ["remote_data_local", "remote_data_ssh"])

@pytest.mark.parametrize('fixture', ['remote_data_local', 'remote_data_ssh'])
def test_clean(request, fixture):
"""Test the :meth:`aiida.orm.nodes.data.remote.base.RemoteData.clean` method."""

Expand All @@ -47,7 +49,8 @@ def test_clean(request, fixture):
assert remote_data.is_empty
assert remote_data.is_cleaned

@pytest.mark.parametrize('fixture', ["remote_data_local", "remote_data_ssh"])

@pytest.mark.parametrize('fixture', ['remote_data_local', 'remote_data_ssh'])
def test_get_size_on_disk_du(request, fixture, monkeypatch):
"""Test the :meth:`aiida.orm.nodes.data.remote.base.RemoteData.clean` method."""

Expand All @@ -66,11 +69,11 @@ def mock_exec_command_wait(command):
return (1, '', 'Error executing `du` command')

monkeypatch.setattr(transport, 'exec_command_wait', mock_exec_command_wait)
with pytest.raises(RuntimeError) as excinfo:
with pytest.raises(RuntimeError, match='Error executing `du`.*'):
remote_data._get_size_on_disk_du(full_path, transport)


@pytest.mark.parametrize('fixture', ["remote_data_local", "remote_data_ssh"])
@pytest.mark.parametrize('fixture', ['remote_data_local', 'remote_data_ssh'])
def test_get_size_on_disk_lstat(request, fixture):
"""Test the :meth:`aiida.orm.nodes.data.remote.base.RemoteData.clean` method."""

Expand All @@ -84,13 +87,14 @@ def test_get_size_on_disk_lstat(request, fixture):
assert size_on_disk == 12


@pytest.mark.parametrize('fixture', ["remote_data_local", "remote_data_ssh"])
@pytest.mark.parametrize('fixture', ['remote_data_local', 'remote_data_ssh'])
def test_get_size_on_disk(request, fixture):
"""Test the :meth:`aiida.orm.nodes.data.remote.base.RemoteData.clean` method."""

remote_data = request.getfixturevalue(fixture)

# Check here for human-readable output string, as integer byte values are checked in `test_get_size_on_disk_[du|lstat]`
# Check here for human-readable output string, as integer byte values are checked in
# `test_get_size_on_disk_[du|lstat]`
size_on_disk = remote_data.get_size_on_disk()
assert size_on_disk == '4.01 KB'

Expand Down

0 comments on commit b1a0560

Please sign in to comment.