From 713d45b349f6099e66e59647ad0659e8f26b7464 Mon Sep 17 00:00:00 2001 From: Aaron Wilson Date: Wed, 9 Oct 2024 16:55:51 -0500 Subject: [PATCH] sdk/python: Simplify object file put by using requests streaming upload, pathlib Signed-off-by: Aaron Wilson --- python/aistore/sdk/obj/object.py | 14 +++--- python/aistore/sdk/utils.py | 27 ++++++------ python/tests/unit/sdk/obj/test_object.py | 7 ++- python/tests/unit/sdk/test_bucket.py | 54 ++++++++++++------------ 4 files changed, 54 insertions(+), 48 deletions(-) diff --git a/python/aistore/sdk/obj/object.py b/python/aistore/sdk/obj/object.py index 9a5c569c37..75193bbb58 100644 --- a/python/aistore/sdk/obj/object.py +++ b/python/aistore/sdk/obj/object.py @@ -3,6 +3,7 @@ # from dataclasses import dataclass from io import BufferedWriter +from pathlib import Path from typing import Dict from requests import Response @@ -42,7 +43,7 @@ PromoteAPIArgs, BlobMsg, ) -from aistore.sdk.utils import read_file_bytes, validate_file +from aistore.sdk.utils import validate_file from aistore.sdk.obj.object_props import ObjectProps @@ -247,14 +248,14 @@ def put_content(self, content: bytes) -> Response: requests.ConnectionTimeout: Timed out connecting to AIStore requests.ReadTimeout: Timed out waiting response from AIStore """ - return self._put_data(content) + return self._put_data(data=content) - def put_file(self, path: str = None) -> Response: + def put_file(self, path: str or Path) -> Response: """ Puts a local file as an object to a bucket in AIS storage. Args: - path (str): Path to local file + path (str or Path): Path to local file Raises: requests.RequestException: "There was an ambiguous exception that occurred while handling..." @@ -264,9 +265,10 @@ def put_file(self, path: str = None) -> Response: ValueError: The path provided is not a valid file """ validate_file(path) - return self._put_data(read_file_bytes(path)) + with open(path, "rb") as f: + return self._put_data(f) - def _put_data(self, data: bytes) -> Response: + def _put_data(self, data) -> Response: return self._client.request( HTTP_METHOD_PUT, path=self._object_path, diff --git a/python/aistore/sdk/utils.py b/python/aistore/sdk/utils.py index 0a671462cb..c86ded3470 100644 --- a/python/aistore/sdk/utils.py +++ b/python/aistore/sdk/utils.py @@ -114,7 +114,7 @@ def probing_frequency(dur: int) -> float: return max(freq, 0.1) -def read_file_bytes(filepath: str): +def read_file_bytes(filepath: str) -> bytes: """ Given a filepath, read the content as bytes Args: @@ -126,37 +126,40 @@ def read_file_bytes(filepath: str): return reader.read() -def _check_path_exists(path: str) -> None: - if not Path(path).exists(): +def _check_path_exists(path: Path) -> None: + if not path.exists(): raise ValueError(f"Path: {path} does not exist") -def validate_file(path: str) -> None: +def validate_file(path: str or Path) -> None: """ Validate that a file exists and is a file Args: - path: Path to validate + path (str or Path): Path to validate Raises: ValueError: If path does not exist or is not a file """ + if isinstance(path, str): + path = Path(path) _check_path_exists(path) - path_obj = Path(path) - if not path_obj.exists(): + if not path.exists(): raise ValueError(f"Path: {path} does not exist") - if not path_obj.is_file(): + if not path.is_file(): raise ValueError(f"Path: {path} is a directory, not a file") -def validate_directory(path: str) -> None: +def validate_directory(path: str or Path) -> None: """ Validate that a directory exists and is a directory Args: - path: Path to validate + path (str or Path): Path to validate Raises: ValueError: If path does not exist or is not a directory """ + if isinstance(path, str): + path = Path(path) _check_path_exists(path) - if not Path(path).is_dir(): + if not path.is_dir(): raise ValueError(f"Path: {path} is a file, not a directory") @@ -229,7 +232,7 @@ def get_logger(name: str, log_format: str = DEFAULT_LOG_FORMAT): Args: name (str): The name of the logger. - format (str, optional): Logging format. + log_format (str, optional): Logging format. Returns: logging.Logger: Configured logger instance. diff --git a/python/tests/unit/sdk/obj/test_object.py b/python/tests/unit/sdk/obj/test_object.py index 94de759506..d5c6bc2ec2 100644 --- a/python/tests/unit/sdk/obj/test_object.py +++ b/python/tests/unit/sdk/obj/test_object.py @@ -176,16 +176,15 @@ def test_put_file(self, mock_exists, mock_is_file): mock_exists.return_value = True mock_is_file.return_value = True path = "any/filepath" - data = b"bytes in the file" - - with patch("builtins.open", mock_open(read_data=data)): + mock_file = mock_open(read_data=b"file content") + with patch("builtins.open", mock_file): self.object.put_file(path) self.mock_client.request.assert_called_with( HTTP_METHOD_PUT, path=REQUEST_PATH, params=self.expected_params, - data=data, + data=mock_file.return_value, ) def test_put_content(self): diff --git a/python/tests/unit/sdk/test_bucket.py b/python/tests/unit/sdk/test_bucket.py index 927a7c5c2f..18c98ff3fa 100644 --- a/python/tests/unit/sdk/test_bucket.py +++ b/python/tests/unit/sdk/test_bucket.py @@ -1,5 +1,5 @@ import unittest -from unittest.mock import Mock, call, patch, MagicMock +from unittest.mock import Mock, call, patch, MagicMock, mock_open from requests.structures import CaseInsensitiveDict @@ -540,48 +540,50 @@ def test_object(self): self.assertEqual(self.ais_bck.qparam, new_obj.query_params) self.assertEqual(props, new_obj.props) - @patch("aistore.sdk.obj.object.read_file_bytes") @patch("aistore.sdk.obj.object.validate_file") @patch("aistore.sdk.bucket.validate_directory") @patch("pathlib.Path.glob") - def test_put_files( - self, mock_glob, mock_validate_dir, mock_validate_file, mock_read - ): + def test_put_files(self, mock_glob, mock_validate_dir, mock_validate_file): path = "directory" - file_1 = ("file_1_name", b"bytes in the first file") - file_2 = ("file_2_name", b"bytes in the second file") - - path_1 = Mock() - path_1.is_file.return_value = True - path_1.relative_to.return_value = file_1[0] - path_1.stat.return_value = Mock(st_size=123) - - path_2 = Mock() - path_2.is_file.return_value = True - path_2.relative_to.return_value = file_2[0] - path_2.stat.return_value = Mock(st_size=4567) - - mock_glob.return_value = [path_1, path_2] - expected_obj_names = [file_1[0], file_2[0]] - mock_read.side_effect = [file_1[1], file_2[1]] + file_names = ["file_1_name", "file_2_name"] + file_sizes = [123, 4567] + + file_readers = [ + mock_open(read_data=b"bytes in the first file").return_value, + mock_open(read_data=b"bytes in the second file").return_value, + ] + mock_file = mock_open() + mock_file.side_effect = file_readers + + # Set up mock files + mock_files = [ + Mock( + is_file=Mock(return_value=True), + relative_to=Mock(return_value=name), + stat=Mock(return_value=Mock(st_size=size)), + ) + for name, size in zip(file_names, file_sizes) + ] + mock_glob.return_value = mock_files - res = self.ais_bck.put_files(path) + with patch("builtins.open", mock_file): + res = self.ais_bck.put_files(path) # Ensure that put_files is called for files for the directory at path mock_validate_dir.assert_called_with(path) # Ensure that files have been created with the proper path - mock_validate_file.assert_has_calls([call(str(path_1)), call(str(path_2))]) + mock_validate_file.assert_has_calls([call(str(f)) for f in mock_files]) # Ensure that the files put in the bucket have the sane names - self.assertEqual(expected_obj_names, res) + self.assertEqual(file_names, res) expected_calls = [] - for file_name, file_data in [file_1, file_2]: + for file_name, file_reader in zip(file_names, file_readers): expected_calls.append( call( HTTP_METHOD_PUT, path=f"objects/{BCK_NAME}/{file_name}", params=self.ais_bck_params, - data=file_data, + data=file_reader, ) )