Skip to content

Commit

Permalink
sdk/python: Simplify object file put by using requests streaming uplo…
Browse files Browse the repository at this point in the history
…ad, pathlib

Signed-off-by: Aaron Wilson <[email protected]>
  • Loading branch information
aaronnw committed Oct 10, 2024
1 parent 2fc22af commit 713d45b
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 48 deletions.
14 changes: 8 additions & 6 deletions python/aistore/sdk/obj/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#
from dataclasses import dataclass
from io import BufferedWriter
from pathlib import Path
from typing import Dict

from requests import Response
Expand Down Expand Up @@ -42,7 +43,7 @@
PromoteAPIArgs,
BlobMsg,
)
from aistore.sdk.utils import read_file_bytes, validate_file
from aistore.sdk.utils import validate_file
from aistore.sdk.obj.object_props import ObjectProps


Expand Down Expand Up @@ -247,14 +248,14 @@ def put_content(self, content: bytes) -> Response:
requests.ConnectionTimeout: Timed out connecting to AIStore
requests.ReadTimeout: Timed out waiting response from AIStore
"""
return self._put_data(content)
return self._put_data(data=content)

def put_file(self, path: str = None) -> Response:
def put_file(self, path: str or Path) -> Response:
"""
Puts a local file as an object to a bucket in AIS storage.
Args:
path (str): Path to local file
path (str or Path): Path to local file
Raises:
requests.RequestException: "There was an ambiguous exception that occurred while handling..."
Expand All @@ -264,9 +265,10 @@ def put_file(self, path: str = None) -> Response:
ValueError: The path provided is not a valid file
"""
validate_file(path)
return self._put_data(read_file_bytes(path))
with open(path, "rb") as f:
return self._put_data(f)

def _put_data(self, data: bytes) -> Response:
def _put_data(self, data) -> Response:
return self._client.request(
HTTP_METHOD_PUT,
path=self._object_path,
Expand Down
27 changes: 15 additions & 12 deletions python/aistore/sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def probing_frequency(dur: int) -> float:
return max(freq, 0.1)


def read_file_bytes(filepath: str):
def read_file_bytes(filepath: str) -> bytes:
"""
Given a filepath, read the content as bytes
Args:
Expand All @@ -126,37 +126,40 @@ def read_file_bytes(filepath: str):
return reader.read()


def _check_path_exists(path: str) -> None:
if not Path(path).exists():
def _check_path_exists(path: Path) -> None:
if not path.exists():
raise ValueError(f"Path: {path} does not exist")


def validate_file(path: str) -> None:
def validate_file(path: str or Path) -> None:
"""
Validate that a file exists and is a file
Args:
path: Path to validate
path (str or Path): Path to validate
Raises:
ValueError: If path does not exist or is not a file
"""
if isinstance(path, str):
path = Path(path)
_check_path_exists(path)
path_obj = Path(path)
if not path_obj.exists():
if not path.exists():
raise ValueError(f"Path: {path} does not exist")
if not path_obj.is_file():
if not path.is_file():
raise ValueError(f"Path: {path} is a directory, not a file")


def validate_directory(path: str) -> None:
def validate_directory(path: str or Path) -> None:
"""
Validate that a directory exists and is a directory
Args:
path: Path to validate
path (str or Path): Path to validate
Raises:
ValueError: If path does not exist or is not a directory
"""
if isinstance(path, str):
path = Path(path)
_check_path_exists(path)
if not Path(path).is_dir():
if not path.is_dir():
raise ValueError(f"Path: {path} is a file, not a directory")


Expand Down Expand Up @@ -229,7 +232,7 @@ def get_logger(name: str, log_format: str = DEFAULT_LOG_FORMAT):
Args:
name (str): The name of the logger.
format (str, optional): Logging format.
log_format (str, optional): Logging format.
Returns:
logging.Logger: Configured logger instance.
Expand Down
7 changes: 3 additions & 4 deletions python/tests/unit/sdk/obj/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,16 +176,15 @@ def test_put_file(self, mock_exists, mock_is_file):
mock_exists.return_value = True
mock_is_file.return_value = True
path = "any/filepath"
data = b"bytes in the file"

with patch("builtins.open", mock_open(read_data=data)):
mock_file = mock_open(read_data=b"file content")
with patch("builtins.open", mock_file):
self.object.put_file(path)

self.mock_client.request.assert_called_with(
HTTP_METHOD_PUT,
path=REQUEST_PATH,
params=self.expected_params,
data=data,
data=mock_file.return_value,
)

def test_put_content(self):
Expand Down
54 changes: 28 additions & 26 deletions python/tests/unit/sdk/test_bucket.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import unittest
from unittest.mock import Mock, call, patch, MagicMock
from unittest.mock import Mock, call, patch, MagicMock, mock_open

from requests.structures import CaseInsensitiveDict

Expand Down Expand Up @@ -540,48 +540,50 @@ def test_object(self):
self.assertEqual(self.ais_bck.qparam, new_obj.query_params)
self.assertEqual(props, new_obj.props)

@patch("aistore.sdk.obj.object.read_file_bytes")
@patch("aistore.sdk.obj.object.validate_file")
@patch("aistore.sdk.bucket.validate_directory")
@patch("pathlib.Path.glob")
def test_put_files(
self, mock_glob, mock_validate_dir, mock_validate_file, mock_read
):
def test_put_files(self, mock_glob, mock_validate_dir, mock_validate_file):
path = "directory"
file_1 = ("file_1_name", b"bytes in the first file")
file_2 = ("file_2_name", b"bytes in the second file")

path_1 = Mock()
path_1.is_file.return_value = True
path_1.relative_to.return_value = file_1[0]
path_1.stat.return_value = Mock(st_size=123)

path_2 = Mock()
path_2.is_file.return_value = True
path_2.relative_to.return_value = file_2[0]
path_2.stat.return_value = Mock(st_size=4567)

mock_glob.return_value = [path_1, path_2]
expected_obj_names = [file_1[0], file_2[0]]
mock_read.side_effect = [file_1[1], file_2[1]]
file_names = ["file_1_name", "file_2_name"]
file_sizes = [123, 4567]

file_readers = [
mock_open(read_data=b"bytes in the first file").return_value,
mock_open(read_data=b"bytes in the second file").return_value,
]
mock_file = mock_open()
mock_file.side_effect = file_readers

# Set up mock files
mock_files = [
Mock(
is_file=Mock(return_value=True),
relative_to=Mock(return_value=name),
stat=Mock(return_value=Mock(st_size=size)),
)
for name, size in zip(file_names, file_sizes)
]
mock_glob.return_value = mock_files

res = self.ais_bck.put_files(path)
with patch("builtins.open", mock_file):
res = self.ais_bck.put_files(path)

# Ensure that put_files is called for files for the directory at path
mock_validate_dir.assert_called_with(path)
# Ensure that files have been created with the proper path
mock_validate_file.assert_has_calls([call(str(path_1)), call(str(path_2))])
mock_validate_file.assert_has_calls([call(str(f)) for f in mock_files])
# Ensure that the files put in the bucket have the sane names
self.assertEqual(expected_obj_names, res)
self.assertEqual(file_names, res)
expected_calls = []

for file_name, file_data in [file_1, file_2]:
for file_name, file_reader in zip(file_names, file_readers):
expected_calls.append(
call(
HTTP_METHOD_PUT,
path=f"objects/{BCK_NAME}/{file_name}",
params=self.ais_bck_params,
data=file_data,
data=file_reader,
)
)

Expand Down

0 comments on commit 713d45b

Please sign in to comment.