Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support public s3 files automatically #67

Merged
merged 5 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 60 additions & 25 deletions bioio/bio_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,35 @@ def determine_plugin(
),
)

@staticmethod
def _get_reader(
image: biob.types.ImageLike,
reader: biob.reader.Reader,
use_plugin_cache: bool,
fs_kwargs: Dict[str, Any],
**kwargs: Any,
) -> Tuple[biob.reader.Reader, Optional[PluginEntry]]:
"""
Initializes and returns the reader (and plugin if relevant) for the provided
image based on provided args and/or the available bioio supported plugins
"""
if reader is not None:
# Check specific reader image types in a situation where a specified reader
# only supports some of the ImageLike types.
if not check_type(image, reader):
raise biob.exceptions.UnsupportedFileFormatError(
reader.__name__, str(type(image))
)

return reader(image, fs_kwargs=fs_kwargs, **kwargs), None

# Determine reader class based on available plugins
plugin = BioImage.determine_plugin(
image, fs_kwargs=fs_kwargs, use_plugin_cache=use_plugin_cache, **kwargs
)
ReaderClass = plugin.metadata.get_reader()
return ReaderClass(image, fs_kwargs=fs_kwargs, **kwargs), plugin

def __init__(
self,
image: biob.types.ImageLike,
Expand All @@ -264,25 +293,28 @@ def __init__(
fs_kwargs: Dict[str, Any] = {},
**kwargs: Any,
):
if reader is None:
# Determine reader class and create dask delayed array
self._plugin = BioImage.determine_plugin(
image, fs_kwargs=fs_kwargs, use_plugin_cache=use_plugin_cache, **kwargs
try:
self._reader, self._plugin = self._get_reader(
image,
reader,
use_plugin_cache,
fs_kwargs,
**kwargs,
)
except biob.exceptions.UnsupportedFileFormatError:
# When reading from S3 if we failed trying to read
# try reading as an anonymous user otherwise re-raise
# the error
if not str(image).startswith("s3://"):
raise

self._reader, self._plugin = self._get_reader(
image,
reader,
use_plugin_cache,
fs_kwargs | {"anon": True},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the implication here, then, is that if you passed fs_kwargs={anon:True} in the initial bioImage constructor then s3 reads do work?

Copy link
Contributor Author

@SeanLeRoy SeanLeRoy Aug 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah 👍 (and I believe were we to always pass True in the initial constructor then users that need to auth like for private buckets would fail, haven't tested that explicitly yet)

**kwargs,
)
ReaderClass = self._plugin.metadata.get_reader()
else:
# check specific reader image types in a situation where a specified reader
# only supports some of the ImageLike types.
if not check_type(image, reader):
raise biob.exceptions.UnsupportedFileFormatError(
reader.__name__, str(type(image))
)

# connect submitted reader
ReaderClass = reader

# Init and store reader
self._reader = ReaderClass(image, fs_kwargs=fs_kwargs, **kwargs)

# Store delayed modifiers
self._reconstruct_mosaic = reconstruct_mosaic
Expand Down Expand Up @@ -1097,13 +1129,16 @@ def save(
)

def __str__(self) -> str:
return (
f"<BioImage ["
f"plugin: {self._plugin.entrypoint.name} installed "
f"at {datetime.datetime.fromtimestamp(self._plugin.timestamp)}, "
f"Image-is-in-Memory: {self._xarray_data is not None}"
f"]>"
)
if self._plugin is not None:
return (
f"<BioImage ["
f"plugin: {self._plugin.entrypoint.name} installed "
f"at {datetime.datetime.fromtimestamp(self._plugin.timestamp)}, "
f"Image-is-in-Memory: {self._xarray_data is not None}"
f"]>"
)

return f"<BioImage [Image-is-in-Memory: {self._xarray_data is not None}]>"

def __repr__(self) -> str:
return str(self)
Expand Down
12 changes: 10 additions & 2 deletions bioio/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@


@pytest.fixture
def sample_text_file(tmp_path: pathlib.Path) -> pathlib.Path:
def sample_text_file(
tmp_path: pathlib.Path,
) -> typing.Generator[pathlib.Path, None, None]:
example_file = tmp_path / "temp-example.txt"
example_file.write_text("just some example text here")
return example_file
yield example_file
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated to main changeset, this is just to make the tests faster by only creating the file once since they seemed slow



def np_random_from_shape(
Expand Down Expand Up @@ -90,3 +92,9 @@ def __exit__(
subprocess.check_call(
[sys.executable, "-m", "pip", "uninstall", "-y", self.package_name]
)


@pytest.fixture
def dummy_plugin() -> typing.Generator[str, None, None]:
with InstallPackage(package_path=DUMMY_PLUGIN_PATH, package_name=DUMMY_PLUGIN_NAME):
yield DUMMY_PLUGIN_NAME
Copy link
Contributor Author

@SeanLeRoy SeanLeRoy Aug 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated to main changeset, this is just to make the tests faster by only installing the plugin once and re-using this fixture since they seemed slow

26 changes: 26 additions & 0 deletions bioio/tests/test_bio_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,29 @@ def test_bioimage_submission_data_reader_type_alignment(
) -> None:
with pytest.raises(expected_exception):
BioImage(image, reader=reader_class)


def test_bioimage_attempts_s3_read_with_anon_attr(
sample_text_file: pathlib.Path, dummy_plugin: str
) -> None:
# Arrange
from dummy_plugin import Reader as DummyReader

err_msg = "anon is not True"

class FakeReader(DummyReader):
def __init__(self, _: ImageLike, fs_kwargs: dict = {}):
if fs_kwargs.get("anon", False) is not True:
raise biob.exceptions.UnsupportedFileFormatError(
"test", "test", err_msg
)

# Act / Assert
BioImage("s3://this/could/go/anywhere.ome.zarr", reader=FakeReader)

# (sanity-check) Make sure it would have failed if not an S3 URI
# that gets given the anon attribute
with pytest.raises(biob.exceptions.UnsupportedFileFormatError) as err:
BioImage(sample_text_file, reader=FakeReader)

assert err_msg in str(err.value)
43 changes: 21 additions & 22 deletions bioio/tests/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,32 @@
import bioio

from ..plugins import dump_plugins
from .conftest import DUMMY_PLUGIN_NAME, DUMMY_PLUGIN_PATH, InstallPackage


def test_dump_plugins() -> None:
with InstallPackage(package_path=DUMMY_PLUGIN_PATH, package_name=DUMMY_PLUGIN_NAME):
# Capture the output of dump_plugins
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
dump_plugins()
output = sys.stdout.getvalue()
finally:
sys.stdout = old_stdout
def test_dump_plugins(dummy_plugin: str) -> None:
# Capture the output of dump_plugins
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
dump_plugins()
output = sys.stdout.getvalue()
finally:
sys.stdout = old_stdout

# Check if package name is in the output
assert DUMMY_PLUGIN_NAME in output
# Check if package name is in the output
assert dummy_plugin in output


def test_plugin_feasibility_report() -> None:
def test_plugin_feasibility_report(dummy_plugin: str) -> None:
# Arrange
test_image = np.random.rand(10, 10)
expected_error_msg = "missing 1 required positional argument: 'path'"
with InstallPackage(package_path=DUMMY_PLUGIN_PATH, package_name=DUMMY_PLUGIN_NAME):
# Act
actual_output = bioio.plugin_feasibility_report(test_image)
# Assert
assert actual_output["ArrayLike"].supported is True
assert actual_output["ArrayLike"].error is None
assert actual_output["dummy-plugin"].supported is False
assert expected_error_msg in (actual_output["dummy-plugin"].error or "")

# Act
actual_output = bioio.plugin_feasibility_report(test_image)

# Assert
assert actual_output["ArrayLike"].supported is True
assert actual_output["ArrayLike"].error is None
assert actual_output[dummy_plugin].supported is False
assert expected_error_msg in (actual_output[dummy_plugin].error or "")
Loading