Skip to content

Commit

Permalink
WIP support ZipFile IO
Browse files Browse the repository at this point in the history
  • Loading branch information
FynnBe committed Oct 24, 2024
1 parent 814ae0e commit 9bc0d97
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 37 deletions.
63 changes: 47 additions & 16 deletions bioimageio/spec/_internal/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import hashlib
import sys
import warnings
import zipfile
from abc import abstractmethod
from collections.abc import Mapping as MappingAbc
from dataclasses import dataclass
Expand Down Expand Up @@ -82,7 +83,8 @@


AbsolutePathT = TypeVar(
"AbsolutePathT", bound=Union[HttpUrl, AbsoluteDirectory, AbsoluteFilePath]
"AbsolutePathT",
bound=Union[HttpUrl, AbsoluteDirectory, AbsoluteFilePath, zipfile.Path],
)


Expand Down Expand Up @@ -132,18 +134,21 @@ def __repr__(self) -> str:

@abstractmethod
def get_absolute(
self, root: Union[RootHttpUrl, AbsoluteDirectory, pydantic.AnyUrl]
self, root: Union[RootHttpUrl, AbsoluteDirectory, pydantic.AnyUrl, ZipFile]
) -> AbsolutePathT: ...

def _get_absolute_impl(
self, root: Union[RootHttpUrl, AbsoluteDirectory, pydantic.AnyUrl]
) -> Union[Path, HttpUrl]:
self, root: Union[RootHttpUrl, AbsoluteDirectory, pydantic.AnyUrl, ZipFile]
) -> Union[Path, HttpUrl, zipfile.Path]:
if isinstance(root, Path):
return (root / self.root).absolute()

rel_path = self.root.as_posix().strip("/")
if isinstance(root, ZipFile):
return zipfile.Path(root, rel_path)

parsed = urlsplit(str(root))
path = list(parsed.path.strip("/").split("/"))
rel_path = self.root.as_posix().strip("/")
if (
parsed.netloc == "zenodo.org"
and parsed.path.startswith("/api/records/")
Expand Down Expand Up @@ -175,7 +180,9 @@ def _validate(cls, value: Union[PurePath, str]):
return cls(PurePath(value))


class RelativeFilePath(RelativePathBase[Union[AbsoluteFilePath, HttpUrl]], frozen=True):
class RelativeFilePath(
RelativePathBase[Union[AbsoluteFilePath, HttpUrl, zipfile.Path]], frozen=True
):
"""A path relative to the `rdf.yaml` file (also if the RDF source is a URL)."""

def model_post_init(self, __context: Any) -> None:
Expand All @@ -185,8 +192,8 @@ def model_post_init(self, __context: Any) -> None:
super().model_post_init(__context)

def get_absolute(
self, root: "RootHttpUrl | Path | AnyUrl"
) -> "AbsoluteFilePath | HttpUrl":
self, root: "RootHttpUrl | Path | AnyUrl | ZipFile"
) -> "AbsoluteFilePath | HttpUrl | zipfile.Path":
absolute = self._get_absolute_impl(root)
if (
isinstance(absolute, Path)
Expand All @@ -200,11 +207,11 @@ def get_absolute(


class RelativeDirectory(
RelativePathBase[Union[AbsoluteDirectory, HttpUrl]], frozen=True
RelativePathBase[Union[AbsoluteDirectory, HttpUrl, zipfile.Path]], frozen=True
):
def get_absolute(
self, root: "RootHttpUrl | Path | AnyUrl"
) -> "AbsoluteDirectory | HttpUrl":
self, root: "RootHttpUrl | Path | AnyUrl | ZipFile"
) -> "AbsoluteDirectory | HttpUrl | zipfile.Path":
absolute = self._get_absolute_impl(root)
if (
isinstance(absolute, Path)
Expand Down Expand Up @@ -506,14 +513,14 @@ def is_yaml_value(value: Any) -> TypeGuard[YamlValue]:
@dataclass
class OpenedBioimageioYaml:
content: BioimageioYamlContent
original_root: Union[AbsoluteDirectory, RootHttpUrl]
original_root: Union[AbsoluteDirectory, RootHttpUrl, ZipFile]
original_file_name: FileName


@dataclass
class DownloadedFile:
path: FilePath
original_root: Union[AbsoluteDirectory, RootHttpUrl]
path: Union[FilePath, zipfile.Path]
original_root: Union[AbsoluteDirectory, RootHttpUrl, ZipFile]
original_file_name: FileName


Expand Down Expand Up @@ -571,6 +578,16 @@ def reset(self): ...
def close(self): ...


# def open(source: Union[PermissiveFileSource, FileDescr], /, progressbar: Union[Progressbar, bool, None] = None,
# **kwargs: Unpack[HashKwargs]):
# if isinstance(source, FileDescr):
# if kwargs.get("sha256") is None:
# kwargs["sha256"] = source.sha256
# source = source.source

# if isinstance(source, RelativeFilePath):


def download(
source: Union[PermissiveFileSource, FileDescr],
/,
Expand All @@ -580,10 +597,22 @@ def download(
"""download `source` URL (or pass local file path)"""
if isinstance(source, FileDescr):
return source.download()
elif isinstance(source, zipfile.Path):
zip_root = source.root
assert isinstance(zip_root, ZipFile)
return DownloadedFile(
source,
zip_root,
extract_file_name(source),
)

strict_source = interprete_file_source(source)
if isinstance(strict_source, RelativeFilePath):
strict_source = strict_source.absolute()
if isinstance(strict_source, zipfile.Path):
return DownloadedFile(
strict_source, strict_source.root, extract_file_name(strict_source)
)

if isinstance(strict_source, PurePath):
if not strict_source.exists():
Expand Down Expand Up @@ -677,9 +706,11 @@ def download(self):


def extract_file_name(
src: Union[pydantic.HttpUrl, HttpUrl, PurePath, RelativeFilePath],
src: Union[pydantic.HttpUrl, HttpUrl, PurePath, RelativeFilePath, zipfile.Path],
) -> FileName:
if isinstance(src, RelativeFilePath):
if isinstance(src, zipfile.Path):
return src.name or src.root.filename or "bioimageio.zip"
elif isinstance(src, RelativeFilePath):
return src.path.name
elif isinstance(src, PurePath):
return src.name
Expand Down
48 changes: 35 additions & 13 deletions bioimageio/spec/_internal/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import io
import warnings
import zipfile
from contextlib import nullcontext
from difflib import get_close_matches
from pathlib import Path
Expand All @@ -10,7 +11,6 @@
Dict,
Mapping,
Optional,
TextIO,
Union,
cast,
)
Expand All @@ -35,6 +35,7 @@
YamlValue,
download,
find_bioimageio_yaml_file_name,
identify_bioimageio_yaml_file_name,
)
from .io_basics import FileName
from .types import FileSource, PermissiveFileSource
Expand All @@ -43,7 +44,7 @@
yaml = YAML(typ="safe")


def read_yaml(file: Union[FilePath, TextIO]) -> YamlValue:
def read_yaml(file: Union[FilePath, IO[str], IO[bytes]]) -> YamlValue:
if isinstance(file, Path):
cm = file.open("r", encoding="utf-8")
else:
Expand All @@ -55,7 +56,9 @@ def read_yaml(file: Union[FilePath, TextIO]) -> YamlValue:
return content


def write_yaml(content: YamlValue, /, file: Union[NewPath, FilePath, TextIO]):
def write_yaml(
content: YamlValue, /, file: Union[NewPath, FilePath, IO[str], IO[bytes]]
):
if isinstance(file, Path):
cm = file.open("w", encoding="utf-8")
else:
Expand All @@ -81,9 +84,26 @@ def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent:
return cast(BioimageioYamlContent, content)


def _open_bioimageio_rdf_in_zip(source: ZipFile, rdf_name: str) -> OpenedBioimageioYaml:
with source.open(rdf_name) as f:
content = _sanitize_bioimageio_yaml(read_yaml(f))

return OpenedBioimageioYaml(content, source, source.filename or "bioimageio.zip")


def _open_bioimageio_zip(source: ZipFile) -> OpenedBioimageioYaml:
rdf_name = identify_bioimageio_yaml_file_name(
[info.filename for info in source.filelist]
)
return _open_bioimageio_rdf_in_zip(source, rdf_name)


def open_bioimageio_yaml(
source: PermissiveFileSource, /, **kwargs: Unpack[HashKwargs]
source: Union[PermissiveFileSource, ZipFile], /, **kwargs: Unpack[HashKwargs]
) -> OpenedBioimageioYaml:
if isinstance(source, ZipFile):
return _open_bioimageio_zip(source)

try:
downloaded = download(source, **kwargs)
except Exception:
Expand Down Expand Up @@ -113,17 +133,18 @@ def open_bioimageio_yaml(
downloaded = entry.download()

local_source = downloaded.path
root = downloaded.original_root

if is_zipfile(local_source):
local_source = unzip(local_source)
if isinstance(local_source, zipfile.Path):
return _open_bioimageio_rdf_in_zip(local_source.root, local_source.name)
elif is_zipfile(local_source):
return _open_bioimageio_zip(ZipFile(local_source))

if local_source.is_dir():
root = local_source
local_source = local_source / find_bioimageio_yaml_file_name(local_source)
else:
root = downloaded.original_root

content = _sanitize_bioimageio_yaml(read_yaml(local_source))

return OpenedBioimageioYaml(content, root, downloaded.original_file_name)


Expand Down Expand Up @@ -244,9 +265,10 @@ def write_zip(

def load_array(source: Union[FileSource, FileDescr]) -> NDArray[Any]:
path = download(source).path

return numpy.load(path, allow_pickle=False)
with path.open(mode="rb") as f:
return numpy.load(f, allow_pickle=False)


def save_array(path: Path, array: NDArray[Any]) -> None:
return numpy.save(path, array, allow_pickle=False)
def save_array(path: Union[Path, zipfile.Path], array: NDArray[Any]) -> None:
with path.open(mode="wb") as f:
return numpy.save(f, array, allow_pickle=False)
7 changes: 5 additions & 2 deletions bioimageio/spec/_internal/packaging_context.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import zipfile
from contextvars import ContextVar, Token
from dataclasses import dataclass, field
from typing import Dict, List, Literal, Optional, Sequence, Union
Expand All @@ -16,7 +17,7 @@ class PackagingContext:

bioimageio_yaml_file_name: FileName

file_sources: Dict[FileName, Union[AbsoluteFilePath, HttpUrl]]
file_sources: Dict[FileName, Union[AbsoluteFilePath, HttpUrl, zipfile.Path]]
"""File sources to include in the packaged resource"""

weights_priority_order: Optional[Sequence[str]] = None
Expand All @@ -26,7 +27,9 @@ def replace(
self,
*,
bioimageio_yaml_file_name: Optional[FileName] = None,
file_sources: Optional[Dict[FileName, Union[AbsoluteFilePath, HttpUrl]]] = None,
file_sources: Optional[
Dict[FileName, Union[AbsoluteFilePath, HttpUrl, zipfile.Path]]
] = None,
weights_priority_order: Union[
Optional[Sequence[str]], Literal["unchanged"]
] = "unchanged",
Expand Down
5 changes: 3 additions & 2 deletions bioimageio/spec/_internal/validation_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import Dict, List, Optional, Union
from urllib.parse import urlsplit, urlunsplit
from zipfile import ZipFile

from pydantic import DirectoryPath

Expand All @@ -20,7 +21,7 @@ class ValidationContext:
init=False, default_factory=list
)

root: Union[RootHttpUrl, AbsoluteDirectory] = Path()
root: Union[RootHttpUrl, AbsoluteDirectory, ZipFile] = Path()
"""url/directory serving as base to resolve any relative file paths"""

warning_level: WarningLevel = 50
Expand All @@ -43,7 +44,7 @@ class ValidationContext:

def replace(
self,
root: Optional[Union[RootHttpUrl, DirectoryPath]] = None,
root: Optional[Union[RootHttpUrl, DirectoryPath, ZipFile]] = None,
warning_level: Optional[WarningLevel] = None,
log_warnings: Optional[bool] = None,
file_name: Optional[str] = None,
Expand Down
9 changes: 5 additions & 4 deletions bioimageio/spec/_io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Dict, Literal, Optional, TextIO, Union, cast
from zipfile import ZipFile

from loguru import logger
from pydantic import FilePath, NewPath
Expand Down Expand Up @@ -26,7 +27,7 @@


def load_description(
source: PermissiveFileSource,
source: Union[PermissiveFileSource, ZipFile],
/,
*,
format_version: Union[Literal["discover"], Literal["latest"], str] = DISCOVER,
Expand Down Expand Up @@ -72,7 +73,7 @@ def load_description(


def load_model_description(
source: PermissiveFileSource,
source: Union[PermissiveFileSource, ZipFile],
/,
*,
format_version: Union[Literal["discover"], Literal["latest"], str] = DISCOVER,
Expand All @@ -92,7 +93,7 @@ def load_model_description(


def load_dataset_description(
source: PermissiveFileSource,
source: Union[PermissiveFileSource, ZipFile],
/,
*,
format_version: Union[Literal["discover"], Literal["latest"], str] = DISCOVER,
Expand Down Expand Up @@ -131,7 +132,7 @@ def save_bioimageio_yaml_only(


def load_description_and_validate_format_only(
source: PermissiveFileSource,
source: Union[PermissiveFileSource, ZipFile],
/,
*,
format_version: Union[Literal["discover"], Literal["latest"], str] = DISCOVER,
Expand Down

0 comments on commit 9bc0d97

Please sign in to comment.