Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

attempts to create a writer #69

Merged
merged 26 commits into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
447ffd6
attempts to create a writer
glyg Dec 3, 2020
ad261bc
attempts to create a writer
glyg Dec 3, 2020
2d83edd
adds the files
glyg Dec 4, 2020
07e6cb8
Merge branch 'writer' of github.com:glyg/ome-zarr-py into writer
glyg Dec 4, 2020
3424efa
Merge 'origin/master' into HEAD
joshmoore Dec 6, 2020
552ea9d
Fix issues from 'pre-commit run -a'
joshmoore Dec 6, 2020
212db6a
passes the writer test
glyg Dec 7, 2020
c970c82
WIP add write mechanisms to LocalZarrLocaltion
glyg Dec 7, 2020
ffa2bda
fixes obvious lint pbs
glyg Dec 7, 2020
371404e
Sorry for the nosie I'll get used to it
glyg Dec 7, 2020
2a12507
Relaxing typing constrains, mypy passes
glyg Dec 7, 2020
7b9599a
adds channels metadata, read from omero spec if nothing else
glyg Dec 11, 2020
d55edae
Slight refactoring of ome_zarr.writer
joshmoore Jan 11, 2021
7c86ddf
Merge pull request #1 from joshmoore/writer
glyg Jan 12, 2021
fce7c96
partly fixes writer test
glyg Jan 12, 2021
fe5d61c
removes unnecessary if block in Reader
glyg Jan 12, 2021
f6123c2
pre-commit fixes
joshmoore Jan 26, 2021
9731606
Try bumping to napari 0.4.4
joshmoore Feb 4, 2021
d5f1614
Register show-viewer in conftest as workaround
joshmoore Feb 4, 2021
6c91ed0
Merge branch 'master' into writer
joshmoore Feb 4, 2021
64bafb1
Add downsampling
joshmoore Mar 22, 2021
b832df5
Use dataset variable
joshmoore Mar 22, 2021
e51b1be
Revert debugging raise
joshmoore Mar 23, 2021
d35cc5e
Remove unnecessary omero metadata
joshmoore Mar 23, 2021
0d0ebfd
write_image: accept array or list
joshmoore Mar 23, 2021
f22c564
write_image: revert array or list
joshmoore Mar 23, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[settings]
known_third_party = cv2,dask,napari,numpy,pytest,requests,scipy,setuptools,skimage,vispy,zarr
known_third_party = cv2,dask,numpy,pytest,requests,scipy,setuptools,skimage,vispy,zarr
multi_line_output=6
include_trailing_comma=False
force_grid_wrap=0
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ repos:
- --autofix

- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
rev: 3.8.4
hooks:
- id: flake8
additional_dependencies: [
flake8-blind-except,
# flake8-blind-except, FIXME
flake8-builtins,
flake8-rst-docstrings,
flake8-logging-format,
Expand All @@ -74,7 +74,7 @@ repos:
--disallow-untyped-defs,
--ignore-missing-imports,
]
exclude: tests/*|setup.py
exclude: tests/|setup.py

- repo: https://github.com/adrienverge/yamllint.git
rev: v1.24.2
Expand Down
7 changes: 4 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#name: z
name: z
channels:
- defaults
- ome
- conda-forge
- defaults
dependencies:
- napari
- pyqt
- flake8
- ipython
- mypy
Expand All @@ -20,6 +20,7 @@ dependencies:
- xarray
- zarr >= 2.4.0
- pip:
- napari
- pre-commit
- pytest-qt
# python.app -- only install on OSX:
Expand Down
12 changes: 1 addition & 11 deletions ome_zarr/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from skimage.segmentation import clear_border

from .scale import Scaler
from .writer import write_multiscale

CHANNEL_DIMENSION = 1

Expand Down Expand Up @@ -92,17 +93,6 @@ def rgb_to_5d(pixels: np.ndarray) -> List:
return video


def write_multiscale(pyramid: List, group: zarr.Group) -> None:
"""Write a pyramid with multiscale metadata to disk."""
paths = []
for path, dataset in enumerate(pyramid):
group.create_dataset(str(path), data=pyramid[path])
paths.append({"path": str(path)})

multiscales = [{"version": "0.1", "datasets": paths}]
group.attrs["multiscales"] = multiscales


def create_zarr(
zarr_directory: str,
method: Callable[..., Tuple[List, List]] = coins,
Expand Down
25 changes: 16 additions & 9 deletions ome_zarr/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import dask.array as da
import requests
import zarr

from .types import JSONDict

Expand Down Expand Up @@ -106,8 +107,12 @@ class LocalZarrLocation(BaseZarrLocation):
Uses the :module:`json` library for loading JSON from disk.
"""

def __init__(self, path: Path) -> None:
def __init__(self, path: Path, mode: str = "r") -> None:
self.__path: Path = path
self.mode = mode
if mode in ("w", "a") and not self.__path.exists():
_ = zarr.DirectoryStore(self.basename())
LOGGER.debug("Created DirectoryStore %s", self.basename())
super().__init__()

def basename(self) -> str:
Expand All @@ -132,8 +137,7 @@ def get_json(self, subpath: str) -> JSONDict:
If a file does not exist, an empty response is returned rather
than an exception.
"""
filename = self.__path / subpath

filename = self.subpath(subpath)
if not os.path.exists(filename):
LOGGER.debug(f"{filename} does not exist")
return {}
Expand Down Expand Up @@ -189,7 +193,7 @@ def get_json(self, subpath: str) -> JSONDict:
return {}


def parse_url(path: str) -> Optional[BaseZarrLocation]:
def parse_url(path: str, mode: str = "r") -> Optional[BaseZarrLocation]:
Copy link
Contributor Author

@glyg glyg Jan 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As this function is not used in the writer module, maybe I should revert the changes in io.py

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was working on a simple example for Wei (his use case was "how to turn an existing zarr array into an ome-zarr"). Let's keep this until we have a simple working user example. Maybe parse_url can still be the starting point.

previous example
#!/usr/bin/env python
import numpy as np
import zarr as zr

from ome_zarr.io import parse_url
from ome_zarr.reader import OMERO, Reader
from ome_zarr.writer import write_image

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("path")
ns = parser.parse_args()

dtype = np.uint8
mean = 10
shape = (1, 3, 1, 256, 256)
rng = np.random.default_rng(0)
data = rng.poisson(mean, size=shape).astype(dtype)
store = zr.DirectoryStore(ns.path)
grp = zr.group(store)
write_image(data, grp, chunks=(128, 128))

"""Convert a path string or URL to a BaseZarrLocation subclass.

>>> parse_url('does-not-exist')
Expand All @@ -199,12 +203,15 @@ def parse_url(path: str) -> Optional[BaseZarrLocation]:
return LocalZarrLocation(Path(path))
else:
result = urlparse(path)
zarr: Optional[BaseZarrLocation] = None
zarr_loc: Optional[BaseZarrLocation] = None
if result.scheme in ("", "file"):
# Strips 'file://' if necessary
zarr = LocalZarrLocation(Path(result.path))
zarr_loc = LocalZarrLocation(Path(result.path), mode=mode)

else:
zarr = RemoteZarrLocation(path)
if zarr.exists():
return zarr
if mode != "r":
raise ValueError("Remote locations are read only")
zarr_loc = RemoteZarrLocation(path)
if zarr_loc.exists() or (mode in ("a", "w")):
return zarr_loc
return None
2 changes: 2 additions & 0 deletions ome_zarr/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,9 @@ def __init__(self, node: Node) -> None:
node.metadata["visible"] = visibles
node.metadata["contrast_limits"] = contrast_limits
node.metadata["colormap"] = colormaps

except Exception as e:
raise e
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be there? The LOGGER line below will never be called

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pretty likely debugging code that crept in. Thanks, @glyg

LOGGER.error(f"failed to parse metadata: {e}")


Expand Down
108 changes: 108 additions & 0 deletions ome_zarr/writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""Image writer utility

"""
import logging
from typing import Any, List, Tuple, Union

import numpy as np
import zarr

from .scale import Scaler
from .types import JSONDict

LOGGER = logging.getLogger("ome_zarr.writer")


def write_multiscale(
pyramid: List, group: zarr.Group, chunks: Union[Tuple[Any, ...], int] = None,
) -> None:
"""Write a pyramid with multiscale metadata to disk."""
paths = []
for path, dataset in enumerate(pyramid):
# TODO: chunks here could be different per layer
group.create_dataset(str(path), data=dataset, chunks=chunks)
paths.append({"path": str(path)})

multiscales = [{"version": "0.1", "datasets": paths}]
group.attrs["multiscales"] = multiscales


def write_image(
image: np.ndarray,
group: zarr.Group,
chunks: Union[Tuple[Any, ...], int] = None,
byte_order: Union[str, List[str]] = "tczyx",
scaler: Scaler = None,
**metadata: JSONDict,
) -> None:
"""Writes an image to the zarr store according to ome-zarr specification

Parameters
----------
image: np.ndarray
the image to save
group: zarr.Group
the group within the zarr store to store the data in
chunks: int or tuple of ints,
size of the saved chunks to store the image
byte_order: str or list of str, default "tczyx"
combination of the letters defining the order
in which the dimensions are saved
"""

if image.ndim > 5:
raise ValueError("Only images of 5D or less are supported")

shape_5d: Tuple[Any, ...] = (*(1,) * (5 - image.ndim), *image.shape)
image = image.reshape(shape_5d)

if chunks is not None:
chunks = _retuple(chunks, shape_5d)
omero = metadata.get("omero", {})
# Update the size entry anyway
omero["size"] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to have size metadata here since we have that info in the .zarray and it's not part of the OME-Zarr spec.

"t": image.shape[0],
"c": image.shape[1],
"z": image.shape[2],
"height": image.shape[3],
"width": image.shape[4],
}
if omero.get("channels") is None:
size_c = image.shape[1]
if size_c == 1:
omero["channels"] = [{"window": {"start": 0, "end": 1}}]
omero["rdefs"] = {"model": "greyscale"}
else:
rng = np.random.default_rng(0)
colors = rng.integers(0, high=2 ** 8, size=(image.shape[1], 3))
omero["channels"] = [
{
"color": "".join(f"{i:02x}" for i in color),
"window": {"start": 0, "end": 1},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably better to omit all the omero['channels'] metadata if we're not provided with it.
Generating these numbers will cause viewers to assume they are based on the data, which could lead to poor visualisation (e.g. render levels 0 - 1 will likely be saturated)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@glyg : any thoughts from your initial use case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I just tried to emulate the code I saw in the tests or elsewhere, I agree that by default metadata entries should as small as possible

"active": True,
}
for color in colors
]
omero["rdefs"] = {"model": "color"}

metadata["omero"] = omero
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is currently missing the multiscales metadata. I'd propose moving write_multiscales from ome_zarr.data to your writer module.


if scaler is None:
scaler = Scaler()

pyramid = scaler.nearest(image)
write_multiscale(pyramid, group, chunks=chunks)
group.attrs.update(metadata)


def _retuple(
chunks: Union[Tuple[Any, ...], int], shape: Tuple[Any, ...]
) -> Tuple[Any, ...]:

_chunks: Tuple[Any, ...]
if isinstance(chunks, int):
_chunks = (chunks,)
else:
_chunks = chunks

return (*shape[: (5 - len(_chunks))], *_chunks)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is all based on the assumption that we have 5D data, which is being relaxed in ome/ngff#39, should there be a default retuple to 5 or no attempt at all to reshape the data?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, but let's handle that along with #39. Ultimately I think the rules we'll have to be laid out, the most flexible being "once the shape is defined (e.g. len(shape)==4), then everything else (e.g. chunks) should stay in that shape & order.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok great!

10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
def pytest_addoption(parser):
"""
add `--show-viewer` as a valid command line flag
"""
parser.addoption(
"--show-viewer",
action="store_true",
default=False,
help="don't show viewer during tests",
)
12 changes: 2 additions & 10 deletions tests/test_napari.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,11 @@

import numpy as np
import pytest
from napari.conftest import make_test_viewer # noqa

from ome_zarr.data import astronaut, create_zarr
from ome_zarr.napari import napari_get_reader


@pytest.fixture(autouse=True, scope="session")
def load_napari_conftest(pytestconfig):
from napari import conftest

pytestconfig.pluginmanager.register(conftest, "napari-conftest")


class TestNapari:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
Expand Down Expand Up @@ -67,9 +59,9 @@ def test_label(self):
@pytest.mark.skipif(
sys.version_info < (3, 7), reason="on_draw is missing in napari < 0.4.0",
)
def test_viewer(self, make_test_viewer): # noqa
def test_viewer(self, make_napari_viewer): # noqa
"""example of testing the viewer."""
viewer = make_test_viewer()
viewer = make_napari_viewer()

shapes = [(4000, 3000), (2000, 1500), (1000, 750), (500, 375)]
np.random.seed(0)
Expand Down
31 changes: 31 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
import pytest
import zarr

from ome_zarr.io import parse_url
from ome_zarr.reader import OMERO, Reader
from ome_zarr.writer import write_image


class TestWriter:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = tmpdir.mkdir("data")

def create_data(self, shape, dtype, mean_val=10):
rng = np.random.default_rng(0)
return rng.poisson(mean_val, size=shape).astype(dtype)

def test_writer(self):

shape = (1, 2, 1, 256, 256)
data = self.create_data(shape, np.uint8)
store = zarr.DirectoryStore(self.path)
root = zarr.group(store=store)
grp = root.create_group("test")
write_image(image=data, group=grp, chunks=(128, 128))
reader = Reader(parse_url(f"{self.path}/test"))
node = list(reader())[0]
assert OMERO.matches(node.zarr)
assert node.data[0].shape == shape
assert node.data[0].chunks == ((1,), (2,), (1,), (128, 128), (128, 128))