Skip to content

Commit

Permalink
Enable XYZ chunking at conversion (#178)
Browse files Browse the repository at this point in the history
* add xyz chunking at conversion

* bugfixes

* revamp ndtiff converter

* remove cli debugging

* style

* bugfix

* write metadata at the end

* pos name fixes

* style

* better timepoint handling

* fix tests

* move the ndtiff conversion path into a method

* Update iohub/cli/cli.py

Co-authored-by: Talon Chandler <[email protected]>

* better handling of missing timepoints

* prettier T/C bar

* better data slicing

* Revert "prettier T/C bar"

This reverts commit 9b20964.

---------

Co-authored-by: Ziwen Liu <[email protected]>
Co-authored-by: Talon Chandler <[email protected]>
  • Loading branch information
3 people authored Aug 26, 2023
1 parent e19d1f1 commit fc1db13
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 62 deletions.
11 changes: 10 additions & 1 deletion iohub/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,22 @@ def info(files, verbose):
is_flag=True,
help="Arrange FOVs in a row/column grid layout for tiled acquisition",
)
def convert(input, output, format, scale_voxels, grid_layout):
@click.option(
"--chunks",
"-c",
required=False,
default="XY",
help="Zarr chunk size given as 'XY', 'XYZ', or a tuple of chunk "
"dimensions. If 'XYZ', chunk size will be limited to 500 MB.",
)
def convert(input, output, format, scale_voxels, grid_layout, chunks):
"""Converts Micro-Manager TIFF datasets to OME-Zarr"""
converter = TIFFConverter(
input_dir=input,
output_dir=output,
data_type=format,
scale_voxels=scale_voxels,
grid_layout=grid_layout,
chunks=chunks,
)
converter.run()
217 changes: 167 additions & 50 deletions iohub/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from typing import Literal, Union

import numpy as np
from dask.array import to_zarr
from numpy.typing import NDArray
from tqdm import tqdm
from tqdm.contrib.itertools import product

from iohub._version import version as iohub_version
from iohub.ngff import Position, TransformationMeta, open_ome_zarr
Expand All @@ -18,6 +20,7 @@
)

__all__ = ["TIFFConverter"]
MAX_CHUNK_SIZE = 500e6 # in bytes


def _create_grid_from_coordinates(
Expand Down Expand Up @@ -84,7 +87,7 @@ class TIFFConverter:
Whether to lay out the positions in a grid-like format
based on how the data was acquired
(useful for tiled acquisitions), by default False
chunks : tuple[int], optional
chunks : tuple[int] or Literal['XY', 'XYZ'], optional
Chunk size of the output Zarr arrays, by default None
(chunk by XY planes, this is the fastest at converting time)
scale_voxels : bool, optional
Expand All @@ -109,7 +112,7 @@ def __init__(
output_dir: str,
data_type: Literal["singlepagetiff", "ometiff", "ndtiff"] = None,
grid_layout: int = False,
chunks: tuple[int] = None,
chunks: Union[tuple[int], Literal["XY", "XYZ"]] = None,
scale_voxels: bool = True,
hcs_plate: bool = None,
):
Expand Down Expand Up @@ -148,6 +151,7 @@ def __init__(
self.z = self.reader.slices
self.y = self.reader.height
self.x = self.reader.width
self.dtype = self.reader.dtype
self.dim = (self.p, self.t, self.c, self.z, self.y, self.x)
self.prefix_list = []
self.hcs_plate = hcs_plate
Expand Down Expand Up @@ -175,7 +179,7 @@ def __init__(
self._make_default_grid()
else:
self._make_default_grid()
self.chunks = chunks if chunks else (1, 1, 1, self.y, self.x)
self.chunks = self._gen_chunks(chunks)
self.transform = self._scale_voxels() if scale_voxels else None

def _check_hcs_sites(self):
Expand Down Expand Up @@ -331,14 +335,35 @@ def _get_coord_reorder(self, coord):
]
return tuple(reordered)

def _normalize_ndtiff_coord(
self, p: int, t: int, c: int, z: int
) -> tuple[Union[str, int], ...]:
if self.reader.str_position_axis:
p = self.pos_names[p]
if self.reader.str_channel_axis:
c = self.reader.channel_names[c]
return p, t, c, z
def _gen_chunks(self, input_chunks):
if not input_chunks:
chunks = [1, 1, 1, self.y, self.x]
elif isinstance(input_chunks, tuple):
chunks = list(input_chunks)
elif isinstance(input_chunks, str):
if input_chunks.lower() == "xy":
chunks = [1, 1, 1, self.y, self.x]
elif input_chunks.lower() == "xyz":
chunks = [1, 1, self.z, self.y, self.x]
else:
raise ValueError(f"{input_chunks} chunks are not supported.")
else:
raise TypeError(
f"Chunk type {type(input_chunks)} is not supported."
)

# limit chunks to MAX_CHUNK_SIZE bytes
bytes_per_pixel = np.dtype(self.dtype).itemsize
# it's OK if a single image is larger than MAX_CHUNK_SIZE
while (
chunks[-3] > 1
and np.prod(chunks) * bytes_per_pixel > MAX_CHUNK_SIZE
):
chunks[-3] = np.ceil(chunks[-3] / 2).astype(int)

logging.debug(f"Zarr store chunk size will be set to {chunks}.")

return tuple(chunks)

def _get_channel_names(self):
cns = self.reader.channel_names
Expand Down Expand Up @@ -397,7 +422,7 @@ def _init_zarr_arrays(self):
self.y,
self.x,
),
"dtype": self.reader.dtype,
"dtype": self.dtype,
"chunks": self.chunks,
"transform": self.transform,
}
Expand Down Expand Up @@ -430,6 +455,108 @@ def _create_zeros_array(
]
pos.dump_meta()

def _convert_ndtiff(self):
bar_format_positions = (
"Converting Positions: |{bar:16}|{n_fmt}/{total_fmt} "
"(Time Remaining: {remaining}), {rate_fmt}{postfix}]"
)
bar_format_time_channel = (
"Converting Timepoints/Channels: |{bar:16}|{n_fmt}/{total_fmt} "
"(Time Remaining: {remaining}), {rate_fmt}{postfix}]"
)
all_ndtiff_metadata = {}
for p_idx in tqdm(range(self.p), bar_format=bar_format_positions):
# ndtiff_pos_idx, ndtiff_t_idx, and ndtiff_channel_idx
# may be None
ndtiff_pos_idx = (
self.pos_names[p_idx]
if self.reader.str_position_axis
else p_idx
)
try:
ndtiff_pos_idx, *_ = self.reader._check_coordinates(
ndtiff_pos_idx, 0, 0, 0
)
except ValueError:
# Log warning and continue if some positions were not
# acquired in the dataset
logging.warning(
f"Cannot load data at position {ndtiff_pos_idx}, "
"filling with zeros. Raw data may be incomplete."
)
continue

dask_arr = self.reader.get_zarr(position=ndtiff_pos_idx)
zarr_pos_name = self.zarr_position_names[p_idx]
zarr_arr = self.writer[zarr_pos_name]["0"]

for t_idx, c_idx in product(
range(self.t),
range(self.c),
bar_format=bar_format_time_channel,
position=1,
leave=False,
):
ndtiff_channel_idx = (
self.reader.channel_names[c_idx]
if self.reader.str_channel_axis
else c_idx
)
# set ndtiff_t_idx and ndtiff_z_idx to None if these axes were
# not acquired
(
_,
ndtiff_t_idx,
ndtiff_channel_idx,
ndtiff_z_idx,
) = self.reader._check_coordinates(
ndtiff_pos_idx, t_idx, ndtiff_channel_idx, 0
)
# Log warning and continue if some T/C were not acquired in the
# dataset
if not self.reader.dataset.has_image(
position=ndtiff_pos_idx,
time=ndtiff_t_idx,
channel=ndtiff_channel_idx,
z=ndtiff_z_idx,
):
logging.warning(
f"Cannot load data at timepoint {t_idx}, channel "
f"{c_idx}, filling with zeros. Raw data may be "
"incomplete."
)
continue

data_slice = (slice(t_idx, t_idx + 1), slice(c_idx, c_idx + 1))
to_zarr(
dask_arr[data_slice].rechunk(self.chunks),
zarr_arr,
region=data_slice,
)

for z_idx in range(self.z):
# this function will handle z_idx=0 when no z stacks
# acquired
image_metadata = self.reader.get_image_metadata(
ndtiff_pos_idx,
ndtiff_t_idx,
ndtiff_channel_idx,
z_idx,
)
# row/well/fov/img/T/C/Z
frame_key = "/".join(
[zarr_arr.path]
+ [str(i) for i in (t_idx, c_idx, z_idx)]
)
all_ndtiff_metadata[frame_key] = image_metadata

logging.info("Writing ND-TIFF image plane metadata...")
with open(
os.path.join(self.output_dir, "image_plane_metadata.json"),
mode="x",
) as metadata_file:
json.dump(all_ndtiff_metadata, metadata_file, indent=4)

def run(self, check_image: bool = True):
"""Runs the conversion.
Expand All @@ -441,50 +568,40 @@ def run(self, check_image: bool = True):
"""
logging.debug("Setting up Zarr store.")
self._init_zarr_arrays()
bar_format = (
"Status: |{bar:16}|{n_fmt}/{total_fmt} "
bar_format_images = (
"Converting Images: |{bar:16}|{n_fmt}/{total_fmt} "
"(Time Remaining: {remaining}), {rate_fmt}{postfix}]"
)
# Run through every coordinate and convert in acquisition order
logging.info("Converting Images...")
ndtiff = False
if isinstance(self.reader, NDTiffReader):
ndtiff = True
all_ndtiff_metadata = {}
for coord in tqdm(self.coords, bar_format=bar_format):
coord_reorder = self._get_coord_reorder(coord)
if isinstance(self.reader, NDTiffReader):
p, t, c, z = self._normalize_ndtiff_coord(*coord_reorder)
else:
p, t, c, z = coord_reorder
img_raw = self._get_image_array(p, t, c, z)
if img_raw is None or not getattr(img_raw, "shape", ()):
# Leave incomplete datasets zero-filled
logging.warning(
f"Cannot load image at PTCZ={(p, t, c, z)}, "
"filling with zeros. Check if the raw data is incomplete."
)
continue
else:
pos_idx = coord_reorder[0]
pos_name = self.zarr_position_names[pos_idx]
zarr_img = self.writer[pos_name]["0"]
zarr_img[coord_reorder[1:]] = img_raw
if check_image:
self._perform_image_check(zarr_img[coord_reorder[1:]], img_raw)
if ndtiff:
image_metadata = self.reader.get_image_metadata(p, t, c, z)
# row/well/fov/img/T/C/Z
frame_key = "/".join(
[zarr_img.path] + [str(i) for i in (t, c, z)]
logging.info(
"Checking converted image is not supported for ND-TIFF. "
"Ignoring..."
)
all_ndtiff_metadata[frame_key] = image_metadata
self._convert_ndtiff()
else:
for coord in tqdm(self.coords, bar_format=bar_format_images):
coord_reorder = self._get_coord_reorder(coord)
p, t, c, z = coord_reorder
img_raw = self._get_image_array(p, t, c, z)
if img_raw is None or not getattr(img_raw, "shape", ()):
# Leave incomplete datasets zero-filled
logging.warning(
f"Cannot load image at PTCZ={(p, t, c, z)}, filling "
"with zeros. Check if the raw data is incomplete."
)
continue
else:
pos_idx = coord_reorder[0]
ndtiff_pos_idx = self.zarr_position_names[pos_idx]
zarr_img = self.writer[ndtiff_pos_idx]["0"]
zarr_img[coord_reorder[1:]] = img_raw
if check_image:
self._perform_image_check(
zarr_img[coord_reorder[1:]], img_raw
)

self.writer.zgroup.attrs.update(self.metadata)
if ndtiff:
logging.info("Writing ND-TIFF image plane metadata...")
with open(
os.path.join(self.output_dir, "image_plane_metadata.json"),
mode="x",
) as metadata_file:
json.dump(all_ndtiff_metadata, metadata_file, indent=4)
self.writer.close()
22 changes: 12 additions & 10 deletions iohub/ndtiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,16 +149,18 @@ def _check_coordinates(

# The axis is not part of the dataset axes
else:
# If coord = 0 is requested, the coordinate will be replaced
# with None
if coord == 0:
coords[i] = None
# If coord != 0 is requested and the axis is not part of the
# dataset, ValueError will be raised
else:
raise ValueError(
f"Axis {axis} is not part of this dataset"
)
# Nothing to do if coord == None
if coord is not None:
# If coord = 0 is requested, the coordinate will be
# replaced with None
if coord == 0:
coords[i] = None
# If coord != 0 is requested and the axis is not part of
# the dataset, ValueError will be raised
else:
raise ValueError(
f"Axis {axis} is not part of this dataset"
)

return (*coords,)

Expand Down
2 changes: 1 addition & 1 deletion tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,4 @@ def test_cli_convert_ome_tiff(
cmd += ["-g"]
result = runner.invoke(cli, cmd)
assert result.exit_code == 0
assert "Status" in result.output
assert "Converting" in result.output

0 comments on commit fc1db13

Please sign in to comment.