diff --git a/conda.yaml b/conda.yaml index 82647d0..ae44d61 100644 --- a/conda.yaml +++ b/conda.yaml @@ -16,5 +16,7 @@ dependencies: - ruff - tifffile - pip + - h5py + - scipy - pip: - cyclopts diff --git a/linc_convert/modalities/__init__.py b/linc_convert/modalities/__init__.py index 974d52f..98ea30f 100644 --- a/linc_convert/modalities/__init__.py +++ b/linc_convert/modalities/__init__.py @@ -1,4 +1,5 @@ """Converters for all imaging modalities.""" -__all__ = ["df", "lsm", "wk"] -from . import df, lsm, wk +__all__ = ["df", "lsm", "wk", "psoct"] +from . import df, lsm, wk, psoct + diff --git a/linc_convert/modalities/df/__init__.py b/linc_convert/modalities/df/__init__.py index a6d9d6f..1e26307 100644 --- a/linc_convert/modalities/df/__init__.py +++ b/linc_convert/modalities/df/__init__.py @@ -1,5 +1,9 @@ """Dark Field microscopy converters.""" -__all__ = ["cli", "multi_slice", "single_slice"] +try: + import glymur as _ # noqa: F401 -from . import cli, multi_slice, single_slice + __all__ = ["cli", "multi_slice", "single_slice"] + from . import cli, multi_slice, single_slice +except ImportError: + pass diff --git a/linc_convert/modalities/lsm/__init__.py b/linc_convert/modalities/lsm/__init__.py index 1ce6617..abf7cee 100644 --- a/linc_convert/modalities/lsm/__init__.py +++ b/linc_convert/modalities/lsm/__init__.py @@ -1,4 +1,9 @@ """Light Sheet Microscopy converters.""" -__all__ = ["cli", "mosaic"] -from . import cli, mosaic +try: + import tifffile as _ # noqa: F401 + + __all__ = ["cli", "mosaic"] + from . import cli, mosaic +except ImportError: + pass diff --git a/linc_convert/modalities/psoct/__init__.py b/linc_convert/modalities/psoct/__init__.py new file mode 100644 index 0000000..d9bb1a6 --- /dev/null +++ b/linc_convert/modalities/psoct/__init__.py @@ -0,0 +1,10 @@ +"""Polarization-sensitive optical coherence tomography converters.""" + +try: + import h5py as _h5py # noqa: F401 + import scipy as _scipy # noqa: F401 + + __all__ = ["cli", "multi_slice", "single_volume"] + from . import cli, multi_slice, single_volume +except ImportError: + pass diff --git a/linc_convert/modalities/psoct/_utils.py b/linc_convert/modalities/psoct/_utils.py new file mode 100644 index 0000000..ee0d749 --- /dev/null +++ b/linc_convert/modalities/psoct/_utils.py @@ -0,0 +1,499 @@ +import itertools +import re +from typing import Any, Literal + +import nibabel as nib +import numpy as np +import zarr + +from linc_convert.utils.math import ceildiv +from linc_convert.utils.unit import convert_unit + + +def make_json(oct_meta: str) -> dict: + """ + Make json from OCT metadata. + + Expected input: + --------------- + Image medium: 60% TDE + Center Wavelength: 1294.84nm + Axial resolution: 4.9um + Lateral resolution: 4.92um + FOV: 3x3mm + Voxel size: 3x3x3um + Depth focus range: 225um + Number of focuses: 2 + Focus #: 2 + Slice thickness: 450um. + Number of slices: 75 + Slice #:23 + Modality: dBI + """ + + def _parse_value_unit( + string: str, n: int = None + ) -> tuple[float | list[float], str | Any]: + number = r"-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?" + value = "x".join([number] * (n or 1)) + match = re.fullmatch(r"(?P" + value + r")(?P\w*)", string) + value, unit = match.group("value"), match.group("unit") + value = list(map(float, value.split("x"))) + if n is None: + value = value[0] + return value, unit + + meta = { + "BodyPart": "BRAIN", + "Environment": "exvivo", + "SampleStaining": "none", + } + + for line in oct_meta.split("\n"): + if ":" not in line: + continue + + key, value = line.split(":") + key, value = key.strip(), value.strip() + + if key == "Image medium": + parts = value.split() + if "TDE" in parts: + parts[parts.index("TDE")] = "2,2' Thiodiethanol (TDE)" + meta["SampleMedium"] = " ".join(parts) + + elif key == "Center Wavelength": + value, unit = _parse_value_unit(value) + meta["Wavelength"] = value + meta["WavelengthUnit"] = unit + + elif key == "Axial resolution": + value, unit = _parse_value_unit(value) + meta["ResolutionAxial"] = value + meta["ResolutionAxialUnit"] = unit + + elif key == "Lateral resolution": + value, unit = _parse_value_unit(value) + meta["ResolutionLateral"] = value + meta["ResolutionLateralUnit"] = unit + + elif key == "Voxel size": + value, unit = _parse_value_unit(value, n=3) + meta["PixelSize"] = value + meta["PixelSizeUnits"] = unit + + elif key == "Depth focus range": + value, unit = _parse_value_unit(value) + meta["DepthFocusRange"] = value + meta["DepthFocusRangeUnit"] = unit + + elif key == "Number of focuses": + value, unit = _parse_value_unit(value) + meta["FocusCount"] = int(value) + + elif key == "Slice thickness": + value, unit = _parse_value_unit(value) + unit = convert_unit(value, unit[:-1], "u") + meta["SliceThickness"] = value + + elif key == "Number of slices": + value, unit = _parse_value_unit(value) + meta["SliceCount"] = int(value) + + elif key == "Modality": + meta["OCTModality"] = value + + else: + continue + + return meta + + +def generate_pyramid( + omz: zarr.Group, + levels: int | None = None, + ndim: int = 3, + max_load: int = 512, + mode: Literal["mean", "median"] = "median", + no_pyramid_axis: int | str | None = None, +) -> list[list[int]]: + """ + Generate the levels of a pyramid in an existing Zarr. + + Parameters + ---------- + path : PathLike | str + Path to parent Zarr + levels : int + Number of additional levels to generate. + By default, stop when all dimensions are smaller than their + corresponding chunk size. + shard : list[int] | bool | {"auto"} | None + Shard size. + * If `None`, use same shard size as the input array; + * If `False`, no dot use sharding; + * If `True` or `"auto"`, automatically find shard size; + * Otherwise, use provided shard size. + ndim : int + Number of spatial dimensions. + max_load : int + Maximum number of voxels to load along each dimension. + mode : {"mean", "median"} + Whether to use a mean or median moving window. + + Returns + ------- + shapes : list[list[int]] + Shapes of all levels, from finest to coarsest, including the + existing top level. + """ + # Read properties from base level + shape = list(omz["0"].shape) + chunk_size = omz["0"].chunks + opt = { + "dimension_separator": omz["0"]._dimension_separator, + "order": omz["0"]._order, + "dtype": omz["0"]._dtype, + "fill_value": omz["0"]._fill_value, + "compressor": omz["0"]._compressor, + } + + # Select windowing function + if mode == "median": + func = np.median + else: + assert mode == "mean" + func = np.mean + + level = 0 + batch, shape = shape[:-ndim], shape[-ndim:] + allshapes = [shape] + + while True: + level += 1 + + # Compute downsampled shape + prev_shape, shape = shape, [] + for i, length in enumerate(prev_shape): + if i == no_pyramid_axis: + shape.append(length) + else: + shape.append(max(1, length // 2)) + + # Stop if seen enough levels or level shape smaller than chunk size + if levels is None: + if all(x <= c for x, c in zip(shape, chunk_size[-ndim:])): + break + elif level > levels: + break + + print("Compute level", level, "with shape", shape) + + allshapes.append(shape) + omz.create_dataset(str(level), shape=shape, **opt) + + # Iterate across `max_load` chunks + # (note that these are unrelared to underlying zarr chunks) + grid_shape = [ceildiv(n, max_load) for n in prev_shape] + for chunk_index in itertools.product(*[range(x) for x in grid_shape]): + print(f"chunk {chunk_index} / {tuple(grid_shape)})", end="\r") + + # Read one chunk of data at the previous resolution + slicer = [Ellipsis] + [ + slice(i * max_load, min((i + 1) * max_load, n)) + for i, n in zip(chunk_index, prev_shape) + ] + fullshape = omz[str(level - 1)].shape + dat = omz[str(level - 1)][tuple(slicer)] + + # Discard the last voxel along odd dimensions + crop = [ + 0 if y == 1 else x % 2 for x, y in zip(dat.shape[-ndim:], fullshape) + ] + # Don't crop the axis not down-sampling + # cannot do if not no_pyramid_axis since it could be 0 + if no_pyramid_axis is not None: + crop[no_pyramid_axis] = 0 + slcr = [slice(-1) if x else slice(None) for x in crop] + dat = dat[tuple([Ellipsis, *slcr])] + + if any(n == 0 for n in dat.shape): + # last strip had a single voxel, nothing to do + continue + + patch_shape = dat.shape[-ndim:] + + # Reshape into patches of shape 2x2x2 + windowed_shape = [ + x for n in patch_shape for x in (max(n // 2, 1), min(n, 2)) + ] + if no_pyramid_axis is not None: + windowed_shape[2 * no_pyramid_axis] = patch_shape[no_pyramid_axis] + windowed_shape[2 * no_pyramid_axis + 1] = 1 + + dat = dat.reshape(batch + windowed_shape) + # -> last `ndim`` dimensions have shape 2x2x2 + dat = dat.transpose( + list(range(len(batch))) + + list(range(len(batch), len(batch) + 2 * ndim, 2)) + + list(range(len(batch) + 1, len(batch) + 2 * ndim, 2)) + ) + # -> flatten patches + smaller_shape = [max(n // 2, 1) for n in patch_shape] + if no_pyramid_axis is not None: + smaller_shape[no_pyramid_axis] = patch_shape[no_pyramid_axis] + + dat = dat.reshape(batch + smaller_shape + [-1]) + + # Compute the median/mean of each patch + dtype = dat.dtype + dat = func(dat, axis=-1) + dat = dat.astype(dtype) + + # Write output + slicer = [Ellipsis] + [ + slice(i * max_load // 2, min((i + 1) * max_load // 2, n)) + if axis_index != no_pyramid_axis + else slice(i * max_load, min((i + 1) * max_load, n)) + for i, axis_index, n in zip(chunk_index, range(ndim), shape) + ] + + omz[str(level)][tuple(slicer)] = dat + + print("") + + return allshapes + + pass + + +def write_ome_metadata( + omz: zarr.Group, + axes: list[str], + space_scale: float | list[float] = 1, + time_scale: float = 1, + space_unit: str = "micrometer", + time_unit: str = "second", + name: str = "", + pyramid_aligns: str | int | list[str | int] = 2, + levels: int | None = None, + no_pool: int | None = None, + multiscales_type: str = "", +) -> None: + """ + Write OME metadata into Zarr. + + Parameters + ---------- + path : str | PathLike + Path to parent Zarr. + axes : list[str] + Name of each dimension, in Zarr order (t, c, z, y, x) + space_scale : float | list[float] + Finest-level voxel size, in Zarr order (z, y, x) + time_scale : float + Time scale + space_unit : str + Unit of spatial scale (assumed identical across dimensions) + space_time : str + Unit of time scale + name : str + Name attribute + pyramid_aligns : float | list[float] | {"center", "edge"} + Whether the pyramid construction aligns the edges or the centers + of the corner voxels. If a (list of) number, assume that a moving + window of that size was used. + levels : int + Number of existing levels. Default: find out automatically. + zarr_version : {2, 3} | None + Zarr version. If `None`, guess from existing zarr array. + + """ + # Read shape at each pyramid level + shapes = [] + level = 0 + while True: + if levels is not None and level > levels: + break + + if str(level) not in omz.keys(): + levels = level + break + shapes += [ + omz[str(level)].shape, + ] + level += 1 + + axis_to_type = { + "x": "space", + "y": "space", + "z": "space", + "t": "time", + "c": "channel", + } + + # Number of spatial (s), batch (b) and total (n) dimensions + ndim = len(axes) + sdim = sum(axis_to_type[axis] == "space" for axis in axes) + bdim = ndim - sdim + + if isinstance(pyramid_aligns, (int, str)): + pyramid_aligns = [pyramid_aligns] + pyramid_aligns = list(pyramid_aligns) + if len(pyramid_aligns) < sdim: + repeat = pyramid_aligns[:1] * (sdim - len(pyramid_aligns)) + pyramid_aligns = repeat + pyramid_aligns + pyramid_aligns = pyramid_aligns[-sdim:] + + if isinstance(space_scale, (int, float)): + space_scale = [space_scale] + space_scale = list(space_scale) + if len(space_scale) < sdim: + repeat = space_scale[:1] * (sdim - len(space_scale)) + space_scale = repeat + space_scale + space_scale = space_scale[-sdim:] + + multiscales = [ + { + "version": "0.4", + "axes": [ + { + "name": axis, + "type": axis_to_type[axis], + } + if axis_to_type[axis] == "channel" + else { + "name": axis, + "type": axis_to_type[axis], + "unit": ( + space_unit + if axis_to_type[axis] == "space" + else time_unit + if axis_to_type[axis] == "time" + else None + ), + } + for axis in axes + ], + "datasets": [], + "type": "median window " + "x".join(["2"] * sdim) + if not multiscales_type + else multiscales_type, + "name": name, + } + ] + + shape0 = shapes[0] + for n in range(len(shapes)): + shape = shapes[n] + multiscales[0]["datasets"].append({}) + level = multiscales[0]["datasets"][-1] + level["path"] = str(n) + + scale = [1] * bdim + [ + ( + pyramid_aligns[i] ** n + if not isinstance(pyramid_aligns[i], str) + else (shape0[bdim + i] / shape[bdim + i]) + if pyramid_aligns[i][0].lower() == "e" + else ((shape0[bdim + i] - 1) / (shape[bdim + i] - 1)) + ) + * space_scale[i] + if i != no_pool + else space_scale[i] + for i in range(sdim) + ] + translation = [0] * bdim + [ + ( + pyramid_aligns[i] ** n - 1 + if not isinstance(pyramid_aligns[i], str) + else (shape0[bdim + i] / shape[bdim + i]) - 1 + if pyramid_aligns[i][0].lower() == "e" + else 0 + ) + * 0.5 + * space_scale[i] + if i != no_pool + else 0 + for i in range(sdim) + ] + + level["coordinateTransformations"] = [ + { + "type": "scale", + "scale": scale, + }, + { + "type": "translation", + "translation": translation, + }, + ] + + scale = [1.0] * ndim + if "t" in axes: + scale[axes.index("t")] = time_scale + multiscales[0]["coordinateTransformations"] = [{"scale": scale, "type": "scale"}] + + multiscales[0]["version"] = "0.4" + omz.attrs["multiscales"] = multiscales + + +def niftizarr_write_header( + omz: zarr.Group, + shape: list[int], + affine: np.ndarray, + dtype: np.dtype | str, + unit: Literal["micron", "mm"] | None = None, + header: nib.Nifti1Header | nib.Nifti2Header | None = None, + nifti_version: Literal[1, 2] = 1, +) -> None: + """ + Write NIfTI header in a NIfTI-Zarr file. + + Parameters + ---------- + path : PathLike | str + Path to parent Zarr. + affine : (4, 4) matrix + Orientation matrix. + shape : list[int] + Array shape, in NIfTI order (x, y, z, t, c). + dtype : np.dtype | str + Data type. + unit : {"micron", "mm"}, optional + World unit. + header : nib.Nifti1Header | nib.Nifti2Header, optional + Pre-instantiated header. + zarr_version : int, default=3 + Zarr version. + """ + # TODO: we do not write the json zattrs, but it should be added in + # once the nifti-zarr package is released + + # If dimensions do not fit in a short (which is often the case), we + # use NIfTI 2. + if all(x < 32768 for x in shape) or nifti_version == 1: + NiftiHeader = nib.Nifti1Header + else: + NiftiHeader = nib.Nifti2Header + + header = header or NiftiHeader() + header.set_data_shape(shape) + header.set_data_dtype(dtype) + header.set_qform(affine) + header.set_sform(affine) + if unit: + header.set_xyzt_units(nib.nifti1.unit_codes.code[unit]) + header = np.frombuffer(header.structarr.tobytes(), dtype="u1") + + metadata = { + "chunks": [len(header)], + "order": "F", + "dtype": "|u1", + "fill_value": None, + "compressor": None, # TODO: Subject to change compression + } + + omz.create_dataset("nifti", data=header, shape=len(header), **metadata) + + print("done.") diff --git a/linc_convert/modalities/psoct/cli.py b/linc_convert/modalities/psoct/cli.py new file mode 100644 index 0000000..91d1ccd --- /dev/null +++ b/linc_convert/modalities/psoct/cli.py @@ -0,0 +1,9 @@ +"""Entry-points for polarization-sensitive optical coherence tomography converter.""" + +from cyclopts import App + +from linc_convert.cli import main + +help = "Converters for PS-OCT .mat files" +psoct = App(name="psoct", help=help) +main.command(psoct) diff --git a/linc_convert/modalities/psoct/multi_slice.py b/linc_convert/modalities/psoct/multi_slice.py new file mode 100644 index 0000000..745d5b4 --- /dev/null +++ b/linc_convert/modalities/psoct/multi_slice.py @@ -0,0 +1,332 @@ +""" +Matlab to OME-Zarr. + +Converts Matlab files generated by the MGH in-house OCT pipeline +into a OME-ZARR pyramid. +""" + +import ast +import json +import math +import os +from functools import wraps +from itertools import product +from typing import Callable, Mapping, Optional +from warnings import warn + +import cyclopts +import h5py +import numpy as np +import zarr +from scipy.io import loadmat + +from linc_convert.modalities.psoct._utils import ( + generate_pyramid, + make_json, + niftizarr_write_header, + write_ome_metadata, +) +from linc_convert.modalities.psoct.cli import psoct +from linc_convert.utils.math import ceildiv +from linc_convert.utils.orientation import center_affine, orientation_to_affine +from linc_convert.utils.unit import to_nifti_unit, to_ome_unit +from linc_convert.utils.zarr import make_compressor + +multi_slice = cyclopts.App(name="multi_slice", help_format="markdown") +psoct.command(multi_slice) + + +def _automap(func: Callable) -> Callable: + """Automatically maps the array in the mat file.""" + + @wraps(func) + def wrapper(inp: list[str], out: str = None, **kwargs: dict) -> callable: + if out is None: + out = os.path.splitext(inp[0])[0] + out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" + kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") + dat = _mapmat(inp, kwargs.get("key", None)) + return func(dat, out, **kwargs) + + return wrapper + + +class _ArrayWrapper: + def _get_key(self, f: Mapping) -> str: + key = self.key + if key is None: + if not len(f.keys()): + raise Exception(f"{self.file} is empty") + for key in f.keys(): + if key[:1] != "_": + break + if len(f.keys()) > 1: + warn( + f"More than one key in .mat file {self.file}, " + f'arbitrarily loading "{key}"' + ) + + if key not in f.keys(): + raise Exception(f"Key {key} not found in file {self.file}") + + return key + + +class _H5ArrayWrapper(_ArrayWrapper): + def __init__(self, file: h5py.File, key: str | None) -> None: + self.file = file + self.key = key + self.array = file.get(self._get_key(self.file)) + + def __del__(self) -> None: + if hasattr(self.file, "close"): + self.file.close() + + def load(self) -> np.ndarray: + self.array = self.array[...] + if hasattr(self.file, "close"): + self.file.close() + self.file = None + return self.array + + @property + def shape(self) -> list[int]: + return self.array.shape + + @property + def dtype(self) -> np.dtype: + return self.array.dtype + + def __len__(self) -> int: + return len(self.array) + + def __getitem__(self, index: object) -> np.ndarray: + return self.array[index] + + +class _MatArrayWrapper(_ArrayWrapper): + def __init__(self, file: str, key: str | None) -> None: + self.file = file + self.key = key + self.array = None + + def __del__(self) -> None: + if hasattr(self.file, "close"): + self.file.close() + + def load(self) -> np.ndarray: + f = loadmat(self.file) + self.array = f.get(self._get_key(f)) + self.file = None + return self.array + + @property + def shape(self) -> list[int]: + if self.array is None: + self.load() + return self.array.shape + + @property + def dtype(self) -> np.dtype: + if self.array is None: + self.load() + return self.array.dtype + + def __len__(self) -> int: + if self.array is None: + self.load() + return len(self.array) + + def __getitem__(self, index: object) -> np.ndarray: + if self.array is None: + self.load() + return self.array[index] + + +def _mapmat(fnames: list[str], key: str = None) -> list[_ArrayWrapper]: + """Load or memory-map an array stored in a .mat file.""" + # loaded_data = [] + + def make_wrapper(fname: str) -> callable: + try: + # "New" .mat file + f = h5py.File(fname, "r") + return _H5ArrayWrapper(f, key) + except Exception: + # "Old" .mat file + return _MatArrayWrapper(fname, key) + + return [make_wrapper(fname) for fname in fnames] + + +@multi_slice.default +@_automap +def convert( + inp: list[str], + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = "blosc", + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = "RAS", + center: bool = True, + dtype: str | None = None, +) -> None: + """ + Matlab to OME-Zarr. + + Convert OCT volumes in raw matlab files into a pyramidal + OME-ZARR (or NIfTI-Zarr) hierarchy. + + This command assumes that each slice in a volume is stored in a + different mat file. All slices must have the same shape, and will + be concatenated into a 3D Zarr. + + Parameters + ---------- + inp + Path to the input mat file + out + Path to the output Zarr directory [.ome.zarr] + key + Key of the array to be extracted, default to first key found + meta + Path to the metadata file + chunk + Output chunk size + compressor : {blosc, zlib, raw} + Compression method + compressor_opt + Compression options + max_load + Maximum input chunk size + max_levels + Maximum number of pyramid levels + no_pool + Index of dimension to not pool when building pyramid. + nii + Convert to nifti-zarr. True if path ends in ".nii.zarr" + orientation + Orientation of the volume + center + Set RAS[0, 0, 0] at FOV center + dtype + Data type to write into + """ + if isinstance(compressor_opt, str): + compressor_opt = ast.literal_eval(compressor_opt) + + # Write OME-Zarr multiscale metadata + if meta: + print("Write JSON") + with open(meta, "r") as f: + meta_txt = f.read() + meta_json = make_json(meta_txt) + path_json = ".".join(out.split(".")[:-2]) + ".json" + with open(path_json, "w") as f: + json.dump(meta_json, f, indent=4) + vx = meta_json["PixelSize"] + unit = meta_json["PixelSizeUnits"] + else: + vx = [1] * 3 + unit = "um" + + # Prepare Zarr group + omz = zarr.storage.DirectoryStore(out) + omz = zarr.group(store=omz, overwrite=True) + + # if not hasattr(inp[0], "dtype"): + # raise Exception("Input is not an array. This is likely unexpected") + if len(inp[0].shape) < 2: + raise Exception("Input array is not 2d:", inp[0].shape) + # Prepare chunking options + dtype = dtype or np.dtype(inp[0].dtype).str + opt = { + "dimension_separator": r"/", + "order": "F", + "dtype": dtype, + "fill_value": None, + "compressor": make_compressor(compressor, **compressor_opt), + } + inp: list = inp + inp_shape = (*inp[0].shape, len(inp)) + inp_chunk = [min(x, max_load) for x in inp_shape[-3:]] + nk = ceildiv(inp_shape[-3], inp_chunk[0]) + nj = ceildiv(inp_shape[-2], inp_chunk[1]) + ni = len(inp) + + nblevels = min( + [int(math.ceil(math.log2(x))) for i, x in enumerate(inp_shape) if i != no_pool] + ) + nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) + nblevels = min(nblevels, max_levels) + + opt["chunks"] = [min(x, chunk) for x in inp_shape] + + omz.create_dataset(str(0), shape=inp_shape, **opt) + + # iterate across input chunks + for i in range(ni): + for j, k in product(range(nj), range(nk)): + loaded_chunk = inp[i][ + ..., + k * inp_chunk[0] : (k + 1) * inp_chunk[0], + j * inp_chunk[1] : (j + 1) * inp_chunk[1], + ] + + print( + f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", + "/", + f"[{ni:03d}, {nj:03d}, {nk:03d}]", + # f"({1 + level}/{nblevels})", + end="\r", + ) + + # save current chunk + omz["0"][ + ..., + k * inp_chunk[0] : k * inp_chunk[0] + loaded_chunk.shape[0], + j * inp_chunk[1] : j * inp_chunk[1] + loaded_chunk.shape[1], + i, + ] = loaded_chunk + + inp[i] = None # no ref count -> delete array + + generate_pyramid(omz, nblevels - 1, mode="mean", no_pyramid_axis=no_pool) + + print("") + + # Write OME-Zarr multiscale metadata + print("Write metadata") + print(unit) + ome_unit = to_ome_unit(unit) + write_ome_metadata( + omz, + axes=["z", "y", "x"], + no_pool=no_pool, + space_unit=ome_unit, + space_scale=vx, + multiscales_type=(("2x2x2" if no_pool is None else "2x2") + "mean window"), + ) + + if not nii: + print("done.") + return + + # Write NIfTI-Zarr header + # NOTE: we use nifti2 because dimensions typically do not fit in a short + # TODO: we do not write the json zattrs, but it should be added in + # once the nifti-zarr package is released + shape = list(reversed(omz["0"].shape)) + affine = orientation_to_affine(orientation, *vx[::-1]) + if center: + affine = center_affine(affine, shape[:3]) + niftizarr_write_header( + omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2 + ) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py new file mode 100644 index 0000000..529f32b --- /dev/null +++ b/linc_convert/modalities/psoct/single_volume.py @@ -0,0 +1,242 @@ +""" +Matlab to OME-Zarr. + +Converts Matlab files generated by the MGH in-house OCT pipeline +into a OME-ZARR pyramid. +""" + +import ast +import json +import math +import os +from contextlib import contextmanager +from functools import wraps +from itertools import product +from typing import Callable, Optional +from warnings import warn + +import cyclopts +import h5py +import numpy as np +import zarr +from scipy.io import loadmat + +from linc_convert.modalities.psoct._utils import ( + generate_pyramid, + make_json, + niftizarr_write_header, + write_ome_metadata, +) +from linc_convert.modalities.psoct.cli import psoct +from linc_convert.utils.math import ceildiv +from linc_convert.utils.orientation import center_affine, orientation_to_affine +from linc_convert.utils.unit import to_nifti_unit, to_ome_unit +from linc_convert.utils.zarr import make_compressor + +single_volume = cyclopts.App(name="single_volume", help_format="markdown") +psoct.command(single_volume) + + +def _automap(func: Callable) -> Callable: + """Automatically map the array in the mat file.""" + + @wraps(func) + def wrapper(inp: str, out: str = None, **kwargs: dict) -> None: + if out is None: + out = os.path.splitext(inp[0])[0] + out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" + kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") + with _mapmat(inp, kwargs.get("key", None)) as dat: + return func(dat, out, **kwargs) + + return wrapper + + +@contextmanager +def _mapmat(fname: str, key: str = None) -> None: + """Load or memory-map an array stored in a .mat file.""" + try: + # "New" .mat file + f = h5py.File(fname, "r") + except Exception: + # "Old" .mat file + f = loadmat(fname) + + if key is None: + if not len(f.keys()): + raise Exception(f"{fname} is empty") + for key in f.keys(): + if key[:1] != "_": + break + if len(f.keys()) > 1: + warn( + f"More than one key in .mat file {fname}, " + f'arbitrarily loading "{key}"' + ) + + if key not in f.keys(): + raise Exception(f"Key {key} not found in file {fname}") + + yield f.get(key) + if hasattr(f, "close"): + f.close() + + +@single_volume.default +@_automap +def convert( + inp: str, + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = "blosc", + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = "RAS", + center: bool = True, +) -> None: + """ + Matlab to OME-Zarr. + + Convert OCT volumes in raw matlab files + into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. + + Parameters + ---------- + inp + Path to the input mat file + out + Path to the output Zarr directory [.ome.zarr] + key + Key of the array to be extracted, default to first key found + meta + Path to the metadata file + chunk + Output chunk size + compressor : {blosc, zlib, raw} + Compression method + compressor_opt + Compression options + max_load + Maximum input chunk size + max_levels + Maximum number of pyramid levels + no_pool + Index of dimension to not pool when building pyramid + nii + Convert to nifti-zarr. True if path ends in ".nii.zarr" + orientation + Orientation of the volume + center + Set RAS[0, 0, 0] at FOV center + """ + if isinstance(compressor_opt, str): + compressor_opt = ast.literal_eval(compressor_opt) + + # Write OME-Zarr multiscale metadata + if meta: + print("Write JSON") + with open(meta, "r") as f: + meta_txt = f.read() + meta_json = make_json(meta_txt) + path_json = ".".join(out.split(".")[:-2]) + ".json" + with open(path_json, "w") as f: + json.dump(meta_json, f, indent=4) + vx = meta_json["PixelSize"] + unit = meta_json["PixelSizeUnits"] + else: + vx = [1] * 3 + unit = "um" + + # Prepare Zarr group + omz = zarr.storage.DirectoryStore(out) + omz = zarr.group(store=omz, overwrite=True) + + if not hasattr(inp, "dtype"): + raise Exception("Input is not a numpy array. This is unexpected.") + if len(inp.shape) < 3: + raise Exception("Input array is not 3d:", inp.shape) + # Prepare chunking options + opt = { + "dimension_separator": r"/", + "order": "F", + "dtype": np.dtype(inp.dtype).str, + "fill_value": None, + "compressor": make_compressor(compressor, **compressor_opt), + } + + inp_chunk = [min(x, max_load) for x in inp.shape] + nk = ceildiv(inp.shape[0], inp_chunk[0]) + nj = ceildiv(inp.shape[1], inp_chunk[1]) + ni = ceildiv(inp.shape[2], inp_chunk[2]) + + nblevels = min( + [int(math.ceil(math.log2(x))) for i, x in enumerate(inp.shape) if i != no_pool] + ) + nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) + nblevels = min(nblevels, max_levels) + + opt["chunks"] = [min(x, chunk) for x in inp.shape] + + omz.create_dataset(str(0), shape=inp.shape, **opt) + + # iterate across input chunks + for i, j, k in product(range(ni), range(nj), range(nk)): + loaded_chunk = inp[ + k * inp_chunk[0] : (k + 1) * inp_chunk[0], + j * inp_chunk[1] : (j + 1) * inp_chunk[1], + i * inp_chunk[2] : (i + 1) * inp_chunk[2], + ] + + print( + f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", + "/", + f"[{ni:03d}, {nj:03d}, {nk:03d}]", + # f"({1 + level}/{nblevels})", + end="\r", + ) + + # save current chunk + omz["0"][ + k * inp_chunk[0] : k * inp_chunk[0] + loaded_chunk.shape[0], + j * inp_chunk[1] : j * inp_chunk[1] + loaded_chunk.shape[1], + i * inp_chunk[2] : i * inp_chunk[2] + loaded_chunk.shape[2], + ] = loaded_chunk + + generate_pyramid(omz, nblevels - 1, mode="mean", no_pyramid_axis=no_pool) + + print("") + + # Write OME-Zarr multiscale metadata + print("Write metadata") + print(unit) + ome_unit = to_ome_unit(unit) + write_ome_metadata( + omz, + axes=["z", "y", "x"], + no_pool=no_pool, + space_unit=ome_unit, + space_scale=vx, + multiscales_type=(("2x2x2" if no_pool is None else "2x2") + "mean window"), + ) + + if not nii: + print("done.") + return + + # Write NIfTI-Zarr header + # NOTE: we use nifti2 because dimensions typically do not fit in a short + # TODO: we do not write the json zattrs, but it should be added in + # once the nifti-zarr package is released + shape = list(reversed(omz["0"].shape)) + affine = orientation_to_affine(orientation, *vx[::-1]) + if center: + affine = center_affine(affine, shape[:3]) + niftizarr_write_header( + omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2 + ) diff --git a/linc_convert/utils/unit.py b/linc_convert/utils/unit.py new file mode 100644 index 0000000..04be0f2 --- /dev/null +++ b/linc_convert/utils/unit.py @@ -0,0 +1,249 @@ +"""Converts units between zarr and other specifications.""" + +ome_valid_units = { + "space": [ + "angstrom", + "attometer", + "centimeter", + "decimeter", + "exameter", + "femtometer", + "foot", + "gigameter", + "hectometer", + "inch", + "kilometer", + "megameter", + "meter", + "micrometer", + "mile", + "millimeter", + "nanometer", + "parsec", + "petameter", + "picometer", + "terameter", + "yard", + "yoctometer", + "yottameter", + "zeptometer", + "zettameter", + ], + "time": [ + "attosecond", + "centisecond", + "day", + "decisecond", + "exasecond", + "femtosecond", + "gigasecond", + "hectosecond", + "hour", + "kilosecond", + "megasecond", + "microsecond", + "millisecond", + "minute", + "nanosecond", + "petasecond", + "picosecond", + "second", + "terasecond", + "yoctosecond", + "yottasecond", + "zeptosecond", + "zettasecond", + ], +} + +nifti_valid_units = [ + "unknown", + "meter", + "mm", + "micron", + "sec", + "msec", + "usec", + "hz", + "ppm", + "rads", +] + +si_prefix_short2long = { + "Q": "quetta", + "R": "ronna", + "Y": "yotta", + "Z": "zetta", + "E": "exa", + "P": "peta", + "T": "tera", + "G": "giga", + "M": "mega", + "K": "kilo", + "k": "kilo", + "H": "hecto", + "h": "hecto", + "D": "deca", + "da": "deca", + "d": "deci", + "c": "centi", + "m": "milli", + "u": "micro", + "μ": "micro", + "n": "nano", + "p": "pico", + "f": "femto", + "a": "atto", + "z": "zepto", + "y": "yocto", + "r": "ronto", + "q": "quecto", +} + +si_prefix_long2short = {long: short for short, long in si_prefix_short2long.items()} + +si_prefix_exponent = { + "Q": 30, + "R": 27, + "Y": 24, + "Z": 21, + "E": 18, + "P": 15, + "T": 12, + "G": 9, + "M": 6, + "K": 3, + "k": 3, + "H": 2, + "h": 2, + "D": 1, + "da": 1, + "": 0, + "d": -1, + "c": -2, + "m": -3, + "u": -6, + "μ": -6, + "n": -9, + "p": -12, + "f": -15, + "a": -18, + "z": -21, + "y": -24, + "r": -27, + "q": -30, +} + + +unit_space_short2long = { + short + "m": long + "meter" for short, long in si_prefix_short2long.items() +} +unit_space_short2long.update( + { + "m": "meter", + "mi": "mile", + "yd": "yard", + "ft": "foot", + "in": "inch", + "'": "foot", + '"': "inch", + "Å": "angstrom", + "pc": "parsec", + } +) +unit_space_long2short = {long: short for short, long in unit_space_short2long.items()} +unit_space_long2short["micron"] = "u" + +unit_time_short2long = { + short + "s": long + "second" for short, long in si_prefix_short2long.items() +} +unit_time_short2long.update( + { + "y": "year", + "d": "day", + "h": "hour", + "m": "minute", + "s": "second", + } +) +unit_time_long2short = {long: short for short, long in unit_time_short2long.items()} + +unit_space_scale = { + prefix + "m": 10**exponent for prefix, exponent in si_prefix_exponent.items() +} +unit_space_scale.update( + { + "mi": 1609.344, + "yd": 0.9144, + "ft": 0.3048, + "'": 0.3048, + "in": 25.4e-3, + '"': 25.4e-3, + "Å": 1e-10, + "pc": 3.0857e16, + } +) + +unit_time_scale = { + prefix + "s": 10**exponent for prefix, exponent in si_prefix_exponent.items() +} +unit_time_scale.update( + { + "y": 365.25 * 24 * 60 * 60, + "d": 24 * 60 * 60, + "h": 60 * 60, + "m": 60, + } +) + + +def convert_unit(value: float, src: str, dst: str) -> float: + """Convert unit for a value.""" + src = unit_to_scale(src) + dst = unit_to_scale(dst) + return value * (src / dst) + + +def to_ome_unit(unit: str) -> str: + """Convert unit to ome-zarr spec.""" + if unit in unit_space_short2long: + unit = unit_space_short2long[unit] + elif unit in unit_time_short2long: + unit = unit_time_short2long[unit] + elif unit in si_prefix_short2long: + unit = si_prefix_short2long[unit] + if unit not in (*ome_valid_units["space"], *ome_valid_units["time"]): + raise ValueError("Unknow unit") + return unit + + +def to_nifti_unit(unit: str) -> str: + """Convert unit to nifti spec.""" + unit = to_ome_unit(unit) + return { + "meter": "meter", + "millimeter": "mm", + "micrometer": "micron", + "second": "sec", + "millisecond": "msec", + "microsecond": "usec", + }.get(unit, "unknown") + + +def unit_to_scale(unit: str) -> float: + """Convert unit to scale.""" + if unit in unit_space_long2short: + unit = unit_space_long2short[unit] + elif unit in unit_time_long2short: + unit = unit_time_long2short[unit] + elif unit in si_prefix_long2short: + unit = si_prefix_long2short[unit] + if unit in unit_space_scale: + unit = unit_space_scale[unit] + elif unit in unit_time_scale: + unit = unit_time_scale[unit] + elif unit in si_prefix_exponent: + unit = 10 ** si_prefix_exponent[unit] + if isinstance(unit, str): + raise ValueError("Unknown unit", unit) + return unit diff --git a/pyproject.toml b/pyproject.toml index 2143e59..e0b8ecf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,12 +30,22 @@ nibabel = "*" zarr = "^2.0.0" wkw = "*" +[tool.poetry.group.df] +optional = true [tool.poetry.group.df.dependencies] glymur = "*" +[tool.poetry.group.lsm] +optional = true [tool.poetry.group.lsm.dependencies] tifffile = "*" +[tool.poetry.group.psoct] +optional = true +[tool.poetry.group.psoct.dependencies] +h5py = "*" +scipy = "*" + [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py deleted file mode 100644 index a2ff53e..0000000 --- a/scripts/oct_mat_to_zarr.py +++ /dev/null @@ -1,440 +0,0 @@ -""" -(OCT) Matlab to OME-ZARR -======================== - -This script converts Matlab files generated by the MGH in-house OCT pipeline -into a pyramidal OME-ZARR hierarchy. - -dependencies: - numpy - scipy - zarr - nibabel - cyclopts -""" -import ast -import json -import math -import os -import re -from contextlib import contextmanager -from functools import wraps -from itertools import product -from typing import Optional, List -from warnings import warn - -import cyclopts -import h5py -import nibabel as nib -import numpy as np -import zarr -from scipy.io import loadmat - -from utils import ( - ceildiv, make_compressor, convert_unit, to_ome_unit, to_nifti_unit, - orientation_to_affine, center_affine -) - -app = cyclopts.App(help_format="markdown") - - -def automap(func): - """Decorator to automatically map the array in the mat file""" - - @wraps(func) - def wrapper(inp, out=None, **kwargs): - if out is None: - out = os.path.splitext(inp[0])[0] - out += '.nii.zarr' if kwargs.get('nii', False) else '.ome.zarr' - kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr') - with mapmat(inp, kwargs.get('key', None)) as dat: - return func(dat, out, **kwargs) - - return wrapper - - -@app.default -@automap -def convert( - inp: List[str], - out: Optional[str] = None, - *, - key: Optional[str] = None, - meta: str = None, - chunk: int = 128, - compressor: str = 'blosc', - compressor_opt: str = "{}", - max_load: int = 128, - max_levels: int = 5, - no_pool: Optional[int] = None, - nii: bool = False, - orientation: str = 'RAS', - center: bool = True, -): - """ - This command converts OCT volumes stored in raw matlab files - into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. - - Parameters - ---------- - inp - Path to the input mat file - out - Path to the output Zarr directory [.ome.zarr] - key - Key of the array to be extracted, default to first key found - meta - Path to the metadata file - chunk - Output chunk size - compressor : {blosc, zlib, raw} - Compression method - compressor_opt - Compression options - max_load - Maximum input chunk size - max_levels - Maximum number of pyramid levels - no_pool - Index of dimension to not pool when building pyramid - nii - Convert to nifti-zarr. True if path ends in ".nii.zarr" - orientation - Orientation of the volume - center - Set RAS[0, 0, 0] at FOV center - """ - - if isinstance(compressor_opt, str): - compressor_opt = ast.literal_eval(compressor_opt) - - # Write OME-Zarr multiscale metadata - if meta: - print('Write JSON') - with open(meta, 'r') as f: - meta_txt = f.read() - meta_json = make_json(meta_txt) - path_json = '.'.join(out.split('.')[:-2]) + '.json' - with open(path_json, 'w') as f: - json.dump(meta_json, f, indent=4) - vx = meta_json['PixelSize'] - unit = meta_json['PixelSizeUnits'] - else: - vx = [1] * 3 - unit = 'um' - - # Prepare Zarr group - omz = zarr.storage.DirectoryStore(out) - omz = zarr.group(store=omz, overwrite=True) - - if not hasattr(inp, "dtype"): - raise Exception("Input is not a numpy array. This is likely unexpected") - if len(inp.shape) < 3: - raise Exception("Input array is not 3d") - # Prepare chunking options - opt = { - 'dimension_separator': r'/', - 'order': 'F', - 'dtype': np.dtype(inp.dtype).str, - 'fill_value': None, - 'compressor': make_compressor(compressor, **compressor_opt), - } - - inp_chunk = [min(x, max_load) for x in inp.shape] - nk = ceildiv(inp.shape[0], inp_chunk[0]) - nj = ceildiv(inp.shape[1], inp_chunk[1]) - ni = ceildiv(inp.shape[2], inp_chunk[2]) - - nblevels = min([ - int(math.ceil(math.log2(x))) - for i, x in enumerate(inp.shape) - if i != no_pool - ]) - nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) - nblevels = min(nblevels, max_levels) - - # create all arrays in the group - shape_level = inp.shape - for level in range(nblevels): - opt['chunks'] = [min(x, chunk) for x in shape_level] - omz.create_dataset(str(level), shape=shape_level, **opt) - shape_level = [ - x if i == no_pool else x // 2 - for i, x in enumerate(shape_level) - ] - - # iterate across input chunks - for i, j, k in product(range(ni), range(nj), range(nk)): - - level_chunk = inp_chunk - loaded_chunk = inp[ - k * level_chunk[0]:(k + 1) * level_chunk[0], - j * level_chunk[1]:(j + 1) * level_chunk[1], - i * level_chunk[2]:(i + 1) * level_chunk[2], - ] - - for level in range(nblevels): - out_level = omz[str(level)] - - print(f'[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]', '/', - f'[{ni:03d}, {nj:03d}, {nk:03d}]', - f'({1 + level}/{nblevels})', end='\r') - - # save current chunk - out_level[ - k * level_chunk[0]:k * level_chunk[0] + loaded_chunk.shape[0], - j * level_chunk[1]:j * level_chunk[1] + loaded_chunk.shape[1], - i * level_chunk[2]:i * level_chunk[2] + loaded_chunk.shape[2], - ] = loaded_chunk - # ensure divisible by 2 - loaded_chunk = loaded_chunk[ - slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), - slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), - slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), - ] - # mean pyramid (average each 2x2x2 patch) - if no_pool == 0: - loaded_chunk = ( - loaded_chunk[:, 0::2, 0::2] + - loaded_chunk[:, 0::2, 1::2] + - loaded_chunk[:, 1::2, 0::2] + - loaded_chunk[:, 1::2, 1::2] - ) / 4 - elif no_pool == 1: - loaded_chunk = ( - loaded_chunk[0::2, :, 0::2] + - loaded_chunk[0::2, :, 1::2] + - loaded_chunk[1::2, :, 0::2] + - loaded_chunk[1::2, :, 1::2] - ) / 4 - elif no_pool == 2: - loaded_chunk = ( - loaded_chunk[0::2, 0::2, :] + - loaded_chunk[0::2, 1::2, :] + - loaded_chunk[1::2, 0::2, :] + - loaded_chunk[1::2, 1::2, :] - ) / 4 - else: - loaded_chunk = ( - loaded_chunk[0::2, 0::2, 0::2] + - loaded_chunk[0::2, 0::2, 1::2] + - loaded_chunk[0::2, 1::2, 0::2] + - loaded_chunk[0::2, 1::2, 1::2] + - loaded_chunk[1::2, 0::2, 0::2] + - loaded_chunk[1::2, 0::2, 1::2] + - loaded_chunk[1::2, 1::2, 0::2] + - loaded_chunk[1::2, 1::2, 1::2] - ) / 8 - level_chunk = [ - x if i == no_pool else x // 2 - for i, x in enumerate(level_chunk) - ] - print('') - - # Write OME-Zarr multiscale metadata - print('Write metadata') - print(unit) - ome_unit = to_ome_unit(unit) - multiscales = [{ - 'version': '0.4', - 'axes': [ - {"name": "z", "type": "space", "unit": ome_unit}, - {"name": "y", "type": "space", "unit": ome_unit}, - {"name": "x", "type": "space", "unit": ome_unit} - ], - 'datasets': [], - 'type': ('2x2x2' if no_pool is None else '2x2') + 'mean window', - 'name': '', - }] - - for n in range(nblevels): - multiscales[0]['datasets'].append({}) - level = multiscales[0]['datasets'][-1] - level["path"] = str(n) - - # With a moving window, the scaling factor is exactly 2, and - # the edges of the top-left voxel are aligned - level["coordinateTransformations"] = [ - { - "type": "scale", - "scale": [ - (1 if no_pool == 0 else 2 ** n) * vx[0], - (1 if no_pool == 1 else 2 ** n) * vx[1], - (1 if no_pool == 2 else 2 ** n) * vx[2], - ] - }, - { - "type": "translation", - "translation": [ - (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, - (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, - (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, - ] - } - ] - multiscales[0]["coordinateTransformations"] = [ - { - "scale": [1.0] * 3, - "type": "scale" - } - ] - omz.attrs["multiscales"] = multiscales - - if not nii: - print('done.') - return - - # Write NIfTI-Zarr header - # NOTE: we use nifti2 because dimensions typically do not fit in a short - # TODO: we do not write the json zattrs, but it should be added in - # once the nifti-zarr package is released - shape = list(reversed(omz['0'].shape)) - affine = orientation_to_affine(orientation, *vx[::-1]) - if center: - affine = center_affine(affine, shape[:3]) - header = nib.Nifti2Header() - header.set_data_shape(shape) - header.set_data_dtype(omz['0'].dtype) - header.set_qform(affine) - header.set_sform(affine) - header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) - header.structarr['magic'] = b'nz2\0' - header = np.frombuffer(header.structarr.tobytes(), dtype='u1') - opt = { - 'chunks': [len(header)], - 'dimension_separator': r'/', - 'order': 'F', - 'dtype': '|u1', - 'fill_value': None, - 'compressor': None, - } - omz.create_dataset('nifti', data=header, shape=shape, **opt) - print('done.') - - -@contextmanager -def mapmat(fnames, key=None): - """Load or memory-map an array stored in a .mat file""" - loaded_data = [] - - for fname in fnames: - try: - # "New" .mat file - f = h5py.File(fname, 'r') - except Exception: - # "Old" .mat file - f = loadmat(fname) - if key is None: - if len(f.keys()) > 1: - warn(f'More than one key in .mat file {fname}, arbitrarily loading "{f.keys[0]}"') - key = f.keys()[0] - if key not in f.keys(): - raise Exception(f"Key {key} not found in file {fname}") - - if len(fnames) == 1: - yield f.get(key) - if hasattr(f, 'close'): - f.close() - break - loaded_data.append(f.get(key)) - - yield np.stack(loaded_data, axis=-1) - - -def make_json(oct_meta): - """ - Expected input: - --------------- - Image medium: 60% TDE - Center Wavelength: 1294.84nm - Axial resolution: 4.9um - Lateral resolution: 4.92um - FOV: 3x3mm - Voxel size: 3x3x3um - Depth focus range: 225um - Number of focuses: 2 - Focus #: 2 - Slice thickness: 450um. - Number of slices: 75 - Slice #:23 - Modality: dBI - """ - - def parse_value_unit(string, n=None): - number = r'-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?' - value = 'x'.join([number] * (n or 1)) - match = re.fullmatch(r'(?P' + value + r')(?P\w*)', string) - value, unit = match.group('value'), match.group('unit') - value = list(map(float, value.split('x'))) - if n is None: - value = value[0] - return value, unit - - meta = { - 'BodyPart': 'BRAIN', - 'Environment': 'exvivo', - 'SampleStaining': 'none', - } - - for line in oct_meta.split('\n'): - if ':' not in line: - continue - - key, value = line.split(':') - key, value = key.strip(), value.strip() - - if key == 'Image medium': - parts = value.split() - if 'TDE' in parts: - parts[parts.index('TDE')] = "2,2' Thiodiethanol (TDE)" - meta['SampleMedium'] = ' '.join(parts) - - elif key == 'Center Wavelength': - value, unit = parse_value_unit(value) - meta['Wavelength'] = value - meta['WavelengthUnit'] = unit - - elif key == 'Axial resolution': - value, unit = parse_value_unit(value) - meta['ResolutionAxial'] = value - meta['ResolutionAxialUnit'] = unit - - elif key == 'Lateral resolution': - value, unit = parse_value_unit(value) - meta['ResolutionLateral'] = value - meta['ResolutionLateralUnit'] = unit - - elif key == 'Voxel size': - value, unit = parse_value_unit(value, n=3) - meta['PixelSize'] = value - meta['PixelSizeUnits'] = unit - - elif key == 'Depth focus range': - value, unit = parse_value_unit(value) - meta['DepthFocusRange'] = value - meta['DepthFocusRangeUnit'] = unit - - elif key == 'Number of focuses': - value, unit = parse_value_unit(value) - meta['FocusCount'] = int(value) - - elif key == 'Slice thickness': - value, unit = parse_value_unit(value) - unit = convert_unit(value, unit[:-1], 'u') - meta['SliceThickness'] = value - - elif key == 'Number of slices': - value, unit = parse_value_unit(value) - meta['SliceCount'] = int(value) - - elif key == 'Modality': - meta['OCTModality'] = value - - else: - continue - - return meta - - -if __name__ == "__main__": - app() diff --git a/scripts/utils.py b/scripts/utils.py deleted file mode 100644 index 88d6ff5..0000000 --- a/scripts/utils.py +++ /dev/null @@ -1,324 +0,0 @@ -import math -import numcodecs -import numpy as np - - -def orientation_ensure_3d(orientation): - """ - Parameters - ---------- - orientation : str - Either one of {'coronal', 'axial', 'sagittal'}, or a two- or - three-letter permutation of {('R', 'L'), ('A', 'P'), ('S', 'I')} - - Returns - ------- - orientation : str - A three-letter permutation of {('R', 'L'), ('A', 'P'), ('S', 'I')} - """ - orientation = { - 'coronal': 'LI', - 'axial': 'LP', - 'sagittal': 'PI', - }.get(orientation.lower(), orientation).upper() - if len(orientation) == 2: - if 'L' not in orientation and 'R' not in orientation: - orientation += 'R' - if 'P' not in orientation and 'A' not in orientation: - orientation += 'A' - if 'I' not in orientation and 'S' not in orientation: - orientation += 'S' - return orientation - - -def orientation_to_affine(orientation, vxw=1, vxh=1, vxd=1): - orientation = orientation_ensure_3d(orientation) - affine = np.zeros([4, 4]) - vx = np.asarray([vxw, vxh, vxd]) - for i in range(3): - letter = orientation[i] - sign = -1 if letter in 'LPI' else 1 - letter = {'L': 'R', 'P': 'A', 'I': 'S'}.get(letter, letter) - index = list('RAS').index(letter) - affine[index, i] = sign * vx[i] - return affine - - -def center_affine(affine, shape): - if len(shape) == 2: - shape = [*shape, 1] - shape = np.asarray(shape) - affine[:3, -1] = -0.5 * affine[:3, :3] @ (shape - 1) - return affine - - -def ceildiv(x, y): - return int(math.ceil(x / y)) - - -def floordiv(x, y): - return int(math.floor(x / y)) - - -def make_compressor(name, **prm): - if not isinstance(name, str): - return name - name = name.lower() - if name == 'blosc': - Compressor = numcodecs.Blosc - elif name == 'zlib': - Compressor = numcodecs.Zlib - else: - raise ValueError('Unknown compressor', name) - return Compressor(**prm) - - -ome_valid_units = { - 'space': [ - 'angstrom', - 'attometer', - 'centimeter', - 'decimeter', - 'exameter', - 'femtometer', - 'foot', - 'gigameter', - 'hectometer', - 'inch', - 'kilometer', - 'megameter', - 'meter', - 'micrometer', - 'mile', - 'millimeter', - 'nanometer', - 'parsec', - 'petameter', - 'picometer', - 'terameter', - 'yard', - 'yoctometer', - 'yottameter', - 'zeptometer', - 'zettameter', - ], - 'time': [ - 'attosecond', - 'centisecond', - 'day', - 'decisecond', - 'exasecond', - 'femtosecond', - 'gigasecond', - 'hectosecond', - 'hour', - 'kilosecond', - 'megasecond', - 'microsecond', - 'millisecond', - 'minute', - 'nanosecond', - 'petasecond', - 'picosecond', - 'second', - 'terasecond', - 'yoctosecond', - 'yottasecond', - 'zeptosecond', - 'zettasecond', - ] -} - -nifti_valid_units = [ - 'unknown', - 'meter', - 'mm', - 'micron', - 'sec', - 'msec', - 'usec', - 'hz', - 'ppm', - 'rads', -] - -si_prefix_short2long = { - 'Q': 'quetta', - 'R': 'ronna', - 'Y': 'yotta', - 'Z': 'zetta', - 'E': 'exa', - 'P': 'peta', - 'T': 'tera', - 'G': 'giga', - 'M': 'mega', - 'K': 'kilo', - 'k': 'kilo', - 'H': 'hecto', - 'h': 'hecto', - 'D': 'deca', - 'da': 'deca', - 'd': 'deci', - 'c': 'centi', - 'm': 'milli', - 'u': 'micro', - 'μ': 'micro', - 'n': 'nano', - 'p': 'pico', - 'f': 'femto', - 'a': 'atto', - 'z': 'zepto', - 'y': 'yocto', - 'r': 'ronto', - 'q': 'quecto', -} - -si_prefix_long2short = { - long: short - for short, long in si_prefix_short2long.items() -} - - -si_prefix_exponent = { - 'Q': 30, - 'R': 27, - 'Y': 24, - 'Z': 21, - 'E': 18, - 'P': 15, - 'T': 12, - 'G': 9, - 'M': 6, - 'K': 3, - 'k': 3, - 'H': 2, - 'h': 2, - 'D': 1, - 'da': 1, - '': 0, - 'd': -1, - 'c': -2, - 'm': -3, - 'u': -6, - 'μ': -6, - 'n': -9, - 'p': -12, - 'f': -15, - 'a': -18, - 'z': -21, - 'y': -24, - 'r': -27, - 'q': -30, -} - - -unit_space_short2long = { - short + 'm': long + 'meter' - for short, long in si_prefix_short2long.items() -} -unit_space_short2long.update({ - 'm': 'meter', - 'mi': 'mile', - 'yd': 'yard', - 'ft': 'foot', - 'in': 'inch', - "'": 'foot', - '"': 'inch', - 'Å': 'angstrom', - 'pc': 'parsec', -}) -unit_space_long2short = { - long: short - for short, long in unit_space_short2long.items() -} -unit_space_long2short['micron'] = 'u' - -unit_time_short2long = { - short + 's': long + 'second' - for short, long in si_prefix_short2long.items() -} -unit_time_short2long.update({ - 'y': 'year', - 'd': 'day', - 'h': 'hour', - 'm': 'minute', - 's': 'second', -}) -unit_time_long2short = { - long: short - for short, long in unit_time_short2long.items() -} - -unit_space_scale = { - prefix + 'm': 10**exponent - for prefix, exponent in si_prefix_exponent.items() -} -unit_space_scale.update({ - 'mi': 1609.344, - 'yd': 0.9144, - 'ft': 0.3048, - "'": 0.3048, - 'in': 25.4E-3, - '"': 25.4E-3, - 'Å': 1E-10, - 'pc': 3.0857E16, -}) - -unit_time_scale = { - prefix + 's': 10**exponent - for prefix, exponent in si_prefix_exponent.items() -} -unit_time_scale.update({ - 'y': 365.25*24*60*60, - 'd': 24*60*60, - 'h': 60*60, - 'm': 60, -}) - - -def convert_unit(value, src, dst): - src = unit_to_scale(src) - dst = unit_to_scale(dst) - return value * (src / dst) - - -def to_ome_unit(unit): - if unit in unit_space_short2long: - unit = unit_space_short2long[unit] - elif unit in unit_time_short2long: - unit = unit_time_short2long[unit] - elif unit in si_prefix_short2long: - unit = si_prefix_short2long[unit] - if unit not in (*ome_valid_units['space'], *ome_valid_units['time']): - raise ValueError('Unknow unit') - return unit - - -def to_nifti_unit(unit): - unit = to_ome_unit(unit) - return { - 'meter': 'meter', - 'millimeter': 'mm', - 'micrometer': 'micron', - 'second': 'sec', - 'millisecond': 'msec', - 'microsecond': 'usec', - }.get(unit, 'unknown') - - -def unit_to_scale(unit): - if unit in unit_space_long2short: - unit = unit_space_long2short[unit] - elif unit in unit_time_long2short: - unit = unit_time_long2short[unit] - elif unit in si_prefix_long2short: - unit = si_prefix_long2short[unit] - if unit in unit_space_scale: - unit = unit_space_scale[unit] - elif unit in unit_time_scale: - unit = unit_time_scale[unit] - elif unit in si_prefix_exponent: - unit = 10 ** si_prefix_exponent[unit] - if isinstance(unit, str): - raise ValueError('Unknown unit', unit) - return unit