From b913cabed8841d9dea6fd870ed58e3790658bb62 Mon Sep 17 00:00:00 2001 From: balbasty Date: Mon, 22 Apr 2024 10:49:38 +0100 Subject: [PATCH 01/51] WIP: conversion form OCT's mat files to zarr --- scripts/oct_mat_to_zarr.py | 155 +++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 scripts/oct_mat_to_zarr.py diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py new file mode 100644 index 0000000..1818c0e --- /dev/null +++ b/scripts/oct_mat_to_zarr.py @@ -0,0 +1,155 @@ +""" +JPEG2000 to OME-ZARR +==================== + +This script converts JPEG2000 files generated by MBF-Neurolucida into +a pyramidal OME-ZARR hierarchy. It does not recompute the image pyramid +but instead reuse the JPEG2000 levels (obtained by wavelet transform). + +dependencies: + numpy + scipy + zarr + nibabel + cyclopts +""" +import cyclopts +import zarr +import ast +import numcodecs +import uuid +import os +import math +import h5py +import numpy as np +import nibabel as nib +from scipy.io import loadmat +from typing import Optional, Union, Sequence +from warnings import warn +from contextlib import contextmanager + +app = cyclopts.App(help_format="markdown") + + +""" +Image medium: 60% TDE +Center Wavelength: 1294.84nm +Axial resolution: 4.9um +Lateral resolution: 4.92um +FOV: 3x3mm +Voxel size: 3x3x3um +Depth focus range: 225um +Number of focuses: 2 +Focus #: 2 +Slice thickness: 450um. +Number of slices: 75 +Slice #:23 +Modality: dBI +""" + + +def automap(func): + """Decorator to automatically map the array in the mat file""" + + def wrapper(inp, out, **kwargs): + if out is None: + out = os.path.splitext(inp) + out += '.nii.zarr' if kwargs.get('nii', False) else '.ome.zarr' + kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr' ) + with mapmat(inp) as dat: + return func(dat, out, **kwargs) + + return wrapper + + +@app.default +def convert( + inp: str, + out: str = None, + *, + chunk: int = 1024, + compressor: str = 'blosc', + compressor_opt: str = "{}", + nii: bool = False, + orientation: str = 'RAS', + center: bool = True, + unit: str = 'um', + voxel_size: Optional[Union[float, Sequence[float]]] = None, + resolution_axial: Optional[float] = None, + resolution_lateral: Optional[float] = None, + field_of_view: Optional[Union[float, Sequence[float]]] = None, + slice_tickness: Optional[float] = None, + nb_slices: Optional[int] = None, + nb_focus: Optional[int] = None, + center_wavelength: Optional[float] = None, + medium: Optional[str] = None, + modality: Optional[str] = None, +): + """ + This command converts OCT volumes stored in raw matlab files + into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. + + Parameters + ---------- + inp + Path to the input mat file + out + Path to the output Zarr directory [.ome.zarr] + chunk + Output chunk size + compressor : {blosc, zlib, raw} + Compression method + compressor_opt + Compression options + max_load + Maximum input chunk size + nii + Convert to nifti-zarr. True if path ends in ".nii.zarr" + orientation + Orientation of the slice + center + Set RAS[0, 0, 0] at FOV center + unit : {mm, um, nm} + Unit of all spatial values (voxel_size, wavelength, etc.) + voxel_size + Voxel size, in `units`. + resolution_axial + Voxel size, in `units` + resolution_lateral + Voxel size, in `units` + field_of_view + Field of view of the camera + slice_tickness + Thickness of each physical slice + nb_slices + Total number of physical slices + nb_focus + Number of focus planes + center_wavelength + Wavelength, in `units` + medium + Description of the imaging medium + modality + Description of the modality + + """ + pass + + +@contextmanager +def mapmat(fname): + try: + f = h5py.File(fname, 'r') + except Exception: + f = loadmat(fname) + keys = list(f.keys()) + if len(keys) > 1: + key = keys[0] + warn(f'More than one key in .mat, arbitrarily loading "{key}"') + yield f.get(key) + if hasattr(f, 'close'): + f.close() + + +if __name__ == "__main__": + app() From eec0f99acb2c6721225bfdc5750f73b6e074000f Mon Sep 17 00:00:00 2001 From: balbasty Date: Tue, 13 Aug 2024 12:08:31 +0100 Subject: [PATCH 02/51] WIP --- scripts/oct_mat_to_zarr.py | 357 +++++++++++++++++++++++++++++++------ 1 file changed, 298 insertions(+), 59 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 1818c0e..1971e68 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -1,10 +1,9 @@ """ -JPEG2000 to OME-ZARR -==================== +(OCT) Matlab to OME-ZARR +======================== -This script converts JPEG2000 files generated by MBF-Neurolucida into -a pyramidal OME-ZARR hierarchy. It does not recompute the image pyramid -but instead reuse the JPEG2000 levels (obtained by wavelet transform). +This script converts Matlab files generated by the MGH in-house OCT pipeline +into a pyramidal OME-ZARR hierarchy. dependencies: numpy @@ -18,11 +17,15 @@ import ast import numcodecs import uuid +import re import os import math +import json import h5py import numpy as np import nibabel as nib +from itertools import product +from functools import wraps from scipy.io import loadmat from typing import Optional, Union, Sequence from warnings import warn @@ -31,31 +34,15 @@ app = cyclopts.App(help_format="markdown") -""" -Image medium: 60% TDE -Center Wavelength: 1294.84nm -Axial resolution: 4.9um -Lateral resolution: 4.92um -FOV: 3x3mm -Voxel size: 3x3x3um -Depth focus range: 225um -Number of focuses: 2 -Focus #: 2 -Slice thickness: 450um. -Number of slices: 75 -Slice #:23 -Modality: dBI -""" - - def automap(func): """Decorator to automatically map the array in the mat file""" + @wraps(func) def wrapper(inp, out, **kwargs): if out is None: out = os.path.splitext(inp) out += '.nii.zarr' if kwargs.get('nii', False) else '.ome.zarr' - kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr' ) + kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr') with mapmat(inp) as dat: return func(dat, out, **kwargs) @@ -63,27 +50,19 @@ def wrapper(inp, out, **kwargs): @app.default +@automap def convert( inp: str, out: str = None, *, - chunk: int = 1024, + meta: str = None, + chunk: int = 128, compressor: str = 'blosc', compressor_opt: str = "{}", + max_load: int = 128, nii: bool = False, orientation: str = 'RAS', center: bool = True, - unit: str = 'um', - voxel_size: Optional[Union[float, Sequence[float]]] = None, - resolution_axial: Optional[float] = None, - resolution_lateral: Optional[float] = None, - field_of_view: Optional[Union[float, Sequence[float]]] = None, - slice_tickness: Optional[float] = None, - nb_slices: Optional[int] = None, - nb_focus: Optional[int] = None, - center_wavelength: Optional[float] = None, - medium: Optional[str] = None, - modality: Optional[str] = None, ): """ This command converts OCT volumes stored in raw matlab files @@ -95,6 +74,8 @@ def convert( Path to the input mat file out Path to the output Zarr directory [.ome.zarr] + meta + Path to the metadata file chunk Output chunk size compressor : {blosc, zlib, raw} @@ -106,34 +87,172 @@ def convert( nii Convert to nifti-zarr. True if path ends in ".nii.zarr" orientation - Orientation of the slice + Orientation of the volume center Set RAS[0, 0, 0] at FOV center - unit : {mm, um, nm} - Unit of all spatial values (voxel_size, wavelength, etc.) - voxel_size - Voxel size, in `units`. - resolution_axial - Voxel size, in `units` - resolution_lateral - Voxel size, in `units` - field_of_view - Field of view of the camera - slice_tickness - Thickness of each physical slice - nb_slices - Total number of physical slices - nb_focus - Number of focus planes - center_wavelength - Wavelength, in `units` - medium - Description of the imaging medium - modality - Description of the modality """ - pass + + if isinstance(compressor_opt, str): + compressor_opt = ast.literal_eval(compressor_opt) + + # Write OME-Zarr multiscale metadata + if meta: + print('Write JSON') + meta_json = make_json(meta) + path_json = '.'.join(out.split('.')[:-2]) + '.json' + with open(path_json, 'r') as f: + json.dump(meta_json, f, indent=4) + vx = meta_json['PixelSize'] + unit = meta_json['PixelSizeUnits'] + else: + vx = [1] * 3 + unit = 'um' + + # Prepare Zarr group + omz = zarr.storage.DirectoryStore(out) + omz = zarr.group(store=omz, overwrite=True) + + # Prepare chunking options + opt = { + 'chunks': [chunk] * 3, + 'dimension_separator': r'/', + 'order': 'F', + 'dtype': np.dtype(inp.dtype).str, + 'fill_value': None, + 'compressor': make_compressor(compressor, **compressor_opt), + } + + nk = ceildiv(inp.shape[0], max_load) + nj = ceildiv(inp.shape[1], max_load) + ni = ceildiv(inp.shape[2], max_load) + nblevels = int(math.ceil(math.log2(max_load))) + + # create all arrays in the group + shape_level = inp.shape + for level in range(nblevels): + omz.create_dataset(str(level), shape=shape_level, **opt) + shape_level = [ceildiv(x, 2) for x in shape_level] + + # iterate across input chunks + pct = 0 + for i, j, k in product(range(ni), range(nj), range(nk)): + pct = int(math.ceil((i*j*k + 1) / (ni*nj*nk))) + print(pct, '%', end='\r') + + chunk_size = max_load + inp_chunk = inp[ + k*chunk_size:(k+1)*chunk_size. + i*chunk_size:(i+1)*chunk_size, + j*chunk_size:(j+1)*chunk_size, + ] + + for level in range(nblevels): + out_level = omz[str(level)] + # save current chunk + out_level[ + k*chunk_size:(k+1)*chunk_size. + i*chunk_size:(i+1)*chunk_size, + j*chunk_size:(j+1)*chunk_size, + ] = inp_chunk + # ensure divisible by + inp_chunk = inp_chunk[ + :2*(inp_chunk.shape[0]//2), + :2*(inp_chunk.shape[1]//2), + :2*(inp_chunk.shape[2]//2), + ] + # mean pyramid (average each 2x2x2 patch) + inp_chunk = ( + inp_chunk[0::2, 0::2, 0::2] + + inp_chunk[0::2, 0::2, 1::2] + + inp_chunk[0::2, 1::2, 0::2] + + inp_chunk[0::2, 1::2, 1::2] + + inp_chunk[1::2, 0::2, 0::2] + + inp_chunk[1::2, 0::2, 1::2] + + inp_chunk[1::2, 1::2, 0::2] + + inp_chunk[1::2, 1::2, 1::2] + ) / 8 + chunk_size = chunk_size // 2 + print('') + + # Write OME-Zarr multiscale metadata + print('Write metadata') + multiscales = [{ + 'version': '0.4', + 'axes': [ + {"name": "z", "type": "space", "unit": "micrometer"}, + {"name": "y", "type": "space", "unit": "micrometer"}, + {"name": "x", "type": "space", "unit": "micrometer"} + ], + 'datasets': [], + 'type': '2x2x2 mean window', + 'name': '', + }] + + for n in range(nblevels): + multiscales[0]['datasets'].append({}) + level = multiscales[0]['datasets'][-1] + level["path"] = str(n) + + # With a moving window, the scaling factor is exactly 2, and + # the edges of the top-left voxel are aligned + level["coordinateTransformations"] = [ + { + "type": "scale", + "scale": [ + (2**n)*vx[0], + (2**n)*vx[1], + (2**n)*vx[2], + ] + }, + { + "type": "translation", + "translation": [ + (2**n - 1)*vx[0]*0.5, + (2**n - 1)*vx[1]*0.5, + (2**n - 1)*vx[2]*0.5, + ] + } + ] + multiscales[0]["coordinateTransformations"] = [ + { + "scale": [1.0] * 3, + "type": "scale" + } + ] + omz.attrs["multiscales"] = multiscales + + if not nii: + print('done.') + return + + # Write NIfTI-Zarr header + # NOTE: we use nifti2 because dimensions typically do not fit in a short + # TODO: we do not write the json zattrs, but it should be added in + # once the nifti-zarr package is released + shape = list(reversed(omz['0'].shape)) + affine = orientation_to_affine(orientation, vxw, vxh, thickness or 1) + if center: + affine = center_affine(affine, shape[:2]) + header = nib.Nifti2Header() + header.set_data_shape(shape) + header.set_data_dtype(omz['0'].dtype) + header.set_qform(affine) + header.set_sform(affine) + header.set_xyzt_units(nib.nifti1.unit_codes.code['micron']) + header.structarr['magic'] = b'nz2\0' + header = np.frombuffer(header.structarr.tobytes(), dtype='u1') + opt = { + 'chunks': [len(header)], + 'dimension_separator': r'/', + 'order': 'F', + 'dtype': '|u1', + 'fill_value': None, + 'compressor': None, + } + omz.create_dataset('nifti', data=header, shape=shape, **opt) + print('done.') + @contextmanager @@ -151,5 +270,125 @@ def mapmat(fname): f.close() +def make_compressor(name, **prm): + if not isinstance(name, str): + return name + name = name.lower() + if name == 'blosc': + Compressor = numcodecs.Blosc + elif name == 'zlib': + Compressor = numcodecs.Zlib + else: + raise ValueError('Unknown compressor', name) + return Compressor(**prm) + + +def ceildiv(x, y): + return int(math.ceil(x / y)) + + +def make_json(oct_meta): + + """ + Expected input: + --------------- + Image medium: 60% TDE + Center Wavelength: 1294.84nm + Axial resolution: 4.9um + Lateral resolution: 4.92um + FOV: 3x3mm + Voxel size: 3x3x3um + Depth focus range: 225um + Number of focuses: 2 + Focus #: 2 + Slice thickness: 450um. + Number of slices: 75 + Slice #:23 + Modality: dBI + """ + + def parse_value_unit(string, n=None): + number = r'-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?' + value = 'x'.join([number]*(n or 1)) + match = re.fullmatch(r'(?P' + value + r')(?P\w*)', string) + value, unit = match.group('value'), match.group('unit') + value = list(map(float, value.split('x'))) + if n is None: + value = value[0] + return value, unit + + def convert_unit(value, src, dst): + exponent = {'P': 15, 'T': 12, 'G': 9, 'M': 6, 'K': 3, 'H': 2, + 'D': 1, '': 0, 'd': -1, 'c': -2, 'm': -3, 'u': -6, + 'µ': -6, 'n': -9, 'p': -12} + return value * 10**(exponent[src] - exponent[dst]) + + meta = { + 'BodyPart': 'BRAIN', + 'Environment': 'exvivo', + 'SampleStaining': 'none', + } + + for line in oct_meta.split(): + if ':' not in line: + continue + + key, value = line.split(':') + key, value = key.strip(), value.strip() + + if key == 'Image medium': + parts = value.split() + if 'TDE' in parts: + parts[parts.index('TDE')] = "2,2' Thiodiethanol (TDE)" + meta['SampleMedium'] = ' '.join(parts) + + elif key == 'Center Wavelength': + value, unit = parse_value_unit(value) + meta['Wavelength'] = value + meta['WavelengthUnit'] = unit + + elif key == 'Axial resolution': + value, unit = parse_value_unit(value) + meta['ResolutionAxial'] = value + meta['ResolutionAxialUnit'] = unit + + elif key == 'Lateral resolution': + value, unit = parse_value_unit(value) + meta['ResolutionLateral'] = value + meta['ResolutionLateralUnit'] = unit + + elif key == 'Voxel size': + value, unit = parse_value_unit(value, n=3) + meta['PixelSize'] = value + meta['PixelSizeUnits'] = unit + + elif key == 'Depth focus range': + value, unit = parse_value_unit(value) + meta['DepthFocusRange'] = value + meta['DepthFocusRangeUnit'] = unit + + elif key == 'Number of focuses': + value, unit = parse_value_unit(value) + meta['FocusCount'] = int(value) + + elif key == 'Slice thickness': + value, unit = parse_value_unit(value) + unit = convert_unit(value, unit[:-1], 'u') + meta['SliceThickness'] = value + + elif key == 'Number of slices': + value, unit = parse_value_unit(value) + meta['SliceCount'] = int(value) + + elif key == 'Modality': + meta['OCTModality'] = value + + else: + continue + + return meta + + + if __name__ == "__main__": app() From e82c068e573ff0f96161bbfeb1b689c6f6291c71 Mon Sep 17 00:00:00 2001 From: balbasty Date: Tue, 13 Aug 2024 13:10:11 +0100 Subject: [PATCH 03/51] WIp --- scripts/oct_mat_to_zarr.py | 46 ++----- scripts/utils.py | 262 +++++++++++++++++++++++++++++++++++++ 2 files changed, 274 insertions(+), 34 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 1971e68..86c3bf4 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -15,8 +15,6 @@ import cyclopts import zarr import ast -import numcodecs -import uuid import re import os import math @@ -27,10 +25,14 @@ from itertools import product from functools import wraps from scipy.io import loadmat -from typing import Optional, Union, Sequence from warnings import warn from contextlib import contextmanager +from utils import ( + ceildiv, make_compressor, convert_unit, to_ome_unit, to_nifti_unit, + orientation_to_affine, center_affine +) + app = cyclopts.App(help_format="markdown") @@ -177,12 +179,13 @@ def convert( # Write OME-Zarr multiscale metadata print('Write metadata') + ome_unit = to_ome_unit(unit) multiscales = [{ 'version': '0.4', 'axes': [ - {"name": "z", "type": "space", "unit": "micrometer"}, - {"name": "y", "type": "space", "unit": "micrometer"}, - {"name": "x", "type": "space", "unit": "micrometer"} + {"name": "z", "type": "space", "unit": ome_unit}, + {"name": "y", "type": "space", "unit": ome_unit}, + {"name": "x", "type": "space", "unit": ome_unit} ], 'datasets': [], 'type': '2x2x2 mean window', @@ -231,15 +234,15 @@ def convert( # TODO: we do not write the json zattrs, but it should be added in # once the nifti-zarr package is released shape = list(reversed(omz['0'].shape)) - affine = orientation_to_affine(orientation, vxw, vxh, thickness or 1) + affine = orientation_to_affine(orientation, *vx[::-1]) if center: - affine = center_affine(affine, shape[:2]) + affine = center_affine(affine, shape[:3]) header = nib.Nifti2Header() header.set_data_shape(shape) header.set_data_dtype(omz['0'].dtype) header.set_qform(affine) header.set_sform(affine) - header.set_xyzt_units(nib.nifti1.unit_codes.code['micron']) + header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) header.structarr['magic'] = b'nz2\0' header = np.frombuffer(header.structarr.tobytes(), dtype='u1') opt = { @@ -254,7 +257,6 @@ def convert( print('done.') - @contextmanager def mapmat(fname): try: @@ -270,23 +272,6 @@ def mapmat(fname): f.close() -def make_compressor(name, **prm): - if not isinstance(name, str): - return name - name = name.lower() - if name == 'blosc': - Compressor = numcodecs.Blosc - elif name == 'zlib': - Compressor = numcodecs.Zlib - else: - raise ValueError('Unknown compressor', name) - return Compressor(**prm) - - -def ceildiv(x, y): - return int(math.ceil(x / y)) - - def make_json(oct_meta): """ @@ -317,12 +302,6 @@ def parse_value_unit(string, n=None): value = value[0] return value, unit - def convert_unit(value, src, dst): - exponent = {'P': 15, 'T': 12, 'G': 9, 'M': 6, 'K': 3, 'H': 2, - 'D': 1, '': 0, 'd': -1, 'c': -2, 'm': -3, 'u': -6, - 'µ': -6, 'n': -9, 'p': -12} - return value * 10**(exponent[src] - exponent[dst]) - meta = { 'BodyPart': 'BRAIN', 'Environment': 'exvivo', @@ -389,6 +368,5 @@ def convert_unit(value, src, dst): return meta - if __name__ == "__main__": app() diff --git a/scripts/utils.py b/scripts/utils.py index d0f54f2..f9317c5 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -4,6 +4,18 @@ def orientation_ensure_3d(orientation): + """ + Parameters + ---------- + orientation : str + Either one of {'coronal', 'axial', 'sagittal'}, or a two- or + three-letter permutation of {('R', 'L'), ('A', 'P'), ('S', 'I')} + + Returns + ------- + orientation : str + A three-letter permutation of {('R', 'L'), ('A', 'P'), ('S', 'I')} + """ orientation = { 'coronal': 'LI', 'axial': 'LP', @@ -59,3 +71,253 @@ def make_compressor(name, **prm): else: raise ValueError('Unknown compressor', name) return Compressor(**prm) + + +ome_valid_units = { + 'space': [ + 'angstrom', + 'attometer', + 'centimeter', + 'decimeter', + 'exameter', + 'femtometer', + 'foot', + 'gigameter', + 'hectometer', + 'inch', + 'kilometer', + 'megameter', + 'meter', + 'micrometer', + 'mile', + 'millimeter', + 'nanometer', + 'parsec', + 'petameter', + 'picometer', + 'terameter', + 'yard', + 'yoctometer', + 'yottameter', + 'zeptometer', + 'zettameter', + ], + 'time': [ + 'attosecond', + 'centisecond', + 'day', + 'decisecond', + 'exasecond', + 'femtosecond', + 'gigasecond', + 'hectosecond', + 'hour', + 'kilosecond', + 'megasecond', + 'microsecond', + 'millisecond', + 'minute', + 'nanosecond', + 'petasecond', + 'picosecond', + 'second', + 'terasecond', + 'yoctosecond', + 'yottasecond', + 'zeptosecond', + 'zettasecond', + ] +} + +nifti_valid_units = [ + 'unknown', + 'meter', + 'mm', + 'micron', + 'sec', + 'msec', + 'usec', + 'hz', + 'ppm', + 'rads', +] + +si_prefix_short2long = { + 'Q': 'quetta', + 'R': 'ronna', + 'Y': 'yotta', + 'Z': 'zetta', + 'E': 'exa', + 'P': 'peta', + 'T': 'tera', + 'G': 'giga', + 'M': 'mega', + 'K': 'kilo', + 'k': 'kilo', + 'H': 'hecto', + 'h': 'hecto', + 'D': 'deca', + 'da': 'deca', + 'd': 'deci', + 'c': 'centi', + 'm': 'milli', + 'u': 'micro', + 'μ': 'micro', + 'n': 'nano', + 'p': 'pico', + 'f': 'femto', + 'a': 'atto', + 'z': 'zepto', + 'y': 'yocto', + 'r': 'ronto', + 'q': 'quecto', +} + +si_prefix_long2short = { + long: short + for short, long in si_prefix_short2long.items() +} + + +si_prefix_exponent = { + 'Q': 30, + 'R': 27, + 'Y': 24, + 'Z': 21, + 'E': 18, + 'P': 15, + 'T': 12, + 'G': 9, + 'M': 6, + 'K': 3, + 'k': 3, + 'H': 2, + 'h': 2, + 'D': 1, + 'da': 1, + '': 0, + 'd': -1, + 'c': -2, + 'm': -3, + 'u': -6, + 'μ': -6, + 'n': -9, + 'p': -12, + 'f': -15, + 'a': -18, + 'z': -21, + 'y': -24, + 'r': -27, + 'q': -30, +} + + +unit_space_short2long = { + short + 'm': long + 'meter' + for short, long in si_prefix_long2short +} +unit_space_short2long.update({ + 'm': 'meter', + 'mi': 'mile', + 'yd': 'yard', + 'ft': 'foot', + 'in': 'inch', + "'": 'foot', + '"': 'inch', + 'Å': 'angstrom', + 'pc': 'parsec', +}) +unit_space_long2short = { + long: short + for short, long in unit_space_short2long.items() +} +unit_space_long2short['micron'] = 'u' + +unit_time_short2long = { + short + 's': long + 'second' + for short, long in si_prefix_long2short +} +unit_time_short2long.update({ + 'y': 'year', + 'd': 'day', + 'h': 'hour', + 'm': 'minute', + 's': 'second', +}) +unit_time_long2short = { + long: short + for short, long in unit_time_short2long.items() +} + +unit_space_scale = { + prefix + 'm': 10**exponent + for prefix, exponent in si_prefix_exponent +} +unit_space_scale.update({ + 'mi': 1609.344, + 'yd': 0.9144, + 'ft': 0.3048, + "'": 0.3048, + 'in': 25.4E-3, + '"': 25.4E-3, + 'Å': 1E-10, + 'pc': 3.0857E16, +}) + +unit_time_scale = { + prefix + 's': 10**exponent + for prefix, exponent in si_prefix_exponent +} +unit_time_scale.update({ + 'y': 365.25*24*60*60, + 'd': 24*60*60, + 'h': 60*60, + 'm': 60, +}) + + +def convert_unit(value, src, dst): + src = unit_to_scale(src) + dst = unit_to_scale(dst) + return value * (src / dst) + + +def to_ome_unit(unit): + if unit in unit_space_short2long: + unit = unit_space_short2long[unit] + elif unit in unit_time_short2long: + unit = unit_time_short2long[unit] + elif unit in si_prefix_short2long: + unit = si_prefix_short2long[unit] + if unit not in (*ome_valid_units['space'], *ome_valid_units['time']): + raise ValueError('Unknow unit') + + +def to_nifti_unit(unit): + unit = to_ome_unit(unit) + return { + 'meter': 'meter', + 'millimeter': 'mm', + 'micrometer': 'micron', + 'second': 'sec', + 'millisecond': 'msec', + 'microsecond': 'usec', + }.get(unit, 'unknown') + + +def unit_to_scale(unit): + if unit in unit_space_long2short: + unit = unit_space_long2short[unit] + elif unit in unit_time_long2short: + unit = unit_time_long2short[unit] + elif unit in si_prefix_long2short: + unit = si_prefix_long2short[unit] + if unit in unit_space_scale: + unit = unit_space_scale[unit] + elif unit in unit_time_scale: + unit = unit_time_scale[unit] + elif unit in si_prefix_exponent: + unit = 10 ** si_prefix_exponent[unit] + if isinstance(unit, str): + raise ValueError('Unknown unit', unit) + return unit From 1dc61070e8d3f94ffc2a7ce60b1c05a124dcb1f6 Mon Sep 17 00:00:00 2001 From: balbasty Date: Tue, 13 Aug 2024 14:03:10 +0100 Subject: [PATCH 04/51] WIP --- scripts/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/utils.py b/scripts/utils.py index f9317c5..a453354 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -214,7 +214,7 @@ def make_compressor(name, **prm): unit_space_short2long = { short + 'm': long + 'meter' - for short, long in si_prefix_long2short + for short, long in si_prefix_long2short.items() } unit_space_short2long.update({ 'm': 'meter', @@ -235,7 +235,7 @@ def make_compressor(name, **prm): unit_time_short2long = { short + 's': long + 'second' - for short, long in si_prefix_long2short + for short, long in si_prefix_long2short.items() } unit_time_short2long.update({ 'y': 'year', @@ -251,7 +251,7 @@ def make_compressor(name, **prm): unit_space_scale = { prefix + 'm': 10**exponent - for prefix, exponent in si_prefix_exponent + for prefix, exponent in si_prefix_exponent.items() } unit_space_scale.update({ 'mi': 1609.344, From 3051d39180e0ade952d93b5199945a3f46056ca1 Mon Sep 17 00:00:00 2001 From: Yael Balbastre Date: Tue, 13 Aug 2024 10:48:43 -0400 Subject: [PATCH 05/51] WIP --- scripts/oct_mat_to_zarr.py | 45 +++++++++++++++++++++----------------- scripts/utils.py | 2 +- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 86c3bf4..63092c8 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -62,6 +62,7 @@ def convert( compressor: str = 'blosc', compressor_opt: str = "{}", max_load: int = 128, + max_levels: int = 5, nii: bool = False, orientation: str = 'RAS', center: bool = True, @@ -86,6 +87,8 @@ def convert( Compression options max_load Maximum input chunk size + max_levels + Maximum number of pyramid levels nii Convert to nifti-zarr. True if path ends in ".nii.zarr" orientation @@ -101,9 +104,11 @@ def convert( # Write OME-Zarr multiscale metadata if meta: print('Write JSON') - meta_json = make_json(meta) + with open(meta, 'r') as f: + meta_txt = f.read() + meta_json = make_json(meta_txt) path_json = '.'.join(out.split('.')[:-2]) + '.json' - with open(path_json, 'r') as f: + with open(path_json, 'w') as f: json.dump(meta_json, f, indent=4) vx = meta_json['PixelSize'] unit = meta_json['PixelSizeUnits'] @@ -113,7 +118,7 @@ def convert( # Prepare Zarr group omz = zarr.storage.DirectoryStore(out) - omz = zarr.group(store=omz, overwrite=True) + omz = zarr.group(store=omz, overwrite=False) # Prepare chunking options opt = { @@ -128,34 +133,34 @@ def convert( nk = ceildiv(inp.shape[0], max_load) nj = ceildiv(inp.shape[1], max_load) ni = ceildiv(inp.shape[2], max_load) - nblevels = int(math.ceil(math.log2(max_load))) + nblevels = min([int(math.ceil(math.log2(x))) for x in inp.shape]) + nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) + nblevels = min(nblevels, max_levels) # create all arrays in the group shape_level = inp.shape - for level in range(nblevels): - omz.create_dataset(str(level), shape=shape_level, **opt) - shape_level = [ceildiv(x, 2) for x in shape_level] + #for level in range(nblevels): + # omz.create_dataset(str(level), shape=shape_level, **opt) + # shape_level = [ceildiv(x, 2) for x in shape_level] # iterate across input chunks pct = 0 - for i, j, k in product(range(ni), range(nj), range(nk)): - pct = int(math.ceil((i*j*k + 1) / (ni*nj*nk))) - print(pct, '%', end='\r') - + for _ in []: # i, j, k in product(range(ni), range(nj), range(nk)): chunk_size = max_load inp_chunk = inp[ - k*chunk_size:(k+1)*chunk_size. - i*chunk_size:(i+1)*chunk_size, + k*chunk_size:(k+1)*chunk_size, j*chunk_size:(j+1)*chunk_size, + i*chunk_size:(i+1)*chunk_size, ] for level in range(nblevels): + print(f'[{i:03d}, {j:03d}, {k:03d}] / [{ni:03d}, {nj:03d}, {nk:03d}] ({1+level}/{nblevels})', end='\r') out_level = omz[str(level)] # save current chunk out_level[ - k*chunk_size:(k+1)*chunk_size. - i*chunk_size:(i+1)*chunk_size, - j*chunk_size:(j+1)*chunk_size, + k*chunk_size:k*chunk_size+inp_chunk.shape[0], + j*chunk_size:j*chunk_size+inp_chunk.shape[1], + i*chunk_size:i*chunk_size+inp_chunk.shape[2], ] = inp_chunk # ensure divisible by inp_chunk = inp_chunk[ @@ -179,6 +184,7 @@ def convert( # Write OME-Zarr multiscale metadata print('Write metadata') + print(unit) ome_unit = to_ome_unit(unit) multiscales = [{ 'version': '0.4', @@ -265,9 +271,8 @@ def mapmat(fname): f = loadmat(fname) keys = list(f.keys()) if len(keys) > 1: - key = keys[0] - warn(f'More than one key in .mat, arbitrarily loading "{key}"') - yield f.get(key) + warn(f'More than one key in .mat, arbitrarily loading "{keys[0]}"') + yield f.get(keys[0]) if hasattr(f, 'close'): f.close() @@ -308,7 +313,7 @@ def parse_value_unit(string, n=None): 'SampleStaining': 'none', } - for line in oct_meta.split(): + for line in oct_meta.split('\n'): if ':' not in line: continue diff --git a/scripts/utils.py b/scripts/utils.py index a453354..7a8e992 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -266,7 +266,7 @@ def make_compressor(name, **prm): unit_time_scale = { prefix + 's': 10**exponent - for prefix, exponent in si_prefix_exponent + for prefix, exponent in si_prefix_exponent.items() } unit_time_scale.update({ 'y': 365.25*24*60*60, From 94ca8826e2db9e0b0e13a4134dbd1eed055d22b4 Mon Sep 17 00:00:00 2001 From: balbasty Date: Tue, 13 Aug 2024 15:57:18 +0100 Subject: [PATCH 06/51] FIX: units --- scripts/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/utils.py b/scripts/utils.py index 7a8e992..9bb07c4 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -214,7 +214,7 @@ def make_compressor(name, **prm): unit_space_short2long = { short + 'm': long + 'meter' - for short, long in si_prefix_long2short.items() + for short, long in si_prefix_short2long.items() } unit_space_short2long.update({ 'm': 'meter', @@ -235,7 +235,7 @@ def make_compressor(name, **prm): unit_time_short2long = { short + 's': long + 'second' - for short, long in si_prefix_long2short.items() + for short, long in si_prefix_short2long.items() } unit_time_short2long.update({ 'y': 'year', From d158694c8d0eed537b645a8b48b017efe90a958b Mon Sep 17 00:00:00 2001 From: balbasty Date: Tue, 13 Aug 2024 15:59:02 +0100 Subject: [PATCH 07/51] FIX: units --- scripts/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/utils.py b/scripts/utils.py index 9bb07c4..88d6ff5 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -291,6 +291,7 @@ def to_ome_unit(unit): unit = si_prefix_short2long[unit] if unit not in (*ome_valid_units['space'], *ome_valid_units['time']): raise ValueError('Unknow unit') + return unit def to_nifti_unit(unit): From db623c519d00bf7dab22374a1714859ec928a3b5 Mon Sep 17 00:00:00 2001 From: balbasty Date: Wed, 14 Aug 2024 11:27:25 +0100 Subject: [PATCH 08/51] WIP --- scripts/oct_mat_to_zarr.py | 130 ++++++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 44 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 63092c8..3724f18 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -22,6 +22,7 @@ import h5py import numpy as np import nibabel as nib +from typing import Optional from itertools import product from functools import wraps from scipy.io import loadmat @@ -63,6 +64,7 @@ def convert( compressor_opt: str = "{}", max_load: int = 128, max_levels: int = 5, + no_pool: Optional[int] = None, nii: bool = False, orientation: str = 'RAS', center: bool = True, @@ -89,6 +91,8 @@ def convert( Maximum input chunk size max_levels Maximum number of pyramid levels + no_pool + Index of dimension to not pool when building pyramid nii Convert to nifti-zarr. True if path ends in ".nii.zarr" orientation @@ -118,11 +122,10 @@ def convert( # Prepare Zarr group omz = zarr.storage.DirectoryStore(out) - omz = zarr.group(store=omz, overwrite=False) + omz = zarr.group(store=omz, overwrite=True) # Prepare chunking options opt = { - 'chunks': [chunk] * 3, 'dimension_separator': r'/', 'order': 'F', 'dtype': np.dtype(inp.dtype).str, @@ -130,56 +133,95 @@ def convert( 'compressor': make_compressor(compressor, **compressor_opt), } - nk = ceildiv(inp.shape[0], max_load) - nj = ceildiv(inp.shape[1], max_load) - ni = ceildiv(inp.shape[2], max_load) - nblevels = min([int(math.ceil(math.log2(x))) for x in inp.shape]) + inp_chunk = [min(x, max_load) for x in inp.shape] + nk = ceildiv(inp.shape[0], inp_chunk[0]) + nj = ceildiv(inp.shape[1], inp_chunk[1]) + ni = ceildiv(inp.shape[2], inp_chunk[2]) + + nblevels = min([ + int(math.ceil(math.log2(x))) + for i, x in enumerate(inp.shape) + if i != no_pool + ]) nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) nblevels = min(nblevels, max_levels) # create all arrays in the group shape_level = inp.shape - #for level in range(nblevels): - # omz.create_dataset(str(level), shape=shape_level, **opt) - # shape_level = [ceildiv(x, 2) for x in shape_level] + for level in range(nblevels): + opt['chunks'] = [min(x, chunk) for x in shape_level] + omz.create_dataset(str(level), shape=shape_level, **opt) + shape_level = [ + s if c == 1 else s//2 + for c, s in zip(chunk, shape_level) + ] # iterate across input chunks - pct = 0 - for _ in []: # i, j, k in product(range(ni), range(nj), range(nk)): - chunk_size = max_load - inp_chunk = inp[ - k*chunk_size:(k+1)*chunk_size, - j*chunk_size:(j+1)*chunk_size, - i*chunk_size:(i+1)*chunk_size, + for i, j, k in product(range(ni), range(nj), range(nk)): + + level_chunk = inp_chunk + loaded_chunk = inp[ + k*level_chunk[0]:(k+1)*level_chunk[0], + j*level_chunk[1]:(j+1)*level_chunk[1], + i*level_chunk[2]:(i+1)*level_chunk[2], ] for level in range(nblevels): - print(f'[{i:03d}, {j:03d}, {k:03d}] / [{ni:03d}, {nj:03d}, {nk:03d}] ({1+level}/{nblevels})', end='\r') out_level = omz[str(level)] + + print(f'[{i+1:03d}, {j+1:03d}, {k+1:03d}]', '/', + f'[{ni:03d}, {nj:03d}, {nk:03d}]', + f'({1+level}/{nblevels})', end='\r') + # save current chunk out_level[ - k*chunk_size:k*chunk_size+inp_chunk.shape[0], - j*chunk_size:j*chunk_size+inp_chunk.shape[1], - i*chunk_size:i*chunk_size+inp_chunk.shape[2], - ] = inp_chunk - # ensure divisible by - inp_chunk = inp_chunk[ - :2*(inp_chunk.shape[0]//2), - :2*(inp_chunk.shape[1]//2), - :2*(inp_chunk.shape[2]//2), + k*level_chunk[0]:k*level_chunk[0]+loaded_chunk.shape[0], + j*level_chunk[1]:j*level_chunk[1]+loaded_chunk.shape[1], + i*level_chunk[2]:i*level_chunk[2]+loaded_chunk.shape[2], + ] = loaded_chunk + # ensure divisible by 2 + loaded_chunk = loaded_chunk[ + slice(2*(inp_chunk.shape[0]//2) if 0 != no_pool else None), + slice(2*(inp_chunk.shape[1]//2) if 1 != no_pool else None), + slice(2*(inp_chunk.shape[2]//2) if 2 != no_pool else None), ] # mean pyramid (average each 2x2x2 patch) - inp_chunk = ( - inp_chunk[0::2, 0::2, 0::2] + - inp_chunk[0::2, 0::2, 1::2] + - inp_chunk[0::2, 1::2, 0::2] + - inp_chunk[0::2, 1::2, 1::2] + - inp_chunk[1::2, 0::2, 0::2] + - inp_chunk[1::2, 0::2, 1::2] + - inp_chunk[1::2, 1::2, 0::2] + - inp_chunk[1::2, 1::2, 1::2] - ) / 8 - chunk_size = chunk_size // 2 + if no_pool == 0: + loaded_chunk = ( + loaded_chunk[:, 0::2, 0::2] + + loaded_chunk[:, 0::2, 1::2] + + loaded_chunk[:, 1::2, 0::2] + + loaded_chunk[:, 1::2, 1::2] + ) / 4 + elif no_pool == 1: + loaded_chunk = ( + loaded_chunk[0::2, :, 0::2] + + loaded_chunk[0::2, :, 1::2] + + loaded_chunk[1::2, :, 0::2] + + loaded_chunk[1::2, :, 1::2] + ) / 4 + elif no_pool == 2: + loaded_chunk = ( + loaded_chunk[0::2, 0::2, :] + + loaded_chunk[0::2, 1::2, :] + + loaded_chunk[1::2, 0::2, :] + + loaded_chunk[1::2, 1::2, :] + ) / 4 + else: + inp_chunk = ( + inp_chunk[0::2, 0::2, 0::2] + + inp_chunk[0::2, 0::2, 1::2] + + inp_chunk[0::2, 1::2, 0::2] + + inp_chunk[0::2, 1::2, 1::2] + + inp_chunk[1::2, 0::2, 0::2] + + inp_chunk[1::2, 0::2, 1::2] + + inp_chunk[1::2, 1::2, 0::2] + + inp_chunk[1::2, 1::2, 1::2] + ) / 8 + level_chunk = [ + x if i == no_pool else x // 2 + for i, x in enumerate(level_chunk) + ] print('') # Write OME-Zarr multiscale metadata @@ -194,7 +236,7 @@ def convert( {"name": "x", "type": "space", "unit": ome_unit} ], 'datasets': [], - 'type': '2x2x2 mean window', + 'type': ('2x2x2' if no_pool is None else '2x2') + 'mean window', 'name': '', }] @@ -209,17 +251,17 @@ def convert( { "type": "scale", "scale": [ - (2**n)*vx[0], - (2**n)*vx[1], - (2**n)*vx[2], + (1 if no_pool == 0 else 2**n)*vx[0], + (1 if no_pool == 1 else 2**n)*vx[1], + (1 if no_pool == 2 else 2**n)*vx[2], ] }, { "type": "translation", "translation": [ - (2**n - 1)*vx[0]*0.5, - (2**n - 1)*vx[1]*0.5, - (2**n - 1)*vx[2]*0.5, + (0 if no_pool == 0 else (2**n - 1))*vx[0]*0.5, + (0 if no_pool == 1 else (2**n - 1))*vx[1]*0.5, + (0 if no_pool == 2 else (2**n - 1))*vx[2]*0.5, ] } ] From 05a78a8986a3d65ba139f5e7597c902ec8f565cd Mon Sep 17 00:00:00 2001 From: balbasty Date: Wed, 14 Aug 2024 11:29:43 +0100 Subject: [PATCH 09/51] WIP --- scripts/oct_mat_to_zarr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 3724f18..b512bea 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -99,7 +99,6 @@ def convert( Orientation of the volume center Set RAS[0, 0, 0] at FOV center - """ if isinstance(compressor_opt, str): @@ -307,9 +306,12 @@ def convert( @contextmanager def mapmat(fname): + """Load or memory-map an array stored in a .mat file""" try: + # "New" .mat file f = h5py.File(fname, 'r') except Exception: + # "Old" .mat file f = loadmat(fname) keys = list(f.keys()) if len(keys) > 1: From bd69a09b4b6e51072900f6b3baf3773ef01918b6 Mon Sep 17 00:00:00 2001 From: balbasty Date: Wed, 14 Aug 2024 11:34:21 +0100 Subject: [PATCH 10/51] WIP --- scripts/oct_mat_to_zarr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index b512bea..fa5f168 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -151,8 +151,8 @@ def convert( opt['chunks'] = [min(x, chunk) for x in shape_level] omz.create_dataset(str(level), shape=shape_level, **opt) shape_level = [ - s if c == 1 else s//2 - for c, s in zip(chunk, shape_level) + x if i == no_pool else x//2 + for i, x in enumerate(shape_level) ] # iterate across input chunks From f8baa3df66d88f51bce8fa9eaf607ba563441560 Mon Sep 17 00:00:00 2001 From: balbasty Date: Wed, 14 Aug 2024 11:35:01 +0100 Subject: [PATCH 11/51] WIP --- scripts/oct_mat_to_zarr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index fa5f168..a6beb60 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -180,9 +180,9 @@ def convert( ] = loaded_chunk # ensure divisible by 2 loaded_chunk = loaded_chunk[ - slice(2*(inp_chunk.shape[0]//2) if 0 != no_pool else None), - slice(2*(inp_chunk.shape[1]//2) if 1 != no_pool else None), - slice(2*(inp_chunk.shape[2]//2) if 2 != no_pool else None), + slice(2*(level_chunk.shape[0]//2) if 0 != no_pool else None), + slice(2*(level_chunk.shape[1]//2) if 1 != no_pool else None), + slice(2*(level_chunk.shape[2]//2) if 2 != no_pool else None), ] # mean pyramid (average each 2x2x2 patch) if no_pool == 0: From 4ab71bbf2d60b4cf60b564d53dd610b9d4607ded Mon Sep 17 00:00:00 2001 From: balbasty Date: Wed, 14 Aug 2024 11:35:43 +0100 Subject: [PATCH 12/51] WIP --- scripts/oct_mat_to_zarr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index a6beb60..0db2dcf 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -180,9 +180,9 @@ def convert( ] = loaded_chunk # ensure divisible by 2 loaded_chunk = loaded_chunk[ - slice(2*(level_chunk.shape[0]//2) if 0 != no_pool else None), - slice(2*(level_chunk.shape[1]//2) if 1 != no_pool else None), - slice(2*(level_chunk.shape[2]//2) if 2 != no_pool else None), + slice(2*(loaded_chunk.shape[0]//2) if 0 != no_pool else None), + slice(2*(loaded_chunk.shape[1]//2) if 1 != no_pool else None), + slice(2*(loaded_chunk.shape[2]//2) if 2 != no_pool else None), ] # mean pyramid (average each 2x2x2 patch) if no_pool == 0: From 1c31b8928ec01fdacf90d204d961714ad32586e2 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 13:25:07 -0400 Subject: [PATCH 13/51] Raise exception when input is not a ndarray --- scripts/oct_mat_to_zarr.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 0db2dcf..e79ac6a 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -123,6 +123,9 @@ def convert( omz = zarr.storage.DirectoryStore(out) omz = zarr.group(store=omz, overwrite=True) + if not hasattr(inp,"dtype"): + raise Exception("Input is not a numpy array, converted. This is likely unexpected") + # Prepare chunking options opt = { 'dimension_separator': r'/', From 215d1c1913da82e75c2955fedec9d4656c080efc Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 13:26:37 -0400 Subject: [PATCH 14/51] Raise exception when input does not have enough dimension --- scripts/oct_mat_to_zarr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index e79ac6a..bf59a1b 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -125,7 +125,8 @@ def convert( if not hasattr(inp,"dtype"): raise Exception("Input is not a numpy array, converted. This is likely unexpected") - + if len(inp.shape) < 3: + raise Exception("Input array is not 3d") # Prepare chunking options opt = { 'dimension_separator': r'/', From 91f8a57e2fa3449f8bf519de5192b730d9430edd Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 13:28:44 -0400 Subject: [PATCH 15/51] Fix error message --- scripts/oct_mat_to_zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index bf59a1b..decf2ee 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -124,7 +124,7 @@ def convert( omz = zarr.group(store=omz, overwrite=True) if not hasattr(inp,"dtype"): - raise Exception("Input is not a numpy array, converted. This is likely unexpected") + raise Exception("Input is not a numpy array. This is likely unexpected") if len(inp.shape) < 3: raise Exception("Input array is not 3d") # Prepare chunking options From 819d226ee230cd2b5ff0e7f6cf76f79b03f435ce Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 13:30:22 -0400 Subject: [PATCH 16/51] Add an argument "key" to use in .mat file data --- scripts/oct_mat_to_zarr.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index decf2ee..7971f5d 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -46,7 +46,7 @@ def wrapper(inp, out, **kwargs): out = os.path.splitext(inp) out += '.nii.zarr' if kwargs.get('nii', False) else '.ome.zarr' kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr') - with mapmat(inp) as dat: + with mapmat(inp,kwargs.get('key', None)) as dat: return func(dat, out, **kwargs) return wrapper @@ -58,6 +58,7 @@ def convert( inp: str, out: str = None, *, + key: str = None, meta: str = None, chunk: int = 128, compressor: str = 'blosc', @@ -309,7 +310,7 @@ def convert( @contextmanager -def mapmat(fname): +def mapmat(fname, key=None): """Load or memory-map an array stored in a .mat file""" try: # "New" .mat file @@ -318,9 +319,12 @@ def mapmat(fname): # "Old" .mat file f = loadmat(fname) keys = list(f.keys()) - if len(keys) > 1: - warn(f'More than one key in .mat, arbitrarily loading "{keys[0]}"') - yield f.get(keys[0]) + if key is None: + if len(keys) > 1: + warn(f'More than one key in .mat, arbitrarily loading "{keys[0]}"') + yield f.get(keys[0]) + else: + yield f[key] if hasattr(f, 'close'): f.close() From 520be68c1095732945630dd52193d5d226737846 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 13:32:24 -0400 Subject: [PATCH 17/51] Fix the error when output file is not given --- scripts/oct_mat_to_zarr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 7971f5d..da7de32 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -41,9 +41,9 @@ def automap(func): """Decorator to automatically map the array in the mat file""" @wraps(func) - def wrapper(inp, out, **kwargs): + def wrapper(inp, out=None, **kwargs): if out is None: - out = os.path.splitext(inp) + out = os.path.splitext(inp)[0] out += '.nii.zarr' if kwargs.get('nii', False) else '.ome.zarr' kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr') with mapmat(inp,kwargs.get('key', None)) as dat: From 241c5e9ca964866cc85dc4ea7e95a7775c0697c8 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 13:38:54 -0400 Subject: [PATCH 18/51] Fix the error when output file is not given --- scripts/oct_mat_to_zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index da7de32..775f12d 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -56,7 +56,7 @@ def wrapper(inp, out=None, **kwargs): @automap def convert( inp: str, - out: str = None, + out: Optional[str] = None, *, key: str = None, meta: str = None, From 7f609979cdc932a281416aa3f062fd2bf0329111 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 13:40:39 -0400 Subject: [PATCH 19/51] Add argument key's doc --- scripts/oct_mat_to_zarr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 775f12d..a592e97 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -58,7 +58,7 @@ def convert( inp: str, out: Optional[str] = None, *, - key: str = None, + key: Optional[str] = None, meta: str = None, chunk: int = 128, compressor: str = 'blosc', @@ -80,6 +80,8 @@ def convert( Path to the input mat file out Path to the output Zarr directory [.ome.zarr] + key + Key of the array to be extracted, default to first key found meta Path to the metadata file chunk From 70b36430f6a4c98557ad3fc08d9895461f8766d5 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 14:10:10 -0400 Subject: [PATCH 20/51] Add capability to handle multiple input files and stack them --- scripts/oct_mat_to_zarr.py | 63 +++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index a592e97..6b3cf65 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -22,7 +22,7 @@ import h5py import numpy as np import nibabel as nib -from typing import Optional +from typing import Optional, List from itertools import product from functools import wraps from scipy.io import loadmat @@ -43,7 +43,7 @@ def automap(func): @wraps(func) def wrapper(inp, out=None, **kwargs): if out is None: - out = os.path.splitext(inp)[0] + out = os.path.splitext(inp[0])[0] out += '.nii.zarr' if kwargs.get('nii', False) else '.ome.zarr' kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr') with mapmat(inp,kwargs.get('key', None)) as dat: @@ -55,7 +55,7 @@ def wrapper(inp, out=None, **kwargs): @app.default @automap def convert( - inp: str, + inp: List[str], out: Optional[str] = None, *, key: Optional[str] = None, @@ -214,15 +214,15 @@ def convert( loaded_chunk[1::2, 1::2, :] ) / 4 else: - inp_chunk = ( - inp_chunk[0::2, 0::2, 0::2] + - inp_chunk[0::2, 0::2, 1::2] + - inp_chunk[0::2, 1::2, 0::2] + - inp_chunk[0::2, 1::2, 1::2] + - inp_chunk[1::2, 0::2, 0::2] + - inp_chunk[1::2, 0::2, 1::2] + - inp_chunk[1::2, 1::2, 0::2] + - inp_chunk[1::2, 1::2, 1::2] + loaded_chunk = ( + loaded_chunk[0::2, 0::2, 0::2] + + loaded_chunk[0::2, 0::2, 1::2] + + loaded_chunk[0::2, 1::2, 0::2] + + loaded_chunk[0::2, 1::2, 1::2] + + loaded_chunk[1::2, 0::2, 0::2] + + loaded_chunk[1::2, 0::2, 1::2] + + loaded_chunk[1::2, 1::2, 0::2] + + loaded_chunk[1::2, 1::2, 1::2] ) / 8 level_chunk = [ x if i == no_pool else x // 2 @@ -312,23 +312,30 @@ def convert( @contextmanager -def mapmat(fname, key=None): +def mapmat(fnames, key=None): """Load or memory-map an array stored in a .mat file""" - try: - # "New" .mat file - f = h5py.File(fname, 'r') - except Exception: - # "Old" .mat file - f = loadmat(fname) - keys = list(f.keys()) - if key is None: - if len(keys) > 1: - warn(f'More than one key in .mat, arbitrarily loading "{keys[0]}"') - yield f.get(keys[0]) - else: - yield f[key] - if hasattr(f, 'close'): - f.close() + loaded_data = [] + + for fname in fnames: + try: + # "New" .mat file + f = h5py.File(fname, 'r') + except Exception: + # "Old" .mat file + f = loadmat(fname) + if key is None: + if len(f.keys()[0]) > 1: + warn(f'More than one key in .mat file {fname}, arbitrarily loading "{f.keys[0]}"') + key = f.keys()[0] + + if len(fnames) == 1: + yield f.get(key) + if hasattr(f, 'close'): + f.close() + break + loaded_data.append(f.get(key)) + + yield np.stack(loaded_data, axis=-1) def make_json(oct_meta): From b008415e24e75f18f7ccfcf447a01c4653e5b566 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 14:10:36 -0400 Subject: [PATCH 21/51] Cleanup --- scripts/oct_mat_to_zarr.py | 96 +++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 6b3cf65..381a626 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -12,22 +12,23 @@ nibabel cyclopts """ -import cyclopts -import zarr import ast -import re -import os -import math import json +import math +import os +import re +from contextlib import contextmanager +from functools import wraps +from itertools import product +from typing import Optional, List +from warnings import warn + +import cyclopts import h5py -import numpy as np import nibabel as nib -from typing import Optional, List -from itertools import product -from functools import wraps +import numpy as np +import zarr from scipy.io import loadmat -from warnings import warn -from contextlib import contextmanager from utils import ( ceildiv, make_compressor, convert_unit, to_ome_unit, to_nifti_unit, @@ -46,7 +47,7 @@ def wrapper(inp, out=None, **kwargs): out = os.path.splitext(inp[0])[0] out += '.nii.zarr' if kwargs.get('nii', False) else '.ome.zarr' kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr') - with mapmat(inp,kwargs.get('key', None)) as dat: + with mapmat(inp, kwargs.get('key', None)) as dat: return func(dat, out, **kwargs) return wrapper @@ -55,20 +56,20 @@ def wrapper(inp, out=None, **kwargs): @app.default @automap def convert( - inp: List[str], - out: Optional[str] = None, - *, - key: Optional[str] = None, - meta: str = None, - chunk: int = 128, - compressor: str = 'blosc', - compressor_opt: str = "{}", - max_load: int = 128, - max_levels: int = 5, - no_pool: Optional[int] = None, - nii: bool = False, - orientation: str = 'RAS', - center: bool = True, + inp: List[str], + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = 'blosc', + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = 'RAS', + center: bool = True, ): """ This command converts OCT volumes stored in raw matlab files @@ -126,7 +127,7 @@ def convert( omz = zarr.storage.DirectoryStore(out) omz = zarr.group(store=omz, overwrite=True) - if not hasattr(inp,"dtype"): + if not hasattr(inp, "dtype"): raise Exception("Input is not a numpy array. This is likely unexpected") if len(inp.shape) < 3: raise Exception("Input array is not 3d") @@ -158,7 +159,7 @@ def convert( opt['chunks'] = [min(x, chunk) for x in shape_level] omz.create_dataset(str(level), shape=shape_level, **opt) shape_level = [ - x if i == no_pool else x//2 + x if i == no_pool else x // 2 for i, x in enumerate(shape_level) ] @@ -167,29 +168,29 @@ def convert( level_chunk = inp_chunk loaded_chunk = inp[ - k*level_chunk[0]:(k+1)*level_chunk[0], - j*level_chunk[1]:(j+1)*level_chunk[1], - i*level_chunk[2]:(i+1)*level_chunk[2], - ] + k * level_chunk[0]:(k + 1) * level_chunk[0], + j * level_chunk[1]:(j + 1) * level_chunk[1], + i * level_chunk[2]:(i + 1) * level_chunk[2], + ] for level in range(nblevels): out_level = omz[str(level)] - print(f'[{i+1:03d}, {j+1:03d}, {k+1:03d}]', '/', + print(f'[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]', '/', f'[{ni:03d}, {nj:03d}, {nk:03d}]', - f'({1+level}/{nblevels})', end='\r') + f'({1 + level}/{nblevels})', end='\r') # save current chunk out_level[ - k*level_chunk[0]:k*level_chunk[0]+loaded_chunk.shape[0], - j*level_chunk[1]:j*level_chunk[1]+loaded_chunk.shape[1], - i*level_chunk[2]:i*level_chunk[2]+loaded_chunk.shape[2], + k * level_chunk[0]:k * level_chunk[0] + loaded_chunk.shape[0], + j * level_chunk[1]:j * level_chunk[1] + loaded_chunk.shape[1], + i * level_chunk[2]:i * level_chunk[2] + loaded_chunk.shape[2], ] = loaded_chunk # ensure divisible by 2 loaded_chunk = loaded_chunk[ - slice(2*(loaded_chunk.shape[0]//2) if 0 != no_pool else None), - slice(2*(loaded_chunk.shape[1]//2) if 1 != no_pool else None), - slice(2*(loaded_chunk.shape[2]//2) if 2 != no_pool else None), + slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), + slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), + slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), ] # mean pyramid (average each 2x2x2 patch) if no_pool == 0: @@ -257,17 +258,17 @@ def convert( { "type": "scale", "scale": [ - (1 if no_pool == 0 else 2**n)*vx[0], - (1 if no_pool == 1 else 2**n)*vx[1], - (1 if no_pool == 2 else 2**n)*vx[2], + (1 if no_pool == 0 else 2 ** n) * vx[0], + (1 if no_pool == 1 else 2 ** n) * vx[1], + (1 if no_pool == 2 else 2 ** n) * vx[2], ] }, { "type": "translation", "translation": [ - (0 if no_pool == 0 else (2**n - 1))*vx[0]*0.5, - (0 if no_pool == 1 else (2**n - 1))*vx[1]*0.5, - (0 if no_pool == 2 else (2**n - 1))*vx[2]*0.5, + (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, + (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, + (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, ] } ] @@ -339,7 +340,6 @@ def mapmat(fnames, key=None): def make_json(oct_meta): - """ Expected input: --------------- @@ -360,7 +360,7 @@ def make_json(oct_meta): def parse_value_unit(string, n=None): number = r'-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?' - value = 'x'.join([number]*(n or 1)) + value = 'x'.join([number] * (n or 1)) match = re.fullmatch(r'(?P' + value + r')(?P\w*)', string) value, unit = match.group('value'), match.group('unit') value = list(map(float, value.split('x'))) From f3a3c09a6d129d252dc88b391e91938429b0cfbc Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 14:15:00 -0400 Subject: [PATCH 22/51] Fix typo --- scripts/oct_mat_to_zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 381a626..4846ad8 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -325,7 +325,7 @@ def mapmat(fnames, key=None): # "Old" .mat file f = loadmat(fname) if key is None: - if len(f.keys()[0]) > 1: + if len(f.keys()) > 1: warn(f'More than one key in .mat file {fname}, arbitrarily loading "{f.keys[0]}"') key = f.keys()[0] From 9910ad69c28fede9452da68f2a95c3cad85cff42 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 30 Oct 2024 14:26:14 -0400 Subject: [PATCH 23/51] Raise exception when the key is not found in the input file. Ensure all input files using the same key. --- scripts/oct_mat_to_zarr.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/oct_mat_to_zarr.py b/scripts/oct_mat_to_zarr.py index 4846ad8..a2ff53e 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/scripts/oct_mat_to_zarr.py @@ -328,6 +328,8 @@ def mapmat(fnames, key=None): if len(f.keys()) > 1: warn(f'More than one key in .mat file {fname}, arbitrarily loading "{f.keys[0]}"') key = f.keys()[0] + if key not in f.keys(): + raise Exception(f"Key {key} not found in file {fname}") if len(fnames) == 1: yield f.get(key) From 9bd60d4552d6fdae165e65c25994798f6840bfb7 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 13 Nov 2024 11:28:49 -0500 Subject: [PATCH 24/51] Move PSOCT conversion script into modalities --- linc_convert/modalities/psoct/__init__.py | 5 ++ linc_convert/modalities/psoct/cli.py | 9 +++ .../modalities/psoct/placeholder.py | 32 +++----- .../utils.py => linc_convert/utils/unit.py | 76 ------------------- 4 files changed, 25 insertions(+), 97 deletions(-) create mode 100644 linc_convert/modalities/psoct/__init__.py create mode 100644 linc_convert/modalities/psoct/cli.py rename scripts/oct_mat_to_zarr.py => linc_convert/modalities/psoct/placeholder.py (96%) rename scripts/utils.py => linc_convert/utils/unit.py (71%) diff --git a/linc_convert/modalities/psoct/__init__.py b/linc_convert/modalities/psoct/__init__.py new file mode 100644 index 0000000..a6d9d6f --- /dev/null +++ b/linc_convert/modalities/psoct/__init__.py @@ -0,0 +1,5 @@ +"""Dark Field microscopy converters.""" + +__all__ = ["cli", "multi_slice", "single_slice"] + +from . import cli, multi_slice, single_slice diff --git a/linc_convert/modalities/psoct/cli.py b/linc_convert/modalities/psoct/cli.py new file mode 100644 index 0000000..12d4c06 --- /dev/null +++ b/linc_convert/modalities/psoct/cli.py @@ -0,0 +1,9 @@ +"""Entry-points for Dark Field microscopy converter.""" + +from cyclopts import App + +from linc_convert.cli import main + +help = "Converters for PS-OCT .mat files" +psoct = App(name="psoct", help=help) +main.command(psoct) diff --git a/scripts/oct_mat_to_zarr.py b/linc_convert/modalities/psoct/placeholder.py similarity index 96% rename from scripts/oct_mat_to_zarr.py rename to linc_convert/modalities/psoct/placeholder.py index a2ff53e..964b4e9 100644 --- a/scripts/oct_mat_to_zarr.py +++ b/linc_convert/modalities/psoct/placeholder.py @@ -1,17 +1,8 @@ """ -(OCT) Matlab to OME-ZARR -======================== - -This script converts Matlab files generated by the MGH in-house OCT pipeline -into a pyramidal OME-ZARR hierarchy. - -dependencies: - numpy - scipy - zarr - nibabel - cyclopts +Converts Matlab files generated by the MGH in-house OCT pipeline +into a OME-ZARR pyramid. """ + import ast import json import math @@ -30,12 +21,14 @@ import zarr from scipy.io import loadmat -from utils import ( - ceildiv, make_compressor, convert_unit, to_ome_unit, to_nifti_unit, - orientation_to_affine, center_affine -) +from linc_convert.modalities.psoct.cli import psoct +from linc_convert.utils.orientation import orientation_to_affine, center_affine +from linc_convert.utils.math import ceildiv +from linc_convert.utils.zarr import make_compressor +from linc_convert.utils.unit import (convert_unit, to_ome_unit, to_nifti_unit) -app = cyclopts.App(help_format="markdown") +placeholder = cyclopts.App(name="placeholder", help_format="markdown") +psoct.command(placeholder) def automap(func): @@ -53,7 +46,7 @@ def wrapper(inp, out=None, **kwargs): return wrapper -@app.default +@placeholder.default @automap def convert( inp: List[str], @@ -435,6 +428,3 @@ def parse_value_unit(string, n=None): return meta - -if __name__ == "__main__": - app() diff --git a/scripts/utils.py b/linc_convert/utils/unit.py similarity index 71% rename from scripts/utils.py rename to linc_convert/utils/unit.py index 88d6ff5..fc27695 100644 --- a/scripts/utils.py +++ b/linc_convert/utils/unit.py @@ -1,78 +1,3 @@ -import math -import numcodecs -import numpy as np - - -def orientation_ensure_3d(orientation): - """ - Parameters - ---------- - orientation : str - Either one of {'coronal', 'axial', 'sagittal'}, or a two- or - three-letter permutation of {('R', 'L'), ('A', 'P'), ('S', 'I')} - - Returns - ------- - orientation : str - A three-letter permutation of {('R', 'L'), ('A', 'P'), ('S', 'I')} - """ - orientation = { - 'coronal': 'LI', - 'axial': 'LP', - 'sagittal': 'PI', - }.get(orientation.lower(), orientation).upper() - if len(orientation) == 2: - if 'L' not in orientation and 'R' not in orientation: - orientation += 'R' - if 'P' not in orientation and 'A' not in orientation: - orientation += 'A' - if 'I' not in orientation and 'S' not in orientation: - orientation += 'S' - return orientation - - -def orientation_to_affine(orientation, vxw=1, vxh=1, vxd=1): - orientation = orientation_ensure_3d(orientation) - affine = np.zeros([4, 4]) - vx = np.asarray([vxw, vxh, vxd]) - for i in range(3): - letter = orientation[i] - sign = -1 if letter in 'LPI' else 1 - letter = {'L': 'R', 'P': 'A', 'I': 'S'}.get(letter, letter) - index = list('RAS').index(letter) - affine[index, i] = sign * vx[i] - return affine - - -def center_affine(affine, shape): - if len(shape) == 2: - shape = [*shape, 1] - shape = np.asarray(shape) - affine[:3, -1] = -0.5 * affine[:3, :3] @ (shape - 1) - return affine - - -def ceildiv(x, y): - return int(math.ceil(x / y)) - - -def floordiv(x, y): - return int(math.floor(x / y)) - - -def make_compressor(name, **prm): - if not isinstance(name, str): - return name - name = name.lower() - if name == 'blosc': - Compressor = numcodecs.Blosc - elif name == 'zlib': - Compressor = numcodecs.Zlib - else: - raise ValueError('Unknown compressor', name) - return Compressor(**prm) - - ome_valid_units = { 'space': [ 'angstrom', @@ -178,7 +103,6 @@ def make_compressor(name, **prm): for short, long in si_prefix_short2long.items() } - si_prefix_exponent = { 'Q': 30, 'R': 27, From db215178bcdb0d8517e58d703dfdd21e029e0ad6 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Wed, 13 Nov 2024 13:49:15 -0500 Subject: [PATCH 25/51] Update OCT process naming --- linc_convert/modalities/__init__.py | 4 ++-- linc_convert/modalities/psoct/__init__.py | 4 ++-- .../modalities/psoct/{placeholder.py => stacking.py} | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) rename linc_convert/modalities/psoct/{placeholder.py => stacking.py} (99%) diff --git a/linc_convert/modalities/__init__.py b/linc_convert/modalities/__init__.py index 7ae40c8..d1a6625 100644 --- a/linc_convert/modalities/__init__.py +++ b/linc_convert/modalities/__init__.py @@ -1,4 +1,4 @@ """Converters for all imaging modalities.""" -__all__ = ["df", "lsm"] -from . import df, lsm +__all__ = ["df", "lsm", "psoct"] +from . import df, lsm, psoct diff --git a/linc_convert/modalities/psoct/__init__.py b/linc_convert/modalities/psoct/__init__.py index a6d9d6f..7f771eb 100644 --- a/linc_convert/modalities/psoct/__init__.py +++ b/linc_convert/modalities/psoct/__init__.py @@ -1,5 +1,5 @@ """Dark Field microscopy converters.""" -__all__ = ["cli", "multi_slice", "single_slice"] +__all__ = ["cli", "stacking"] -from . import cli, multi_slice, single_slice +from . import cli, stacking diff --git a/linc_convert/modalities/psoct/placeholder.py b/linc_convert/modalities/psoct/stacking.py similarity index 99% rename from linc_convert/modalities/psoct/placeholder.py rename to linc_convert/modalities/psoct/stacking.py index 964b4e9..51fa8c5 100644 --- a/linc_convert/modalities/psoct/placeholder.py +++ b/linc_convert/modalities/psoct/stacking.py @@ -27,8 +27,8 @@ from linc_convert.utils.zarr import make_compressor from linc_convert.utils.unit import (convert_unit, to_ome_unit, to_nifti_unit) -placeholder = cyclopts.App(name="placeholder", help_format="markdown") -psoct.command(placeholder) +stacking = cyclopts.App(name="stacking", help_format="markdown") +psoct.command(stacking) def automap(func): @@ -46,7 +46,7 @@ def wrapper(inp, out=None, **kwargs): return wrapper -@placeholder.default +@stacking.default @automap def convert( inp: List[str], From c9c06064b0d2cb5c801f6d07fbfd3e2dfc36b8c1 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Thu, 14 Nov 2024 10:26:10 -0500 Subject: [PATCH 26/51] docs/style --- linc_convert/utils/unit.py | 371 ++++++++++++++++++------------------- 1 file changed, 183 insertions(+), 188 deletions(-) diff --git a/linc_convert/utils/unit.py b/linc_convert/utils/unit.py index fc27695..01c4463 100644 --- a/linc_convert/utils/unit.py +++ b/linc_convert/utils/unit.py @@ -1,236 +1,231 @@ ome_valid_units = { - 'space': [ - 'angstrom', - 'attometer', - 'centimeter', - 'decimeter', - 'exameter', - 'femtometer', - 'foot', - 'gigameter', - 'hectometer', - 'inch', - 'kilometer', - 'megameter', - 'meter', - 'micrometer', - 'mile', - 'millimeter', - 'nanometer', - 'parsec', - 'petameter', - 'picometer', - 'terameter', - 'yard', - 'yoctometer', - 'yottameter', - 'zeptometer', - 'zettameter', + "space": [ + "angstrom", + "attometer", + "centimeter", + "decimeter", + "exameter", + "femtometer", + "foot", + "gigameter", + "hectometer", + "inch", + "kilometer", + "megameter", + "meter", + "micrometer", + "mile", + "millimeter", + "nanometer", + "parsec", + "petameter", + "picometer", + "terameter", + "yard", + "yoctometer", + "yottameter", + "zeptometer", + "zettameter", + ], + "time": [ + "attosecond", + "centisecond", + "day", + "decisecond", + "exasecond", + "femtosecond", + "gigasecond", + "hectosecond", + "hour", + "kilosecond", + "megasecond", + "microsecond", + "millisecond", + "minute", + "nanosecond", + "petasecond", + "picosecond", + "second", + "terasecond", + "yoctosecond", + "yottasecond", + "zeptosecond", + "zettasecond", ], - 'time': [ - 'attosecond', - 'centisecond', - 'day', - 'decisecond', - 'exasecond', - 'femtosecond', - 'gigasecond', - 'hectosecond', - 'hour', - 'kilosecond', - 'megasecond', - 'microsecond', - 'millisecond', - 'minute', - 'nanosecond', - 'petasecond', - 'picosecond', - 'second', - 'terasecond', - 'yoctosecond', - 'yottasecond', - 'zeptosecond', - 'zettasecond', - ] } nifti_valid_units = [ - 'unknown', - 'meter', - 'mm', - 'micron', - 'sec', - 'msec', - 'usec', - 'hz', - 'ppm', - 'rads', + "unknown", + "meter", + "mm", + "micron", + "sec", + "msec", + "usec", + "hz", + "ppm", + "rads", ] si_prefix_short2long = { - 'Q': 'quetta', - 'R': 'ronna', - 'Y': 'yotta', - 'Z': 'zetta', - 'E': 'exa', - 'P': 'peta', - 'T': 'tera', - 'G': 'giga', - 'M': 'mega', - 'K': 'kilo', - 'k': 'kilo', - 'H': 'hecto', - 'h': 'hecto', - 'D': 'deca', - 'da': 'deca', - 'd': 'deci', - 'c': 'centi', - 'm': 'milli', - 'u': 'micro', - 'μ': 'micro', - 'n': 'nano', - 'p': 'pico', - 'f': 'femto', - 'a': 'atto', - 'z': 'zepto', - 'y': 'yocto', - 'r': 'ronto', - 'q': 'quecto', + "Q": "quetta", + "R": "ronna", + "Y": "yotta", + "Z": "zetta", + "E": "exa", + "P": "peta", + "T": "tera", + "G": "giga", + "M": "mega", + "K": "kilo", + "k": "kilo", + "H": "hecto", + "h": "hecto", + "D": "deca", + "da": "deca", + "d": "deci", + "c": "centi", + "m": "milli", + "u": "micro", + "μ": "micro", + "n": "nano", + "p": "pico", + "f": "femto", + "a": "atto", + "z": "zepto", + "y": "yocto", + "r": "ronto", + "q": "quecto", } -si_prefix_long2short = { - long: short - for short, long in si_prefix_short2long.items() -} +si_prefix_long2short = {long: short for short, long in si_prefix_short2long.items()} si_prefix_exponent = { - 'Q': 30, - 'R': 27, - 'Y': 24, - 'Z': 21, - 'E': 18, - 'P': 15, - 'T': 12, - 'G': 9, - 'M': 6, - 'K': 3, - 'k': 3, - 'H': 2, - 'h': 2, - 'D': 1, - 'da': 1, - '': 0, - 'd': -1, - 'c': -2, - 'm': -3, - 'u': -6, - 'μ': -6, - 'n': -9, - 'p': -12, - 'f': -15, - 'a': -18, - 'z': -21, - 'y': -24, - 'r': -27, - 'q': -30, + "Q": 30, + "R": 27, + "Y": 24, + "Z": 21, + "E": 18, + "P": 15, + "T": 12, + "G": 9, + "M": 6, + "K": 3, + "k": 3, + "H": 2, + "h": 2, + "D": 1, + "da": 1, + "": 0, + "d": -1, + "c": -2, + "m": -3, + "u": -6, + "μ": -6, + "n": -9, + "p": -12, + "f": -15, + "a": -18, + "z": -21, + "y": -24, + "r": -27, + "q": -30, } unit_space_short2long = { - short + 'm': long + 'meter' - for short, long in si_prefix_short2long.items() + short + "m": long + "meter" for short, long in si_prefix_short2long.items() } -unit_space_short2long.update({ - 'm': 'meter', - 'mi': 'mile', - 'yd': 'yard', - 'ft': 'foot', - 'in': 'inch', - "'": 'foot', - '"': 'inch', - 'Å': 'angstrom', - 'pc': 'parsec', -}) -unit_space_long2short = { - long: short - for short, long in unit_space_short2long.items() -} -unit_space_long2short['micron'] = 'u' +unit_space_short2long.update( + { + "m": "meter", + "mi": "mile", + "yd": "yard", + "ft": "foot", + "in": "inch", + "'": "foot", + '"': "inch", + "Å": "angstrom", + "pc": "parsec", + } +) +unit_space_long2short = {long: short for short, long in unit_space_short2long.items()} +unit_space_long2short["micron"] = "u" unit_time_short2long = { - short + 's': long + 'second' - for short, long in si_prefix_short2long.items() -} -unit_time_short2long.update({ - 'y': 'year', - 'd': 'day', - 'h': 'hour', - 'm': 'minute', - 's': 'second', -}) -unit_time_long2short = { - long: short - for short, long in unit_time_short2long.items() + short + "s": long + "second" for short, long in si_prefix_short2long.items() } +unit_time_short2long.update( + { + "y": "year", + "d": "day", + "h": "hour", + "m": "minute", + "s": "second", + } +) +unit_time_long2short = {long: short for short, long in unit_time_short2long.items()} unit_space_scale = { - prefix + 'm': 10**exponent - for prefix, exponent in si_prefix_exponent.items() + prefix + "m": 10**exponent for prefix, exponent in si_prefix_exponent.items() } -unit_space_scale.update({ - 'mi': 1609.344, - 'yd': 0.9144, - 'ft': 0.3048, - "'": 0.3048, - 'in': 25.4E-3, - '"': 25.4E-3, - 'Å': 1E-10, - 'pc': 3.0857E16, -}) +unit_space_scale.update( + { + "mi": 1609.344, + "yd": 0.9144, + "ft": 0.3048, + "'": 0.3048, + "in": 25.4e-3, + '"': 25.4e-3, + "Å": 1e-10, + "pc": 3.0857e16, + } +) unit_time_scale = { - prefix + 's': 10**exponent - for prefix, exponent in si_prefix_exponent.items() + prefix + "s": 10**exponent for prefix, exponent in si_prefix_exponent.items() } -unit_time_scale.update({ - 'y': 365.25*24*60*60, - 'd': 24*60*60, - 'h': 60*60, - 'm': 60, -}) +unit_time_scale.update( + { + "y": 365.25 * 24 * 60 * 60, + "d": 24 * 60 * 60, + "h": 60 * 60, + "m": 60, + } +) -def convert_unit(value, src, dst): +def convert_unit(value: float, src: str, dst: str) -> float: src = unit_to_scale(src) dst = unit_to_scale(dst) return value * (src / dst) -def to_ome_unit(unit): +def to_ome_unit(unit: str) -> str: if unit in unit_space_short2long: unit = unit_space_short2long[unit] elif unit in unit_time_short2long: unit = unit_time_short2long[unit] elif unit in si_prefix_short2long: unit = si_prefix_short2long[unit] - if unit not in (*ome_valid_units['space'], *ome_valid_units['time']): - raise ValueError('Unknow unit') + if unit not in (*ome_valid_units["space"], *ome_valid_units["time"]): + raise ValueError("Unknow unit") return unit -def to_nifti_unit(unit): +def to_nifti_unit(unit: str) -> str: unit = to_ome_unit(unit) return { - 'meter': 'meter', - 'millimeter': 'mm', - 'micrometer': 'micron', - 'second': 'sec', - 'millisecond': 'msec', - 'microsecond': 'usec', - }.get(unit, 'unknown') + "meter": "meter", + "millimeter": "mm", + "micrometer": "micron", + "second": "sec", + "millisecond": "msec", + "microsecond": "usec", + }.get(unit, "unknown") -def unit_to_scale(unit): +def unit_to_scale(unit: str) -> float: if unit in unit_space_long2short: unit = unit_space_long2short[unit] elif unit in unit_time_long2short: @@ -244,5 +239,5 @@ def unit_to_scale(unit): elif unit in si_prefix_exponent: unit = 10 ** si_prefix_exponent[unit] if isinstance(unit, str): - raise ValueError('Unknown unit', unit) + raise ValueError("Unknown unit", unit) return unit From 8589c9f8043298e6f20e25ea86f761152d20e1b2 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Thu, 14 Nov 2024 10:28:37 -0500 Subject: [PATCH 27/51] style --- linc_convert/modalities/psoct/stacking.py | 315 +++++++++++----------- 1 file changed, 157 insertions(+), 158 deletions(-) diff --git a/linc_convert/modalities/psoct/stacking.py b/linc_convert/modalities/psoct/stacking.py index 51fa8c5..ebed53d 100644 --- a/linc_convert/modalities/psoct/stacking.py +++ b/linc_convert/modalities/psoct/stacking.py @@ -25,7 +25,7 @@ from linc_convert.utils.orientation import orientation_to_affine, center_affine from linc_convert.utils.math import ceildiv from linc_convert.utils.zarr import make_compressor -from linc_convert.utils.unit import (convert_unit, to_ome_unit, to_nifti_unit) +from linc_convert.utils.unit import convert_unit, to_ome_unit, to_nifti_unit stacking = cyclopts.App(name="stacking", help_format="markdown") psoct.command(stacking) @@ -38,9 +38,9 @@ def automap(func): def wrapper(inp, out=None, **kwargs): if out is None: out = os.path.splitext(inp[0])[0] - out += '.nii.zarr' if kwargs.get('nii', False) else '.ome.zarr' - kwargs['nii'] = kwargs.get('nii', False) or out.endswith('.nii.zarr') - with mapmat(inp, kwargs.get('key', None)) as dat: + out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" + kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") + with mapmat(inp, kwargs.get("key", None)) as dat: return func(dat, out, **kwargs) return wrapper @@ -49,21 +49,21 @@ def wrapper(inp, out=None, **kwargs): @stacking.default @automap def convert( - inp: List[str], - out: Optional[str] = None, - *, - key: Optional[str] = None, - meta: str = None, - chunk: int = 128, - compressor: str = 'blosc', - compressor_opt: str = "{}", - max_load: int = 128, - max_levels: int = 5, - no_pool: Optional[int] = None, - nii: bool = False, - orientation: str = 'RAS', - center: bool = True, -): + inp: List[str], + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = "blosc", + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = "RAS", + center: bool = True, +) -> None: """ This command converts OCT volumes stored in raw matlab files into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. @@ -103,18 +103,18 @@ def convert( # Write OME-Zarr multiscale metadata if meta: - print('Write JSON') - with open(meta, 'r') as f: + print("Write JSON") + with open(meta, "r") as f: meta_txt = f.read() meta_json = make_json(meta_txt) - path_json = '.'.join(out.split('.')[:-2]) + '.json' - with open(path_json, 'w') as f: + path_json = ".".join(out.split(".")[:-2]) + ".json" + with open(path_json, "w") as f: json.dump(meta_json, f, indent=4) - vx = meta_json['PixelSize'] - unit = meta_json['PixelSizeUnits'] + vx = meta_json["PixelSize"] + unit = meta_json["PixelSizeUnits"] else: vx = [1] * 3 - unit = 'um' + unit = "um" # Prepare Zarr group omz = zarr.storage.DirectoryStore(out) @@ -126,11 +126,11 @@ def convert( raise Exception("Input array is not 3d") # Prepare chunking options opt = { - 'dimension_separator': r'/', - 'order': 'F', - 'dtype': np.dtype(inp.dtype).str, - 'fill_value': None, - 'compressor': make_compressor(compressor, **compressor_opt), + "dimension_separator": r"/", + "order": "F", + "dtype": np.dtype(inp.dtype).str, + "fill_value": None, + "compressor": make_compressor(compressor, **compressor_opt), } inp_chunk = [min(x, max_load) for x in inp.shape] @@ -138,46 +138,44 @@ def convert( nj = ceildiv(inp.shape[1], inp_chunk[1]) ni = ceildiv(inp.shape[2], inp_chunk[2]) - nblevels = min([ - int(math.ceil(math.log2(x))) - for i, x in enumerate(inp.shape) - if i != no_pool - ]) + nblevels = min( + [int(math.ceil(math.log2(x))) for i, x in enumerate(inp.shape) if i != no_pool] + ) nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) nblevels = min(nblevels, max_levels) # create all arrays in the group shape_level = inp.shape for level in range(nblevels): - opt['chunks'] = [min(x, chunk) for x in shape_level] + opt["chunks"] = [min(x, chunk) for x in shape_level] omz.create_dataset(str(level), shape=shape_level, **opt) - shape_level = [ - x if i == no_pool else x // 2 - for i, x in enumerate(shape_level) - ] + shape_level = [x if i == no_pool else x // 2 for i, x in enumerate(shape_level)] # iterate across input chunks for i, j, k in product(range(ni), range(nj), range(nk)): - level_chunk = inp_chunk loaded_chunk = inp[ - k * level_chunk[0]:(k + 1) * level_chunk[0], - j * level_chunk[1]:(j + 1) * level_chunk[1], - i * level_chunk[2]:(i + 1) * level_chunk[2], - ] + k * level_chunk[0] : (k + 1) * level_chunk[0], + j * level_chunk[1] : (j + 1) * level_chunk[1], + i * level_chunk[2] : (i + 1) * level_chunk[2], + ] for level in range(nblevels): out_level = omz[str(level)] - print(f'[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]', '/', - f'[{ni:03d}, {nj:03d}, {nk:03d}]', - f'({1 + level}/{nblevels})', end='\r') + print( + f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", + "/", + f"[{ni:03d}, {nj:03d}, {nk:03d}]", + f"({1 + level}/{nblevels})", + end="\r", + ) # save current chunk out_level[ - k * level_chunk[0]:k * level_chunk[0] + loaded_chunk.shape[0], - j * level_chunk[1]:j * level_chunk[1] + loaded_chunk.shape[1], - i * level_chunk[2]:i * level_chunk[2] + loaded_chunk.shape[2], + k * level_chunk[0] : k * level_chunk[0] + loaded_chunk.shape[0], + j * level_chunk[1] : j * level_chunk[1] + loaded_chunk.shape[1], + i * level_chunk[2] : i * level_chunk[2] + loaded_chunk.shape[2], ] = loaded_chunk # ensure divisible by 2 loaded_chunk = loaded_chunk[ @@ -188,61 +186,62 @@ def convert( # mean pyramid (average each 2x2x2 patch) if no_pool == 0: loaded_chunk = ( - loaded_chunk[:, 0::2, 0::2] + - loaded_chunk[:, 0::2, 1::2] + - loaded_chunk[:, 1::2, 0::2] + - loaded_chunk[:, 1::2, 1::2] + loaded_chunk[:, 0::2, 0::2] + + loaded_chunk[:, 0::2, 1::2] + + loaded_chunk[:, 1::2, 0::2] + + loaded_chunk[:, 1::2, 1::2] ) / 4 elif no_pool == 1: loaded_chunk = ( - loaded_chunk[0::2, :, 0::2] + - loaded_chunk[0::2, :, 1::2] + - loaded_chunk[1::2, :, 0::2] + - loaded_chunk[1::2, :, 1::2] + loaded_chunk[0::2, :, 0::2] + + loaded_chunk[0::2, :, 1::2] + + loaded_chunk[1::2, :, 0::2] + + loaded_chunk[1::2, :, 1::2] ) / 4 elif no_pool == 2: loaded_chunk = ( - loaded_chunk[0::2, 0::2, :] + - loaded_chunk[0::2, 1::2, :] + - loaded_chunk[1::2, 0::2, :] + - loaded_chunk[1::2, 1::2, :] + loaded_chunk[0::2, 0::2, :] + + loaded_chunk[0::2, 1::2, :] + + loaded_chunk[1::2, 0::2, :] + + loaded_chunk[1::2, 1::2, :] ) / 4 else: loaded_chunk = ( - loaded_chunk[0::2, 0::2, 0::2] + - loaded_chunk[0::2, 0::2, 1::2] + - loaded_chunk[0::2, 1::2, 0::2] + - loaded_chunk[0::2, 1::2, 1::2] + - loaded_chunk[1::2, 0::2, 0::2] + - loaded_chunk[1::2, 0::2, 1::2] + - loaded_chunk[1::2, 1::2, 0::2] + - loaded_chunk[1::2, 1::2, 1::2] + loaded_chunk[0::2, 0::2, 0::2] + + loaded_chunk[0::2, 0::2, 1::2] + + loaded_chunk[0::2, 1::2, 0::2] + + loaded_chunk[0::2, 1::2, 1::2] + + loaded_chunk[1::2, 0::2, 0::2] + + loaded_chunk[1::2, 0::2, 1::2] + + loaded_chunk[1::2, 1::2, 0::2] + + loaded_chunk[1::2, 1::2, 1::2] ) / 8 level_chunk = [ - x if i == no_pool else x // 2 - for i, x in enumerate(level_chunk) + x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) ] - print('') + print("") # Write OME-Zarr multiscale metadata - print('Write metadata') + print("Write metadata") print(unit) ome_unit = to_ome_unit(unit) - multiscales = [{ - 'version': '0.4', - 'axes': [ - {"name": "z", "type": "space", "unit": ome_unit}, - {"name": "y", "type": "space", "unit": ome_unit}, - {"name": "x", "type": "space", "unit": ome_unit} - ], - 'datasets': [], - 'type': ('2x2x2' if no_pool is None else '2x2') + 'mean window', - 'name': '', - }] + multiscales = [ + { + "version": "0.4", + "axes": [ + {"name": "z", "type": "space", "unit": ome_unit}, + {"name": "y", "type": "space", "unit": ome_unit}, + {"name": "x", "type": "space", "unit": ome_unit}, + ], + "datasets": [], + "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", + "name": "", + } + ] for n in range(nblevels): - multiscales[0]['datasets'].append({}) - level = multiscales[0]['datasets'][-1] + multiscales[0]["datasets"].append({}) + level = multiscales[0]["datasets"][-1] level["path"] = str(n) # With a moving window, the scaling factor is exactly 2, and @@ -251,58 +250,55 @@ def convert( { "type": "scale", "scale": [ - (1 if no_pool == 0 else 2 ** n) * vx[0], - (1 if no_pool == 1 else 2 ** n) * vx[1], - (1 if no_pool == 2 else 2 ** n) * vx[2], - ] + (1 if no_pool == 0 else 2**n) * vx[0], + (1 if no_pool == 1 else 2**n) * vx[1], + (1 if no_pool == 2 else 2**n) * vx[2], + ], }, { "type": "translation", "translation": [ - (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, - (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, - (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, - ] - } + (0 if no_pool == 0 else (2**n - 1)) * vx[0] * 0.5, + (0 if no_pool == 1 else (2**n - 1)) * vx[1] * 0.5, + (0 if no_pool == 2 else (2**n - 1)) * vx[2] * 0.5, + ], + }, ] multiscales[0]["coordinateTransformations"] = [ - { - "scale": [1.0] * 3, - "type": "scale" - } + {"scale": [1.0] * 3, "type": "scale"} ] omz.attrs["multiscales"] = multiscales if not nii: - print('done.') + print("done.") return # Write NIfTI-Zarr header # NOTE: we use nifti2 because dimensions typically do not fit in a short # TODO: we do not write the json zattrs, but it should be added in # once the nifti-zarr package is released - shape = list(reversed(omz['0'].shape)) + shape = list(reversed(omz["0"].shape)) affine = orientation_to_affine(orientation, *vx[::-1]) if center: affine = center_affine(affine, shape[:3]) header = nib.Nifti2Header() header.set_data_shape(shape) - header.set_data_dtype(omz['0'].dtype) + header.set_data_dtype(omz["0"].dtype) header.set_qform(affine) header.set_sform(affine) header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) - header.structarr['magic'] = b'nz2\0' - header = np.frombuffer(header.structarr.tobytes(), dtype='u1') + header.structarr["magic"] = b"nz2\0" + header = np.frombuffer(header.structarr.tobytes(), dtype="u1") opt = { - 'chunks': [len(header)], - 'dimension_separator': r'/', - 'order': 'F', - 'dtype': '|u1', - 'fill_value': None, - 'compressor': None, + "chunks": [len(header)], + "dimension_separator": r"/", + "order": "F", + "dtype": "|u1", + "fill_value": None, + "compressor": None, } - omz.create_dataset('nifti', data=header, shape=shape, **opt) - print('done.') + omz.create_dataset("nifti", data=header, shape=shape, **opt) + print("done.") @contextmanager @@ -313,20 +309,24 @@ def mapmat(fnames, key=None): for fname in fnames: try: # "New" .mat file - f = h5py.File(fname, 'r') + f = h5py.File(fname, "r") except Exception: # "Old" .mat file f = loadmat(fname) + if key is None: if len(f.keys()) > 1: - warn(f'More than one key in .mat file {fname}, arbitrarily loading "{f.keys[0]}"') + warn( + f'More than one key in .mat file {fname}, arbitrarily loading "{f.keys[0]}"' + ) key = f.keys()[0] + if key not in f.keys(): raise Exception(f"Key {key} not found in file {fname}") if len(fnames) == 1: yield f.get(key) - if hasattr(f, 'close'): + if hasattr(f, "close"): f.close() break loaded_data.append(f.get(key)) @@ -354,77 +354,76 @@ def make_json(oct_meta): """ def parse_value_unit(string, n=None): - number = r'-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?' - value = 'x'.join([number] * (n or 1)) - match = re.fullmatch(r'(?P' + value + r')(?P\w*)', string) - value, unit = match.group('value'), match.group('unit') - value = list(map(float, value.split('x'))) + number = r"-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?" + value = "x".join([number] * (n or 1)) + match = re.fullmatch(r"(?P" + value + r")(?P\w*)", string) + value, unit = match.group("value"), match.group("unit") + value = list(map(float, value.split("x"))) if n is None: value = value[0] return value, unit meta = { - 'BodyPart': 'BRAIN', - 'Environment': 'exvivo', - 'SampleStaining': 'none', + "BodyPart": "BRAIN", + "Environment": "exvivo", + "SampleStaining": "none", } - for line in oct_meta.split('\n'): - if ':' not in line: + for line in oct_meta.split("\n"): + if ":" not in line: continue - key, value = line.split(':') + key, value = line.split(":") key, value = key.strip(), value.strip() - if key == 'Image medium': + if key == "Image medium": parts = value.split() - if 'TDE' in parts: - parts[parts.index('TDE')] = "2,2' Thiodiethanol (TDE)" - meta['SampleMedium'] = ' '.join(parts) + if "TDE" in parts: + parts[parts.index("TDE")] = "2,2' Thiodiethanol (TDE)" + meta["SampleMedium"] = " ".join(parts) - elif key == 'Center Wavelength': + elif key == "Center Wavelength": value, unit = parse_value_unit(value) - meta['Wavelength'] = value - meta['WavelengthUnit'] = unit + meta["Wavelength"] = value + meta["WavelengthUnit"] = unit - elif key == 'Axial resolution': + elif key == "Axial resolution": value, unit = parse_value_unit(value) - meta['ResolutionAxial'] = value - meta['ResolutionAxialUnit'] = unit + meta["ResolutionAxial"] = value + meta["ResolutionAxialUnit"] = unit - elif key == 'Lateral resolution': + elif key == "Lateral resolution": value, unit = parse_value_unit(value) - meta['ResolutionLateral'] = value - meta['ResolutionLateralUnit'] = unit + meta["ResolutionLateral"] = value + meta["ResolutionLateralUnit"] = unit - elif key == 'Voxel size': + elif key == "Voxel size": value, unit = parse_value_unit(value, n=3) - meta['PixelSize'] = value - meta['PixelSizeUnits'] = unit + meta["PixelSize"] = value + meta["PixelSizeUnits"] = unit - elif key == 'Depth focus range': + elif key == "Depth focus range": value, unit = parse_value_unit(value) - meta['DepthFocusRange'] = value - meta['DepthFocusRangeUnit'] = unit + meta["DepthFocusRange"] = value + meta["DepthFocusRangeUnit"] = unit - elif key == 'Number of focuses': + elif key == "Number of focuses": value, unit = parse_value_unit(value) - meta['FocusCount'] = int(value) + meta["FocusCount"] = int(value) - elif key == 'Slice thickness': + elif key == "Slice thickness": value, unit = parse_value_unit(value) - unit = convert_unit(value, unit[:-1], 'u') - meta['SliceThickness'] = value + unit = convert_unit(value, unit[:-1], "u") + meta["SliceThickness"] = value - elif key == 'Number of slices': + elif key == "Number of slices": value, unit = parse_value_unit(value) - meta['SliceCount'] = int(value) + meta["SliceCount"] = int(value) - elif key == 'Modality': - meta['OCTModality'] = value + elif key == "Modality": + meta["OCTModality"] = value else: continue return meta - From aa448998b12280b9f7afb9f76ba0750c8419d8b8 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Thu, 14 Nov 2024 14:28:39 -0500 Subject: [PATCH 28/51] refactor: generate_pyramid from oct pipeline --- linc_convert/modalities/psoct/stacking.py | 439 +++++++++++++++++----- 1 file changed, 344 insertions(+), 95 deletions(-) diff --git a/linc_convert/modalities/psoct/stacking.py b/linc_convert/modalities/psoct/stacking.py index ebed53d..49339dc 100644 --- a/linc_convert/modalities/psoct/stacking.py +++ b/linc_convert/modalities/psoct/stacking.py @@ -4,6 +4,7 @@ """ import ast +import itertools import json import math import os @@ -11,7 +12,7 @@ from contextlib import contextmanager from functools import wraps from itertools import product -from typing import Optional, List +from typing import Optional, List, Literal from warnings import warn import cyclopts @@ -22,10 +23,10 @@ from scipy.io import loadmat from linc_convert.modalities.psoct.cli import psoct -from linc_convert.utils.orientation import orientation_to_affine, center_affine from linc_convert.utils.math import ceildiv -from linc_convert.utils.zarr import make_compressor +from linc_convert.utils.orientation import orientation_to_affine, center_affine from linc_convert.utils.unit import convert_unit, to_ome_unit, to_nifti_unit +from linc_convert.utils.zarr import make_compressor stacking = cyclopts.App(name="stacking", help_format="markdown") psoct.command(stacking) @@ -49,20 +50,20 @@ def wrapper(inp, out=None, **kwargs): @stacking.default @automap def convert( - inp: List[str], - out: Optional[str] = None, - *, - key: Optional[str] = None, - meta: str = None, - chunk: int = 128, - compressor: str = "blosc", - compressor_opt: str = "{}", - max_load: int = 128, - max_levels: int = 5, - no_pool: Optional[int] = None, - nii: bool = False, - orientation: str = "RAS", - center: bool = True, + inp: List[str], + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = "blosc", + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = "RAS", + center: bool = True, ) -> None: """ This command converts OCT volumes stored in raw matlab files @@ -144,81 +145,35 @@ def convert( nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) nblevels = min(nblevels, max_levels) - # create all arrays in the group - shape_level = inp.shape - for level in range(nblevels): - opt["chunks"] = [min(x, chunk) for x in shape_level] - omz.create_dataset(str(level), shape=shape_level, **opt) - shape_level = [x if i == no_pool else x // 2 for i, x in enumerate(shape_level)] + opt["chunks"] = [min(x, chunk) for x in inp.shape] + + omz.create_dataset(str(0), shape=inp.shape, **opt) # iterate across input chunks for i, j, k in product(range(ni), range(nj), range(nk)): - level_chunk = inp_chunk loaded_chunk = inp[ - k * level_chunk[0] : (k + 1) * level_chunk[0], - j * level_chunk[1] : (j + 1) * level_chunk[1], - i * level_chunk[2] : (i + 1) * level_chunk[2], - ] + k * inp_chunk[0]: (k + 1) * inp_chunk[0], + j * inp_chunk[1]: (j + 1) * inp_chunk[1], + i * inp_chunk[2]: (i + 1) * inp_chunk[2], + ] + + print( + f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", + "/", + f"[{ni:03d}, {nj:03d}, {nk:03d}]", + # f"({1 + level}/{nblevels})", + end="\r", + ) + + # save current chunk + omz["0"][ + k * inp_chunk[0]: k * inp_chunk[0] + loaded_chunk.shape[0], + j * inp_chunk[1]: j * inp_chunk[1] + loaded_chunk.shape[1], + i * inp_chunk[2]: i * inp_chunk[2] + loaded_chunk.shape[2], + ] = loaded_chunk + + generate_pyramid(omz, nblevels - 1, mode="mean") - for level in range(nblevels): - out_level = omz[str(level)] - - print( - f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", - "/", - f"[{ni:03d}, {nj:03d}, {nk:03d}]", - f"({1 + level}/{nblevels})", - end="\r", - ) - - # save current chunk - out_level[ - k * level_chunk[0] : k * level_chunk[0] + loaded_chunk.shape[0], - j * level_chunk[1] : j * level_chunk[1] + loaded_chunk.shape[1], - i * level_chunk[2] : i * level_chunk[2] + loaded_chunk.shape[2], - ] = loaded_chunk - # ensure divisible by 2 - loaded_chunk = loaded_chunk[ - slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), - slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), - slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), - ] - # mean pyramid (average each 2x2x2 patch) - if no_pool == 0: - loaded_chunk = ( - loaded_chunk[:, 0::2, 0::2] - + loaded_chunk[:, 0::2, 1::2] - + loaded_chunk[:, 1::2, 0::2] - + loaded_chunk[:, 1::2, 1::2] - ) / 4 - elif no_pool == 1: - loaded_chunk = ( - loaded_chunk[0::2, :, 0::2] - + loaded_chunk[0::2, :, 1::2] - + loaded_chunk[1::2, :, 0::2] - + loaded_chunk[1::2, :, 1::2] - ) / 4 - elif no_pool == 2: - loaded_chunk = ( - loaded_chunk[0::2, 0::2, :] - + loaded_chunk[0::2, 1::2, :] - + loaded_chunk[1::2, 0::2, :] - + loaded_chunk[1::2, 1::2, :] - ) / 4 - else: - loaded_chunk = ( - loaded_chunk[0::2, 0::2, 0::2] - + loaded_chunk[0::2, 0::2, 1::2] - + loaded_chunk[0::2, 1::2, 0::2] - + loaded_chunk[0::2, 1::2, 1::2] - + loaded_chunk[1::2, 0::2, 0::2] - + loaded_chunk[1::2, 0::2, 1::2] - + loaded_chunk[1::2, 1::2, 0::2] - + loaded_chunk[1::2, 1::2, 1::2] - ) / 8 - level_chunk = [ - x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) - ] print("") # Write OME-Zarr multiscale metadata @@ -250,17 +205,17 @@ def convert( { "type": "scale", "scale": [ - (1 if no_pool == 0 else 2**n) * vx[0], - (1 if no_pool == 1 else 2**n) * vx[1], - (1 if no_pool == 2 else 2**n) * vx[2], + (1 if no_pool == 0 else 2 ** n) * vx[0], + (1 if no_pool == 1 else 2 ** n) * vx[1], + (1 if no_pool == 2 else 2 ** n) * vx[2], ], }, { "type": "translation", "translation": [ - (0 if no_pool == 0 else (2**n - 1)) * vx[0] * 0.5, - (0 if no_pool == 1 else (2**n - 1)) * vx[1] * 0.5, - (0 if no_pool == 2 else (2**n - 1)) * vx[2] * 0.5, + (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, + (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, + (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, ], }, ] @@ -301,6 +256,298 @@ def convert( print("done.") +def generate_pyramid( + omz, + levels: int | None = None, + ndim: int = 3, + max_load: int = 512, + mode: Literal["mean", "median"] = "median", +) -> list[list[int]]: + """ + Generate the levels of a pyramid in an existing Zarr. + + Parameters + ---------- + path : PathLike | str + Path to parent Zarr + levels : int + Number of additional levels to generate. + By default, stop when all dimensions are smaller than their + corresponding chunk size. + shard : list[int] | bool | {"auto"} | None + Shard size. + * If `None`, use same shard size as the input array; + * If `False`, no dot use sharding; + * If `True` or `"auto"`, automatically find shard size; + * Otherwise, use provided shard size. + ndim : int + Number of spatial dimensions. + max_load : int + Maximum number of voxels to load along each dimension. + mode : {"mean", "median"} + Whether to use a mean or median moving window. + + Returns + ------- + shapes : list[list[int]] + Shapes of all levels, from finest to coarsest, including the + existing top level. + """ + + shape = list(omz["0"].shape) + chunk_size = omz["0"].chunks + + # Select windowing function + if mode == "median": + func = np.median + else: + assert mode == "mean" + func = np.mean + + level = 0 + batch, shape = shape[:-ndim], shape[-ndim:] + allshapes = [shape] + + opt = { + "dimension_separator": omz["0"]._dimension_separator, + "order": omz["0"]._order, + "dtype": omz["0"]._dtype, + "fill_value": omz["0"]._fill_value, + "compressor": omz["0"]._compressor, + } + + while True: + level += 1 + + # Compute downsampled shape + prev_shape, shape = shape, [max(1, x // 2) for x in shape] + + # Stop if seen enough levels or level shape smaller than chunk size + if levels is None: + if all(x <= c for x, c in zip(shape, chunk_size[-ndim:])): + break + elif level > levels: + break + + print("Compute level", level, "with shape", shape) + + allshapes.append(shape) + omz.create_dataset(str(level), shape=shape, **opt) + + # Iterate across `max_load` chunks + # (note that these are unrelared to underlying zarr chunks) + grid_shape = [ceildiv(n, max_load) for n in prev_shape] + for chunk_index in itertools.product(*[range(x) for x in grid_shape]): + print(f"chunk {chunk_index} / {tuple(grid_shape)})", end="\r") + + # Read one chunk of data at the previous resolution + slicer = [Ellipsis] + [ + slice(i * max_load, min((i + 1) * max_load, n)) + for i, n in zip(chunk_index, prev_shape) + ] + dat = omz[str(level - 1)][tuple(slicer)] + + # Discard the last voxel along odd dimensions + crop = [0 if x == 1 else x % 2 for x in dat.shape[-3:]] + slcr = [slice(-1) if x else slice(None) for x in crop] + dat = dat[tuple([Ellipsis, *slcr])] + + patch_shape = dat.shape[-3:] + + # Reshape into patches of shape 2x2x2 + windowed_shape = [ + x for n in patch_shape for x in (max(n // 2, 1), min(n, 2)) + ] + dat = dat.reshape(batch + windowed_shape) + # -> last `ndim`` dimensions have shape 2x2x2 + dat = dat.transpose( + list(range(len(batch))) + + list(range(len(batch), len(batch) + 2 * ndim, 2)) + + list(range(len(batch) + 1, len(batch) + 2 * ndim, 2)) + ) + # -> flatten patches + smaller_shape = [max(n // 2, 1) for n in patch_shape] + dat = dat.reshape(batch + smaller_shape + [-1]) + + # Compute the median/mean of each patch + dtype = dat.dtype + dat = func(dat, axis=-1) + dat = dat.astype(dtype) + + # Write output + slicer = [Ellipsis] + [ + slice(i * max_load // 2, min((i + 1) * max_load // 2, n)) + for i, n in zip(chunk_index, shape) + ] + + omz[str(level)][tuple(slicer)] = dat + + print("") + + return allshapes + + pass + + +# +# def generate_pyramid(inp, inp_chunk, nblevels, ni, nj, nk, no_pool, omz): +# for i, j, k in product(range(ni), range(nj), range(nk)): +# level_chunk = inp_chunk +# loaded_chunk = inp[ +# k * level_chunk[0]: (k + 1) * level_chunk[0], +# j * level_chunk[1]: (j + 1) * level_chunk[1], +# i * level_chunk[2]: (i + 1) * level_chunk[2], +# ] +# +# out_level = omz["0"] +# +# print( +# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", +# "/", +# f"[{ni:03d}, {nj:03d}, {nk:03d}]", +# # f"({1 + level}/{nblevels})", +# end="\r", +# ) +# +# # save current chunk +# out_level[ +# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], +# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], +# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], +# ] = loaded_chunk +# +# for level in range(nblevels): +# out_level = omz[str(level)] +# +# print( +# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", +# "/", +# f"[{ni:03d}, {nj:03d}, {nk:03d}]", +# f"({1 + level}/{nblevels})", +# end="\r", +# ) +# +# # save current chunk +# out_level[ +# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], +# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], +# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], +# ] = loaded_chunk +# # ensure divisible by 2 +# loaded_chunk = loaded_chunk[ +# slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), +# slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), +# slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), +# ] +# # mean pyramid (average each 2x2x2 patch) +# if no_pool == 0: +# loaded_chunk = ( +# loaded_chunk[:, 0::2, 0::2] +# + loaded_chunk[:, 0::2, 1::2] +# + loaded_chunk[:, 1::2, 0::2] +# + loaded_chunk[:, 1::2, 1::2] +# ) / 4 +# elif no_pool == 1: +# loaded_chunk = ( +# loaded_chunk[0::2, :, 0::2] +# + loaded_chunk[0::2, :, 1::2] +# + loaded_chunk[1::2, :, 0::2] +# + loaded_chunk[1::2, :, 1::2] +# ) / 4 +# elif no_pool == 2: +# loaded_chunk = ( +# loaded_chunk[0::2, 0::2, :] +# + loaded_chunk[0::2, 1::2, :] +# + loaded_chunk[1::2, 0::2, :] +# + loaded_chunk[1::2, 1::2, :] +# ) / 4 +# else: +# loaded_chunk = ( +# loaded_chunk[0::2, 0::2, 0::2] +# + loaded_chunk[0::2, 0::2, 1::2] +# + loaded_chunk[0::2, 1::2, 0::2] +# + loaded_chunk[0::2, 1::2, 1::2] +# + loaded_chunk[1::2, 0::2, 0::2] +# + loaded_chunk[1::2, 0::2, 1::2] +# + loaded_chunk[1::2, 1::2, 0::2] +# + loaded_chunk[1::2, 1::2, 1::2] +# ) / 8 +# level_chunk = [ +# x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) +# ] +# + +# def generate_pyramid(i, j, k, level_chunk, loaded_chunk, nblevels, ni, nj, nk, no_pool, +# omz): +# for level in range(nblevels): +# out_level = omz[str(level)] +# +# print( +# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", +# "/", +# f"[{ni:03d}, {nj:03d}, {nk:03d}]", +# f"({1 + level}/{nblevels})", +# end="\r", +# ) +# +# # save current chunk +# out_level[ +# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], +# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], +# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], +# ] = loaded_chunk +# # ensure divisible by 2 +# loaded_chunk = loaded_chunk[ +# slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), +# slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), +# slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), +# ] +# # mean pyramid (average each 2x2x2 patch) +# if no_pool == 0: +# loaded_chunk = ( +# loaded_chunk[:, 0::2, 0::2] +# + loaded_chunk[:, 0::2, 1::2] +# + loaded_chunk[:, 1::2, 0::2] +# + loaded_chunk[:, 1::2, 1::2] +# ) / 4 +# elif no_pool == 1: +# loaded_chunk = ( +# loaded_chunk[0::2, :, 0::2] +# + loaded_chunk[0::2, :, 1::2] +# + loaded_chunk[1::2, :, 0::2] +# + loaded_chunk[1::2, :, 1::2] +# ) / 4 +# elif no_pool == 2: +# loaded_chunk = ( +# loaded_chunk[0::2, 0::2, :] +# + loaded_chunk[0::2, 1::2, :] +# + loaded_chunk[1::2, 0::2, :] +# + loaded_chunk[1::2, 1::2, :] +# ) / 4 +# else: +# loaded_chunk = ( +# loaded_chunk[0::2, 0::2, 0::2] +# + loaded_chunk[0::2, 0::2, 1::2] +# + loaded_chunk[0::2, 1::2, 0::2] +# + loaded_chunk[0::2, 1::2, 1::2] +# + loaded_chunk[1::2, 0::2, 0::2] +# + loaded_chunk[1::2, 0::2, 1::2] +# + loaded_chunk[1::2, 1::2, 0::2] +# + loaded_chunk[1::2, 1::2, 1::2] +# ) / 8 +# level_chunk = [ +# x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) +# ] + +# +# def create_level(chunk, inp, nblevels, no_pool, omz, opt): +# shape_level = inp.shape +# for level in range(1,nblevels,1): +# opt["chunks"] = [min(x, chunk) for x in shape_level] +# omz.create_dataset(str(level), shape=shape_level, **opt) +# shape_level = [x if i == no_pool else x // 2 for i, x in enumerate(shape_level)] +# + @contextmanager def mapmat(fnames, key=None): """Load or memory-map an array stored in a .mat file""" @@ -315,11 +562,13 @@ def mapmat(fnames, key=None): f = loadmat(fname) if key is None: + if not len(f.keys()): + raise Exception(f"{fname} is empty") + key = list(f.keys())[0] if len(f.keys()) > 1: warn( - f'More than one key in .mat file {fname}, arbitrarily loading "{f.keys[0]}"' + f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' ) - key = f.keys()[0] if key not in f.keys(): raise Exception(f"Key {key} not found in file {fname}") From 91072d00a75fb2abc4c20701ef4a24f38a02ee76 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Thu, 14 Nov 2024 14:53:33 -0500 Subject: [PATCH 29/51] fix: wrong shape was used for nii header in zarr, no effect --- linc_convert/modalities/psoct/stacking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linc_convert/modalities/psoct/stacking.py b/linc_convert/modalities/psoct/stacking.py index 49339dc..f1e45a5 100644 --- a/linc_convert/modalities/psoct/stacking.py +++ b/linc_convert/modalities/psoct/stacking.py @@ -252,7 +252,7 @@ def convert( "fill_value": None, "compressor": None, } - omz.create_dataset("nifti", data=header, shape=shape, **opt) + omz.create_dataset("nifti", data=header, shape=len(header), **opt) print("done.") From 5a72e9e1eb7bb43843c7e7687519c7bdbe207b25 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Thu, 14 Nov 2024 15:05:57 -0500 Subject: [PATCH 30/51] refactor: nifti-zarr header --- linc_convert/modalities/psoct/stacking.py | 514 ++++++++++++++++------ 1 file changed, 390 insertions(+), 124 deletions(-) diff --git a/linc_convert/modalities/psoct/stacking.py b/linc_convert/modalities/psoct/stacking.py index f1e45a5..4ae2c8d 100644 --- a/linc_convert/modalities/psoct/stacking.py +++ b/linc_convert/modalities/psoct/stacking.py @@ -236,24 +236,157 @@ def convert( affine = orientation_to_affine(orientation, *vx[::-1]) if center: affine = center_affine(affine, shape[:3]) - header = nib.Nifti2Header() - header.set_data_shape(shape) - header.set_data_dtype(omz["0"].dtype) - header.set_qform(affine) - header.set_sform(affine) - header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) - header.structarr["magic"] = b"nz2\0" - header = np.frombuffer(header.structarr.tobytes(), dtype="u1") - opt = { - "chunks": [len(header)], - "dimension_separator": r"/", - "order": "F", - "dtype": "|u1", - "fill_value": None, - "compressor": None, + niftizarr_write_header(omz,shape,affine,omz["0"].dtype,unit,nifti_version=2) + # header = nib.Nifti2Header() + # header.set_data_shape(shape) + # header.set_data_dtype(omz["0"].dtype) + # header.set_qform(affine) + # header.set_sform(affine) + # header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) + # header.structarr["magic"] = b"nz2\0" + # header = np.frombuffer(header.structarr.tobytes(), dtype="u1") + # opt = { + # "chunks": [len(header)], + # "dimension_separator": r"/", + # "order": "F", + # "dtype": "|u1", + # "fill_value": None, + # "compressor": None, + # } + # omz.create_dataset("nifti", data=header, shape=len(header), **opt) + # print("done.") + + +@contextmanager +def mapmat(fnames, key=None): + """Load or memory-map an array stored in a .mat file""" + loaded_data = [] + + for fname in fnames: + try: + # "New" .mat file + f = h5py.File(fname, "r") + except Exception: + # "Old" .mat file + f = loadmat(fname) + + if key is None: + if not len(f.keys()): + raise Exception(f"{fname} is empty") + key = list(f.keys())[0] + if len(f.keys()) > 1: + warn( + f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' + ) + + if key not in f.keys(): + raise Exception(f"Key {key} not found in file {fname}") + + if len(fnames) == 1: + yield f.get(key) + if hasattr(f, "close"): + f.close() + break + loaded_data.append(f.get(key)) + + yield np.stack(loaded_data, axis=-1) + + +def make_json(oct_meta): + """ + Expected input: + --------------- + Image medium: 60% TDE + Center Wavelength: 1294.84nm + Axial resolution: 4.9um + Lateral resolution: 4.92um + FOV: 3x3mm + Voxel size: 3x3x3um + Depth focus range: 225um + Number of focuses: 2 + Focus #: 2 + Slice thickness: 450um. + Number of slices: 75 + Slice #:23 + Modality: dBI + """ + + def parse_value_unit(string, n=None): + number = r"-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?" + value = "x".join([number] * (n or 1)) + match = re.fullmatch(r"(?P" + value + r")(?P\w*)", string) + value, unit = match.group("value"), match.group("unit") + value = list(map(float, value.split("x"))) + if n is None: + value = value[0] + return value, unit + + meta = { + "BodyPart": "BRAIN", + "Environment": "exvivo", + "SampleStaining": "none", } - omz.create_dataset("nifti", data=header, shape=len(header), **opt) - print("done.") + + for line in oct_meta.split("\n"): + if ":" not in line: + continue + + key, value = line.split(":") + key, value = key.strip(), value.strip() + + if key == "Image medium": + parts = value.split() + if "TDE" in parts: + parts[parts.index("TDE")] = "2,2' Thiodiethanol (TDE)" + meta["SampleMedium"] = " ".join(parts) + + elif key == "Center Wavelength": + value, unit = parse_value_unit(value) + meta["Wavelength"] = value + meta["WavelengthUnit"] = unit + + elif key == "Axial resolution": + value, unit = parse_value_unit(value) + meta["ResolutionAxial"] = value + meta["ResolutionAxialUnit"] = unit + + elif key == "Lateral resolution": + value, unit = parse_value_unit(value) + meta["ResolutionLateral"] = value + meta["ResolutionLateralUnit"] = unit + + elif key == "Voxel size": + value, unit = parse_value_unit(value, n=3) + meta["PixelSize"] = value + meta["PixelSizeUnits"] = unit + + elif key == "Depth focus range": + value, unit = parse_value_unit(value) + meta["DepthFocusRange"] = value + meta["DepthFocusRangeUnit"] = unit + + elif key == "Number of focuses": + value, unit = parse_value_unit(value) + meta["FocusCount"] = int(value) + + elif key == "Slice thickness": + value, unit = parse_value_unit(value) + unit = convert_unit(value, unit[:-1], "u") + meta["SliceThickness"] = value + + elif key == "Number of slices": + value, unit = parse_value_unit(value) + meta["SliceCount"] = int(value) + + elif key == "Modality": + meta["OCTModality"] = value + + else: + continue + + return meta + + def generate_pyramid( @@ -548,131 +681,264 @@ def generate_pyramid( # shape_level = [x if i == no_pool else x // 2 for i, x in enumerate(shape_level)] # -@contextmanager -def mapmat(fnames, key=None): - """Load or memory-map an array stored in a .mat file""" - loaded_data = [] - for fname in fnames: - try: - # "New" .mat file - f = h5py.File(fname, "r") - except Exception: - # "Old" .mat file - f = loadmat(fname) - if key is None: - if not len(f.keys()): - raise Exception(f"{fname} is empty") - key = list(f.keys())[0] - if len(f.keys()) > 1: - warn( - f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' - ) - - if key not in f.keys(): - raise Exception(f"Key {key} not found in file {fname}") - - if len(fnames) == 1: - yield f.get(key) - if hasattr(f, "close"): - f.close() - break - loaded_data.append(f.get(key)) - - yield np.stack(loaded_data, axis=-1) +def write_ome_metadata( + path: str | os.PathLike, + axes: list[str], + space_scale: float | list[float] = 1, + time_scale: float = 1, + space_unit: str = "micrometer", + time_unit: str = "second", + name: str = "", + pyramid_aligns: str | int | list[str | int] = 2, + levels: int | None = None, + zarr_version: int | None = None, +) -> None: + """ + Write OME metadata into Zarr. + Parameters + ---------- + path : str | PathLike + Path to parent Zarr. + axes : list[str] + Name of each dimension, in Zarr order (t, c, z, y, x) + space_scale : float | list[float] + Finest-level voxel size, in Zarr order (z, y, x) + time_scale : float + Time scale + space_unit : str + Unit of spatial scale (assumed identical across dimensions) + space_time : str + Unit of time scale + name : str + Name attribute + pyramid_aligns : float | list[float] | {"center", "edge"} + Whether the pyramid construction aligns the edges or the centers + of the corner voxels. If a (list of) number, assume that a moving + window of that size was used. + levels : int + Number of existing levels. Default: find out automatically. + zarr_version : {2, 3} | None + Zarr version. If `None`, guess from existing zarr array. -def make_json(oct_meta): - """ - Expected input: - --------------- - Image medium: 60% TDE - Center Wavelength: 1294.84nm - Axial resolution: 4.9um - Lateral resolution: 4.92um - FOV: 3x3mm - Voxel size: 3x3x3um - Depth focus range: 225um - Number of focuses: 2 - Focus #: 2 - Slice thickness: 450um. - Number of slices: 75 - Slice #:23 - Modality: dBI """ + path = UPath(path) + + # Detect zarr version + if not zarr_version: + if (path / "0" / "zarr.json").exists(): + zarr_version = 3 + elif (path / "0" / ".zarray").exists(): + zarr_version = 2 + else: + raise FileNotFoundError("No existing array to guess version from") - def parse_value_unit(string, n=None): - number = r"-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?" - value = "x".join([number] * (n or 1)) - match = re.fullmatch(r"(?P" + value + r")(?P\w*)", string) - value, unit = match.group("value"), match.group("unit") - value = list(map(float, value.split("x"))) - if n is None: - value = value[0] - return value, unit + # Read shape at each pyramid level + zname = "zarr.json" if zarr_version == 3 else ".zarray" + shapes = [] + level = 0 + while True: + if levels is not None and level > levels: + break - meta = { - "BodyPart": "BRAIN", - "Environment": "exvivo", - "SampleStaining": "none", + zpath = path / str(level) / zname + if not zpath.exists(): + levels = level + break + + level += 1 + with zpath.open("rb") as f: + zarray = json.load(f) + shapes += [zarray["shape"]] + + axis_to_type = { + "x": "space", + "y": "space", + "z": "space", + "t": "time", + "c": "channel", } - for line in oct_meta.split("\n"): - if ":" not in line: - continue + # Number of spatial (s), batch (b) and total (n) dimensions + ndim = len(axes) + sdim = sum(axis_to_type[axis] == "space" for axis in axes) + bdim = ndim - sdim + + if isinstance(pyramid_aligns, (int, str)): + pyramid_aligns = [pyramid_aligns] + pyramid_aligns = list(pyramid_aligns) + if len(pyramid_aligns) < sdim: + repeat = pyramid_aligns[:1] * (sdim - len(pyramid_aligns)) + pyramid_aligns = repeat + pyramid_aligns + pyramid_aligns = pyramid_aligns[-sdim:] + + if isinstance(space_scale, (int, float)): + space_scale = [space_scale] + space_scale = list(space_scale) + if len(space_scale) < sdim: + repeat = space_scale[:1] * (sdim - len(space_scale)) + space_scale = repeat + space_scale + space_scale = space_scale[-sdim:] - key, value = line.split(":") - key, value = key.strip(), value.strip() - - if key == "Image medium": - parts = value.split() - if "TDE" in parts: - parts[parts.index("TDE")] = "2,2' Thiodiethanol (TDE)" - meta["SampleMedium"] = " ".join(parts) + multiscales = [ + { + "version": "0.4", + "axes": [ + { + "name": axis, + "type": axis_to_type[axis], + } + if axis_to_type[axis] == "channel" + else { + "name": axis, + "type": axis_to_type[axis], + "unit": ( + space_unit + if axis_to_type[axis] == "space" + else time_unit + if axis_to_type[axis] == "time" + else None + ), + } + for axis in axes + ], + "datasets": [], + "type": "median window " + "x".join(["2"] * sdim), + "name": name, + } + ] - elif key == "Center Wavelength": - value, unit = parse_value_unit(value) - meta["Wavelength"] = value - meta["WavelengthUnit"] = unit + shape = shape0 = shapes[0] + for n in range(len(shapes)): + shape = shapes[n] + multiscales[0]["datasets"].append({}) + level = multiscales[0]["datasets"][-1] + level["path"] = str(n) - elif key == "Axial resolution": - value, unit = parse_value_unit(value) - meta["ResolutionAxial"] = value - meta["ResolutionAxialUnit"] = unit + scale = [1] * bdim + [ + ( + pyramid_aligns[i] ** n + if not isinstance(pyramid_aligns[i], str) + else (shape0[bdim + i] / shape[bdim + i]) + if pyramid_aligns[i][0].lower() == "e" + else ((shape0[bdim + i] - 1) / (shape[bdim + i] - 1)) + ) + * space_scale[i] + for i in range(sdim) + ] + translation = [0] * bdim + [ + ( + pyramid_aligns[i] ** n - 1 + if not isinstance(pyramid_aligns[i], str) + else (shape0[bdim + i] / shape[bdim + i]) - 1 + if pyramid_aligns[i][0].lower() == "e" + else 0 + ) + * 0.5 + * space_scale[i] + for i in range(sdim) + ] - elif key == "Lateral resolution": - value, unit = parse_value_unit(value) - meta["ResolutionLateral"] = value - meta["ResolutionLateralUnit"] = unit + level["coordinateTransformations"] = [ + { + "type": "scale", + "scale": scale, + }, + { + "type": "translation", + "translation": translation, + }, + ] - elif key == "Voxel size": - value, unit = parse_value_unit(value, n=3) - meta["PixelSize"] = value - meta["PixelSizeUnits"] = unit + scale = [1] * ndim + if "t" in axes: + scale[axes.index("t")] = time_scale + multiscales[0]["coordinateTransformations"] = [{"scale": scale, "type": "scale"}] + + if zarr_version == 3: + with (path / "zarr.json").open("wt") as f: + json.dump( + { + "zarr_format": 3, + "node_type": "group", + "attributes": { + # Zarr v2 way of doing it -- neuroglancer wants this + "multiscales": multiscales, + # Zarr RFC-2 recommended way + "ome": {"version": "0.4", "multiscales": multiscales}, + }, + }, + f, + indent=4, + ) + else: + multiscales[0]["version"] = "0.4" + with (path / ".zgroup").open("wt") as f: + json.dump({"zarr_format": 2}, f, indent=4) + with (path / ".zattrs").open("wt") as f: + json.dump({"multiscales": multiscales}, f, indent=4) + + + +def niftizarr_write_header( + omz, + shape: list[int], + affine: np.ndarray, + dtype: np.dtype | str, + unit: Literal["micron", "mm"] | None = None, + header: nib.Nifti1Header | nib.Nifti2Header | None = None, + nifti_version: Literal[1,2] = 1 +) -> None: + """ + Write NIfTI header in a NIfTI-Zarr file. - elif key == "Depth focus range": - value, unit = parse_value_unit(value) - meta["DepthFocusRange"] = value - meta["DepthFocusRangeUnit"] = unit + Parameters + ---------- + path : PathLike | str + Path to parent Zarr. + affine : (4, 4) matrix + Orientation matrix. + shape : list[int] + Array shape, in NIfTI order (x, y, z, t, c). + dtype : np.dtype | str + Data type. + unit : {"micron", "mm"}, optional + World unit. + header : nib.Nifti1Header | nib.Nifti2Header, optional + Pre-instantiated header. + zarr_version : int, default=3 + Zarr version. + """ + # TODO: we do not write the json zattrs, but it should be added in + # once the nifti-zarr package is released - elif key == "Number of focuses": - value, unit = parse_value_unit(value) - meta["FocusCount"] = int(value) + # If dimensions do not fit in a short (which is often the case), we + # use NIfTI 2. + if all(x < 32768 for x in shape) or nifti_version == 1: + NiftiHeader = nib.Nifti1Header + else: + NiftiHeader = nib.Nifti2Header - elif key == "Slice thickness": - value, unit = parse_value_unit(value) - unit = convert_unit(value, unit[:-1], "u") - meta["SliceThickness"] = value + header = header or NiftiHeader() + header.set_data_shape(shape) + header.set_data_dtype(dtype) + header.set_qform(affine) + header.set_sform(affine) + if unit: + header.set_xyzt_units(nib.nifti1.unit_codes.code[unit]) + header = np.frombuffer(header.structarr.tobytes(), dtype="u1") - elif key == "Number of slices": - value, unit = parse_value_unit(value) - meta["SliceCount"] = int(value) - elif key == "Modality": - meta["OCTModality"] = value + metadata = { + "chunks": [len(header)], + "order": "F", + "dtype": "|u1", + "fill_value": None, + "compressor": None, # TODO: Subject to change + } - else: - continue + omz.create_dataset("nifti", data=header, shape=len(header), **metadata) - return meta + print("done.") From 4ac9e0a75a7d3ff0743c83e3819f5cb287856a17 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Thu, 14 Nov 2024 15:09:03 -0500 Subject: [PATCH 31/51] fix: nifti unit --- linc_convert/modalities/psoct/stacking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linc_convert/modalities/psoct/stacking.py b/linc_convert/modalities/psoct/stacking.py index 4ae2c8d..5b70dfd 100644 --- a/linc_convert/modalities/psoct/stacking.py +++ b/linc_convert/modalities/psoct/stacking.py @@ -236,7 +236,7 @@ def convert( affine = orientation_to_affine(orientation, *vx[::-1]) if center: affine = center_affine(affine, shape[:3]) - niftizarr_write_header(omz,shape,affine,omz["0"].dtype,unit,nifti_version=2) + niftizarr_write_header(omz,shape,affine,omz["0"].dtype,to_nifti_unit(unit),nifti_version=2) # header = nib.Nifti2Header() # header.set_data_shape(shape) # header.set_data_dtype(omz["0"].dtype) From d7306fe48e2f481a7f38cc0336efc9844d37572a Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Fri, 15 Nov 2024 13:55:09 -0500 Subject: [PATCH 32/51] WIP --- linc_convert/modalities/psoct/stacking.py | 369 ++++++++++------------ 1 file changed, 173 insertions(+), 196 deletions(-) diff --git a/linc_convert/modalities/psoct/stacking.py b/linc_convert/modalities/psoct/stacking.py index 5b70dfd..4d926e8 100644 --- a/linc_convert/modalities/psoct/stacking.py +++ b/linc_convert/modalities/psoct/stacking.py @@ -171,7 +171,7 @@ def convert( j * inp_chunk[1]: j * inp_chunk[1] + loaded_chunk.shape[1], i * inp_chunk[2]: i * inp_chunk[2] + loaded_chunk.shape[2], ] = loaded_chunk - + # TODO: no_pool is ignored for now, should add back generate_pyramid(omz, nblevels - 1, mode="mean") print("") @@ -521,168 +521,6 @@ def generate_pyramid( pass - -# -# def generate_pyramid(inp, inp_chunk, nblevels, ni, nj, nk, no_pool, omz): -# for i, j, k in product(range(ni), range(nj), range(nk)): -# level_chunk = inp_chunk -# loaded_chunk = inp[ -# k * level_chunk[0]: (k + 1) * level_chunk[0], -# j * level_chunk[1]: (j + 1) * level_chunk[1], -# i * level_chunk[2]: (i + 1) * level_chunk[2], -# ] -# -# out_level = omz["0"] -# -# print( -# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", -# "/", -# f"[{ni:03d}, {nj:03d}, {nk:03d}]", -# # f"({1 + level}/{nblevels})", -# end="\r", -# ) -# -# # save current chunk -# out_level[ -# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], -# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], -# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], -# ] = loaded_chunk -# -# for level in range(nblevels): -# out_level = omz[str(level)] -# -# print( -# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", -# "/", -# f"[{ni:03d}, {nj:03d}, {nk:03d}]", -# f"({1 + level}/{nblevels})", -# end="\r", -# ) -# -# # save current chunk -# out_level[ -# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], -# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], -# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], -# ] = loaded_chunk -# # ensure divisible by 2 -# loaded_chunk = loaded_chunk[ -# slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), -# slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), -# slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), -# ] -# # mean pyramid (average each 2x2x2 patch) -# if no_pool == 0: -# loaded_chunk = ( -# loaded_chunk[:, 0::2, 0::2] -# + loaded_chunk[:, 0::2, 1::2] -# + loaded_chunk[:, 1::2, 0::2] -# + loaded_chunk[:, 1::2, 1::2] -# ) / 4 -# elif no_pool == 1: -# loaded_chunk = ( -# loaded_chunk[0::2, :, 0::2] -# + loaded_chunk[0::2, :, 1::2] -# + loaded_chunk[1::2, :, 0::2] -# + loaded_chunk[1::2, :, 1::2] -# ) / 4 -# elif no_pool == 2: -# loaded_chunk = ( -# loaded_chunk[0::2, 0::2, :] -# + loaded_chunk[0::2, 1::2, :] -# + loaded_chunk[1::2, 0::2, :] -# + loaded_chunk[1::2, 1::2, :] -# ) / 4 -# else: -# loaded_chunk = ( -# loaded_chunk[0::2, 0::2, 0::2] -# + loaded_chunk[0::2, 0::2, 1::2] -# + loaded_chunk[0::2, 1::2, 0::2] -# + loaded_chunk[0::2, 1::2, 1::2] -# + loaded_chunk[1::2, 0::2, 0::2] -# + loaded_chunk[1::2, 0::2, 1::2] -# + loaded_chunk[1::2, 1::2, 0::2] -# + loaded_chunk[1::2, 1::2, 1::2] -# ) / 8 -# level_chunk = [ -# x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) -# ] -# - -# def generate_pyramid(i, j, k, level_chunk, loaded_chunk, nblevels, ni, nj, nk, no_pool, -# omz): -# for level in range(nblevels): -# out_level = omz[str(level)] -# -# print( -# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", -# "/", -# f"[{ni:03d}, {nj:03d}, {nk:03d}]", -# f"({1 + level}/{nblevels})", -# end="\r", -# ) -# -# # save current chunk -# out_level[ -# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], -# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], -# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], -# ] = loaded_chunk -# # ensure divisible by 2 -# loaded_chunk = loaded_chunk[ -# slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), -# slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), -# slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), -# ] -# # mean pyramid (average each 2x2x2 patch) -# if no_pool == 0: -# loaded_chunk = ( -# loaded_chunk[:, 0::2, 0::2] -# + loaded_chunk[:, 0::2, 1::2] -# + loaded_chunk[:, 1::2, 0::2] -# + loaded_chunk[:, 1::2, 1::2] -# ) / 4 -# elif no_pool == 1: -# loaded_chunk = ( -# loaded_chunk[0::2, :, 0::2] -# + loaded_chunk[0::2, :, 1::2] -# + loaded_chunk[1::2, :, 0::2] -# + loaded_chunk[1::2, :, 1::2] -# ) / 4 -# elif no_pool == 2: -# loaded_chunk = ( -# loaded_chunk[0::2, 0::2, :] -# + loaded_chunk[0::2, 1::2, :] -# + loaded_chunk[1::2, 0::2, :] -# + loaded_chunk[1::2, 1::2, :] -# ) / 4 -# else: -# loaded_chunk = ( -# loaded_chunk[0::2, 0::2, 0::2] -# + loaded_chunk[0::2, 0::2, 1::2] -# + loaded_chunk[0::2, 1::2, 0::2] -# + loaded_chunk[0::2, 1::2, 1::2] -# + loaded_chunk[1::2, 0::2, 0::2] -# + loaded_chunk[1::2, 0::2, 1::2] -# + loaded_chunk[1::2, 1::2, 0::2] -# + loaded_chunk[1::2, 1::2, 1::2] -# ) / 8 -# level_chunk = [ -# x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) -# ] - -# -# def create_level(chunk, inp, nblevels, no_pool, omz, opt): -# shape_level = inp.shape -# for level in range(1,nblevels,1): -# opt["chunks"] = [min(x, chunk) for x in shape_level] -# omz.create_dataset(str(level), shape=shape_level, **opt) -# shape_level = [x if i == no_pool else x // 2 for i, x in enumerate(shape_level)] -# - - - def write_ome_metadata( path: str | os.PathLike, axes: list[str], @@ -693,7 +531,6 @@ def write_ome_metadata( name: str = "", pyramid_aligns: str | int | list[str | int] = 2, levels: int | None = None, - zarr_version: int | None = None, ) -> None: """ Write OME metadata into Zarr. @@ -724,19 +561,11 @@ def write_ome_metadata( Zarr version. If `None`, guess from existing zarr array. """ - path = UPath(path) # Detect zarr version - if not zarr_version: - if (path / "0" / "zarr.json").exists(): - zarr_version = 3 - elif (path / "0" / ".zarray").exists(): - zarr_version = 2 - else: - raise FileNotFoundError("No existing array to guess version from") # Read shape at each pyramid level - zname = "zarr.json" if zarr_version == 3 else ".zarray" + zname = ".zarray" shapes = [] level = 0 while True: @@ -857,28 +686,12 @@ def write_ome_metadata( scale[axes.index("t")] = time_scale multiscales[0]["coordinateTransformations"] = [{"scale": scale, "type": "scale"}] - if zarr_version == 3: - with (path / "zarr.json").open("wt") as f: - json.dump( - { - "zarr_format": 3, - "node_type": "group", - "attributes": { - # Zarr v2 way of doing it -- neuroglancer wants this - "multiscales": multiscales, - # Zarr RFC-2 recommended way - "ome": {"version": "0.4", "multiscales": multiscales}, - }, - }, - f, - indent=4, - ) - else: - multiscales[0]["version"] = "0.4" - with (path / ".zgroup").open("wt") as f: - json.dump({"zarr_format": 2}, f, indent=4) - with (path / ".zattrs").open("wt") as f: - json.dump({"multiscales": multiscales}, f, indent=4) + + multiscales[0]["version"] = "0.4" + with (path / ".zgroup").open("wt") as f: + json.dump({"zarr_format": 2}, f, indent=4) + with (path / ".zattrs").open("wt") as f: + json.dump({"multiscales": multiscales}, f, indent=4) @@ -936,9 +749,173 @@ def niftizarr_write_header( "order": "F", "dtype": "|u1", "fill_value": None, - "compressor": None, # TODO: Subject to change + "compressor": None, # TODO: Subject to change compression } omz.create_dataset("nifti", data=header, shape=len(header), **metadata) print("done.") + + + + +# +# def generate_pyramid(inp, inp_chunk, nblevels, ni, nj, nk, no_pool, omz): +# for i, j, k in product(range(ni), range(nj), range(nk)): +# level_chunk = inp_chunk +# loaded_chunk = inp[ +# k * level_chunk[0]: (k + 1) * level_chunk[0], +# j * level_chunk[1]: (j + 1) * level_chunk[1], +# i * level_chunk[2]: (i + 1) * level_chunk[2], +# ] +# +# out_level = omz["0"] +# +# print( +# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", +# "/", +# f"[{ni:03d}, {nj:03d}, {nk:03d}]", +# # f"({1 + level}/{nblevels})", +# end="\r", +# ) +# +# # save current chunk +# out_level[ +# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], +# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], +# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], +# ] = loaded_chunk +# +# for level in range(nblevels): +# out_level = omz[str(level)] +# +# print( +# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", +# "/", +# f"[{ni:03d}, {nj:03d}, {nk:03d}]", +# f"({1 + level}/{nblevels})", +# end="\r", +# ) +# +# # save current chunk +# out_level[ +# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], +# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], +# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], +# ] = loaded_chunk +# # ensure divisible by 2 +# loaded_chunk = loaded_chunk[ +# slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), +# slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), +# slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), +# ] +# # mean pyramid (average each 2x2x2 patch) +# if no_pool == 0: +# loaded_chunk = ( +# loaded_chunk[:, 0::2, 0::2] +# + loaded_chunk[:, 0::2, 1::2] +# + loaded_chunk[:, 1::2, 0::2] +# + loaded_chunk[:, 1::2, 1::2] +# ) / 4 +# elif no_pool == 1: +# loaded_chunk = ( +# loaded_chunk[0::2, :, 0::2] +# + loaded_chunk[0::2, :, 1::2] +# + loaded_chunk[1::2, :, 0::2] +# + loaded_chunk[1::2, :, 1::2] +# ) / 4 +# elif no_pool == 2: +# loaded_chunk = ( +# loaded_chunk[0::2, 0::2, :] +# + loaded_chunk[0::2, 1::2, :] +# + loaded_chunk[1::2, 0::2, :] +# + loaded_chunk[1::2, 1::2, :] +# ) / 4 +# else: +# loaded_chunk = ( +# loaded_chunk[0::2, 0::2, 0::2] +# + loaded_chunk[0::2, 0::2, 1::2] +# + loaded_chunk[0::2, 1::2, 0::2] +# + loaded_chunk[0::2, 1::2, 1::2] +# + loaded_chunk[1::2, 0::2, 0::2] +# + loaded_chunk[1::2, 0::2, 1::2] +# + loaded_chunk[1::2, 1::2, 0::2] +# + loaded_chunk[1::2, 1::2, 1::2] +# ) / 8 +# level_chunk = [ +# x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) +# ] +# + +# def generate_pyramid(i, j, k, level_chunk, loaded_chunk, nblevels, ni, nj, nk, no_pool, +# omz): +# for level in range(nblevels): +# out_level = omz[str(level)] +# +# print( +# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", +# "/", +# f"[{ni:03d}, {nj:03d}, {nk:03d}]", +# f"({1 + level}/{nblevels})", +# end="\r", +# ) +# +# # save current chunk +# out_level[ +# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], +# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], +# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], +# ] = loaded_chunk +# # ensure divisible by 2 +# loaded_chunk = loaded_chunk[ +# slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), +# slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), +# slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), +# ] +# # mean pyramid (average each 2x2x2 patch) +# if no_pool == 0: +# loaded_chunk = ( +# loaded_chunk[:, 0::2, 0::2] +# + loaded_chunk[:, 0::2, 1::2] +# + loaded_chunk[:, 1::2, 0::2] +# + loaded_chunk[:, 1::2, 1::2] +# ) / 4 +# elif no_pool == 1: +# loaded_chunk = ( +# loaded_chunk[0::2, :, 0::2] +# + loaded_chunk[0::2, :, 1::2] +# + loaded_chunk[1::2, :, 0::2] +# + loaded_chunk[1::2, :, 1::2] +# ) / 4 +# elif no_pool == 2: +# loaded_chunk = ( +# loaded_chunk[0::2, 0::2, :] +# + loaded_chunk[0::2, 1::2, :] +# + loaded_chunk[1::2, 0::2, :] +# + loaded_chunk[1::2, 1::2, :] +# ) / 4 +# else: +# loaded_chunk = ( +# loaded_chunk[0::2, 0::2, 0::2] +# + loaded_chunk[0::2, 0::2, 1::2] +# + loaded_chunk[0::2, 1::2, 0::2] +# + loaded_chunk[0::2, 1::2, 1::2] +# + loaded_chunk[1::2, 0::2, 0::2] +# + loaded_chunk[1::2, 0::2, 1::2] +# + loaded_chunk[1::2, 1::2, 0::2] +# + loaded_chunk[1::2, 1::2, 1::2] +# ) / 8 +# level_chunk = [ +# x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) +# ] + +# +# def create_level(chunk, inp, nblevels, no_pool, omz, opt): +# shape_level = inp.shape +# for level in range(1,nblevels,1): +# opt["chunks"] = [min(x, chunk) for x in shape_level] +# omz.create_dataset(str(level), shape=shape_level, **opt) +# shape_level = [x if i == no_pool else x // 2 for i, x in enumerate(shape_level)] +# + + From 2e09d6b8fca51dc4a18d5ee07822d229bce24645 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Fri, 15 Nov 2024 14:09:07 -0500 Subject: [PATCH 33/51] Seperate OCT pipeline to multi_slice.py and single_volume.py --- linc_convert/modalities/psoct/multi_slice.py | 291 +++++++++++++++++ .../modalities/psoct/single_volume.py | 285 +++++++++++++++++ .../psoct/{stacking.py => utils.py} | 292 +----------------- 3 files changed, 580 insertions(+), 288 deletions(-) create mode 100644 linc_convert/modalities/psoct/multi_slice.py create mode 100644 linc_convert/modalities/psoct/single_volume.py rename linc_convert/modalities/psoct/{stacking.py => utils.py} (70%) diff --git a/linc_convert/modalities/psoct/multi_slice.py b/linc_convert/modalities/psoct/multi_slice.py new file mode 100644 index 0000000..fddf988 --- /dev/null +++ b/linc_convert/modalities/psoct/multi_slice.py @@ -0,0 +1,291 @@ +""" +Converts Matlab files generated by the MGH in-house OCT pipeline +into a OME-ZARR pyramid. +""" + +import ast +import json +import math +import os +from contextlib import contextmanager +from functools import wraps +from itertools import product +from typing import Optional, List +from warnings import warn + +import cyclopts +import h5py +import numpy as np +import zarr +from scipy.io import loadmat + +from linc_convert.modalities.psoct.cli import psoct +from linc_convert.modalities.psoct.utils import make_json, generate_pyramid, \ + niftizarr_write_header +from linc_convert.utils.math import ceildiv +from linc_convert.utils.orientation import orientation_to_affine, center_affine +from linc_convert.utils.unit import to_ome_unit, to_nifti_unit +from linc_convert.utils.zarr import make_compressor + +multi_slice = cyclopts.App(name="multi_slice", help_format="markdown") +psoct.command(multi_slice) + + +def automap(func): + """Decorator to automatically map the array in the mat file""" + + @wraps(func) + def wrapper(inp, out=None, **kwargs): + if out is None: + out = os.path.splitext(inp[0])[0] + out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" + kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") + with mapmat(inp, kwargs.get("key", None)) as dat: + return func(dat, out, **kwargs) + + return wrapper + + +@multi_slice.default +@automap +def convert( + inp: List[str], + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = "blosc", + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = "RAS", + center: bool = True, +) -> None: + """ + This command converts OCT volumes stored in raw matlab files + into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. + + Parameters + ---------- + inp + Path to the input mat file + out + Path to the output Zarr directory [.ome.zarr] + key + Key of the array to be extracted, default to first key found + meta + Path to the metadata file + chunk + Output chunk size + compressor : {blosc, zlib, raw} + Compression method + compressor_opt + Compression options + max_load + Maximum input chunk size + max_levels + Maximum number of pyramid levels + no_pool + Index of dimension to not pool when building pyramid + nii + Convert to nifti-zarr. True if path ends in ".nii.zarr" + orientation + Orientation of the volume + center + Set RAS[0, 0, 0] at FOV center + """ + + if isinstance(compressor_opt, str): + compressor_opt = ast.literal_eval(compressor_opt) + + # Write OME-Zarr multiscale metadata + if meta: + print("Write JSON") + with open(meta, "r") as f: + meta_txt = f.read() + meta_json = make_json(meta_txt) + path_json = ".".join(out.split(".")[:-2]) + ".json" + with open(path_json, "w") as f: + json.dump(meta_json, f, indent=4) + vx = meta_json["PixelSize"] + unit = meta_json["PixelSizeUnits"] + else: + vx = [1] * 3 + unit = "um" + + # Prepare Zarr group + omz = zarr.storage.DirectoryStore(out) + omz = zarr.group(store=omz, overwrite=True) + + if not hasattr(inp, "dtype"): + raise Exception("Input is not a numpy array. This is likely unexpected") + if len(inp.shape) < 3: + raise Exception("Input array is not 3d") + # Prepare chunking options + opt = { + "dimension_separator": r"/", + "order": "F", + "dtype": np.dtype(inp.dtype).str, + "fill_value": None, + "compressor": make_compressor(compressor, **compressor_opt), + } + + inp_chunk = [min(x, max_load) for x in inp.shape] + nk = ceildiv(inp.shape[0], inp_chunk[0]) + nj = ceildiv(inp.shape[1], inp_chunk[1]) + ni = ceildiv(inp.shape[2], inp_chunk[2]) + + nblevels = min( + [int(math.ceil(math.log2(x))) for i, x in enumerate(inp.shape) if i != no_pool] + ) + nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) + nblevels = min(nblevels, max_levels) + + opt["chunks"] = [min(x, chunk) for x in inp.shape] + + omz.create_dataset(str(0), shape=inp.shape, **opt) + + # iterate across input chunks + for i, j, k in product(range(ni), range(nj), range(nk)): + loaded_chunk = inp[ + k * inp_chunk[0]: (k + 1) * inp_chunk[0], + j * inp_chunk[1]: (j + 1) * inp_chunk[1], + i * inp_chunk[2]: (i + 1) * inp_chunk[2], + ] + + print( + f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", + "/", + f"[{ni:03d}, {nj:03d}, {nk:03d}]", + # f"({1 + level}/{nblevels})", + end="\r", + ) + + # save current chunk + omz["0"][ + k * inp_chunk[0]: k * inp_chunk[0] + loaded_chunk.shape[0], + j * inp_chunk[1]: j * inp_chunk[1] + loaded_chunk.shape[1], + i * inp_chunk[2]: i * inp_chunk[2] + loaded_chunk.shape[2], + ] = loaded_chunk + # TODO: no_pool is ignored for now, should add back + generate_pyramid(omz, nblevels - 1, mode="mean") + + print("") + + # Write OME-Zarr multiscale metadata + print("Write metadata") + print(unit) + ome_unit = to_ome_unit(unit) + multiscales = [ + { + "version": "0.4", + "axes": [ + {"name": "z", "type": "space", "unit": ome_unit}, + {"name": "y", "type": "space", "unit": ome_unit}, + {"name": "x", "type": "space", "unit": ome_unit}, + ], + "datasets": [], + "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", + "name": "", + } + ] + + for n in range(nblevels): + multiscales[0]["datasets"].append({}) + level = multiscales[0]["datasets"][-1] + level["path"] = str(n) + + # With a moving window, the scaling factor is exactly 2, and + # the edges of the top-left voxel are aligned + level["coordinateTransformations"] = [ + { + "type": "scale", + "scale": [ + (1 if no_pool == 0 else 2 ** n) * vx[0], + (1 if no_pool == 1 else 2 ** n) * vx[1], + (1 if no_pool == 2 else 2 ** n) * vx[2], + ], + }, + { + "type": "translation", + "translation": [ + (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, + (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, + (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, + ], + }, + ] + multiscales[0]["coordinateTransformations"] = [ + {"scale": [1.0] * 3, "type": "scale"} + ] + omz.attrs["multiscales"] = multiscales + + if not nii: + print("done.") + return + + # Write NIfTI-Zarr header + # NOTE: we use nifti2 because dimensions typically do not fit in a short + # TODO: we do not write the json zattrs, but it should be added in + # once the nifti-zarr package is released + shape = list(reversed(omz["0"].shape)) + affine = orientation_to_affine(orientation, *vx[::-1]) + if center: + affine = center_affine(affine, shape[:3]) + niftizarr_write_header(omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2) + # header = nib.Nifti2Header() + # header.set_data_shape(shape) + # header.set_data_dtype(omz["0"].dtype) + # header.set_qform(affine) + # header.set_sform(affine) + # header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) + # header.structarr["magic"] = b"nz2\0" + # header = np.frombuffer(header.structarr.tobytes(), dtype="u1") + # opt = { + # "chunks": [len(header)], + # "dimension_separator": r"/", + # "order": "F", + # "dtype": "|u1", + # "fill_value": None, + # "compressor": None, + # } + # omz.create_dataset("nifti", data=header, shape=len(header), **opt) + # print("done.") + + +@contextmanager +def mapmat(fnames, key=None): + """Load or memory-map an array stored in a .mat file""" + loaded_data = [] + + for fname in fnames: + try: + # "New" .mat file + f = h5py.File(fname, "r") + except Exception: + # "Old" .mat file + f = loadmat(fname) + + if key is None: + if not len(f.keys()): + raise Exception(f"{fname} is empty") + key = list(f.keys())[0] + if len(f.keys()) > 1: + warn( + f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' + ) + + if key not in f.keys(): + raise Exception(f"Key {key} not found in file {fname}") + + if len(fnames) == 1: + yield f.get(key) + if hasattr(f, "close"): + f.close() + break + loaded_data.append(f.get(key)) + + yield np.stack(loaded_data, axis=-1) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py new file mode 100644 index 0000000..ecf8915 --- /dev/null +++ b/linc_convert/modalities/psoct/single_volume.py @@ -0,0 +1,285 @@ +""" +Converts Matlab files generated by the MGH in-house OCT pipeline +into a OME-ZARR pyramid. +""" + +import ast +import json +import math +import os +from contextlib import contextmanager +from functools import wraps +from itertools import product +from typing import Optional, List +from warnings import warn + +import cyclopts +import h5py +import numpy as np +import zarr +from scipy.io import loadmat + +from linc_convert.modalities.psoct.cli import psoct +from linc_convert.modalities.psoct.utils import make_json, generate_pyramid, \ + niftizarr_write_header +from linc_convert.utils.math import ceildiv +from linc_convert.utils.orientation import orientation_to_affine, center_affine +from linc_convert.utils.unit import to_ome_unit, to_nifti_unit +from linc_convert.utils.zarr import make_compressor + +single_volume = cyclopts.App(name="single_volume", help_format="markdown") +psoct.command(single_volume) + + +def automap(func): + """Decorator to automatically map the array in the mat file""" + + @wraps(func) + def wrapper(inp, out=None, **kwargs): + if out is None: + out = os.path.splitext(inp[0])[0] + out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" + kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") + with mapmat(inp, kwargs.get("key", None)) as dat: + return func(dat, out, **kwargs) + + return wrapper + + +@single_volume.default +@automap +def convert( + inp: str, + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = "blosc", + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = "RAS", + center: bool = True, +) -> None: + """ + This command converts OCT volumes stored in raw matlab files + into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. + + Parameters + ---------- + inp + Path to the input mat file + out + Path to the output Zarr directory [.ome.zarr] + key + Key of the array to be extracted, default to first key found + meta + Path to the metadata file + chunk + Output chunk size + compressor : {blosc, zlib, raw} + Compression method + compressor_opt + Compression options + max_load + Maximum input chunk size + max_levels + Maximum number of pyramid levels + no_pool + Index of dimension to not pool when building pyramid + nii + Convert to nifti-zarr. True if path ends in ".nii.zarr" + orientation + Orientation of the volume + center + Set RAS[0, 0, 0] at FOV center + """ + + if isinstance(compressor_opt, str): + compressor_opt = ast.literal_eval(compressor_opt) + + # Write OME-Zarr multiscale metadata + if meta: + print("Write JSON") + with open(meta, "r") as f: + meta_txt = f.read() + meta_json = make_json(meta_txt) + path_json = ".".join(out.split(".")[:-2]) + ".json" + with open(path_json, "w") as f: + json.dump(meta_json, f, indent=4) + vx = meta_json["PixelSize"] + unit = meta_json["PixelSizeUnits"] + else: + vx = [1] * 3 + unit = "um" + + # Prepare Zarr group + omz = zarr.storage.DirectoryStore(out) + omz = zarr.group(store=omz, overwrite=True) + + if not hasattr(inp, "dtype"): + raise Exception("Input is not a numpy array. This is likely unexpected") + if len(inp.shape) < 3: + raise Exception("Input array is not 3d") + # Prepare chunking options + opt = { + "dimension_separator": r"/", + "order": "F", + "dtype": np.dtype(inp.dtype).str, + "fill_value": None, + "compressor": make_compressor(compressor, **compressor_opt), + } + + inp_chunk = [min(x, max_load) for x in inp.shape] + nk = ceildiv(inp.shape[0], inp_chunk[0]) + nj = ceildiv(inp.shape[1], inp_chunk[1]) + ni = ceildiv(inp.shape[2], inp_chunk[2]) + + nblevels = min( + [int(math.ceil(math.log2(x))) for i, x in enumerate(inp.shape) if i != no_pool] + ) + nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) + nblevels = min(nblevels, max_levels) + + opt["chunks"] = [min(x, chunk) for x in inp.shape] + + omz.create_dataset(str(0), shape=inp.shape, **opt) + + # iterate across input chunks + for i, j, k in product(range(ni), range(nj), range(nk)): + loaded_chunk = inp[ + k * inp_chunk[0]: (k + 1) * inp_chunk[0], + j * inp_chunk[1]: (j + 1) * inp_chunk[1], + i * inp_chunk[2]: (i + 1) * inp_chunk[2], + ] + + print( + f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", + "/", + f"[{ni:03d}, {nj:03d}, {nk:03d}]", + # f"({1 + level}/{nblevels})", + end="\r", + ) + + # save current chunk + omz["0"][ + k * inp_chunk[0]: k * inp_chunk[0] + loaded_chunk.shape[0], + j * inp_chunk[1]: j * inp_chunk[1] + loaded_chunk.shape[1], + i * inp_chunk[2]: i * inp_chunk[2] + loaded_chunk.shape[2], + ] = loaded_chunk + # TODO: no_pool is ignored for now, should add back + generate_pyramid(omz, nblevels - 1, mode="mean") + + print("") + + # Write OME-Zarr multiscale metadata + print("Write metadata") + print(unit) + ome_unit = to_ome_unit(unit) + multiscales = [ + { + "version": "0.4", + "axes": [ + {"name": "z", "type": "space", "unit": ome_unit}, + {"name": "y", "type": "space", "unit": ome_unit}, + {"name": "x", "type": "space", "unit": ome_unit}, + ], + "datasets": [], + "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", + "name": "", + } + ] + + for n in range(nblevels): + multiscales[0]["datasets"].append({}) + level = multiscales[0]["datasets"][-1] + level["path"] = str(n) + + # With a moving window, the scaling factor is exactly 2, and + # the edges of the top-left voxel are aligned + level["coordinateTransformations"] = [ + { + "type": "scale", + "scale": [ + (1 if no_pool == 0 else 2 ** n) * vx[0], + (1 if no_pool == 1 else 2 ** n) * vx[1], + (1 if no_pool == 2 else 2 ** n) * vx[2], + ], + }, + { + "type": "translation", + "translation": [ + (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, + (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, + (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, + ], + }, + ] + multiscales[0]["coordinateTransformations"] = [ + {"scale": [1.0] * 3, "type": "scale"} + ] + omz.attrs["multiscales"] = multiscales + + if not nii: + print("done.") + return + + # Write NIfTI-Zarr header + # NOTE: we use nifti2 because dimensions typically do not fit in a short + # TODO: we do not write the json zattrs, but it should be added in + # once the nifti-zarr package is released + shape = list(reversed(omz["0"].shape)) + affine = orientation_to_affine(orientation, *vx[::-1]) + if center: + affine = center_affine(affine, shape[:3]) + niftizarr_write_header(omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2) + # header = nib.Nifti2Header() + # header.set_data_shape(shape) + # header.set_data_dtype(omz["0"].dtype) + # header.set_qform(affine) + # header.set_sform(affine) + # header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) + # header.structarr["magic"] = b"nz2\0" + # header = np.frombuffer(header.structarr.tobytes(), dtype="u1") + # opt = { + # "chunks": [len(header)], + # "dimension_separator": r"/", + # "order": "F", + # "dtype": "|u1", + # "fill_value": None, + # "compressor": None, + # } + # omz.create_dataset("nifti", data=header, shape=len(header), **opt) + # print("done.") + + +@contextmanager +def mapmat(fname, key=None): + """Load or memory-map an array stored in a .mat file""" + try: + # "New" .mat file + f = h5py.File(fname, "r") + except Exception: + # "Old" .mat file + f = loadmat(fname) + + if key is None: + if not len(f.keys()): + raise Exception(f"{fname} is empty") + key = list(f.keys())[0] + if len(f.keys()) > 1: + warn( + f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' + ) + + if key not in f.keys(): + raise Exception(f"Key {key} not found in file {fname}") + + + yield f.get(key) + if hasattr(f, "close"): + f.close() + diff --git a/linc_convert/modalities/psoct/stacking.py b/linc_convert/modalities/psoct/utils.py similarity index 70% rename from linc_convert/modalities/psoct/stacking.py rename to linc_convert/modalities/psoct/utils.py index 4d926e8..c6c3fed 100644 --- a/linc_convert/modalities/psoct/stacking.py +++ b/linc_convert/modalities/psoct/utils.py @@ -1,295 +1,14 @@ -""" -Converts Matlab files generated by the MGH in-house OCT pipeline -into a OME-ZARR pyramid. -""" - -import ast import itertools import json -import math import os import re -from contextlib import contextmanager -from functools import wraps -from itertools import product -from typing import Optional, List, Literal -from warnings import warn - -import cyclopts -import h5py +from typing import Literal + import nibabel as nib import numpy as np -import zarr -from scipy.io import loadmat -from linc_convert.modalities.psoct.cli import psoct from linc_convert.utils.math import ceildiv -from linc_convert.utils.orientation import orientation_to_affine, center_affine -from linc_convert.utils.unit import convert_unit, to_ome_unit, to_nifti_unit -from linc_convert.utils.zarr import make_compressor - -stacking = cyclopts.App(name="stacking", help_format="markdown") -psoct.command(stacking) - - -def automap(func): - """Decorator to automatically map the array in the mat file""" - - @wraps(func) - def wrapper(inp, out=None, **kwargs): - if out is None: - out = os.path.splitext(inp[0])[0] - out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" - kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") - with mapmat(inp, kwargs.get("key", None)) as dat: - return func(dat, out, **kwargs) - - return wrapper - - -@stacking.default -@automap -def convert( - inp: List[str], - out: Optional[str] = None, - *, - key: Optional[str] = None, - meta: str = None, - chunk: int = 128, - compressor: str = "blosc", - compressor_opt: str = "{}", - max_load: int = 128, - max_levels: int = 5, - no_pool: Optional[int] = None, - nii: bool = False, - orientation: str = "RAS", - center: bool = True, -) -> None: - """ - This command converts OCT volumes stored in raw matlab files - into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. - - Parameters - ---------- - inp - Path to the input mat file - out - Path to the output Zarr directory [.ome.zarr] - key - Key of the array to be extracted, default to first key found - meta - Path to the metadata file - chunk - Output chunk size - compressor : {blosc, zlib, raw} - Compression method - compressor_opt - Compression options - max_load - Maximum input chunk size - max_levels - Maximum number of pyramid levels - no_pool - Index of dimension to not pool when building pyramid - nii - Convert to nifti-zarr. True if path ends in ".nii.zarr" - orientation - Orientation of the volume - center - Set RAS[0, 0, 0] at FOV center - """ - - if isinstance(compressor_opt, str): - compressor_opt = ast.literal_eval(compressor_opt) - - # Write OME-Zarr multiscale metadata - if meta: - print("Write JSON") - with open(meta, "r") as f: - meta_txt = f.read() - meta_json = make_json(meta_txt) - path_json = ".".join(out.split(".")[:-2]) + ".json" - with open(path_json, "w") as f: - json.dump(meta_json, f, indent=4) - vx = meta_json["PixelSize"] - unit = meta_json["PixelSizeUnits"] - else: - vx = [1] * 3 - unit = "um" - - # Prepare Zarr group - omz = zarr.storage.DirectoryStore(out) - omz = zarr.group(store=omz, overwrite=True) - - if not hasattr(inp, "dtype"): - raise Exception("Input is not a numpy array. This is likely unexpected") - if len(inp.shape) < 3: - raise Exception("Input array is not 3d") - # Prepare chunking options - opt = { - "dimension_separator": r"/", - "order": "F", - "dtype": np.dtype(inp.dtype).str, - "fill_value": None, - "compressor": make_compressor(compressor, **compressor_opt), - } - - inp_chunk = [min(x, max_load) for x in inp.shape] - nk = ceildiv(inp.shape[0], inp_chunk[0]) - nj = ceildiv(inp.shape[1], inp_chunk[1]) - ni = ceildiv(inp.shape[2], inp_chunk[2]) - - nblevels = min( - [int(math.ceil(math.log2(x))) for i, x in enumerate(inp.shape) if i != no_pool] - ) - nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) - nblevels = min(nblevels, max_levels) - - opt["chunks"] = [min(x, chunk) for x in inp.shape] - - omz.create_dataset(str(0), shape=inp.shape, **opt) - - # iterate across input chunks - for i, j, k in product(range(ni), range(nj), range(nk)): - loaded_chunk = inp[ - k * inp_chunk[0]: (k + 1) * inp_chunk[0], - j * inp_chunk[1]: (j + 1) * inp_chunk[1], - i * inp_chunk[2]: (i + 1) * inp_chunk[2], - ] - - print( - f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", - "/", - f"[{ni:03d}, {nj:03d}, {nk:03d}]", - # f"({1 + level}/{nblevels})", - end="\r", - ) - - # save current chunk - omz["0"][ - k * inp_chunk[0]: k * inp_chunk[0] + loaded_chunk.shape[0], - j * inp_chunk[1]: j * inp_chunk[1] + loaded_chunk.shape[1], - i * inp_chunk[2]: i * inp_chunk[2] + loaded_chunk.shape[2], - ] = loaded_chunk - # TODO: no_pool is ignored for now, should add back - generate_pyramid(omz, nblevels - 1, mode="mean") - - print("") - - # Write OME-Zarr multiscale metadata - print("Write metadata") - print(unit) - ome_unit = to_ome_unit(unit) - multiscales = [ - { - "version": "0.4", - "axes": [ - {"name": "z", "type": "space", "unit": ome_unit}, - {"name": "y", "type": "space", "unit": ome_unit}, - {"name": "x", "type": "space", "unit": ome_unit}, - ], - "datasets": [], - "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", - "name": "", - } - ] - - for n in range(nblevels): - multiscales[0]["datasets"].append({}) - level = multiscales[0]["datasets"][-1] - level["path"] = str(n) - - # With a moving window, the scaling factor is exactly 2, and - # the edges of the top-left voxel are aligned - level["coordinateTransformations"] = [ - { - "type": "scale", - "scale": [ - (1 if no_pool == 0 else 2 ** n) * vx[0], - (1 if no_pool == 1 else 2 ** n) * vx[1], - (1 if no_pool == 2 else 2 ** n) * vx[2], - ], - }, - { - "type": "translation", - "translation": [ - (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, - (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, - (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, - ], - }, - ] - multiscales[0]["coordinateTransformations"] = [ - {"scale": [1.0] * 3, "type": "scale"} - ] - omz.attrs["multiscales"] = multiscales - - if not nii: - print("done.") - return - - # Write NIfTI-Zarr header - # NOTE: we use nifti2 because dimensions typically do not fit in a short - # TODO: we do not write the json zattrs, but it should be added in - # once the nifti-zarr package is released - shape = list(reversed(omz["0"].shape)) - affine = orientation_to_affine(orientation, *vx[::-1]) - if center: - affine = center_affine(affine, shape[:3]) - niftizarr_write_header(omz,shape,affine,omz["0"].dtype,to_nifti_unit(unit),nifti_version=2) - # header = nib.Nifti2Header() - # header.set_data_shape(shape) - # header.set_data_dtype(omz["0"].dtype) - # header.set_qform(affine) - # header.set_sform(affine) - # header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) - # header.structarr["magic"] = b"nz2\0" - # header = np.frombuffer(header.structarr.tobytes(), dtype="u1") - # opt = { - # "chunks": [len(header)], - # "dimension_separator": r"/", - # "order": "F", - # "dtype": "|u1", - # "fill_value": None, - # "compressor": None, - # } - # omz.create_dataset("nifti", data=header, shape=len(header), **opt) - # print("done.") - - -@contextmanager -def mapmat(fnames, key=None): - """Load or memory-map an array stored in a .mat file""" - loaded_data = [] - - for fname in fnames: - try: - # "New" .mat file - f = h5py.File(fname, "r") - except Exception: - # "Old" .mat file - f = loadmat(fname) - - if key is None: - if not len(f.keys()): - raise Exception(f"{fname} is empty") - key = list(f.keys())[0] - if len(f.keys()) > 1: - warn( - f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' - ) - - if key not in f.keys(): - raise Exception(f"Key {key} not found in file {fname}") - - if len(fnames) == 1: - yield f.get(key) - if hasattr(f, "close"): - f.close() - break - loaded_data.append(f.get(key)) - - yield np.stack(loaded_data, axis=-1) +from linc_convert.utils.unit import convert_unit def make_json(oct_meta): @@ -387,8 +106,6 @@ def parse_value_unit(string, n=None): return meta - - def generate_pyramid( omz, levels: int | None = None, @@ -521,6 +238,7 @@ def generate_pyramid( pass + def write_ome_metadata( path: str | os.PathLike, axes: list[str], @@ -694,7 +412,6 @@ def write_ome_metadata( json.dump({"multiscales": multiscales}, f, indent=4) - def niftizarr_write_header( omz, shape: list[int], @@ -758,7 +475,6 @@ def niftizarr_write_header( - # # def generate_pyramid(inp, inp_chunk, nblevels, ni, nj, nk, no_pool, omz): # for i, j, k in product(range(ni), range(nj), range(nk)): From 6d982728c10c1fd1f61b8c9ebca6b1826320564c Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Fri, 15 Nov 2024 15:45:38 -0500 Subject: [PATCH 34/51] Seperate OCT pipeline to multi_slice.py and single_volume.py --- linc_convert/modalities/psoct/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linc_convert/modalities/psoct/__init__.py b/linc_convert/modalities/psoct/__init__.py index 7f771eb..b539c95 100644 --- a/linc_convert/modalities/psoct/__init__.py +++ b/linc_convert/modalities/psoct/__init__.py @@ -1,5 +1,5 @@ """Dark Field microscopy converters.""" -__all__ = ["cli", "stacking"] +__all__ = ["cli", "multi_slice", "single_volume"] -from . import cli, stacking +from . import cli, multi_slice, single_volume From eb88d93af57d8382cef60a064ab3cac72f440ed6 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Fri, 15 Nov 2024 15:46:18 -0500 Subject: [PATCH 35/51] feat: generate pyramid without pooling specific axis --- .../modalities/psoct/single_volume.py | 2 +- linc_convert/modalities/psoct/utils.py | 46 +++++++++++++------ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py index ecf8915..a1e545f 100644 --- a/linc_convert/modalities/psoct/single_volume.py +++ b/linc_convert/modalities/psoct/single_volume.py @@ -170,7 +170,7 @@ def convert( j * inp_chunk[1]: j * inp_chunk[1] + loaded_chunk.shape[1], i * inp_chunk[2]: i * inp_chunk[2] + loaded_chunk.shape[2], ] = loaded_chunk - # TODO: no_pool is ignored for now, should add back + generate_pyramid(omz, nblevels - 1, mode="mean") print("") diff --git a/linc_convert/modalities/psoct/utils.py b/linc_convert/modalities/psoct/utils.py index c6c3fed..7a5daff 100644 --- a/linc_convert/modalities/psoct/utils.py +++ b/linc_convert/modalities/psoct/utils.py @@ -112,6 +112,7 @@ def generate_pyramid( ndim: int = 3, max_load: int = 512, mode: Literal["mean", "median"] = "median", + no_pyramid_axis: int|str|None = None ) -> list[list[int]]: """ Generate the levels of a pyramid in an existing Zarr. @@ -143,9 +144,16 @@ def generate_pyramid( Shapes of all levels, from finest to coarsest, including the existing top level. """ - + # Read properties from base level shape = list(omz["0"].shape) chunk_size = omz["0"].chunks + opt = { + "dimension_separator": omz["0"]._dimension_separator, + "order": omz["0"]._order, + "dtype": omz["0"]._dtype, + "fill_value": omz["0"]._fill_value, + "compressor": omz["0"]._compressor, + } # Select windowing function if mode == "median": @@ -158,19 +166,16 @@ def generate_pyramid( batch, shape = shape[:-ndim], shape[-ndim:] allshapes = [shape] - opt = { - "dimension_separator": omz["0"]._dimension_separator, - "order": omz["0"]._order, - "dtype": omz["0"]._dtype, - "fill_value": omz["0"]._fill_value, - "compressor": omz["0"]._compressor, - } - while True: level += 1 # Compute downsampled shape - prev_shape, shape = shape, [max(1, x // 2) for x in shape] + prev_shape, shape = shape, [] + for i, length in enumerate(prev_shape): + if i == no_pyramid_axis: + shape.append(length) + else: + shape.append(max(1, length // 2)) # Stop if seen enough levels or level shape smaller than chunk size if levels is None: @@ -198,16 +203,24 @@ def generate_pyramid( dat = omz[str(level - 1)][tuple(slicer)] # Discard the last voxel along odd dimensions - crop = [0 if x == 1 else x % 2 for x in dat.shape[-3:]] + crop = [0 if x == 1 else x % 2 for x in dat.shape[-ndim:]] + # Don't crop the axis not down-sampling + # cannot do if not no_pyramid_axis since it could be 0 + if no_pyramid_axis is not None: + crop[no_pyramid_axis] = 0 slcr = [slice(-1) if x else slice(None) for x in crop] dat = dat[tuple([Ellipsis, *slcr])] - patch_shape = dat.shape[-3:] + patch_shape = dat.shape[-ndim:] # Reshape into patches of shape 2x2x2 windowed_shape = [ x for n in patch_shape for x in (max(n // 2, 1), min(n, 2)) ] + if no_pyramid_axis is not None: + windowed_shape[2*no_pyramid_axis] = patch_shape[no_pyramid_axis] + windowed_shape[2 * no_pyramid_axis+1] = 1 + dat = dat.reshape(batch + windowed_shape) # -> last `ndim`` dimensions have shape 2x2x2 dat = dat.transpose( @@ -217,6 +230,9 @@ def generate_pyramid( ) # -> flatten patches smaller_shape = [max(n // 2, 1) for n in patch_shape] + if no_pyramid_axis is not None: + smaller_shape[2 * no_pyramid_axis] = patch_shape[no_pyramid_axis] + dat = dat.reshape(batch + smaller_shape + [-1]) # Compute the median/mean of each patch @@ -227,7 +243,9 @@ def generate_pyramid( # Write output slicer = [Ellipsis] + [ slice(i * max_load // 2, min((i + 1) * max_load // 2, n)) - for i, n in zip(chunk_index, shape) + if axis_index != no_pyramid_axis else + slice(i * max_load, min((i + 1) * max_load, n)) + for i, axis_index, n in zip(chunk_index, range(ndim), shape) ] omz[str(level)][tuple(slicer)] = dat @@ -357,7 +375,7 @@ def write_ome_metadata( } ] - shape = shape0 = shapes[0] + shape0 = shapes[0] for n in range(len(shapes)): shape = shapes[n] multiscales[0]["datasets"].append({}) From fd7e092f1121dd230dddba79ac4e94e6afaa96ad Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Fri, 15 Nov 2024 16:21:52 -0500 Subject: [PATCH 36/51] refactor: write_ome_metadata --- .../modalities/psoct/single_volume.py | 91 ++++++++++--------- linc_convert/modalities/psoct/utils.py | 26 ++---- 2 files changed, 57 insertions(+), 60 deletions(-) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py index a1e545f..0d1a1b9 100644 --- a/linc_convert/modalities/psoct/single_volume.py +++ b/linc_convert/modalities/psoct/single_volume.py @@ -21,7 +21,7 @@ from linc_convert.modalities.psoct.cli import psoct from linc_convert.modalities.psoct.utils import make_json, generate_pyramid, \ - niftizarr_write_header + niftizarr_write_header, write_ome_metadata from linc_convert.utils.math import ceildiv from linc_convert.utils.orientation import orientation_to_affine, center_affine from linc_convert.utils.unit import to_ome_unit, to_nifti_unit @@ -179,49 +179,7 @@ def convert( print("Write metadata") print(unit) ome_unit = to_ome_unit(unit) - multiscales = [ - { - "version": "0.4", - "axes": [ - {"name": "z", "type": "space", "unit": ome_unit}, - {"name": "y", "type": "space", "unit": ome_unit}, - {"name": "x", "type": "space", "unit": ome_unit}, - ], - "datasets": [], - "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", - "name": "", - } - ] - - for n in range(nblevels): - multiscales[0]["datasets"].append({}) - level = multiscales[0]["datasets"][-1] - level["path"] = str(n) - - # With a moving window, the scaling factor is exactly 2, and - # the edges of the top-left voxel are aligned - level["coordinateTransformations"] = [ - { - "type": "scale", - "scale": [ - (1 if no_pool == 0 else 2 ** n) * vx[0], - (1 if no_pool == 1 else 2 ** n) * vx[1], - (1 if no_pool == 2 else 2 ** n) * vx[2], - ], - }, - { - "type": "translation", - "translation": [ - (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, - (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, - (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, - ], - }, - ] - multiscales[0]["coordinateTransformations"] = [ - {"scale": [1.0] * 3, "type": "scale"} - ] - omz.attrs["multiscales"] = multiscales + write_ome_metadata(omz, no_pool=no_pool, space_unit=ome_unit, space_scale=vx, multiscales_type=("2x2x2" if no_pool is None else "2x2") + "mean window") if not nii: print("done.") @@ -256,6 +214,51 @@ def convert( # print("done.") +# def write_ome_metadata(nblevels, no_pool, ome_unit, omz, vx): +# multiscales = [ +# { +# "version": "0.4", +# "axes": [ +# {"name": "z", "type": "space", "unit": ome_unit}, +# {"name": "y", "type": "space", "unit": ome_unit}, +# {"name": "x", "type": "space", "unit": ome_unit}, +# ], +# "datasets": [], +# "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", +# "name": "", +# } +# ] +# for n in range(nblevels): +# multiscales[0]["datasets"].append({}) +# level = multiscales[0]["datasets"][-1] +# level["path"] = str(n) +# +# # With a moving window, the scaling factor is exactly 2, and +# # the edges of the top-left voxel are aligned +# level["coordinateTransformations"] = [ +# { +# "type": "scale", +# "scale": [ +# (1 if no_pool == 0 else 2 ** n) * vx[0], +# (1 if no_pool == 1 else 2 ** n) * vx[1], +# (1 if no_pool == 2 else 2 ** n) * vx[2], +# ], +# }, +# { +# "type": "translation", +# "translation": [ +# (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, +# (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, +# (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, +# ], +# }, +# ] +# multiscales[0]["coordinateTransformations"] = [ +# {"scale": [1.0] * 3, "type": "scale"} +# ] +# omz.attrs["multiscales"] = multiscales + + @contextmanager def mapmat(fname, key=None): """Load or memory-map an array stored in a .mat file""" diff --git a/linc_convert/modalities/psoct/utils.py b/linc_convert/modalities/psoct/utils.py index 7a5daff..9d0f742 100644 --- a/linc_convert/modalities/psoct/utils.py +++ b/linc_convert/modalities/psoct/utils.py @@ -258,7 +258,7 @@ def generate_pyramid( def write_ome_metadata( - path: str | os.PathLike, + omz, axes: list[str], space_scale: float | list[float] = 1, time_scale: float = 1, @@ -267,6 +267,8 @@ def write_ome_metadata( name: str = "", pyramid_aligns: str | int | list[str | int] = 2, levels: int | None = None, + no_pool: int | None = None, + multiscales_type : str = "" ) -> None: """ Write OME metadata into Zarr. @@ -298,25 +300,19 @@ def write_ome_metadata( """ - # Detect zarr version - # Read shape at each pyramid level - zname = ".zarray" shapes = [] level = 0 while True: if levels is not None and level > levels: break - zpath = path / str(level) / zname - if not zpath.exists(): + if str(level) not in omz.keys(): levels = level break level += 1 - with zpath.open("rb") as f: - zarray = json.load(f) - shapes += [zarray["shape"]] + shapes += omz[str(level)].shape axis_to_type = { "x": "space", @@ -370,7 +366,7 @@ def write_ome_metadata( for axis in axes ], "datasets": [], - "type": "median window " + "x".join(["2"] * sdim), + "type": "median window " + "x".join(["2"] * sdim) if not multiscales_type else multiscales_type, "name": name, } ] @@ -391,6 +387,7 @@ def write_ome_metadata( else ((shape0[bdim + i] - 1) / (shape[bdim + i] - 1)) ) * space_scale[i] + if i != no_pool else space_scale[i] for i in range(sdim) ] translation = [0] * bdim + [ @@ -403,6 +400,7 @@ def write_ome_metadata( ) * 0.5 * space_scale[i] + if i != no_pool else 0 for i in range(sdim) ] @@ -417,18 +415,14 @@ def write_ome_metadata( }, ] - scale = [1] * ndim + scale = [1.0] * ndim if "t" in axes: scale[axes.index("t")] = time_scale multiscales[0]["coordinateTransformations"] = [{"scale": scale, "type": "scale"}] multiscales[0]["version"] = "0.4" - with (path / ".zgroup").open("wt") as f: - json.dump({"zarr_format": 2}, f, indent=4) - with (path / ".zattrs").open("wt") as f: - json.dump({"multiscales": multiscales}, f, indent=4) - + omz.attrs["multiscales"] = multiscales def niftizarr_write_header( omz, From 0a58cd97761ed4aa415df0feb59591b08b81c455 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Mon, 18 Nov 2024 09:17:13 -0500 Subject: [PATCH 37/51] refactor: move unused function --- .../modalities/psoct/single_volume.py | 44 ------------------- linc_convert/modalities/psoct/utils.py | 44 +++++++++++++++++++ 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py index 0d1a1b9..25a0e89 100644 --- a/linc_convert/modalities/psoct/single_volume.py +++ b/linc_convert/modalities/psoct/single_volume.py @@ -214,50 +214,6 @@ def convert( # print("done.") -# def write_ome_metadata(nblevels, no_pool, ome_unit, omz, vx): -# multiscales = [ -# { -# "version": "0.4", -# "axes": [ -# {"name": "z", "type": "space", "unit": ome_unit}, -# {"name": "y", "type": "space", "unit": ome_unit}, -# {"name": "x", "type": "space", "unit": ome_unit}, -# ], -# "datasets": [], -# "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", -# "name": "", -# } -# ] -# for n in range(nblevels): -# multiscales[0]["datasets"].append({}) -# level = multiscales[0]["datasets"][-1] -# level["path"] = str(n) -# -# # With a moving window, the scaling factor is exactly 2, and -# # the edges of the top-left voxel are aligned -# level["coordinateTransformations"] = [ -# { -# "type": "scale", -# "scale": [ -# (1 if no_pool == 0 else 2 ** n) * vx[0], -# (1 if no_pool == 1 else 2 ** n) * vx[1], -# (1 if no_pool == 2 else 2 ** n) * vx[2], -# ], -# }, -# { -# "type": "translation", -# "translation": [ -# (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, -# (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, -# (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, -# ], -# }, -# ] -# multiscales[0]["coordinateTransformations"] = [ -# {"scale": [1.0] * 3, "type": "scale"} -# ] -# omz.attrs["multiscales"] = multiscales - @contextmanager def mapmat(fname, key=None): diff --git a/linc_convert/modalities/psoct/utils.py b/linc_convert/modalities/psoct/utils.py index 9d0f742..3625af7 100644 --- a/linc_convert/modalities/psoct/utils.py +++ b/linc_convert/modalities/psoct/utils.py @@ -486,6 +486,50 @@ def niftizarr_write_header( print("done.") +# def write_ome_metadata(nblevels, no_pool, ome_unit, omz, vx): +# multiscales = [ +# { +# "version": "0.4", +# "axes": [ +# {"name": "z", "type": "space", "unit": ome_unit}, +# {"name": "y", "type": "space", "unit": ome_unit}, +# {"name": "x", "type": "space", "unit": ome_unit}, +# ], +# "datasets": [], +# "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", +# "name": "", +# } +# ] +# for n in range(nblevels): +# multiscales[0]["datasets"].append({}) +# level = multiscales[0]["datasets"][-1] +# level["path"] = str(n) +# +# # With a moving window, the scaling factor is exactly 2, and +# # the edges of the top-left voxel are aligned +# level["coordinateTransformations"] = [ +# { +# "type": "scale", +# "scale": [ +# (1 if no_pool == 0 else 2 ** n) * vx[0], +# (1 if no_pool == 1 else 2 ** n) * vx[1], +# (1 if no_pool == 2 else 2 ** n) * vx[2], +# ], +# }, +# { +# "type": "translation", +# "translation": [ +# (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, +# (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, +# (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, +# ], +# }, +# ] +# multiscales[0]["coordinateTransformations"] = [ +# {"scale": [1.0] * 3, "type": "scale"} +# ] +# omz.attrs["multiscales"] = multiscales + # # def generate_pyramid(inp, inp_chunk, nblevels, ni, nj, nk, no_pool, omz): From 68e28734774ed715d3052235b67f48db7387e77e Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Mon, 18 Nov 2024 09:34:47 -0500 Subject: [PATCH 38/51] refactor: cleanup --- .../modalities/psoct/single_volume.py | 142 ++++++++---------- linc_convert/modalities/psoct/utils.py | 44 +++--- 2 files changed, 86 insertions(+), 100 deletions(-) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py index 25a0e89..f072838 100644 --- a/linc_convert/modalities/psoct/single_volume.py +++ b/linc_convert/modalities/psoct/single_volume.py @@ -10,7 +10,7 @@ from contextlib import contextmanager from functools import wraps from itertools import product -from typing import Optional, List +from typing import Optional from warnings import warn import cyclopts @@ -20,11 +20,15 @@ from scipy.io import loadmat from linc_convert.modalities.psoct.cli import psoct -from linc_convert.modalities.psoct.utils import make_json, generate_pyramid, \ - niftizarr_write_header, write_ome_metadata +from linc_convert.modalities.psoct.utils import ( + generate_pyramid, + make_json, + niftizarr_write_header, + write_ome_metadata, +) from linc_convert.utils.math import ceildiv -from linc_convert.utils.orientation import orientation_to_affine, center_affine -from linc_convert.utils.unit import to_ome_unit, to_nifti_unit +from linc_convert.utils.orientation import center_affine, orientation_to_affine +from linc_convert.utils.unit import to_nifti_unit, to_ome_unit from linc_convert.utils.zarr import make_compressor single_volume = cyclopts.App(name="single_volume", help_format="markdown") @@ -46,23 +50,48 @@ def wrapper(inp, out=None, **kwargs): return wrapper +@contextmanager +def mapmat(fname, key=None): + """Load or memory-map an array stored in a .mat file""" + try: + # "New" .mat file + f = h5py.File(fname, "r") + except Exception: + # "Old" .mat file + f = loadmat(fname) + + if key is None: + if not len(f.keys()): + raise Exception(f"{fname} is empty") + key = list(f.keys())[0] + if len(f.keys()) > 1: + warn(f'More than one key in .mat file {fname}, arbitrarily loading "{key}"') + + if key not in f.keys(): + raise Exception(f"Key {key} not found in file {fname}") + + yield f.get(key) + if hasattr(f, "close"): + f.close() + + @single_volume.default @automap def convert( - inp: str, - out: Optional[str] = None, - *, - key: Optional[str] = None, - meta: str = None, - chunk: int = 128, - compressor: str = "blosc", - compressor_opt: str = "{}", - max_load: int = 128, - max_levels: int = 5, - no_pool: Optional[int] = None, - nii: bool = False, - orientation: str = "RAS", - center: bool = True, + inp: str, + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = "blosc", + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = "RAS", + center: bool = True, ) -> None: """ This command converts OCT volumes stored in raw matlab files @@ -97,7 +126,6 @@ def convert( center Set RAS[0, 0, 0] at FOV center """ - if isinstance(compressor_opt, str): compressor_opt = ast.literal_eval(compressor_opt) @@ -151,10 +179,10 @@ def convert( # iterate across input chunks for i, j, k in product(range(ni), range(nj), range(nk)): loaded_chunk = inp[ - k * inp_chunk[0]: (k + 1) * inp_chunk[0], - j * inp_chunk[1]: (j + 1) * inp_chunk[1], - i * inp_chunk[2]: (i + 1) * inp_chunk[2], - ] + k * inp_chunk[0] : (k + 1) * inp_chunk[0], + j * inp_chunk[1] : (j + 1) * inp_chunk[1], + i * inp_chunk[2] : (i + 1) * inp_chunk[2], + ] print( f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", @@ -166,9 +194,9 @@ def convert( # save current chunk omz["0"][ - k * inp_chunk[0]: k * inp_chunk[0] + loaded_chunk.shape[0], - j * inp_chunk[1]: j * inp_chunk[1] + loaded_chunk.shape[1], - i * inp_chunk[2]: i * inp_chunk[2] + loaded_chunk.shape[2], + k * inp_chunk[0] : k * inp_chunk[0] + loaded_chunk.shape[0], + j * inp_chunk[1] : j * inp_chunk[1] + loaded_chunk.shape[1], + i * inp_chunk[2] : i * inp_chunk[2] + loaded_chunk.shape[2], ] = loaded_chunk generate_pyramid(omz, nblevels - 1, mode="mean") @@ -179,7 +207,13 @@ def convert( print("Write metadata") print(unit) ome_unit = to_ome_unit(unit) - write_ome_metadata(omz, no_pool=no_pool, space_unit=ome_unit, space_scale=vx, multiscales_type=("2x2x2" if no_pool is None else "2x2") + "mean window") + write_ome_metadata( + omz, + no_pool=no_pool, + space_unit=ome_unit, + space_scale=vx, + multiscales_type=("2x2x2" if no_pool is None else "2x2") + "mean window", + ) if not nii: print("done.") @@ -193,52 +227,6 @@ def convert( affine = orientation_to_affine(orientation, *vx[::-1]) if center: affine = center_affine(affine, shape[:3]) - niftizarr_write_header(omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2) - # header = nib.Nifti2Header() - # header.set_data_shape(shape) - # header.set_data_dtype(omz["0"].dtype) - # header.set_qform(affine) - # header.set_sform(affine) - # header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) - # header.structarr["magic"] = b"nz2\0" - # header = np.frombuffer(header.structarr.tobytes(), dtype="u1") - # opt = { - # "chunks": [len(header)], - # "dimension_separator": r"/", - # "order": "F", - # "dtype": "|u1", - # "fill_value": None, - # "compressor": None, - # } - # omz.create_dataset("nifti", data=header, shape=len(header), **opt) - # print("done.") - - - -@contextmanager -def mapmat(fname, key=None): - """Load or memory-map an array stored in a .mat file""" - try: - # "New" .mat file - f = h5py.File(fname, "r") - except Exception: - # "Old" .mat file - f = loadmat(fname) - - if key is None: - if not len(f.keys()): - raise Exception(f"{fname} is empty") - key = list(f.keys())[0] - if len(f.keys()) > 1: - warn( - f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' - ) - - if key not in f.keys(): - raise Exception(f"Key {key} not found in file {fname}") - - - yield f.get(key) - if hasattr(f, "close"): - f.close() - + niftizarr_write_header( + omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2 + ) diff --git a/linc_convert/modalities/psoct/utils.py b/linc_convert/modalities/psoct/utils.py index 3625af7..2e2ccb4 100644 --- a/linc_convert/modalities/psoct/utils.py +++ b/linc_convert/modalities/psoct/utils.py @@ -1,6 +1,4 @@ import itertools -import json -import os import re from typing import Literal @@ -107,12 +105,12 @@ def parse_value_unit(string, n=None): def generate_pyramid( - omz, - levels: int | None = None, - ndim: int = 3, - max_load: int = 512, - mode: Literal["mean", "median"] = "median", - no_pyramid_axis: int|str|None = None + omz, + levels: int | None = None, + ndim: int = 3, + max_load: int = 512, + mode: Literal["mean", "median"] = "median", + no_pyramid_axis: int | str | None = None, ) -> list[list[int]]: """ Generate the levels of a pyramid in an existing Zarr. @@ -218,8 +216,8 @@ def generate_pyramid( x for n in patch_shape for x in (max(n // 2, 1), min(n, 2)) ] if no_pyramid_axis is not None: - windowed_shape[2*no_pyramid_axis] = patch_shape[no_pyramid_axis] - windowed_shape[2 * no_pyramid_axis+1] = 1 + windowed_shape[2 * no_pyramid_axis] = patch_shape[no_pyramid_axis] + windowed_shape[2 * no_pyramid_axis + 1] = 1 dat = dat.reshape(batch + windowed_shape) # -> last `ndim`` dimensions have shape 2x2x2 @@ -243,8 +241,8 @@ def generate_pyramid( # Write output slicer = [Ellipsis] + [ slice(i * max_load // 2, min((i + 1) * max_load // 2, n)) - if axis_index != no_pyramid_axis else - slice(i * max_load, min((i + 1) * max_load, n)) + if axis_index != no_pyramid_axis + else slice(i * max_load, min((i + 1) * max_load, n)) for i, axis_index, n in zip(chunk_index, range(ndim), shape) ] @@ -268,7 +266,7 @@ def write_ome_metadata( pyramid_aligns: str | int | list[str | int] = 2, levels: int | None = None, no_pool: int | None = None, - multiscales_type : str = "" + multiscales_type: str = "", ) -> None: """ Write OME metadata into Zarr. @@ -299,7 +297,6 @@ def write_ome_metadata( Zarr version. If `None`, guess from existing zarr array. """ - # Read shape at each pyramid level shapes = [] level = 0 @@ -366,7 +363,9 @@ def write_ome_metadata( for axis in axes ], "datasets": [], - "type": "median window " + "x".join(["2"] * sdim) if not multiscales_type else multiscales_type, + "type": "median window " + "x".join(["2"] * sdim) + if not multiscales_type + else multiscales_type, "name": name, } ] @@ -387,7 +386,8 @@ def write_ome_metadata( else ((shape0[bdim + i] - 1) / (shape[bdim + i] - 1)) ) * space_scale[i] - if i != no_pool else space_scale[i] + if i != no_pool + else space_scale[i] for i in range(sdim) ] translation = [0] * bdim + [ @@ -400,7 +400,8 @@ def write_ome_metadata( ) * 0.5 * space_scale[i] - if i != no_pool else 0 + if i != no_pool + else 0 for i in range(sdim) ] @@ -420,10 +421,10 @@ def write_ome_metadata( scale[axes.index("t")] = time_scale multiscales[0]["coordinateTransformations"] = [{"scale": scale, "type": "scale"}] - multiscales[0]["version"] = "0.4" omz.attrs["multiscales"] = multiscales + def niftizarr_write_header( omz, shape: list[int], @@ -431,7 +432,7 @@ def niftizarr_write_header( dtype: np.dtype | str, unit: Literal["micron", "mm"] | None = None, header: nib.Nifti1Header | nib.Nifti2Header | None = None, - nifti_version: Literal[1,2] = 1 + nifti_version: Literal[1, 2] = 1, ) -> None: """ Write NIfTI header in a NIfTI-Zarr file. @@ -472,13 +473,12 @@ def niftizarr_write_header( header.set_xyzt_units(nib.nifti1.unit_codes.code[unit]) header = np.frombuffer(header.structarr.tobytes(), dtype="u1") - metadata = { "chunks": [len(header)], "order": "F", "dtype": "|u1", "fill_value": None, - "compressor": None, # TODO: Subject to change compression + "compressor": None, # TODO: Subject to change compression } omz.create_dataset("nifti", data=header, shape=len(header), **metadata) @@ -689,5 +689,3 @@ def niftizarr_write_header( # omz.create_dataset(str(level), shape=shape_level, **opt) # shape_level = [x if i == no_pool else x // 2 for i, x in enumerate(shape_level)] # - - From 4e0c4c6ff6bbf655dd3384a1a4ea06d1c60f839c Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Mon, 18 Nov 2024 10:10:01 -0500 Subject: [PATCH 39/51] docs --- .../modalities/psoct/single_volume.py | 15 +++++++------- linc_convert/modalities/psoct/utils.py | 20 +++++++++---------- linc_convert/utils/unit.py | 4 ++++ 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py index f072838..5a590f9 100644 --- a/linc_convert/modalities/psoct/single_volume.py +++ b/linc_convert/modalities/psoct/single_volume.py @@ -35,24 +35,24 @@ psoct.command(single_volume) -def automap(func): - """Decorator to automatically map the array in the mat file""" +def _automap(func): + """Decorator to automatically map the array in the mat file.""" @wraps(func) - def wrapper(inp, out=None, **kwargs): + def wrapper(inp:str, out:str=None, **kwargs:dict): if out is None: out = os.path.splitext(inp[0])[0] out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") - with mapmat(inp, kwargs.get("key", None)) as dat: + with _mapmat(inp, kwargs.get("key", None)) as dat: return func(dat, out, **kwargs) return wrapper @contextmanager -def mapmat(fname, key=None): - """Load or memory-map an array stored in a .mat file""" +def _mapmat(fname: str, key:str=None)->None: + """Load or memory-map an array stored in a .mat file.""" try: # "New" .mat file f = h5py.File(fname, "r") @@ -76,7 +76,7 @@ def mapmat(fname, key=None): @single_volume.default -@automap +@_automap def convert( inp: str, out: Optional[str] = None, @@ -209,6 +209,7 @@ def convert( ome_unit = to_ome_unit(unit) write_ome_metadata( omz, + axes=("z","y","x"), no_pool=no_pool, space_unit=ome_unit, space_scale=vx, diff --git a/linc_convert/modalities/psoct/utils.py b/linc_convert/modalities/psoct/utils.py index 2e2ccb4..1d57042 100644 --- a/linc_convert/modalities/psoct/utils.py +++ b/linc_convert/modalities/psoct/utils.py @@ -28,7 +28,7 @@ def make_json(oct_meta): Modality: dBI """ - def parse_value_unit(string, n=None): + def _parse_value_unit(string:str, n=None): number = r"-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?" value = "x".join([number] * (n or 1)) match = re.fullmatch(r"(?P" + value + r")(?P\w*)", string) @@ -58,41 +58,41 @@ def parse_value_unit(string, n=None): meta["SampleMedium"] = " ".join(parts) elif key == "Center Wavelength": - value, unit = parse_value_unit(value) + value, unit = _parse_value_unit(value) meta["Wavelength"] = value meta["WavelengthUnit"] = unit elif key == "Axial resolution": - value, unit = parse_value_unit(value) + value, unit = _parse_value_unit(value) meta["ResolutionAxial"] = value meta["ResolutionAxialUnit"] = unit elif key == "Lateral resolution": - value, unit = parse_value_unit(value) + value, unit = _parse_value_unit(value) meta["ResolutionLateral"] = value meta["ResolutionLateralUnit"] = unit elif key == "Voxel size": - value, unit = parse_value_unit(value, n=3) + value, unit = _parse_value_unit(value, n=3) meta["PixelSize"] = value meta["PixelSizeUnits"] = unit elif key == "Depth focus range": - value, unit = parse_value_unit(value) + value, unit = _parse_value_unit(value) meta["DepthFocusRange"] = value meta["DepthFocusRangeUnit"] = unit elif key == "Number of focuses": - value, unit = parse_value_unit(value) + value, unit = _parse_value_unit(value) meta["FocusCount"] = int(value) elif key == "Slice thickness": - value, unit = parse_value_unit(value) + value, unit = _parse_value_unit(value) unit = convert_unit(value, unit[:-1], "u") meta["SliceThickness"] = value elif key == "Number of slices": - value, unit = parse_value_unit(value) + value, unit = _parse_value_unit(value) meta["SliceCount"] = int(value) elif key == "Modality": @@ -309,7 +309,7 @@ def write_ome_metadata( break level += 1 - shapes += omz[str(level)].shape + axis_to_type = { "x": "space", diff --git a/linc_convert/utils/unit.py b/linc_convert/utils/unit.py index 01c4463..51afb52 100644 --- a/linc_convert/utils/unit.py +++ b/linc_convert/utils/unit.py @@ -1,3 +1,7 @@ +""" +Converts units between zarr and other specifications +""" + ome_valid_units = { "space": [ "angstrom", From ed675562ee90ff31aa4260a09971fd4fdc0c395c Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Mon, 18 Nov 2024 10:10:26 -0500 Subject: [PATCH 40/51] fix: shape was concated to the list --- linc_convert/modalities/psoct/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linc_convert/modalities/psoct/utils.py b/linc_convert/modalities/psoct/utils.py index 1d57042..9f28f1b 100644 --- a/linc_convert/modalities/psoct/utils.py +++ b/linc_convert/modalities/psoct/utils.py @@ -307,7 +307,7 @@ def write_ome_metadata( if str(level) not in omz.keys(): levels = level break - + shapes += [omz[str(level)].shape,] level += 1 From 5d6beed906d22cb29b09cc34e894ae2943d2e184 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Tue, 19 Nov 2024 09:25:51 -0500 Subject: [PATCH 41/51] docs: add annotations and fix lint error --- .../modalities/psoct/{utils.py => _utils.py} | 227 +------------- linc_convert/modalities/psoct/multi_slice.py | 291 ------------------ .../modalities/psoct/single_volume.py | 22 +- linc_convert/utils/unit.py | 8 +- 4 files changed, 32 insertions(+), 516 deletions(-) rename linc_convert/modalities/psoct/{utils.py => _utils.py} (61%) delete mode 100644 linc_convert/modalities/psoct/multi_slice.py diff --git a/linc_convert/modalities/psoct/utils.py b/linc_convert/modalities/psoct/_utils.py similarity index 61% rename from linc_convert/modalities/psoct/utils.py rename to linc_convert/modalities/psoct/_utils.py index 9f28f1b..380515e 100644 --- a/linc_convert/modalities/psoct/utils.py +++ b/linc_convert/modalities/psoct/_utils.py @@ -1,16 +1,19 @@ import itertools import re -from typing import Literal +from typing import Any, Literal import nibabel as nib import numpy as np +import zarr from linc_convert.utils.math import ceildiv from linc_convert.utils.unit import convert_unit -def make_json(oct_meta): +def make_json(oct_meta: str) -> dict: """ + Make json from OCT metadata. + Expected input: --------------- Image medium: 60% TDE @@ -28,7 +31,9 @@ def make_json(oct_meta): Modality: dBI """ - def _parse_value_unit(string:str, n=None): + def _parse_value_unit( + string: str, n: int = None + ) -> tuple[float | list[float], str | Any]: number = r"-?(\d+\.?\d*|\d*\.?\d+)(E-?\d+)?" value = "x".join([number] * (n or 1)) match = re.fullmatch(r"(?P" + value + r")(?P\w*)", string) @@ -105,7 +110,7 @@ def _parse_value_unit(string:str, n=None): def generate_pyramid( - omz, + omz: zarr.Group, levels: int | None = None, ndim: int = 3, max_load: int = 512, @@ -256,7 +261,7 @@ def generate_pyramid( def write_ome_metadata( - omz, + omz: zarr.Group, axes: list[str], space_scale: float | list[float] = 1, time_scale: float = 1, @@ -307,10 +312,11 @@ def write_ome_metadata( if str(level) not in omz.keys(): levels = level break - shapes += [omz[str(level)].shape,] + shapes += [ + omz[str(level)].shape, + ] level += 1 - axis_to_type = { "x": "space", "y": "space", @@ -426,7 +432,7 @@ def write_ome_metadata( def niftizarr_write_header( - omz, + omz: zarr.Group, shape: list[int], affine: np.ndarray, dtype: np.dtype | str, @@ -484,208 +490,3 @@ def niftizarr_write_header( omz.create_dataset("nifti", data=header, shape=len(header), **metadata) print("done.") - - -# def write_ome_metadata(nblevels, no_pool, ome_unit, omz, vx): -# multiscales = [ -# { -# "version": "0.4", -# "axes": [ -# {"name": "z", "type": "space", "unit": ome_unit}, -# {"name": "y", "type": "space", "unit": ome_unit}, -# {"name": "x", "type": "space", "unit": ome_unit}, -# ], -# "datasets": [], -# "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", -# "name": "", -# } -# ] -# for n in range(nblevels): -# multiscales[0]["datasets"].append({}) -# level = multiscales[0]["datasets"][-1] -# level["path"] = str(n) -# -# # With a moving window, the scaling factor is exactly 2, and -# # the edges of the top-left voxel are aligned -# level["coordinateTransformations"] = [ -# { -# "type": "scale", -# "scale": [ -# (1 if no_pool == 0 else 2 ** n) * vx[0], -# (1 if no_pool == 1 else 2 ** n) * vx[1], -# (1 if no_pool == 2 else 2 ** n) * vx[2], -# ], -# }, -# { -# "type": "translation", -# "translation": [ -# (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, -# (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, -# (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, -# ], -# }, -# ] -# multiscales[0]["coordinateTransformations"] = [ -# {"scale": [1.0] * 3, "type": "scale"} -# ] -# omz.attrs["multiscales"] = multiscales - - -# -# def generate_pyramid(inp, inp_chunk, nblevels, ni, nj, nk, no_pool, omz): -# for i, j, k in product(range(ni), range(nj), range(nk)): -# level_chunk = inp_chunk -# loaded_chunk = inp[ -# k * level_chunk[0]: (k + 1) * level_chunk[0], -# j * level_chunk[1]: (j + 1) * level_chunk[1], -# i * level_chunk[2]: (i + 1) * level_chunk[2], -# ] -# -# out_level = omz["0"] -# -# print( -# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", -# "/", -# f"[{ni:03d}, {nj:03d}, {nk:03d}]", -# # f"({1 + level}/{nblevels})", -# end="\r", -# ) -# -# # save current chunk -# out_level[ -# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], -# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], -# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], -# ] = loaded_chunk -# -# for level in range(nblevels): -# out_level = omz[str(level)] -# -# print( -# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", -# "/", -# f"[{ni:03d}, {nj:03d}, {nk:03d}]", -# f"({1 + level}/{nblevels})", -# end="\r", -# ) -# -# # save current chunk -# out_level[ -# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], -# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], -# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], -# ] = loaded_chunk -# # ensure divisible by 2 -# loaded_chunk = loaded_chunk[ -# slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), -# slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), -# slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), -# ] -# # mean pyramid (average each 2x2x2 patch) -# if no_pool == 0: -# loaded_chunk = ( -# loaded_chunk[:, 0::2, 0::2] -# + loaded_chunk[:, 0::2, 1::2] -# + loaded_chunk[:, 1::2, 0::2] -# + loaded_chunk[:, 1::2, 1::2] -# ) / 4 -# elif no_pool == 1: -# loaded_chunk = ( -# loaded_chunk[0::2, :, 0::2] -# + loaded_chunk[0::2, :, 1::2] -# + loaded_chunk[1::2, :, 0::2] -# + loaded_chunk[1::2, :, 1::2] -# ) / 4 -# elif no_pool == 2: -# loaded_chunk = ( -# loaded_chunk[0::2, 0::2, :] -# + loaded_chunk[0::2, 1::2, :] -# + loaded_chunk[1::2, 0::2, :] -# + loaded_chunk[1::2, 1::2, :] -# ) / 4 -# else: -# loaded_chunk = ( -# loaded_chunk[0::2, 0::2, 0::2] -# + loaded_chunk[0::2, 0::2, 1::2] -# + loaded_chunk[0::2, 1::2, 0::2] -# + loaded_chunk[0::2, 1::2, 1::2] -# + loaded_chunk[1::2, 0::2, 0::2] -# + loaded_chunk[1::2, 0::2, 1::2] -# + loaded_chunk[1::2, 1::2, 0::2] -# + loaded_chunk[1::2, 1::2, 1::2] -# ) / 8 -# level_chunk = [ -# x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) -# ] -# - -# def generate_pyramid(i, j, k, level_chunk, loaded_chunk, nblevels, ni, nj, nk, no_pool, -# omz): -# for level in range(nblevels): -# out_level = omz[str(level)] -# -# print( -# f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", -# "/", -# f"[{ni:03d}, {nj:03d}, {nk:03d}]", -# f"({1 + level}/{nblevels})", -# end="\r", -# ) -# -# # save current chunk -# out_level[ -# k * level_chunk[0]: k * level_chunk[0] + loaded_chunk.shape[0], -# j * level_chunk[1]: j * level_chunk[1] + loaded_chunk.shape[1], -# i * level_chunk[2]: i * level_chunk[2] + loaded_chunk.shape[2], -# ] = loaded_chunk -# # ensure divisible by 2 -# loaded_chunk = loaded_chunk[ -# slice(2 * (loaded_chunk.shape[0] // 2) if 0 != no_pool else None), -# slice(2 * (loaded_chunk.shape[1] // 2) if 1 != no_pool else None), -# slice(2 * (loaded_chunk.shape[2] // 2) if 2 != no_pool else None), -# ] -# # mean pyramid (average each 2x2x2 patch) -# if no_pool == 0: -# loaded_chunk = ( -# loaded_chunk[:, 0::2, 0::2] -# + loaded_chunk[:, 0::2, 1::2] -# + loaded_chunk[:, 1::2, 0::2] -# + loaded_chunk[:, 1::2, 1::2] -# ) / 4 -# elif no_pool == 1: -# loaded_chunk = ( -# loaded_chunk[0::2, :, 0::2] -# + loaded_chunk[0::2, :, 1::2] -# + loaded_chunk[1::2, :, 0::2] -# + loaded_chunk[1::2, :, 1::2] -# ) / 4 -# elif no_pool == 2: -# loaded_chunk = ( -# loaded_chunk[0::2, 0::2, :] -# + loaded_chunk[0::2, 1::2, :] -# + loaded_chunk[1::2, 0::2, :] -# + loaded_chunk[1::2, 1::2, :] -# ) / 4 -# else: -# loaded_chunk = ( -# loaded_chunk[0::2, 0::2, 0::2] -# + loaded_chunk[0::2, 0::2, 1::2] -# + loaded_chunk[0::2, 1::2, 0::2] -# + loaded_chunk[0::2, 1::2, 1::2] -# + loaded_chunk[1::2, 0::2, 0::2] -# + loaded_chunk[1::2, 0::2, 1::2] -# + loaded_chunk[1::2, 1::2, 0::2] -# + loaded_chunk[1::2, 1::2, 1::2] -# ) / 8 -# level_chunk = [ -# x if i == no_pool else x // 2 for i, x in enumerate(level_chunk) -# ] - -# -# def create_level(chunk, inp, nblevels, no_pool, omz, opt): -# shape_level = inp.shape -# for level in range(1,nblevels,1): -# opt["chunks"] = [min(x, chunk) for x in shape_level] -# omz.create_dataset(str(level), shape=shape_level, **opt) -# shape_level = [x if i == no_pool else x // 2 for i, x in enumerate(shape_level)] -# diff --git a/linc_convert/modalities/psoct/multi_slice.py b/linc_convert/modalities/psoct/multi_slice.py deleted file mode 100644 index fddf988..0000000 --- a/linc_convert/modalities/psoct/multi_slice.py +++ /dev/null @@ -1,291 +0,0 @@ -""" -Converts Matlab files generated by the MGH in-house OCT pipeline -into a OME-ZARR pyramid. -""" - -import ast -import json -import math -import os -from contextlib import contextmanager -from functools import wraps -from itertools import product -from typing import Optional, List -from warnings import warn - -import cyclopts -import h5py -import numpy as np -import zarr -from scipy.io import loadmat - -from linc_convert.modalities.psoct.cli import psoct -from linc_convert.modalities.psoct.utils import make_json, generate_pyramid, \ - niftizarr_write_header -from linc_convert.utils.math import ceildiv -from linc_convert.utils.orientation import orientation_to_affine, center_affine -from linc_convert.utils.unit import to_ome_unit, to_nifti_unit -from linc_convert.utils.zarr import make_compressor - -multi_slice = cyclopts.App(name="multi_slice", help_format="markdown") -psoct.command(multi_slice) - - -def automap(func): - """Decorator to automatically map the array in the mat file""" - - @wraps(func) - def wrapper(inp, out=None, **kwargs): - if out is None: - out = os.path.splitext(inp[0])[0] - out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" - kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") - with mapmat(inp, kwargs.get("key", None)) as dat: - return func(dat, out, **kwargs) - - return wrapper - - -@multi_slice.default -@automap -def convert( - inp: List[str], - out: Optional[str] = None, - *, - key: Optional[str] = None, - meta: str = None, - chunk: int = 128, - compressor: str = "blosc", - compressor_opt: str = "{}", - max_load: int = 128, - max_levels: int = 5, - no_pool: Optional[int] = None, - nii: bool = False, - orientation: str = "RAS", - center: bool = True, -) -> None: - """ - This command converts OCT volumes stored in raw matlab files - into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. - - Parameters - ---------- - inp - Path to the input mat file - out - Path to the output Zarr directory [.ome.zarr] - key - Key of the array to be extracted, default to first key found - meta - Path to the metadata file - chunk - Output chunk size - compressor : {blosc, zlib, raw} - Compression method - compressor_opt - Compression options - max_load - Maximum input chunk size - max_levels - Maximum number of pyramid levels - no_pool - Index of dimension to not pool when building pyramid - nii - Convert to nifti-zarr. True if path ends in ".nii.zarr" - orientation - Orientation of the volume - center - Set RAS[0, 0, 0] at FOV center - """ - - if isinstance(compressor_opt, str): - compressor_opt = ast.literal_eval(compressor_opt) - - # Write OME-Zarr multiscale metadata - if meta: - print("Write JSON") - with open(meta, "r") as f: - meta_txt = f.read() - meta_json = make_json(meta_txt) - path_json = ".".join(out.split(".")[:-2]) + ".json" - with open(path_json, "w") as f: - json.dump(meta_json, f, indent=4) - vx = meta_json["PixelSize"] - unit = meta_json["PixelSizeUnits"] - else: - vx = [1] * 3 - unit = "um" - - # Prepare Zarr group - omz = zarr.storage.DirectoryStore(out) - omz = zarr.group(store=omz, overwrite=True) - - if not hasattr(inp, "dtype"): - raise Exception("Input is not a numpy array. This is likely unexpected") - if len(inp.shape) < 3: - raise Exception("Input array is not 3d") - # Prepare chunking options - opt = { - "dimension_separator": r"/", - "order": "F", - "dtype": np.dtype(inp.dtype).str, - "fill_value": None, - "compressor": make_compressor(compressor, **compressor_opt), - } - - inp_chunk = [min(x, max_load) for x in inp.shape] - nk = ceildiv(inp.shape[0], inp_chunk[0]) - nj = ceildiv(inp.shape[1], inp_chunk[1]) - ni = ceildiv(inp.shape[2], inp_chunk[2]) - - nblevels = min( - [int(math.ceil(math.log2(x))) for i, x in enumerate(inp.shape) if i != no_pool] - ) - nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) - nblevels = min(nblevels, max_levels) - - opt["chunks"] = [min(x, chunk) for x in inp.shape] - - omz.create_dataset(str(0), shape=inp.shape, **opt) - - # iterate across input chunks - for i, j, k in product(range(ni), range(nj), range(nk)): - loaded_chunk = inp[ - k * inp_chunk[0]: (k + 1) * inp_chunk[0], - j * inp_chunk[1]: (j + 1) * inp_chunk[1], - i * inp_chunk[2]: (i + 1) * inp_chunk[2], - ] - - print( - f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", - "/", - f"[{ni:03d}, {nj:03d}, {nk:03d}]", - # f"({1 + level}/{nblevels})", - end="\r", - ) - - # save current chunk - omz["0"][ - k * inp_chunk[0]: k * inp_chunk[0] + loaded_chunk.shape[0], - j * inp_chunk[1]: j * inp_chunk[1] + loaded_chunk.shape[1], - i * inp_chunk[2]: i * inp_chunk[2] + loaded_chunk.shape[2], - ] = loaded_chunk - # TODO: no_pool is ignored for now, should add back - generate_pyramid(omz, nblevels - 1, mode="mean") - - print("") - - # Write OME-Zarr multiscale metadata - print("Write metadata") - print(unit) - ome_unit = to_ome_unit(unit) - multiscales = [ - { - "version": "0.4", - "axes": [ - {"name": "z", "type": "space", "unit": ome_unit}, - {"name": "y", "type": "space", "unit": ome_unit}, - {"name": "x", "type": "space", "unit": ome_unit}, - ], - "datasets": [], - "type": ("2x2x2" if no_pool is None else "2x2") + "mean window", - "name": "", - } - ] - - for n in range(nblevels): - multiscales[0]["datasets"].append({}) - level = multiscales[0]["datasets"][-1] - level["path"] = str(n) - - # With a moving window, the scaling factor is exactly 2, and - # the edges of the top-left voxel are aligned - level["coordinateTransformations"] = [ - { - "type": "scale", - "scale": [ - (1 if no_pool == 0 else 2 ** n) * vx[0], - (1 if no_pool == 1 else 2 ** n) * vx[1], - (1 if no_pool == 2 else 2 ** n) * vx[2], - ], - }, - { - "type": "translation", - "translation": [ - (0 if no_pool == 0 else (2 ** n - 1)) * vx[0] * 0.5, - (0 if no_pool == 1 else (2 ** n - 1)) * vx[1] * 0.5, - (0 if no_pool == 2 else (2 ** n - 1)) * vx[2] * 0.5, - ], - }, - ] - multiscales[0]["coordinateTransformations"] = [ - {"scale": [1.0] * 3, "type": "scale"} - ] - omz.attrs["multiscales"] = multiscales - - if not nii: - print("done.") - return - - # Write NIfTI-Zarr header - # NOTE: we use nifti2 because dimensions typically do not fit in a short - # TODO: we do not write the json zattrs, but it should be added in - # once the nifti-zarr package is released - shape = list(reversed(omz["0"].shape)) - affine = orientation_to_affine(orientation, *vx[::-1]) - if center: - affine = center_affine(affine, shape[:3]) - niftizarr_write_header(omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2) - # header = nib.Nifti2Header() - # header.set_data_shape(shape) - # header.set_data_dtype(omz["0"].dtype) - # header.set_qform(affine) - # header.set_sform(affine) - # header.set_xyzt_units(nib.nifti1.unit_codes.code[to_nifti_unit(unit)]) - # header.structarr["magic"] = b"nz2\0" - # header = np.frombuffer(header.structarr.tobytes(), dtype="u1") - # opt = { - # "chunks": [len(header)], - # "dimension_separator": r"/", - # "order": "F", - # "dtype": "|u1", - # "fill_value": None, - # "compressor": None, - # } - # omz.create_dataset("nifti", data=header, shape=len(header), **opt) - # print("done.") - - -@contextmanager -def mapmat(fnames, key=None): - """Load or memory-map an array stored in a .mat file""" - loaded_data = [] - - for fname in fnames: - try: - # "New" .mat file - f = h5py.File(fname, "r") - except Exception: - # "Old" .mat file - f = loadmat(fname) - - if key is None: - if not len(f.keys()): - raise Exception(f"{fname} is empty") - key = list(f.keys())[0] - if len(f.keys()) > 1: - warn( - f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' - ) - - if key not in f.keys(): - raise Exception(f"Key {key} not found in file {fname}") - - if len(fnames) == 1: - yield f.get(key) - if hasattr(f, "close"): - f.close() - break - loaded_data.append(f.get(key)) - - yield np.stack(loaded_data, axis=-1) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py index 5a590f9..6bf651b 100644 --- a/linc_convert/modalities/psoct/single_volume.py +++ b/linc_convert/modalities/psoct/single_volume.py @@ -1,4 +1,6 @@ """ +Matlab to OME-Zarr. + Converts Matlab files generated by the MGH in-house OCT pipeline into a OME-ZARR pyramid. """ @@ -10,7 +12,7 @@ from contextlib import contextmanager from functools import wraps from itertools import product -from typing import Optional +from typing import Any, Callable, Optional from warnings import warn import cyclopts @@ -19,13 +21,13 @@ import zarr from scipy.io import loadmat -from linc_convert.modalities.psoct.cli import psoct -from linc_convert.modalities.psoct.utils import ( +from linc_convert.modalities.psoct._utils import ( generate_pyramid, make_json, niftizarr_write_header, write_ome_metadata, ) +from linc_convert.modalities.psoct.cli import psoct from linc_convert.utils.math import ceildiv from linc_convert.utils.orientation import center_affine, orientation_to_affine from linc_convert.utils.unit import to_nifti_unit, to_ome_unit @@ -35,11 +37,11 @@ psoct.command(single_volume) -def _automap(func): - """Decorator to automatically map the array in the mat file.""" +def _automap(func: Callable) -> Callable: + """Decorator to automatically map the array in the mat file.""" # noqa: D401 @wraps(func) - def wrapper(inp:str, out:str=None, **kwargs:dict): + def wrapper(inp: str, out: str = None, **kwargs: dict) -> Any: # noqa: ANN401 if out is None: out = os.path.splitext(inp[0])[0] out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" @@ -51,7 +53,7 @@ def wrapper(inp:str, out:str=None, **kwargs:dict): @contextmanager -def _mapmat(fname: str, key:str=None)->None: +def _mapmat(fname: str, key: str = None) -> None: """Load or memory-map an array stored in a .mat file.""" try: # "New" .mat file @@ -94,7 +96,9 @@ def convert( center: bool = True, ) -> None: """ - This command converts OCT volumes stored in raw matlab files + Matlab to OME-Zarr. + + Convert OCT volumes in raw matlab files into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. Parameters @@ -209,7 +213,7 @@ def convert( ome_unit = to_ome_unit(unit) write_ome_metadata( omz, - axes=("z","y","x"), + axes=["z", "y", "x"], no_pool=no_pool, space_unit=ome_unit, space_scale=vx, diff --git a/linc_convert/utils/unit.py b/linc_convert/utils/unit.py index 51afb52..04be0f2 100644 --- a/linc_convert/utils/unit.py +++ b/linc_convert/utils/unit.py @@ -1,6 +1,4 @@ -""" -Converts units between zarr and other specifications -""" +"""Converts units between zarr and other specifications.""" ome_valid_units = { "space": [ @@ -200,12 +198,14 @@ def convert_unit(value: float, src: str, dst: str) -> float: + """Convert unit for a value.""" src = unit_to_scale(src) dst = unit_to_scale(dst) return value * (src / dst) def to_ome_unit(unit: str) -> str: + """Convert unit to ome-zarr spec.""" if unit in unit_space_short2long: unit = unit_space_short2long[unit] elif unit in unit_time_short2long: @@ -218,6 +218,7 @@ def to_ome_unit(unit: str) -> str: def to_nifti_unit(unit: str) -> str: + """Convert unit to nifti spec.""" unit = to_ome_unit(unit) return { "meter": "meter", @@ -230,6 +231,7 @@ def to_nifti_unit(unit: str) -> str: def unit_to_scale(unit: str) -> float: + """Convert unit to scale.""" if unit in unit_space_long2short: unit = unit_space_long2short[unit] elif unit in unit_time_long2short: From 854ee38a72284d572ab4608ee24030a17662d5c2 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Tue, 19 Nov 2024 09:34:04 -0500 Subject: [PATCH 42/51] fix: add dependency for psoct pipeline --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 79b5610..8a1568d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,9 @@ glymur = "*" [tool.poetry.group.lsm.dependencies] tifffile = "*" +[tool.poetry.group.psoct.dependencies] +h5py = "*" + [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] From 1e1c29fbbe0c57ac4bf29c8bce203b8c2b2ab36f Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Tue, 19 Nov 2024 09:36:34 -0500 Subject: [PATCH 43/51] fix: add dependency for psoct pipeline --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8a1568d..6727a8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ tifffile = "*" [tool.poetry.group.psoct.dependencies] h5py = "*" +scipy = "*" [tool.poetry.group.dev] optional = true From b3e8c5850a0d7d3f794dc324971ed3cb4f415b0d Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Tue, 19 Nov 2024 09:40:37 -0500 Subject: [PATCH 44/51] feat: add multislice for psoct --- linc_convert/modalities/psoct/multi_slice.py | 247 +++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 linc_convert/modalities/psoct/multi_slice.py diff --git a/linc_convert/modalities/psoct/multi_slice.py b/linc_convert/modalities/psoct/multi_slice.py new file mode 100644 index 0000000..1f61fa6 --- /dev/null +++ b/linc_convert/modalities/psoct/multi_slice.py @@ -0,0 +1,247 @@ +""" +Matlab to OME-Zarr. + +Converts Matlab files generated by the MGH in-house OCT pipeline +into a OME-ZARR pyramid. +""" + +import ast +import json +import math +import os +from contextlib import contextmanager +from functools import wraps +from itertools import product +from typing import Any, Callable, Optional +from warnings import warn + +import cyclopts +import h5py +import numpy as np +import zarr +from scipy.io import loadmat + +from linc_convert.modalities.psoct._utils import ( + generate_pyramid, + make_json, + niftizarr_write_header, + write_ome_metadata, +) +from linc_convert.modalities.psoct.cli import psoct +from linc_convert.utils.math import ceildiv +from linc_convert.utils.orientation import center_affine, orientation_to_affine +from linc_convert.utils.unit import to_nifti_unit, to_ome_unit +from linc_convert.utils.zarr import make_compressor + +multi_slice = cyclopts.App(name="multi_slice", help_format="markdown") +psoct.command(multi_slice) + + +def _automap(func: Callable) -> Callable: + """Decorator to automatically map the array in the mat file.""" # noqa: D401 + + @wraps(func) + def wrapper(inp: str, out: str = None, **kwargs: dict) -> Any: # noqa: ANN401 + if out is None: + out = os.path.splitext(inp[0])[0] + out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" + kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") + with _mapmat(inp, kwargs.get("key", None)) as dat: + return func(dat, out, **kwargs) + + return wrapper + +@contextmanager +def _mapmat(fnames, key=None): + """Load or memory-map an array stored in a .mat file""" + loaded_data = [] + + for fname in fnames: + try: + # "New" .mat file + f = h5py.File(fname, "r") + except Exception: + # "Old" .mat file + f = loadmat(fname) + + if key is None: + if not len(f.keys()): + raise Exception(f"{fname} is empty") + key = list(f.keys())[0] + if len(f.keys()) > 1: + warn( + f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' + ) + + if key not in f.keys(): + raise Exception(f"Key {key} not found in file {fname}") + + if len(fnames) == 1: + yield f.get(key) + if hasattr(f, "close"): + f.close() + break + loaded_data.append(f.get(key)) + + yield np.stack(loaded_data, axis=-1) + + +@multi_slice.default +@_automap +def convert( + inp: str, + out: Optional[str] = None, + *, + key: Optional[str] = None, + meta: str = None, + chunk: int = 128, + compressor: str = "blosc", + compressor_opt: str = "{}", + max_load: int = 128, + max_levels: int = 5, + no_pool: Optional[int] = None, + nii: bool = False, + orientation: str = "RAS", + center: bool = True, +) -> None: + """ + Matlab to OME-Zarr. + + Convert OCT volumes in raw matlab files + into a pyramidal OME-ZARR (or NIfTI-Zarr) hierarchy. + + Parameters + ---------- + inp + Path to the input mat file + out + Path to the output Zarr directory [.ome.zarr] + key + Key of the array to be extracted, default to first key found + meta + Path to the metadata file + chunk + Output chunk size + compressor : {blosc, zlib, raw} + Compression method + compressor_opt + Compression options + max_load + Maximum input chunk size + max_levels + Maximum number of pyramid levels + no_pool + Index of dimension to not pool when building pyramid + nii + Convert to nifti-zarr. True if path ends in ".nii.zarr" + orientation + Orientation of the volume + center + Set RAS[0, 0, 0] at FOV center + """ + if isinstance(compressor_opt, str): + compressor_opt = ast.literal_eval(compressor_opt) + + # Write OME-Zarr multiscale metadata + if meta: + print("Write JSON") + with open(meta, "r") as f: + meta_txt = f.read() + meta_json = make_json(meta_txt) + path_json = ".".join(out.split(".")[:-2]) + ".json" + with open(path_json, "w") as f: + json.dump(meta_json, f, indent=4) + vx = meta_json["PixelSize"] + unit = meta_json["PixelSizeUnits"] + else: + vx = [1] * 3 + unit = "um" + + # Prepare Zarr group + omz = zarr.storage.DirectoryStore(out) + omz = zarr.group(store=omz, overwrite=True) + + if not hasattr(inp, "dtype"): + raise Exception("Input is not a numpy array. This is likely unexpected") + if len(inp.shape) < 3: + raise Exception("Input array is not 3d") + # Prepare chunking options + opt = { + "dimension_separator": r"/", + "order": "F", + "dtype": np.dtype(inp.dtype).str, + "fill_value": None, + "compressor": make_compressor(compressor, **compressor_opt), + } + + inp_chunk = [min(x, max_load) for x in inp.shape] + nk = ceildiv(inp.shape[0], inp_chunk[0]) + nj = ceildiv(inp.shape[1], inp_chunk[1]) + ni = ceildiv(inp.shape[2], inp_chunk[2]) + + nblevels = min( + [int(math.ceil(math.log2(x))) for i, x in enumerate(inp.shape) if i != no_pool] + ) + nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) + nblevels = min(nblevels, max_levels) + + opt["chunks"] = [min(x, chunk) for x in inp.shape] + + omz.create_dataset(str(0), shape=inp.shape, **opt) + + # iterate across input chunks + for i, j, k in product(range(ni), range(nj), range(nk)): + loaded_chunk = inp[ + k * inp_chunk[0] : (k + 1) * inp_chunk[0], + j * inp_chunk[1] : (j + 1) * inp_chunk[1], + i * inp_chunk[2] : (i + 1) * inp_chunk[2], + ] + + print( + f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", + "/", + f"[{ni:03d}, {nj:03d}, {nk:03d}]", + # f"({1 + level}/{nblevels})", + end="\r", + ) + + # save current chunk + omz["0"][ + k * inp_chunk[0] : k * inp_chunk[0] + loaded_chunk.shape[0], + j * inp_chunk[1] : j * inp_chunk[1] + loaded_chunk.shape[1], + i * inp_chunk[2] : i * inp_chunk[2] + loaded_chunk.shape[2], + ] = loaded_chunk + + generate_pyramid(omz, nblevels - 1, mode="mean") + + print("") + + # Write OME-Zarr multiscale metadata + print("Write metadata") + print(unit) + ome_unit = to_ome_unit(unit) + write_ome_metadata( + omz, + axes=["z", "y", "x"], + no_pool=no_pool, + space_unit=ome_unit, + space_scale=vx, + multiscales_type=("2x2x2" if no_pool is None else "2x2") + "mean window", + ) + + if not nii: + print("done.") + return + + # Write NIfTI-Zarr header + # NOTE: we use nifti2 because dimensions typically do not fit in a short + # TODO: we do not write the json zattrs, but it should be added in + # once the nifti-zarr package is released + shape = list(reversed(omz["0"].shape)) + affine = orientation_to_affine(orientation, *vx[::-1]) + if center: + affine = center_affine(affine, shape[:3]) + niftizarr_write_header( + omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2 + ) + From f0a76c4a53400c59ca9558088d3fc7da8c5365ae Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Tue, 19 Nov 2024 09:44:47 -0500 Subject: [PATCH 45/51] docs --- linc_convert/modalities/psoct/multi_slice.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/linc_convert/modalities/psoct/multi_slice.py b/linc_convert/modalities/psoct/multi_slice.py index 1f61fa6..2154a33 100644 --- a/linc_convert/modalities/psoct/multi_slice.py +++ b/linc_convert/modalities/psoct/multi_slice.py @@ -51,9 +51,10 @@ def wrapper(inp: str, out: str = None, **kwargs: dict) -> Any: # noqa: ANN401 return wrapper + @contextmanager -def _mapmat(fnames, key=None): - """Load or memory-map an array stored in a .mat file""" +def _mapmat(fnames: list[str], key: str = None) -> None: + """Load or memory-map an array stored in a .mat file.""" loaded_data = [] for fname in fnames: @@ -70,7 +71,8 @@ def _mapmat(fnames, key=None): key = list(f.keys())[0] if len(f.keys()) > 1: warn( - f'More than one key in .mat file {fname}, arbitrarily loading "{key}"' + f"More than one key in .mat file {fname}, " + f'arbitrarily loading "{key}"' ) if key not in f.keys(): @@ -89,7 +91,7 @@ def _mapmat(fnames, key=None): @multi_slice.default @_automap def convert( - inp: str, + inp: list[str], out: Optional[str] = None, *, key: Optional[str] = None, @@ -244,4 +246,3 @@ def convert( niftizarr_write_header( omz, shape, affine, omz["0"].dtype, to_nifti_unit(unit), nifti_version=2 ) - From 81cdcc2f3d6d20f6aa17e190d21abcebc187cf36 Mon Sep 17 00:00:00 2001 From: Kaidong Chai Date: Tue, 19 Nov 2024 10:53:18 -0500 Subject: [PATCH 46/51] feat: lazy load input instead of loading all at the beginning --- linc_convert/modalities/psoct/multi_slice.py | 46 +++++++++++--------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/linc_convert/modalities/psoct/multi_slice.py b/linc_convert/modalities/psoct/multi_slice.py index 2154a33..1c55be4 100644 --- a/linc_convert/modalities/psoct/multi_slice.py +++ b/linc_convert/modalities/psoct/multi_slice.py @@ -84,8 +84,8 @@ def _mapmat(fnames: list[str], key: str = None) -> None: f.close() break loaded_data.append(f.get(key)) - - yield np.stack(loaded_data, axis=-1) + yield loaded_data + # yield np.stack(loaded_data, axis=-1) @multi_slice.default @@ -163,41 +163,47 @@ def convert( omz = zarr.storage.DirectoryStore(out) omz = zarr.group(store=omz, overwrite=True) - if not hasattr(inp, "dtype"): - raise Exception("Input is not a numpy array. This is likely unexpected") - if len(inp.shape) < 3: - raise Exception("Input array is not 3d") + if not hasattr(inp[0], "dtype"): + raise Exception("Input is not numpy array. This is likely unexpected") + if len(inp[0].shape) != 2: + raise Exception("Input array is not 2d") # Prepare chunking options opt = { "dimension_separator": r"/", "order": "F", - "dtype": np.dtype(inp.dtype).str, + "dtype": np.dtype(inp[0].dtype).str, "fill_value": None, "compressor": make_compressor(compressor, **compressor_opt), } - - inp_chunk = [min(x, max_load) for x in inp.shape] - nk = ceildiv(inp.shape[0], inp_chunk[0]) - nj = ceildiv(inp.shape[1], inp_chunk[1]) - ni = ceildiv(inp.shape[2], inp_chunk[2]) + inp: list = inp + inp_shape = (*inp[0].shape, len(inp)) + inp_chunk = [min(x, max_load) for x in inp_shape] + nk = ceildiv(inp_shape[0], inp_chunk[0]) + nj = ceildiv(inp_shape[1], inp_chunk[1]) + ni = ceildiv(inp_shape[2], inp_chunk[2]) nblevels = min( - [int(math.ceil(math.log2(x))) for i, x in enumerate(inp.shape) if i != no_pool] + [int(math.ceil(math.log2(x))) for i, x in enumerate(inp_shape) if i != no_pool] ) nblevels = min(nblevels, int(math.ceil(math.log2(max_load)))) nblevels = min(nblevels, max_levels) - opt["chunks"] = [min(x, chunk) for x in inp.shape] + opt["chunks"] = [min(x, chunk) for x in inp_shape] - omz.create_dataset(str(0), shape=inp.shape, **opt) + omz.create_dataset(str(0), shape=inp_shape, **opt) # iterate across input chunks for i, j, k in product(range(ni), range(nj), range(nk)): - loaded_chunk = inp[ - k * inp_chunk[0] : (k + 1) * inp_chunk[0], - j * inp_chunk[1] : (j + 1) * inp_chunk[1], - i * inp_chunk[2] : (i + 1) * inp_chunk[2], - ] + loaded_chunk = np.stack( + [ + inp[index][ + k * inp_chunk[0] : (k + 1) * inp_chunk[0], + j * inp_chunk[1] : (j + 1) * inp_chunk[1], + ] + for index in range(i * inp_chunk[2], (i + 1) * inp_chunk[2]) + ], + axis=-1, + ) print( f"[{i + 1:03d}, {j + 1:03d}, {k + 1:03d}]", From 47d89a3518e64159f71a909952eeed0d6783ed26 Mon Sep 17 00:00:00 2001 From: Calvin Chai Date: Tue, 19 Nov 2024 11:57:55 -0500 Subject: [PATCH 47/51] Update linc_convert/modalities/psoct/__init__.py Co-authored-by: Kabilar Gunalan --- linc_convert/modalities/psoct/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linc_convert/modalities/psoct/__init__.py b/linc_convert/modalities/psoct/__init__.py index b539c95..b353728 100644 --- a/linc_convert/modalities/psoct/__init__.py +++ b/linc_convert/modalities/psoct/__init__.py @@ -1,4 +1,4 @@ -"""Dark Field microscopy converters.""" +"""Polarization-sensitive optical coherence tomography converters.""" __all__ = ["cli", "multi_slice", "single_volume"] From bc1a7e90bf27173014f08d68e4ca285a54ff8874 Mon Sep 17 00:00:00 2001 From: Calvin Chai Date: Tue, 19 Nov 2024 11:58:02 -0500 Subject: [PATCH 48/51] Update linc_convert/modalities/psoct/cli.py Co-authored-by: Kabilar Gunalan --- linc_convert/modalities/psoct/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linc_convert/modalities/psoct/cli.py b/linc_convert/modalities/psoct/cli.py index 12d4c06..91d1ccd 100644 --- a/linc_convert/modalities/psoct/cli.py +++ b/linc_convert/modalities/psoct/cli.py @@ -1,4 +1,4 @@ -"""Entry-points for Dark Field microscopy converter.""" +"""Entry-points for polarization-sensitive optical coherence tomography converter.""" from cyclopts import App From 6749f34c5a5104d13b96501aa8d2a7087c1de7be Mon Sep 17 00:00:00 2001 From: balbasty Date: Thu, 21 Nov 2024 18:46:14 +0000 Subject: [PATCH 49/51] MNT(pyproject) make psoct dep optional --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 7f265f8..7b86f7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,8 @@ optional = true [tool.poetry.group.lsm.dependencies] tifffile = "*" +[tool.poetry.group.psoct] +optional = true [tool.poetry.group.psoct.dependencies] h5py = "*" scipy = "*" From 5011edb69852e3f7c9ed1ee69c00e085dbb348e7 Mon Sep 17 00:00:00 2001 From: balbasty Date: Thu, 21 Nov 2024 19:19:26 +0000 Subject: [PATCH 50/51] FIX: wrap optional imports in try/catch --- linc_convert/modalities/df/__init__.py | 9 ++++++--- linc_convert/modalities/lsm/__init__.py | 8 ++++++-- linc_convert/modalities/psoct/__init__.py | 10 +++++++--- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/linc_convert/modalities/df/__init__.py b/linc_convert/modalities/df/__init__.py index a6d9d6f..62f5c18 100644 --- a/linc_convert/modalities/df/__init__.py +++ b/linc_convert/modalities/df/__init__.py @@ -1,5 +1,8 @@ """Dark Field microscopy converters.""" -__all__ = ["cli", "multi_slice", "single_slice"] - -from . import cli, multi_slice, single_slice +try: + import glymur as _ # noqa: F401 + __all__ = ["cli", "multi_slice", "single_slice"] + from . import cli, multi_slice, single_slice +except ImportError: + pass diff --git a/linc_convert/modalities/lsm/__init__.py b/linc_convert/modalities/lsm/__init__.py index 1ce6617..8ce200e 100644 --- a/linc_convert/modalities/lsm/__init__.py +++ b/linc_convert/modalities/lsm/__init__.py @@ -1,4 +1,8 @@ """Light Sheet Microscopy converters.""" -__all__ = ["cli", "mosaic"] -from . import cli, mosaic +try: + import tifffile as _ # noqa: F401 + __all__ = ["cli", "mosaic"] + from . import cli, mosaic +except ImportError: + pass diff --git a/linc_convert/modalities/psoct/__init__.py b/linc_convert/modalities/psoct/__init__.py index b353728..2a9df3a 100644 --- a/linc_convert/modalities/psoct/__init__.py +++ b/linc_convert/modalities/psoct/__init__.py @@ -1,5 +1,9 @@ """Polarization-sensitive optical coherence tomography converters.""" -__all__ = ["cli", "multi_slice", "single_volume"] - -from . import cli, multi_slice, single_volume +try: + import h5py as _h5py # noqa: F401 + import scipy as _scipy # noqa: F401 + __all__ = ["cli", "multi_slice", "single_volume"] + from . import cli, multi_slice, single_volume +except ImportError: + pass From ad90a048f1b1040b6a7e81c6bd819330a9f52cac Mon Sep 17 00:00:00 2001 From: balbasty Date: Thu, 21 Nov 2024 19:19:51 +0000 Subject: [PATCH 51/51] style fixes by ruff --- linc_convert/modalities/df/__init__.py | 1 + linc_convert/modalities/lsm/__init__.py | 1 + linc_convert/modalities/psoct/__init__.py | 1 + 3 files changed, 3 insertions(+) diff --git a/linc_convert/modalities/df/__init__.py b/linc_convert/modalities/df/__init__.py index 62f5c18..1e26307 100644 --- a/linc_convert/modalities/df/__init__.py +++ b/linc_convert/modalities/df/__init__.py @@ -2,6 +2,7 @@ try: import glymur as _ # noqa: F401 + __all__ = ["cli", "multi_slice", "single_slice"] from . import cli, multi_slice, single_slice except ImportError: diff --git a/linc_convert/modalities/lsm/__init__.py b/linc_convert/modalities/lsm/__init__.py index 8ce200e..abf7cee 100644 --- a/linc_convert/modalities/lsm/__init__.py +++ b/linc_convert/modalities/lsm/__init__.py @@ -2,6 +2,7 @@ try: import tifffile as _ # noqa: F401 + __all__ = ["cli", "mosaic"] from . import cli, mosaic except ImportError: diff --git a/linc_convert/modalities/psoct/__init__.py b/linc_convert/modalities/psoct/__init__.py index 2a9df3a..d9bb1a6 100644 --- a/linc_convert/modalities/psoct/__init__.py +++ b/linc_convert/modalities/psoct/__init__.py @@ -3,6 +3,7 @@ try: import h5py as _h5py # noqa: F401 import scipy as _scipy # noqa: F401 + __all__ = ["cli", "multi_slice", "single_volume"] from . import cli, multi_slice, single_volume except ImportError: