Skip to content

Commit

Permalink
REFACTOR: create python lib with CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
balbasty committed Oct 28, 2024
1 parent 1700119 commit fc4b48a
Show file tree
Hide file tree
Showing 17 changed files with 1,335 additions and 1 deletion.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# linc-convert
Data conversion scripts for the LINC project

Data conversion tools for the LINC project
3 changes: 3 additions & 0 deletions linc_convert/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Data conversion tools for the LINC project."""
__all__ = ["modalities", "utils"]
from . import modalities, utils
5 changes: 5 additions & 0 deletions linc_convert/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Root command line entry point."""
from cyclopts import App

help = "Collection of conversion scripts for LINC datasets"
main = App("linc-convert", help=help)
3 changes: 3 additions & 0 deletions linc_convert/modalities/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Converters for all imaging modalities."""
__all__ = ["df", "lsm"]
from . import df, lsm
4 changes: 4 additions & 0 deletions linc_convert/modalities/df/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Dark Field microscopy converters."""
__all__ = ["cli", "multi_slice", "single_slice"]

from . import cli, multi_slice, single_slice
8 changes: 8 additions & 0 deletions linc_convert/modalities/df/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Entry-points for Dark Field microscopy converter."""
from cyclopts import App

from linc_convert.cli import main

help = "Converters for Dark Field microscopy"
df = App(name="df", help=help)
main.command(df)
293 changes: 293 additions & 0 deletions linc_convert/modalities/df/multi_slice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
"""
Convert JPEG2000 files generated by MBF-Neurolucida into a OME-ZARR pyramid.
We do not recompute the image pyramid but instead reuse the JPEG2000
levels (obtained by wavelet transform).
"""
# stdlib
import ast
import json
import os

# externals
import glymur
import nibabel as nib
import numpy as np
import zarr
from cyclopts import App

# internals
from linc_convert.modalities.df.cli import df
from linc_convert.utils.j2k import WrappedJ2K, get_pixelsize
from linc_convert.utils.math import ceildiv, floordiv
from linc_convert.utils.orientation import center_affine, orientation_to_affine
from linc_convert.utils.zarr import make_compressor

HOME = "/space/aspasia/2/users/linc/000003"

# Path to LincBrain dataset
LINCSET = os.path.join(HOME, "sourcedata")
LINCOUT = os.path.join(HOME, "rawdata")


ms = App(name="multislice", help_format="markdown")
df.command(ms)


@ms.default
def convert(
inp: list[str],
out: str | None = None,
*,
chunk: int = 4096,
compressor: str = "blosc",
compressor_opt: str = "{}",
max_load: int = 16384,
nii: bool = False,
orientation: str = "coronal",
center: bool = True,
thickness: float | None = None,
) -> None:
"""
Convert JPEG2000 files generated by MBF-Neurolucida into a Zarr pyramid.
It does not recompute the image pyramid but instead reuse the
JPEG2000 levels (obtained by wavelet transform).
This command converts a batch of slices and stacks them together
into a single 3D Zarr.
Orientation
-----------
The anatomical orientation of the slice is given in terms of RAS axes.
It is a combination of two letters from the set
`{"L", "R", "A", "P", "I", "S"}`, where
* the first letter corresponds to the horizontal dimension and
indicates the anatomical meaning of the _right_ of the jp2 image,
* the second letter corresponds to the vertical dimension and
indicates the anatomical meaning of the _bottom_ of the jp2 image,
* the third letter corresponds to the slice dimension and
indicates the anatomical meaninff of the _end_ of the stack.
We also provide the aliases
* `"coronal"` == `"LI"`
* `"axial"` == `"LP"`
* `"sagittal"` == `"PI"`
The orientation flag is only useful when converting to nifti-zarr.
Parameters
----------
inp
Path to the input slices
out
Path to the output Zarr directory [<dirname(INP)>.ome.zarr]
chunk
Output chunk size
compressor : {blosc, zlib, raw}
Compression method
compressor_opt
Compression options
max_load
Maximum input chunk size
nii
Convert to nifti-zarr. True if path ends in ".nii.zarr"
orientation
Orientation of the slice
center
Set RAS[0, 0, 0] at FOV center
thickness
Slice thickness
"""
# Default output path
if not out:
out = os.path.splitext(inp[0])[0]
out += ".nii.zarr" if nii else ".ome.zarr"
nii = nii or out.endswith(".nii.zarr")

if isinstance(compressor_opt, str):
compressor_opt = ast.literal_eval(compressor_opt)

# Prepare Zarr group
omz = zarr.storage.DirectoryStore(out)
omz = zarr.group(store=omz, overwrite=True)

nblevel, has_channel, dtype_jp2 = float("inf"), float("inf"), ""

# Compute output shape
new_height, new_width = 0, 0
for inp1 in inp:
jp2 = glymur.Jp2k(inp1)
nblevel = min(nblevel, jp2.codestream.segment[2].num_res)
has_channel = min(has_channel, jp2.ndim - 2)
dtype_jp2 = np.dtype(jp2.dtype).str
if jp2.shape[0] > new_height:
new_height = jp2.shape[0]
if jp2.shape[1] > new_width:
new_width = jp2.shape[1]
new_size = (new_height, new_width)
if has_channel:
new_size += (3.)
print(len(inp), new_size, nblevel, has_channel)

# Prepare chunking options
opt = {
"chunks": list(new_size[2:]) + [1] + [chunk, chunk],
"dimension_separator": r"/",
"order": "F",
"dtype": dtype_jp2,
"fill_value": 0,
"compressor": make_compressor(compressor, **compressor_opt),
}
print(opt)

# Write each level
for level in range(nblevel):
shape = [ceildiv(s, 2**level) for s in new_size[:2]]
shape = [new_size[2]] + [len(inp)] + shape

omz.create_dataset(f"{level}", shape=shape, **opt)
array = omz[f"{level}"]

# Write each slice
for idx, inp1 in enumerate(inp):
j2k = glymur.Jp2k(inp1)
vxw, vxh = get_pixelsize(j2k)
subdat = WrappedJ2K(j2k, level=level)
subdat_size = subdat.shape
print("Convert level", level, "with shape", shape,
"for slice", idx, "with size", subdat_size)

# offset while attaching
x = floordiv(shape[-2] - subdat_size[-2], 2)
y = floordiv(shape[-1] - subdat_size[-1], 2)

if max_load is None or (shape[-2] < max_load and shape[-1] < max_load):
array[..., idx, x:x + subdat_size[1], y:y + subdat_size[2]] = subdat[...]

else:
ni = ceildiv(shape[-2], max_load)
nj = ceildiv(shape[-1], max_load)

for i in range(ni):
for j in range(nj):
print(f"\r{i+1}/{ni}, {j+1}/{nj}", end=" ")
start_x, end_x = i*max_load, min((i+1)*max_load, shape[-2])
start_y, end_y = j*max_load, min((j+1)*max_load, shape[-1])
if end_x <= x or end_y <= y:
continue

if start_x >= subdat_size[-2] or start_y >= subdat_size[-1]:
continue

array[
...,
idx,
x + start_x:x + min(end_x, subdat_size[-2]),
y + start_y:y + min(end_y, subdat_size[-1]),
] = subdat[
...,
start_x:min((i+1)*max_load, subdat_size[-2]),
start_y:min((j+1)*max_load, subdat_size[-1]),
]
print("")

# Write OME-Zarr multiscale metadata
print("Write metadata")
multiscales = [{
"version": "0.4",
"axes": [
{"name": "z", "type": "space", "unit": "micrometer"},
{"name": "y", "type": "distance", "unit": "micrometer"},
{"name": "x", "type": "space", "unit": "micrometer"}
],
"datasets": [],
"type": "jpeg2000",
"name": "",
}]
if has_channel:
multiscales[0]["axes"].insert(0, {"name": "c", "type": "channel"})

for n in range(nblevel):
shape0 = omz["0"].shape[-2:]
shape = omz[str(n)].shape[-2:]
multiscales[0]["datasets"].append({})
level = multiscales[0]["datasets"][-1]
level["path"] = str(n)

# I assume that wavelet transforms end up aligning voxel edges
# across levels, so the effective scaling is the shape ratio,
# and there is a half voxel shift wrt to the "center of first voxel"
# frame
level["coordinateTransformations"] = [
{
"type": "scale",
"scale": [1.0] * has_channel + [
1.0,
(shape0[0]/shape[0])*vxh,
(shape0[1]/shape[1])*vxw,
]
},
{
"type": "translation",
"translation": [0.0] * has_channel + [
0.0,
(shape0[0]/shape[0] - 1)*vxh*0.5,
(shape0[1]/shape[1] - 1)*vxw*0.5,
]
}
]
multiscales[0]["coordinateTransformations"] = [
{
"scale": [1.0] * (3 + has_channel),
"type": "scale"
}
]
omz.attrs["multiscales"] = multiscales

# Write NIfTI-Zarr header
# NOTE: we use nifti2 because dimensions typically do not fit in a short
# TODO: we do not write the json zattrs, but it should be added in
# once the nifti-zarr package is released
shape = list(reversed(omz["0"].shape))
if has_channel:
shape = shape[:3] + [1] + shape[3:]
affine = orientation_to_affine(orientation, vxw, vxh, thickness or 1)
if center:
affine = center_affine(affine, shape[:2])
header = nib.Nifti2Header()
header.set_data_shape(shape)
header.set_data_dtype(omz["0"].dtype)
header.set_qform(affine)
header.set_sform(affine)
header.set_xyzt_units(nib.nifti1.unit_codes.code["micron"])
header.structarr["magic"] = b"n+2\0"
header = np.frombuffer(header.structarr.tobytes(), dtype="u1")
opt = {
"chunks": [len(header)],
"dimension_separator": r"/",
"order": "F",
"dtype": "|u1",
"fill_value": None,
"compressor": None,
}
omz.create_dataset("nifti", data=header, shape=shape, **opt)

# Write sidecar .json file
json_name = os.path.splitext(out)(os.path.splitext(out)[0])[0]
json_name += '.json'
dic = {}
dic["PixelSize"] = json.dumps([vxw, vxh])
dic["PixelSizeUnits"] = "um"
dic["SliceThickness"] = 1.2
dic["SliceThicknessUnits"] = "mm"
dic["SampleStaining"] = "LY"

with open(json_name, "w") as outfile:
json.dump(dic, outfile)
outfile.write("\n")

print("done.")
Loading

0 comments on commit fc4b48a

Please sign in to comment.