Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REFACTOR: create python lib with CLI #8

Merged
merged 10 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# linc-convert
Data conversion scripts for the LINC project

Data conversion tools for the LINC project
4 changes: 4 additions & 0 deletions linc_convert/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Data conversion tools for the LINC project."""

__all__ = ["modalities", "utils"]
from . import modalities, utils
6 changes: 6 additions & 0 deletions linc_convert/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Root command line entry point."""

from cyclopts import App

help = "Collection of conversion scripts for LINC datasets"
main = App("linc-convert", help=help)
4 changes: 4 additions & 0 deletions linc_convert/modalities/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Converters for all imaging modalities."""

__all__ = ["df", "lsm"]
from . import df, lsm
5 changes: 5 additions & 0 deletions linc_convert/modalities/df/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Dark Field microscopy converters."""

__all__ = ["cli", "multi_slice", "single_slice"]

from . import cli, multi_slice, single_slice
9 changes: 9 additions & 0 deletions linc_convert/modalities/df/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""Entry-points for Dark Field microscopy converter."""

from cyclopts import App

from linc_convert.cli import main

help = "Converters for Dark Field microscopy"
df = App(name="df", help=help)
main.command(df)
311 changes: 311 additions & 0 deletions linc_convert/modalities/df/multi_slice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
"""
Convert JPEG2000 files generated by MBF-Neurolucida into a OME-ZARR pyramid.

We do not recompute the image pyramid but instead reuse the JPEG2000
levels (obtained by wavelet transform).
"""

# stdlib
import ast
import json
import os

# externals
import glymur
import nibabel as nib
import numpy as np
import zarr
from cyclopts import App

# internals
from linc_convert.modalities.df.cli import df
from linc_convert.utils.j2k import WrappedJ2K, get_pixelsize
from linc_convert.utils.math import ceildiv, floordiv
from linc_convert.utils.orientation import center_affine, orientation_to_affine
from linc_convert.utils.zarr import make_compressor

HOME = "/space/aspasia/2/users/linc/000003"

# Path to LincBrain dataset
LINCSET = os.path.join(HOME, "sourcedata")
LINCOUT = os.path.join(HOME, "rawdata")


ms = App(name="multislice", help_format="markdown")
df.command(ms)


@ms.default
def convert(
inp: list[str],
out: str | None = None,
*,
chunk: int = 4096,
compressor: str = "blosc",
compressor_opt: str = "{}",
max_load: int = 16384,
nii: bool = False,
orientation: str = "coronal",
center: bool = True,
thickness: float | None = None,
) -> None:
"""
Convert JPEG2000 files generated by MBF-Neurolucida into a Zarr pyramid.

It does not recompute the image pyramid but instead reuse the
JPEG2000 levels (obtained by wavelet transform).

This command converts a batch of slices and stacks them together
into a single 3D Zarr.

Orientation
-----------
The anatomical orientation of the slice is given in terms of RAS axes.

It is a combination of two letters from the set
`{"L", "R", "A", "P", "I", "S"}`, where

* the first letter corresponds to the horizontal dimension and
indicates the anatomical meaning of the _right_ of the jp2 image,
* the second letter corresponds to the vertical dimension and
indicates the anatomical meaning of the _bottom_ of the jp2 image,
* the third letter corresponds to the slice dimension and
indicates the anatomical meaninff of the _end_ of the stack.

We also provide the aliases

* `"coronal"` == `"LI"`
* `"axial"` == `"LP"`
* `"sagittal"` == `"PI"`

The orientation flag is only useful when converting to nifti-zarr.

Parameters
----------
inp
Path to the input slices
out
Path to the output Zarr directory [<dirname(INP)>.ome.zarr]
chunk
Output chunk size
compressor : {blosc, zlib, raw}
Compression method
compressor_opt
Compression options
max_load
Maximum input chunk size
nii
Convert to nifti-zarr. True if path ends in ".nii.zarr"
orientation
Orientation of the slice
center
Set RAS[0, 0, 0] at FOV center
thickness
Slice thickness
"""
# Default output path
if not out:
out = os.path.splitext(inp[0])[0]
out += ".nii.zarr" if nii else ".ome.zarr"
nii = nii or out.endswith(".nii.zarr")

if isinstance(compressor_opt, str):
compressor_opt = ast.literal_eval(compressor_opt)

# Prepare Zarr group
omz = zarr.storage.DirectoryStore(out)
omz = zarr.group(store=omz, overwrite=True)

nblevel, has_channel, dtype_jp2 = float("inf"), float("inf"), ""

# Compute output shape
new_height, new_width = 0, 0
for inp1 in inp:
jp2 = glymur.Jp2k(inp1)
nblevel = min(nblevel, jp2.codestream.segment[2].num_res)
has_channel = min(has_channel, jp2.ndim - 2)
dtype_jp2 = np.dtype(jp2.dtype).str
if jp2.shape[0] > new_height:
new_height = jp2.shape[0]
if jp2.shape[1] > new_width:
new_width = jp2.shape[1]
new_size = (new_height, new_width)
if has_channel:
new_size += (3.0,)
print(len(inp), new_size, nblevel, has_channel)

# Prepare chunking options
opt = {
"chunks": list(new_size[2:]) + [1] + [chunk, chunk],
"dimension_separator": r"/",
"order": "F",
"dtype": dtype_jp2,
"fill_value": 0,
"compressor": make_compressor(compressor, **compressor_opt),
}
print(opt)
print(new_size)
# Write each level
for level in range(nblevel):
shape = [ceildiv(s, 2**level) for s in new_size[:2]]
shape = [new_size[2]] + [len(inp)] + shape

omz.create_dataset(f"{level}", shape=shape, **opt)
array = omz[f"{level}"]

# Write each slice
for idx, inp1 in enumerate(inp):
j2k = glymur.Jp2k(inp1)
vxw, vxh = get_pixelsize(j2k)
subdat = WrappedJ2K(j2k, level=level)
subdat_size = subdat.shape
print(
"Convert level",
level,
"with shape",
shape,
"for slice",
idx,
"with size",
subdat_size,
)

# offset while attaching
x = floordiv(shape[-2] - subdat_size[-2], 2)
y = floordiv(shape[-1] - subdat_size[-1], 2)

if max_load is None or (shape[-2] < max_load and shape[-1] < max_load):
array[..., idx, x : x + subdat_size[1], y : y + subdat_size[2]] = (
subdat[...]
)

else:
ni = ceildiv(shape[-2], max_load)
nj = ceildiv(shape[-1], max_load)

for i in range(ni):
for j in range(nj):
print(f"\r{i+1}/{ni}, {j+1}/{nj}", end=" ")
start_x, end_x = (
i * max_load,
min((i + 1) * max_load, shape[-2]),
)
start_y, end_y = (
j * max_load,
min((j + 1) * max_load, shape[-1]),
)
if end_x <= x or end_y <= y:
continue

if start_x >= subdat_size[-2] or start_y >= subdat_size[-1]:
continue

array[
...,
idx,
x + start_x : x + min(end_x, subdat_size[-2]),
y + start_y : y + min(end_y, subdat_size[-1]),
] = subdat[
...,
start_x : min((i + 1) * max_load, subdat_size[-2]),
start_y : min((j + 1) * max_load, subdat_size[-1]),
]
print("")

# Write OME-Zarr multiscale metadata
print("Write metadata")
multiscales = [
{
"version": "0.4",
"axes": [
{"name": "z", "type": "space", "unit": "micrometer"},
{"name": "y", "type": "distance", "unit": "micrometer"},
{"name": "x", "type": "space", "unit": "micrometer"},
],
"datasets": [],
"type": "jpeg2000",
"name": "",
}
]
if has_channel:
multiscales[0]["axes"].insert(0, {"name": "c", "type": "channel"})

for n in range(nblevel):
shape0 = omz["0"].shape[-2:]
shape = omz[str(n)].shape[-2:]
multiscales[0]["datasets"].append({})
level = multiscales[0]["datasets"][-1]
level["path"] = str(n)

# I assume that wavelet transforms end up aligning voxel edges
# across levels, so the effective scaling is the shape ratio,
# and there is a half voxel shift wrt to the "center of first voxel"
# frame
level["coordinateTransformations"] = [
{
"type": "scale",
"scale": [1.0] * has_channel
+ [
1.0,
(shape0[0] / shape[0]) * vxh,
(shape0[1] / shape[1]) * vxw,
],
},
{
"type": "translation",
"translation": [0.0] * has_channel
+ [
0.0,
(shape0[0] / shape[0] - 1) * vxh * 0.5,
(shape0[1] / shape[1] - 1) * vxw * 0.5,
],
},
]
multiscales[0]["coordinateTransformations"] = [
{"scale": [1.0] * (3 + has_channel), "type": "scale"}
]
omz.attrs["multiscales"] = multiscales

# Write NIfTI-Zarr header
# NOTE: we use nifti2 because dimensions typically do not fit in a short
# TODO: we do not write the json zattrs, but it should be added in
# once the nifti-zarr package is released
shape = list(reversed(omz["0"].shape))
if has_channel:
shape = shape[:3] + [1] + shape[3:]
affine = orientation_to_affine(orientation, vxw, vxh, thickness or 1)
if center:
affine = center_affine(affine, shape[:2])
header = nib.Nifti2Header()
header.set_data_shape(shape)
header.set_data_dtype(omz["0"].dtype)
header.set_qform(affine)
header.set_sform(affine)
header.set_xyzt_units(nib.nifti1.unit_codes.code["micron"])
header.structarr["magic"] = b"n+2\0"
header = np.frombuffer(header.structarr.tobytes(), dtype="u1")
opt = {
"chunks": [len(header)],
"dimension_separator": r"/",
"order": "F",
"dtype": "|u1",
"fill_value": None,
"compressor": None,
}
omz.create_dataset("nifti", data=header, shape=shape, **opt)

# Write sidecar .json file
json_name = os.path.splitext(out)[0]
json_name += ".json"
dic = {}
dic["PixelSize"] = json.dumps([vxw, vxh])
dic["PixelSizeUnits"] = "um"
dic["SliceThickness"] = 1.2
dic["SliceThicknessUnits"] = "mm"
dic["SampleStaining"] = "LY"

with open(json_name, "w") as outfile:
json.dump(dic, outfile)
outfile.write("\n")

print("done.")
Loading
Loading