Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add webknossos_annotation.py #11

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 192 additions & 0 deletions scripts/webknossos_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import os
import cv2
import ast
import wkw
import zarr
import json
import uuid
import time
import math
import shutil
import glymur
import skimage
import cyclopts
import numcodecs
import numpy as np
from glob import glob
import nibabel as nib
from typing import Optional
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
from skimage import measure
from skimage.draw import polygon2mask
from scipy.ndimage import binary_fill_holes

"""
This script resave annotations from webknossos in ome.zarr format following czyx direction
which is the same as underlying dataset.

It calculates offset from low-res images and set offset for other resolution accordingly.

Input:
wkw_dir is the path to unzipped manual annotation folder, for example: .../annotation_folder/data_Volume
ome_dir is the path to underlying ome.zarr dataset
dst is the path to saving annotation mask
dic is the dictionary of mapping annotation value to standard value
"""

app = cyclopts.App(help_format="markdown")
@app.default
def convert(
wkw_dir: str = None,
ome_dir: str = None,
dst: str = None,
dic: dict = None,
*,
chunk: int = 1024,
compressor: str = 'blosc',
compressor_opt: str = "{}",
max_load: int = 16384,
):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you document what each input/option does?

# load underlying dataset info to get size info
omz_data = zarr.open_group(ome_dir, mode='r')
nblevel = len([i for i in os.listdir(ome_dir) if i.isdigit()])
wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(nblevel-1))
wkw_dataset = wkw.Dataset.open(wkw_dataset_path)

low_res_offsets = []
omz_res = omz_data[nblevel-1]
n = omz_res.shape[1]
size = omz_res.shape[-2:]
for idx in range(n):
data = wkw_dataset.read(off = (0, 0, idx), shape = [size[1], size[0], 1])
data = data[0, :, :, 0]
data = np.transpose(data, (1, 0))
[t,b,l,r] = find_borders(data)
low_res_offsets.append([t,b,l,r])

# setup save info
basename = os.path.basename(ome_dir)[:-9]
initials = wkw_dir.split('/')[-2][:2]
out = os.path.join(dst, basename + '_dsec_' + initials + '.ome.zarr')
if os.path.exists(out):
shutil.rmtree(out)
os.makedirs(out, exist_ok=True)

if isinstance(compressor_opt, str):
compressor_opt = ast.literal_eval(compressor_opt)

# Prepare Zarr group
store = zarr.storage.DirectoryStore(out)
omz = zarr.group(store=store, overwrite=True)


# Prepare chunking options
opt = {
'chunks': [1, 1] + [chunk, chunk],
'dimension_separator': r'/',
'order': 'F',
'dtype': 'uint8',
'fill_value': None,
'compressor': make_compressor(compressor, **compressor_opt),
}
print(opt)



# Write each level
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably get the number of levels from the data, not hardcode it.

for level in range(nblevel):
omz_res = omz_data[level]
size = omz_res.shape[-2:]
shape = [1, n] + [i for i in size]

wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(level))
wkw_dataset = wkw.Dataset.open(wkw_dataset_path)

omz.create_dataset(f'{level}', shape=shape, **opt)
array = omz[f'{level}']

# Write each slice
for idx in range(n):
if -1 in low_res_offsets[idx]:
continue

t, b, l, r = [k*2**(nblevel-level-1) for k in low_res_offsets[idx]]
height, width = size[0]-t-b, size[1]-l-r

data = wkw_dataset.read(off = (l, t, idx), shape = [width, height, 1])
data = data[0, :, :, 0]
data = np.transpose(data, (1, 0))
if dic:
data = np.array([[dic[data[i][j]] for j in range(data.shape[1])] for i in range(data.shape[0])])
subdat_size = data.shape

print('Convert level', level, 'with shape', shape, 'and slice', idx, 'with size', subdat_size)
if max_load is None or (subdat_size[-2] < max_load and subdat_size[-1] < max_load):
array[0, idx, t: t+subdat_size[-2], l: l+subdat_size[-1]] = data[...]
else:
ni = ceildiv(subdat_size[-2], max_load)
nj = ceildiv(subdat_size[-1], max_load)

for i in range(ni):
for j in range(nj):
print(f'\r{i+1}/{ni}, {j+1}/{nj}', end=' ')
start_x, end_x = i*max_load, min((i+1)*max_load, subdat_size[-2])
start_y, end_y = j*max_load, min((j+1)*max_load, subdat_size[-1])
array[0, idx, t + start_x: t + end_x, l + start_y: l + end_y] = data[start_x: end_x, start_y: end_y]
print('')


# Write OME-Zarr multiscale metadata
print('Write metadata')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do the annotation have the exact same shape as the annotated zarr file? I feel like we should just copy that info from the input zarr.

omz.attrs["multiscales"] = omz_data.attrs["multiscales"]



def get_mask_name(level):
if level == 0:
return '1'
else:
return f'{2**level}-{2**level}-1'


def ceildiv(x, y):
return int(math.ceil(x / y))


def cal_distance(img):
m = img.shape[0]
for i in range(m):
cnt = np.sum(img[i, :])
if cnt > 0:
return i
return m


def find_borders(img):
if np.max(img) == 0:
return [-1, -1, -1, -1]
t = cal_distance(img)
b = cal_distance(img[::-1])
l = cal_distance(np.rot90(img, k=3))
r = cal_distance(np.rot90(img, k=1))

return [max(0, k-1) for k in [t, b, l, r]]


def make_compressor(name, **prm):
if not isinstance(name, str):
return name
name = name.lower()
if name == 'blosc':
Compressor = numcodecs.Blosc
elif name == 'zlib':
Compressor = numcodecs.Zlib
else:
raise ValueError('Unknown compressor', name)
return Compressor(**prm)



if __name__ == "__main__":
app()
Loading