From af1d4e8ccc2b306ad5c09d84058ece45930c7e00 Mon Sep 17 00:00:00 2001 From: Jingjing Wu Date: Wed, 20 Nov 2024 13:36:18 -0500 Subject: [PATCH 1/9] Refactored webknossos annotation converter --- linc_convert/modalities/__init__.py | 4 +- linc_convert/modalities/wk/__init__.py | 4 + linc_convert/modalities/wk/cli.py | 9 + .../modalities/wk/webknossos_annotation.py | 189 ++++++++++++++++++ tests/data/generate_trusted_result.py | 24 ++- tests/data/wk.zarr.zip | Bin 0 -> 116963 bytes tests/helper.py | 7 +- tests/test_df.py | 2 +- tests/test_wk.py | 104 ++++++++++ 9 files changed, 333 insertions(+), 10 deletions(-) create mode 100644 linc_convert/modalities/wk/__init__.py create mode 100644 linc_convert/modalities/wk/cli.py create mode 100644 linc_convert/modalities/wk/webknossos_annotation.py create mode 100644 tests/data/wk.zarr.zip create mode 100644 tests/test_wk.py diff --git a/linc_convert/modalities/__init__.py b/linc_convert/modalities/__init__.py index 7ae40c8..974d52f 100644 --- a/linc_convert/modalities/__init__.py +++ b/linc_convert/modalities/__init__.py @@ -1,4 +1,4 @@ """Converters for all imaging modalities.""" -__all__ = ["df", "lsm"] -from . import df, lsm +__all__ = ["df", "lsm", "wk"] +from . import df, lsm, wk diff --git a/linc_convert/modalities/wk/__init__.py b/linc_convert/modalities/wk/__init__.py new file mode 100644 index 0000000..100cb33 --- /dev/null +++ b/linc_convert/modalities/wk/__init__.py @@ -0,0 +1,4 @@ +"""Light Sheet Microscopy converters.""" + +__all__ = ["cli", "webknossos_annotation"] +from . import cli, webknossos_annotation diff --git a/linc_convert/modalities/wk/cli.py b/linc_convert/modalities/wk/cli.py new file mode 100644 index 0000000..45a9a7c --- /dev/null +++ b/linc_convert/modalities/wk/cli.py @@ -0,0 +1,9 @@ +"""Entry-points for Webknossos annotation converter.""" + +from cyclopts import App + +from linc_convert.cli import main + +help = "Converters for Webknossos annotation" +wk = App(name="wk", help=help) +main.command(wk) diff --git a/linc_convert/modalities/wk/webknossos_annotation.py b/linc_convert/modalities/wk/webknossos_annotation.py new file mode 100644 index 0000000..60efe3d --- /dev/null +++ b/linc_convert/modalities/wk/webknossos_annotation.py @@ -0,0 +1,189 @@ +""" +Convert annotations downloaded from webknossos into ome.zarr format following czyx direction + +""" + +# stdlib +import os +import ast + +# externals +import wkw +import json +import zarr +import shutil +import cyclopts +import numpy as np +from cyclopts import App + +# internals +from linc_convert.modalities.wk.cli import wk +from linc_convert.utils.math import ceildiv +from linc_convert.utils.zarr import make_compressor + + +webknossos = cyclopts.App(name="webknossos", help_format="markdown") +wk.command(webknossos) + + +@webknossos.default +def convert( + wkw_dir: str = None, + ome_dir: str = None, + dst: str = None, + dic: str = None, + *, + chunk: int = 1024, + compressor: str = 'blosc', + compressor_opt: str = "{}", + max_load: int = 16384, +) -> None: + """ + Converts annotations(in .wkw format) from webknossos to ome.zarr format following czyx direction + which is the same as underlying dataset. + + It calculates offset from low-res images and set offset for other resolution levels accordingly. + + Parameters + ---------- + wkw_dir + Path to unzipped manual annotation folder, for example: .../annotation_folder/data_Volume + ome_dir + Path to underlying ome.zarr dataset + dst + Path to output directory [.ome.zarr] + dic + Dictionary of mapping annotation value to standard value, in case the annotation doesn't follow the standard of + 0: background + 1: Light Bundle + 2: Moderate Bundle + 3: Dense Bundle + 4: Light Terminal + 5: Moderate Terminal + 6: Dense Terminal + 7: Single Fiber + """ + + dic = json.loads(dic) + + # load underlying dataset info to get size info + omz_data = zarr.open_group(ome_dir, mode='r') + nblevel = len([i for i in os.listdir(ome_dir) if i.isdigit()]) + wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(nblevel-1)) + wkw_dataset = wkw.Dataset.open(wkw_dataset_path) + + low_res_offsets = [] + omz_res = omz_data[nblevel-1] + n = omz_res.shape[1] + size = omz_res.shape[-2:] + for idx in range(n): + offset_x, offset_y = 0, 0 + data = wkw_dataset.read(off = (offset_y, offset_x, idx), shape = [size[1], size[0], 1]) + data = data[0, :, :, 0] + data = np.transpose(data, (1, 0)) + [t,b,l,r] = find_borders(data) + low_res_offsets.append([t,b,l,r]) + + # setup save info + basename = os.path.basename(ome_dir)[:-9] + initials = wkw_dir.split('/')[-2][:2] + out = os.path.join(dst, basename + '_dsec_' + initials + '.ome.zarr') + if os.path.exists(out): + shutil.rmtree(out) + os.makedirs(out, exist_ok=True) + + if isinstance(compressor_opt, str): + compressor_opt = ast.literal_eval(compressor_opt) + + # Prepare Zarr group + store = zarr.storage.DirectoryStore(out) + omz = zarr.group(store=store, overwrite=True) + + + # Prepare chunking options + opt = { + 'chunks': [1, 1] + [chunk, chunk], + 'dimension_separator': r'/', + 'order': 'F', + 'dtype': 'uint8', + 'fill_value': None, + 'compressor': make_compressor(compressor, **compressor_opt), + } + print(opt) + + + # Write each level + for level in range(nblevel): + omz_res = omz_data[level] + size = omz_res.shape[-2:] + shape = [1, n] + [i for i in size] + + wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(level)) + wkw_dataset = wkw.Dataset.open(wkw_dataset_path) + + omz.create_dataset(f'{level}', shape=shape, **opt) + array = omz[f'{level}'] + + # Write each slice + for idx in range(n): + if -1 in low_res_offsets[idx]: + array[0, idx, :1, :1] = np.zeros((1, 1), dtype=np.uint8) + continue + + t, b, l, r = [k*2**(nblevel-level-1) for k in low_res_offsets[idx]] + height, width = size[0]-t-b, size[1]-l-r + + data = wkw_dataset.read(off = (l, t, idx), shape = [width, height, 1]) + data = data[0, :, :, 0] + data = np.transpose(data, (1, 0)) + if dic: + data = np.array([[dic[data[i][j]] for j in range(data.shape[1])] for i in range(data.shape[0])]) + subdat_size = data.shape + + print('Convert level', level, 'with shape', shape, 'and slice', idx, 'with size', subdat_size) + if max_load is None or (subdat_size[-2] < max_load and subdat_size[-1] < max_load): + array[0, idx, t: t+subdat_size[-2], l: l+subdat_size[-1]] = data[...] + else: + ni = ceildiv(subdat_size[-2], max_load) + nj = ceildiv(subdat_size[-1], max_load) + + for i in range(ni): + for j in range(nj): + print(f'\r{i+1}/{ni}, {j+1}/{nj}', end=' ') + start_x, end_x = i*max_load, min((i+1)*max_load, subdat_size[-2]) + start_y, end_y = j*max_load, min((j+1)*max_load, subdat_size[-1]) + array[0, idx, t + start_x: t + end_x, l + start_y: l + end_y] = data[start_x: end_x, start_y: end_y] + print('') + + + # Write OME-Zarr multiscale metadata + print('Write metadata') + omz.attrs["multiscales"] = omz_data.attrs["multiscales"] + + + +def get_mask_name(level): + if level == 0: + return '1' + else: + return f'{2**level}-{2**level}-1' + + +def cal_distance(img): + m = img.shape[0] + for i in range(m): + cnt = np.sum(img[i, :]) + if cnt > 0: + return i + return m + + +def find_borders(img): + if np.max(img) == 0: + return [-1, -1, -1, -1] + t = cal_distance(img) + b = cal_distance(img[::-1]) + l = cal_distance(np.rot90(img, k=3)) + r = cal_distance(np.rot90(img, k=1)) + + return [max(0, k-1) for k in [t, b, l, r]] diff --git a/tests/data/generate_trusted_result.py b/tests/data/generate_trusted_result.py index 8efd5ce..f7cc073 100644 --- a/tests/data/generate_trusted_result.py +++ b/tests/data/generate_trusted_result.py @@ -1,14 +1,18 @@ -import glob import os import tempfile import zipfile +from glob import glob +from pathlib import Path import test_df import test_lsm +import test_wk import zarr from linc_convert.modalities.df import multi_slice from linc_convert.modalities.lsm import mosaic +from linc_convert.modalities.wk import webknossos_annotation + if __name__ == "__main__": with tempfile.TemporaryDirectory() as tmp_dir: @@ -22,8 +26,26 @@ multi_slice.convert(files, output_zarr) zarr.copy_all(zarr.open(output_zarr), zarr.open("data/df.zarr.zip", "w")) + with tempfile.TemporaryDirectory() as tmp_dir: test_lsm._write_test_data(tmp_dir) output_zarr = os.path.join(tmp_dir, "output.zarr") mosaic.convert(tmp_dir, output_zarr) zarr.copy_all(zarr.open(output_zarr), zarr.open("data/lsm.zarr.zip", "w")) + + + with tempfile.TemporaryDirectory() as tmp_dir: + test_wk._write_test_data(tmp_dir) + + tmp_dir = Path(tmp_dir) + wkw_dir = str(tmp_dir / "wkw") + ome_dir = str(tmp_dir / "ome") + + basename = os.path.basename(ome_dir)[:-9] + initials = wkw_dir.split('/')[-2][:2] + output_zarr = os.path.join(tmp_dir, + basename + '_dsec_' + initials + '.ome.zarr') + + webknossos_annotation.convert(wkw_dir, ome_dir, tmp_dir, "{}") + zarr.copy_all(zarr.open(output_zarr), zarr.open("data/wk.zarr.zip", "w")) + diff --git a/tests/data/wk.zarr.zip b/tests/data/wk.zarr.zip new file mode 100644 index 0000000000000000000000000000000000000000..74ceee79f758422d7024f3494f159b7ba52c5e9c GIT binary patch literal 116963 zcmeHQQD_{;8D7beod{DLGyng8-^}?w&Hg*MZ`}i1EP6b4yng7>YbWnL!d}yJ1ABH)AFEa- zCr)>6V-I_}P_4dnv{D@_)a@s?^>oe*28`eM{LbdHZI(sPhnR8hiEgqeOnK(T;mPsQ z8nb-S>5jV0e_#4C*VE@a!QMHo4!g(fVr6WiTB_A5Ro0i=x8vR~%9Y}1ZDhK{4(A&7 zcClPKQ7W^8JN*a83uDyKE>HK_zT+cD=pgH=R`egP4No3DS|)?ML1*c1CmV6=P`x|cO z`vVzT0jLApUmf~jlV#D<1uEC={Is`iXV|f$R=aa@@BxM*d-rmuW&Nn#vVOkavflZY zWlcP2S-;w7SsU50F00MDhex(E@Q*>p67V70B=AKKKKPP>m-oO2kiee-@WG!Ayape@ z0e=R-2Y)v38hii;{2B205Y(U6Umn^o${KkRHbhy2Bn^@?NZueRgCq@-Hb~}xtl`gd z#$RLE$v6Ro@WBTmAsK%QK4hE#Lipf=kdTbO1s^g_03m$vK}bl(-+~VrCx8&X$A_S- zaqRVlx+rV(EGKJ_q``M;keop>2T2>)ts$g4t8cOFWEFXN+~9*eFIhbbK4cYndEDTG zJTF;23O-~Nd3oI6gFG);JqkW#6?u8w9v_0T#+#!zdPP~IcR5*u%NQhWkgUOXZa~(M zXFYRAvFv0H@?`@CAC^r|=1zhSnS*@UfWe1l)04TA;6vshUp8RyVcGO#?j-n-Imnj{ z*yBS`)|h{5>xZJO(I=BN=%N$42!&(}UT1@34ce(e5(i|BWs{$slUR1LgLRZh!G|bW z%1#^bAv;({i4=T@lBMjl0Uxr1b(BcKhbURfP8;wcJ6K1F)Z;@?*7)(Cue`^^3X7gg zwO#A=YVEX81a|NI*@G{$BBJ!9hItf$4b*#$xd`m8Mh)0q&6I#0ntOi0FH_~82q^(e z5*5`;AxVnYOCgzxZ~ zV>-gk#Ron#moL*>gAb-7++2L%Lv#5uy*2n?I>ODx2R<~HFVkCt52hpBTznoMg0e=( znY-siStGOrEJ+w7W$+p)ByZ5p4cf5*S)(~LjBSl&$5?1r6&U!isq#6q8O%|DP7=W z#1tgsi35CyCy%D|0zR06WIS`&4)7tKJetxA_+Sc>@x)0z6tz=zaB7;69?AA+*R;|n*sL|H>x0+!#^hGY%COM_$$ zE_XoIh!w2#c7|mqz1St;Irxx7R@2)j_>f-glJFdSNFuB0?Gt=RFLp_I4n8E2)%5lW zKBO1BBs}-{5R^3*`#(D`${NxVuw1?%DT7NJv{QrR4%)c^StAL|QrIV!ofLwWpv&Mx zg2_!`*Wg16K}*nO@FBtErm$=9A%&nN=rZ__U~*H~HTaN1&=Pdn<3mu^cxB+?Wl`3U zmVhNGgX9ZdTZ5N?C20e-H4=m^ZCzv8NgHCx`U^fJo8Pqc4nCv}v1I)PACk>)+Ij~c z(uP>F{(=w5<~MD`DB*;QHsw)o{$e+k%Llz>gL1Z+>E1Z+<; zC14jj-@ogZsd~doz>*Zjr74oENS-3eiq}lR?we$1V`_TG$4F|RYKf!ZLrc<}nnA#a z)Iik|N5O}dq&YQ%fDfsGswIws4=qV^Y6bxxQUg^>9QF7Rl&Lx{JbP1=HKZkA`R*J4 z#YJ4^Ac+IAMoU1{e-JD?`eW2e55b34W?TQUzz6*?YNdzZLo2hb|5)II{us5=L-3)M z+17t7@Iil!TIr$3hoG#n?$W@1QPz-_fF)UjWDNf<4U#yh94ysOfvnL=EHxer%Z|q2 zC~ysYP>^(uhXfxq21kKw;DdssYdj?QpfNZKTmv5zBwgbn!3T}OQQ(@#hoG!+>g1J8 zqO2h;0ZZ})NgH(G2}v6yaqtUJP%A|NoVpH)Wk*+J)c6HHsL8voqk<2*BBRDH@Ig)9 zbsZIa&=naqet{2a@~-Qs;DfHnsPW6=Lr~Uu`r*Y(qO1{G0+ve}v^#@efI^Z6?rft5 zQ7uQsvZEy|%A5ipl%-zFfx!nYVNvE3_@FHHS`G|8XbFolr@#kgsn>E~@IgyhlsV<` zAt-BzC19P(uF?{)fB)ZW{t~c#C;^*f3D|t21Z=*U60p;MeDIE6rs`X%1T0Ba{3gsK zPtmR%)Jjn{N%b2TA0zr9%vIfZ+Byo_m0a-&Ou9}UFWk)kW6uSdH zCd~qO2h;0n6nKUiy{G9JmNY4|KKh0?UpzP-x)@ z_@D(0+Bg9|Xaj{7j(`tZz@Uv2;Da_$XyFL>pal%tH~~Ir1BDijczg)T8n0bA{cBOy z2rU82%f8Y@D0JZoFaJuC2X<>{0kV20u~VVO6}__yr-bu#lpeu$?lIPWyf9W`{nFF>S^Zw0aoy7I&3(FK262sQ?Xi! zEl;PC_bGtp;UJRskwU#tE7gO%B*^Izf^o4@sU8^_FVsu>tA+8}(Mol!P#>v`%R6zq z3hRuYwVmk^m>v9-_?s2C2?ilHp!cyh3S>|mO~|Ntx-ki9{@*{fczekd@UwQE zKqxy1M9N<-O8#PP6ifd6VTN`%BRvxWkv&nU57W|@yB`(_9C8?JV`c-px^m@t#TrRd z%-I-{YJS@6*|2lv{N=iL#*)7TXG0#dfzg%y^=f4_S)*9cbU*w;yOJzMd_u_=C4N@n zXG6X_!farTLW8ClqOK+h&W0Y$1_s2gD6@eziUmyrXG3q4*}xjblD`CJLoa3nn+^H? zDAR#j#e%1S^Wg_k<^yXKOa2m^4}FsP5b#;}1Y6Q%jLxLCrZ^C z`-<&+Tj}VDxl+&MG2udRK{x-Cq5vaH9B*AG2{@p*^a<=U@>nO+Xvm=_VzdW>G zZuTc8qmh|Y$5u2u_WD9yZuXn+xMohB7}4y_(Hp&Tv(Y}+%()IrG@E~G>xXi)r}nsJ z&ORs6OjHgVDcO2}%shcmG}}9W_Mrb(QVp}Gm{~pyfT6kP2jouv^B|eAZ}apcj&^SWBbc>*Gvk);|n*s^#EcSnPUT*fwTdtWDfL8`CUY46Z^B>o&NdUUq+uTf&L@si2Z}H2kasZxTX3jm> zs@1}jXtvn-{$06~U#%mv5CA$ZJbP1a_SY@0nG}F^mj*&suD5r%W>Nr7oxHM1K3cus zHIoAH^uvpnHF!e)SQH@gOv_c61jd)tWsh<8_OnEm$T>`9 literal 0 HcmV?d00001 diff --git a/tests/helper.py b/tests/helper.py index eb865d7..3f5a3f5 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -21,8 +21,6 @@ def _cmp_zarr_archives(path1: str, path2: str) -> bool: # Compare keys (dataset structure) if zarr1.keys() != zarr2.keys(): - print(list(zarr1.keys())) - print(list(zarr2.keys())) print("keys mismatch") return False if zarr1.attrs != zarr2.attrs: @@ -34,10 +32,7 @@ def _cmp_zarr_archives(path1: str, path2: str) -> bool: array1 = zarr1[key][:] array2 = zarr2[key][:] - # Check for equality of the arrays - if not np.array_equal(array1, array2): - print(f"Mismatch found in dataset: {key}") - return False + np.testing.assert_allclose(array1, array2) if zarr1[key].attrs != zarr2[key].attrs: print("attrs mismatch") return False diff --git a/tests/test_df.py b/tests/test_df.py index bcac807..3fa0317 100644 --- a/tests/test_df.py +++ b/tests/test_df.py @@ -19,7 +19,7 @@ def _write_test_data(directory: str) -> None: def test_df(tmp_path): - # _write_test_data(tmp_path) + _write_test_data(tmp_path) with zipfile.ZipFile("data/df_input.zip", "r") as z: z.extractall(tmp_path) output_zarr = tmp_path / "output.zarr" diff --git a/tests/test_wk.py b/tests/test_wk.py new file mode 100644 index 0000000..f4c3704 --- /dev/null +++ b/tests/test_wk.py @@ -0,0 +1,104 @@ + +import os + +import numpy as np +import wkw +import zarr +from helper import _cmp_zarr_archives + +from linc_convert.modalities.wk import webknossos_annotation + + +def _write_test_data(directory: str) -> None: + wkw_dir = f'{directory}/wkw' + ome_dir = f'{directory}/ome' + + store = zarr.storage.DirectoryStore(ome_dir) + omz = zarr.group(store=store, overwrite=True) + + for level in range(5): + size = 2**(4-level) + wkw_array = np.zeros((size, size, 5), dtype=np.uint8) + ome_array = np.zeros((1, 5, size, size), dtype=np.uint8) + + wkw_filepath = os.path.join(wkw_dir, get_mask_name(level)) + with wkw.Dataset.create(wkw_filepath, wkw.Header(np.uint8)) as dataset: + dataset.write((0, 0, 0), wkw_array) + + omz.create_dataset(f'{level}', shape=[1, 5, size, size]) + array = omz[f'{level}'] + array[...] = ome_array + + multiscales = [{ + 'version': '0.4', + 'axes': [ + {"name": "c", "type": "space", "unit": "millimeter"}, + {"name": "z", "type": "space", "unit": "millimeter"}, + {"name": "y", "type": "space", "unit": "micrometer"}, + {"name": "x", "type": "space", "unit": "micrometer"} + ], + 'datasets': [], + 'type': 'jpeg2000', + 'name': '', + }] + for n in range(5): + multiscales[0]['datasets'].append({}) + level = multiscales[0]['datasets'][-1] + level["path"] = str(n) + + level["coordinateTransformations"] = [ + { + "type": "scale", + "scale": [ + 1.0, + 1.0, + float(2**n), + float(2**n), + ] + }, + { + "type": "translation", + "translation": [ + 0.0, + 0.0, + float(2**n - 1) *0.5, + float(2**n - 1) *0.5, + ] + } + ] + omz.attrs["multiscales"] = multiscales + + + + +def test_wk(tmp_path): + _write_test_data(tmp_path) + + wkw_dir = str(tmp_path / "wkw") + ome_dir = str(tmp_path / "ome") + basename = os.path.basename(ome_dir)[:-9] + initials = wkw_dir.split('/')[-2][:2] + output_zarr = os.path.join(tmp_path, basename + '_dsec_' + initials + '.ome.zarr') + + print("starting the convert process") + webknossos_annotation.convert(wkw_dir, ome_dir, tmp_path, "{}") + + z = zarr.open(output_zarr, mode='r') + for level in range(5): + print("output_zarr has", np.shape(z[level]), np.unique(z[level])) + + z = zarr.open('data/wk.zarr.zip', mode='r') + for level in range(5): + print("trusted result has", np.shape(z[level]), np.unique(z[level])) + + + assert _cmp_zarr_archives(str(output_zarr), "data/wk.zarr.zip") + + + +def get_mask_name(level): + if level == 0: + return '1' + else: + return f'{2**level}-{2**level}-1' + From b5a86344e35bd42bc1329de7a833a5c79ca0d57c Mon Sep 17 00:00:00 2001 From: Jingjing Wu Date: Wed, 20 Nov 2024 14:38:14 -0500 Subject: [PATCH 2/9] Refactor webknossos annotation converter --- linc_convert/modalities/wk/__init__.py | 2 +- .../modalities/wk/webknossos_annotation.py | 144 +++++++++++++----- tests/data/generate_trusted_result.py | 3 +- 3 files changed, 110 insertions(+), 39 deletions(-) diff --git a/linc_convert/modalities/wk/__init__.py b/linc_convert/modalities/wk/__init__.py index 100cb33..85d95ec 100644 --- a/linc_convert/modalities/wk/__init__.py +++ b/linc_convert/modalities/wk/__init__.py @@ -1,4 +1,4 @@ -"""Light Sheet Microscopy converters.""" +"""Webknossos annotation converters.""" __all__ = ["cli", "webknossos_annotation"] from . import cli, webknossos_annotation diff --git a/linc_convert/modalities/wk/webknossos_annotation.py b/linc_convert/modalities/wk/webknossos_annotation.py index 60efe3d..785f54f 100644 --- a/linc_convert/modalities/wk/webknossos_annotation.py +++ b/linc_convert/modalities/wk/webknossos_annotation.py @@ -1,27 +1,27 @@ """ -Convert annotations downloaded from webknossos into ome.zarr format following czyx direction +Convert annotation downloaded from webknossos into ome.zarr format. +following czyx direction. """ # stdlib -import os import ast - -# externals -import wkw import json -import zarr +import os import shutil + import cyclopts import numpy as np -from cyclopts import App + +# externals +import wkw +import zarr # internals from linc_convert.modalities.wk.cli import wk from linc_convert.utils.math import ceildiv from linc_convert.utils.zarr import make_compressor - webknossos = cyclopts.App(name="webknossos", help_format="markdown") wk.command(webknossos) @@ -39,21 +39,25 @@ def convert( max_load: int = 16384, ) -> None: """ - Converts annotations(in .wkw format) from webknossos to ome.zarr format following czyx direction - which is the same as underlying dataset. + Convert annotations(in .wkw format) from webknossos to ome.zarr format. + + Following czyx direction which is the same as underlying dataset. - It calculates offset from low-res images and set offset for other resolution levels accordingly. + It calculates offset from low-res images + and set offset for other resolution levels accordingly. Parameters ---------- wkw_dir - Path to unzipped manual annotation folder, for example: .../annotation_folder/data_Volume + Path to unzipped manual annotation folder, + for example: .../annotation_folder/data_Volume ome_dir Path to underlying ome.zarr dataset dst - Path to output directory [.ome.zarr] + Path to output directory [.ome.zarr] dic - Dictionary of mapping annotation value to standard value, in case the annotation doesn't follow the standard of + Dictionary of mapping given annotation value to following standard value, + in case the annotation doesn't match the following standard 0: background 1: Light Bundle 2: Moderate Bundle @@ -62,8 +66,8 @@ def convert( 5: Moderate Terminal 6: Dense Terminal 7: Single Fiber - """ + """ dic = json.loads(dic) # load underlying dataset info to get size info @@ -78,11 +82,12 @@ def convert( size = omz_res.shape[-2:] for idx in range(n): offset_x, offset_y = 0, 0 - data = wkw_dataset.read(off = (offset_y, offset_x, idx), shape = [size[1], size[0], 1]) + data = wkw_dataset.read(off = (offset_y, offset_x, idx), + shape = [size[1], size[0], 1]) data = data[0, :, :, 0] data = np.transpose(data, (1, 0)) - [t,b,l,r] = find_borders(data) - low_res_offsets.append([t,b,l,r]) + [t0,b0,l0,r0] = find_borders(data) + low_res_offsets.append([t0,b0,l0,r0]) # setup save info basename = os.path.basename(ome_dir)[:-9] @@ -130,19 +135,34 @@ def convert( array[0, idx, :1, :1] = np.zeros((1, 1), dtype=np.uint8) continue - t, b, l, r = [k*2**(nblevel-level-1) for k in low_res_offsets[idx]] - height, width = size[0]-t-b, size[1]-l-r + top, bottom, left, right = [k*2**(nblevel-level-1) + for k in low_res_offsets[idx]] + height, width = size[0]-top-bottom, size[1]-left-right - data = wkw_dataset.read(off = (l, t, idx), shape = [width, height, 1]) + data = wkw_dataset.read(off = (left, top, idx), shape = [width, height, 1]) data = data[0, :, :, 0] data = np.transpose(data, (1, 0)) if dic: - data = np.array([[dic[data[i][j]] for j in range(data.shape[1])] for i in range(data.shape[0])]) + data = np.array([[dic[data[i][j]] + for j in range(data.shape[1])] + for i in range(data.shape[0])]) subdat_size = data.shape - print('Convert level', level, 'with shape', shape, 'and slice', idx, 'with size', subdat_size) - if max_load is None or (subdat_size[-2] < max_load and subdat_size[-1] < max_load): - array[0, idx, t: t+subdat_size[-2], l: l+subdat_size[-1]] = data[...] + print('Convert level', + level, + 'with shape', + shape, + 'and slice', + idx, + 'with size', + subdat_size) + if max_load is None or ( + subdat_size[-2] < max_load and subdat_size[-1] < max_load + ): + array[0, + idx, + top: top+subdat_size[-2], + left: left+subdat_size[-1]] = data[...] else: ni = ceildiv(subdat_size[-2], max_load) nj = ceildiv(subdat_size[-1], max_load) @@ -150,9 +170,22 @@ def convert( for i in range(ni): for j in range(nj): print(f'\r{i+1}/{ni}, {j+1}/{nj}', end=' ') - start_x, end_x = i*max_load, min((i+1)*max_load, subdat_size[-2]) - start_y, end_y = j*max_load, min((j+1)*max_load, subdat_size[-1]) - array[0, idx, t + start_x: t + end_x, l + start_y: l + end_y] = data[start_x: end_x, start_y: end_y] + start_x, end_x = i*max_load, + min( + (i+1)*max_load, + subdat_size[-2]) + + start_y, end_y = j*max_load, + min( + (j+1)*max_load, + subdat_size[-1]) + array[ + 0, + idx, + top + start_x: top + end_x, + left + start_y: left + end_y] = data[ + start_x: end_x, + start_y: end_y] print('') @@ -162,14 +195,40 @@ def convert( -def get_mask_name(level): +def get_mask_name(level: int) -> str: + """ + Return the name of the mask for a given resolution level. + + Parameters + ---------- + level : int + The resolution level for which to return the mask name. + + Returns + ------- + str + The name of the mask for the given level. + """ if level == 0: return '1' else: return f'{2**level}-{2**level}-1' -def cal_distance(img): +def cal_distance(img: np.ndarray) -> int: + """ + Return the distance of non-zero values to the top border. + + Parameters + ---------- + img : np.ndarray + The array to calculate distance of object inside to border + + Returns + ------- + int + The distance of non-zero to the top border + """ m = img.shape[0] for i in range(m): cnt = np.sum(img[i, :]) @@ -178,12 +237,25 @@ def cal_distance(img): return m -def find_borders(img): +def find_borders(img: np.ndarray) -> np.ndarray: + """ + Return the distances of non-zero values to four borders. + + Parameters + ---------- + img : np.ndarray + The array to calculate distance of object inside to border + + Returns + ------- + int + The distance of non-zero values to four borders + """ if np.max(img) == 0: return [-1, -1, -1, -1] - t = cal_distance(img) - b = cal_distance(img[::-1]) - l = cal_distance(np.rot90(img, k=3)) - r = cal_distance(np.rot90(img, k=1)) + top = cal_distance(img) + bottom = cal_distance(img[::-1]) + left = cal_distance(np.rot90(img, k=3)) + right = cal_distance(np.rot90(img, k=1)) - return [max(0, k-1) for k in [t, b, l, r]] + return [max(0, k-1) for k in [top, bottom, left, right]] diff --git a/tests/data/generate_trusted_result.py b/tests/data/generate_trusted_result.py index f7cc073..8c3b131 100644 --- a/tests/data/generate_trusted_result.py +++ b/tests/data/generate_trusted_result.py @@ -1,7 +1,7 @@ +import glob import os import tempfile import zipfile -from glob import glob from pathlib import Path import test_df @@ -13,7 +13,6 @@ from linc_convert.modalities.lsm import mosaic from linc_convert.modalities.wk import webknossos_annotation - if __name__ == "__main__": with tempfile.TemporaryDirectory() as tmp_dir: test_df._write_test_data(tmp_dir) From 1680f63f9cf879207afcf6bdb1810c0340239f7a Mon Sep 17 00:00:00 2001 From: jingjingwu1225 Date: Wed, 20 Nov 2024 19:38:37 +0000 Subject: [PATCH 3/9] style fixes by ruff --- .../modalities/wk/webknossos_annotation.py | 177 +++++++++--------- tests/data/generate_trusted_result.py | 12 +- 2 files changed, 92 insertions(+), 97 deletions(-) diff --git a/linc_convert/modalities/wk/webknossos_annotation.py b/linc_convert/modalities/wk/webknossos_annotation.py index 785f54f..773423b 100644 --- a/linc_convert/modalities/wk/webknossos_annotation.py +++ b/linc_convert/modalities/wk/webknossos_annotation.py @@ -1,5 +1,5 @@ """ -Convert annotation downloaded from webknossos into ome.zarr format. +Convert annotation downloaded from webknossos into ome.zarr format. following czyx direction. """ @@ -34,29 +34,29 @@ def convert( dic: str = None, *, chunk: int = 1024, - compressor: str = 'blosc', + compressor: str = "blosc", compressor_opt: str = "{}", max_load: int = 16384, ) -> None: """ - Convert annotations(in .wkw format) from webknossos to ome.zarr format. + Convert annotations(in .wkw format) from webknossos to ome.zarr format. - Following czyx direction which is the same as underlying dataset. + Following czyx direction which is the same as underlying dataset. - It calculates offset from low-res images - and set offset for other resolution levels accordingly. + It calculates offset from low-res images + and set offset for other resolution levels accordingly. Parameters ---------- - wkw_dir - Path to unzipped manual annotation folder, + wkw_dir + Path to unzipped manual annotation folder, for example: .../annotation_folder/data_Volume - ome_dir + ome_dir Path to underlying ome.zarr dataset - dst + dst Path to output directory [.ome.zarr] - dic - Dictionary of mapping given annotation value to following standard value, + dic + Dictionary of mapping given annotation value to following standard value, in case the annotation doesn't match the following standard 0: background 1: Light Bundle @@ -65,34 +65,35 @@ def convert( 4: Light Terminal 5: Moderate Terminal 6: Dense Terminal - 7: Single Fiber + 7: Single Fiber """ dic = json.loads(dic) - # load underlying dataset info to get size info - omz_data = zarr.open_group(ome_dir, mode='r') + # load underlying dataset info to get size info + omz_data = zarr.open_group(ome_dir, mode="r") nblevel = len([i for i in os.listdir(ome_dir) if i.isdigit()]) - wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(nblevel-1)) + wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(nblevel - 1)) wkw_dataset = wkw.Dataset.open(wkw_dataset_path) low_res_offsets = [] - omz_res = omz_data[nblevel-1] - n = omz_res.shape[1] + omz_res = omz_data[nblevel - 1] + n = omz_res.shape[1] size = omz_res.shape[-2:] for idx in range(n): offset_x, offset_y = 0, 0 - data = wkw_dataset.read(off = (offset_y, offset_x, idx), - shape = [size[1], size[0], 1]) + data = wkw_dataset.read( + off=(offset_y, offset_x, idx), shape=[size[1], size[0], 1] + ) data = data[0, :, :, 0] data = np.transpose(data, (1, 0)) - [t0,b0,l0,r0] = find_borders(data) - low_res_offsets.append([t0,b0,l0,r0]) + [t0, b0, l0, r0] = find_borders(data) + low_res_offsets.append([t0, b0, l0, r0]) - # setup save info - basename = os.path.basename(ome_dir)[:-9] - initials = wkw_dir.split('/')[-2][:2] - out = os.path.join(dst, basename + '_dsec_' + initials + '.ome.zarr') + # setup save info + basename = os.path.basename(ome_dir)[:-9] + initials = wkw_dir.split("/")[-2][:2] + out = os.path.join(dst, basename + "_dsec_" + initials + ".ome.zarr") if os.path.exists(out): shutil.rmtree(out) os.makedirs(out, exist_ok=True) @@ -104,97 +105,93 @@ def convert( store = zarr.storage.DirectoryStore(out) omz = zarr.group(store=store, overwrite=True) - # Prepare chunking options opt = { - 'chunks': [1, 1] + [chunk, chunk], - 'dimension_separator': r'/', - 'order': 'F', - 'dtype': 'uint8', - 'fill_value': None, - 'compressor': make_compressor(compressor, **compressor_opt), + "chunks": [1, 1] + [chunk, chunk], + "dimension_separator": r"/", + "order": "F", + "dtype": "uint8", + "fill_value": None, + "compressor": make_compressor(compressor, **compressor_opt), } print(opt) - # Write each level for level in range(nblevel): omz_res = omz_data[level] size = omz_res.shape[-2:] - shape = [1, n] + [i for i in size] - + shape = [1, n] + [i for i in size] + wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(level)) wkw_dataset = wkw.Dataset.open(wkw_dataset_path) - omz.create_dataset(f'{level}', shape=shape, **opt) - array = omz[f'{level}'] + omz.create_dataset(f"{level}", shape=shape, **opt) + array = omz[f"{level}"] # Write each slice for idx in range(n): if -1 in low_res_offsets[idx]: array[0, idx, :1, :1] = np.zeros((1, 1), dtype=np.uint8) - continue - - top, bottom, left, right = [k*2**(nblevel-level-1) - for k in low_res_offsets[idx]] - height, width = size[0]-top-bottom, size[1]-left-right + continue - data = wkw_dataset.read(off = (left, top, idx), shape = [width, height, 1]) + top, bottom, left, right = [ + k * 2 ** (nblevel - level - 1) for k in low_res_offsets[idx] + ] + height, width = size[0] - top - bottom, size[1] - left - right + + data = wkw_dataset.read(off=(left, top, idx), shape=[width, height, 1]) data = data[0, :, :, 0] data = np.transpose(data, (1, 0)) if dic: - data = np.array([[dic[data[i][j]] - for j in range(data.shape[1])] - for i in range(data.shape[0])]) - subdat_size = data.shape - - print('Convert level', - level, - 'with shape', - shape, - 'and slice', - idx, - 'with size', - subdat_size) + data = np.array( + [ + [dic[data[i][j]] for j in range(data.shape[1])] + for i in range(data.shape[0]) + ] + ) + subdat_size = data.shape + + print( + "Convert level", + level, + "with shape", + shape, + "and slice", + idx, + "with size", + subdat_size, + ) if max_load is None or ( subdat_size[-2] < max_load and subdat_size[-1] < max_load - ): - array[0, - idx, - top: top+subdat_size[-2], - left: left+subdat_size[-1]] = data[...] + ): + array[ + 0, idx, top : top + subdat_size[-2], left : left + subdat_size[-1] + ] = data[...] else: ni = ceildiv(subdat_size[-2], max_load) nj = ceildiv(subdat_size[-1], max_load) - + for i in range(ni): for j in range(nj): - print(f'\r{i+1}/{ni}, {j+1}/{nj}', end=' ') - start_x, end_x = i*max_load, - min( - (i+1)*max_load, - subdat_size[-2]) - - start_y, end_y = j*max_load, - min( - (j+1)*max_load, - subdat_size[-1]) + print(f"\r{i+1}/{ni}, {j+1}/{nj}", end=" ") + start_x, end_x = (i * max_load,) + min((i + 1) * max_load, subdat_size[-2]) + + start_y, end_y = (j * max_load,) + min((j + 1) * max_load, subdat_size[-1]) array[ - 0, - idx, - top + start_x: top + end_x, - left + start_y: left + end_y] = data[ - start_x: end_x, - start_y: end_y] - print('') - - + 0, + idx, + top + start_x : top + end_x, + left + start_y : left + end_y, + ] = data[start_x:end_x, start_y:end_y] + print("") + # Write OME-Zarr multiscale metadata - print('Write metadata') + print("Write metadata") omz.attrs["multiscales"] = omz_data.attrs["multiscales"] - def get_mask_name(level: int) -> str: """ Return the name of the mask for a given resolution level. @@ -210,9 +207,9 @@ def get_mask_name(level: int) -> str: The name of the mask for the given level. """ if level == 0: - return '1' + return "1" else: - return f'{2**level}-{2**level}-1' + return f"{2**level}-{2**level}-1" def cal_distance(img: np.ndarray) -> int: @@ -234,7 +231,7 @@ def cal_distance(img: np.ndarray) -> int: cnt = np.sum(img[i, :]) if cnt > 0: return i - return m + return m def find_borders(img: np.ndarray) -> np.ndarray: @@ -254,8 +251,8 @@ def find_borders(img: np.ndarray) -> np.ndarray: if np.max(img) == 0: return [-1, -1, -1, -1] top = cal_distance(img) - bottom = cal_distance(img[::-1]) - left = cal_distance(np.rot90(img, k=3)) + bottom = cal_distance(img[::-1]) + left = cal_distance(np.rot90(img, k=3)) right = cal_distance(np.rot90(img, k=1)) - return [max(0, k-1) for k in [top, bottom, left, right]] + return [max(0, k - 1) for k in [top, bottom, left, right]] diff --git a/tests/data/generate_trusted_result.py b/tests/data/generate_trusted_result.py index 8c3b131..3d76f37 100644 --- a/tests/data/generate_trusted_result.py +++ b/tests/data/generate_trusted_result.py @@ -25,14 +25,12 @@ multi_slice.convert(files, output_zarr) zarr.copy_all(zarr.open(output_zarr), zarr.open("data/df.zarr.zip", "w")) - with tempfile.TemporaryDirectory() as tmp_dir: test_lsm._write_test_data(tmp_dir) output_zarr = os.path.join(tmp_dir, "output.zarr") mosaic.convert(tmp_dir, output_zarr) zarr.copy_all(zarr.open(output_zarr), zarr.open("data/lsm.zarr.zip", "w")) - with tempfile.TemporaryDirectory() as tmp_dir: test_wk._write_test_data(tmp_dir) @@ -40,11 +38,11 @@ wkw_dir = str(tmp_dir / "wkw") ome_dir = str(tmp_dir / "ome") - basename = os.path.basename(ome_dir)[:-9] - initials = wkw_dir.split('/')[-2][:2] - output_zarr = os.path.join(tmp_dir, - basename + '_dsec_' + initials + '.ome.zarr') + basename = os.path.basename(ome_dir)[:-9] + initials = wkw_dir.split("/")[-2][:2] + output_zarr = os.path.join( + tmp_dir, basename + "_dsec_" + initials + ".ome.zarr" + ) webknossos_annotation.convert(wkw_dir, ome_dir, tmp_dir, "{}") zarr.copy_all(zarr.open(output_zarr), zarr.open("data/wk.zarr.zip", "w")) - From cb134c734b0a1fd20066469303204f4801abc362 Mon Sep 17 00:00:00 2001 From: Jingjing Wu <49615883+jingjingwu1225@users.noreply.github.com> Date: Wed, 20 Nov 2024 15:21:46 -0500 Subject: [PATCH 4/9] Update webknossos_annotation.py --- .../modalities/wk/webknossos_annotation.py | 211 +++++++++--------- 1 file changed, 108 insertions(+), 103 deletions(-) diff --git a/linc_convert/modalities/wk/webknossos_annotation.py b/linc_convert/modalities/wk/webknossos_annotation.py index 773423b..af0bf7d 100644 --- a/linc_convert/modalities/wk/webknossos_annotation.py +++ b/linc_convert/modalities/wk/webknossos_annotation.py @@ -1,8 +1,5 @@ -""" -Convert annotation downloaded from webknossos into ome.zarr format. +"""Convert annotation downloaded from webknossos into ome.zarr format.""" -following czyx direction. -""" # stdlib import ast @@ -34,66 +31,70 @@ def convert( dic: str = None, *, chunk: int = 1024, - compressor: str = "blosc", + compressor: str = 'blosc', compressor_opt: str = "{}", max_load: int = 16384, ) -> None: """ - Convert annotations(in .wkw format) from webknossos to ome.zarr format. + Convert annotations (in .wkw format) from webknossos to ome.zarr format. - Following czyx direction which is the same as underlying dataset. - - It calculates offset from low-res images - and set offset for other resolution levels accordingly. + This script converts annotations from webknossos, following the czyx direction, + to the ome.zarr format. + The conversion ensures that the annotations match the underlying dataset. Parameters ---------- - wkw_dir - Path to unzipped manual annotation folder, - for example: .../annotation_folder/data_Volume - ome_dir - Path to underlying ome.zarr dataset - dst - Path to output directory [.ome.zarr] - dic - Dictionary of mapping given annotation value to following standard value, - in case the annotation doesn't match the following standard - 0: background - 1: Light Bundle - 2: Moderate Bundle - 3: Dense Bundle - 4: Light Terminal - 5: Moderate Terminal - 6: Dense Terminal - 7: Single Fiber - + wkw_dir : str + Path to the unzipped manual annotation folder downloaded from webknossos + in .wkw format. For example: .../annotation_folder/data_Volume. + ome_dir : str + Path to the underlying ome.zarr dataset, following the BIDS naming standard. + dst : str + Path to the output directory for saving the converted ome.zarr. + The ome.zarr file name is generated automatically based on ome_dir + and the initials of the annotator. + dic : dict + A dictionary mapping annotation values to the following standard values + if annotation doesn't match the standard. + The dictionary should be in single quotes, with keys in double quotes, + for example: dic = '{"2": 1, "4": 2}'. + The standard values are: + - 0: background + - 1: Light Bundle + - 2: Moderate Bundle + - 3: Dense Bundle + - 4: Light Terminal + - 5: Moderate Terminal + - 6: Dense Terminal + - 7: Single Fiber """ dic = json.loads(dic) + dic = {int(key): int(value) for key,value in dic.items()} + - # load underlying dataset info to get size info - omz_data = zarr.open_group(ome_dir, mode="r") + # load underlying dataset info to get size info + omz_data = zarr.open_group(ome_dir, mode='r') nblevel = len([i for i in os.listdir(ome_dir) if i.isdigit()]) - wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(nblevel - 1)) + wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(nblevel-1)) wkw_dataset = wkw.Dataset.open(wkw_dataset_path) low_res_offsets = [] - omz_res = omz_data[nblevel - 1] - n = omz_res.shape[1] + omz_res = omz_data[nblevel-1] + n = omz_res.shape[1] size = omz_res.shape[-2:] for idx in range(n): offset_x, offset_y = 0, 0 - data = wkw_dataset.read( - off=(offset_y, offset_x, idx), shape=[size[1], size[0], 1] - ) + data = wkw_dataset.read(off = (offset_y, offset_x, idx), + shape = [size[1], size[0], 1]) data = data[0, :, :, 0] data = np.transpose(data, (1, 0)) - [t0, b0, l0, r0] = find_borders(data) - low_res_offsets.append([t0, b0, l0, r0]) + [t0,b0,l0,r0] = find_borders(data) + low_res_offsets.append([t0,b0,l0,r0]) - # setup save info - basename = os.path.basename(ome_dir)[:-9] - initials = wkw_dir.split("/")[-2][:2] - out = os.path.join(dst, basename + "_dsec_" + initials + ".ome.zarr") + # setup save info + basename = os.path.basename(ome_dir)[:-9] + initials = wkw_dir.split('/')[-2][:2] + out = os.path.join(dst, basename + '_dsec_' + initials + '.ome.zarr') if os.path.exists(out): shutil.rmtree(out) os.makedirs(out, exist_ok=True) @@ -105,93 +106,97 @@ def convert( store = zarr.storage.DirectoryStore(out) omz = zarr.group(store=store, overwrite=True) + # Prepare chunking options opt = { - "chunks": [1, 1] + [chunk, chunk], - "dimension_separator": r"/", - "order": "F", - "dtype": "uint8", - "fill_value": None, - "compressor": make_compressor(compressor, **compressor_opt), + 'chunks': [1, 1] + [chunk, chunk], + 'dimension_separator': r'/', + 'order': 'F', + 'dtype': 'uint8', + 'fill_value': None, + 'compressor': make_compressor(compressor, **compressor_opt), } print(opt) + # Write each level for level in range(nblevel): omz_res = omz_data[level] size = omz_res.shape[-2:] - shape = [1, n] + [i for i in size] - + shape = [1, n] + [i for i in size] + wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(level)) wkw_dataset = wkw.Dataset.open(wkw_dataset_path) - omz.create_dataset(f"{level}", shape=shape, **opt) - array = omz[f"{level}"] + omz.create_dataset(f'{level}', shape=shape, **opt) + array = omz[f'{level}'] # Write each slice for idx in range(n): if -1 in low_res_offsets[idx]: array[0, idx, :1, :1] = np.zeros((1, 1), dtype=np.uint8) - continue + continue + + top, bottom, left, right = [k*2**(nblevel-level-1) + for k in low_res_offsets[idx]] + height, width = size[0]-top-bottom, size[1]-left-right - top, bottom, left, right = [ - k * 2 ** (nblevel - level - 1) for k in low_res_offsets[idx] - ] - height, width = size[0] - top - bottom, size[1] - left - right - - data = wkw_dataset.read(off=(left, top, idx), shape=[width, height, 1]) + data = wkw_dataset.read(off = (left, top, idx), shape = [width, height, 1]) data = data[0, :, :, 0] data = np.transpose(data, (1, 0)) if dic: - data = np.array( - [ - [dic[data[i][j]] for j in range(data.shape[1])] - for i in range(data.shape[0]) - ] - ) - subdat_size = data.shape - - print( - "Convert level", - level, - "with shape", - shape, - "and slice", - idx, - "with size", - subdat_size, - ) + data = np.array([[dic[data[i][j]] + for j in range(data.shape[1])] + for i in range(data.shape[0])]) + subdat_size = data.shape + + print('Convert level', + level, + 'with shape', + shape, + 'and slice', + idx, + 'with size', + subdat_size) if max_load is None or ( subdat_size[-2] < max_load and subdat_size[-1] < max_load - ): - array[ - 0, idx, top : top + subdat_size[-2], left : left + subdat_size[-1] - ] = data[...] + ): + array[0, + idx, + top: top+subdat_size[-2], + left: left+subdat_size[-1]] = data[...] else: ni = ceildiv(subdat_size[-2], max_load) nj = ceildiv(subdat_size[-1], max_load) - + for i in range(ni): for j in range(nj): - print(f"\r{i+1}/{ni}, {j+1}/{nj}", end=" ") - start_x, end_x = (i * max_load,) - min((i + 1) * max_load, subdat_size[-2]) - - start_y, end_y = (j * max_load,) - min((j + 1) * max_load, subdat_size[-1]) + print(f'\r{i+1}/{ni}, {j+1}/{nj}', end=' ') + start_x, end_x = i*max_load, + min( + (i+1)*max_load, + subdat_size[-2]) + + start_y, end_y = j*max_load, + min( + (j+1)*max_load, + subdat_size[-1]) array[ - 0, - idx, - top + start_x : top + end_x, - left + start_y : left + end_y, - ] = data[start_x:end_x, start_y:end_y] - print("") - + 0, + idx, + top + start_x: top + end_x, + left + start_y: left + end_y] = data[ + start_x: end_x, + start_y: end_y] + print('') + + # Write OME-Zarr multiscale metadata - print("Write metadata") + print('Write metadata') omz.attrs["multiscales"] = omz_data.attrs["multiscales"] + def get_mask_name(level: int) -> str: """ Return the name of the mask for a given resolution level. @@ -207,9 +212,9 @@ def get_mask_name(level: int) -> str: The name of the mask for the given level. """ if level == 0: - return "1" + return '1' else: - return f"{2**level}-{2**level}-1" + return f'{2**level}-{2**level}-1' def cal_distance(img: np.ndarray) -> int: @@ -231,7 +236,7 @@ def cal_distance(img: np.ndarray) -> int: cnt = np.sum(img[i, :]) if cnt > 0: return i - return m + return m def find_borders(img: np.ndarray) -> np.ndarray: @@ -251,8 +256,8 @@ def find_borders(img: np.ndarray) -> np.ndarray: if np.max(img) == 0: return [-1, -1, -1, -1] top = cal_distance(img) - bottom = cal_distance(img[::-1]) - left = cal_distance(np.rot90(img, k=3)) + bottom = cal_distance(img[::-1]) + left = cal_distance(np.rot90(img, k=3)) right = cal_distance(np.rot90(img, k=1)) - return [max(0, k - 1) for k in [top, bottom, left, right]] + return [max(0, k-1) for k in [top, bottom, left, right]] From f1c4534f16d7f1a3a6b48f151e885645cf244b28 Mon Sep 17 00:00:00 2001 From: jingjingwu1225 Date: Wed, 20 Nov 2024 20:22:03 +0000 Subject: [PATCH 5/9] style fixes by ruff --- .../modalities/wk/webknossos_annotation.py | 171 +++++++++--------- 1 file changed, 83 insertions(+), 88 deletions(-) diff --git a/linc_convert/modalities/wk/webknossos_annotation.py b/linc_convert/modalities/wk/webknossos_annotation.py index af0bf7d..ffb5f5b 100644 --- a/linc_convert/modalities/wk/webknossos_annotation.py +++ b/linc_convert/modalities/wk/webknossos_annotation.py @@ -1,6 +1,5 @@ """Convert annotation downloaded from webknossos into ome.zarr format.""" - # stdlib import ast import json @@ -31,32 +30,32 @@ def convert( dic: str = None, *, chunk: int = 1024, - compressor: str = 'blosc', + compressor: str = "blosc", compressor_opt: str = "{}", max_load: int = 16384, ) -> None: """ Convert annotations (in .wkw format) from webknossos to ome.zarr format. - This script converts annotations from webknossos, following the czyx direction, + This script converts annotations from webknossos, following the czyx direction, to the ome.zarr format. The conversion ensures that the annotations match the underlying dataset. Parameters ---------- wkw_dir : str - Path to the unzipped manual annotation folder downloaded from webknossos + Path to the unzipped manual annotation folder downloaded from webknossos in .wkw format. For example: .../annotation_folder/data_Volume. ome_dir : str Path to the underlying ome.zarr dataset, following the BIDS naming standard. dst : str - Path to the output directory for saving the converted ome.zarr. - The ome.zarr file name is generated automatically based on ome_dir + Path to the output directory for saving the converted ome.zarr. + The ome.zarr file name is generated automatically based on ome_dir and the initials of the annotator. dic : dict - A dictionary mapping annotation values to the following standard values - if annotation doesn't match the standard. - The dictionary should be in single quotes, with keys in double quotes, + A dictionary mapping annotation values to the following standard values + if annotation doesn't match the standard. + The dictionary should be in single quotes, with keys in double quotes, for example: dic = '{"2": 1, "4": 2}'. The standard values are: - 0: background @@ -69,32 +68,32 @@ def convert( - 7: Single Fiber """ dic = json.loads(dic) - dic = {int(key): int(value) for key,value in dic.items()} - + dic = {int(key): int(value) for key, value in dic.items()} - # load underlying dataset info to get size info - omz_data = zarr.open_group(ome_dir, mode='r') + # load underlying dataset info to get size info + omz_data = zarr.open_group(ome_dir, mode="r") nblevel = len([i for i in os.listdir(ome_dir) if i.isdigit()]) - wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(nblevel-1)) + wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(nblevel - 1)) wkw_dataset = wkw.Dataset.open(wkw_dataset_path) low_res_offsets = [] - omz_res = omz_data[nblevel-1] - n = omz_res.shape[1] + omz_res = omz_data[nblevel - 1] + n = omz_res.shape[1] size = omz_res.shape[-2:] for idx in range(n): offset_x, offset_y = 0, 0 - data = wkw_dataset.read(off = (offset_y, offset_x, idx), - shape = [size[1], size[0], 1]) + data = wkw_dataset.read( + off=(offset_y, offset_x, idx), shape=[size[1], size[0], 1] + ) data = data[0, :, :, 0] data = np.transpose(data, (1, 0)) - [t0,b0,l0,r0] = find_borders(data) - low_res_offsets.append([t0,b0,l0,r0]) + [t0, b0, l0, r0] = find_borders(data) + low_res_offsets.append([t0, b0, l0, r0]) - # setup save info - basename = os.path.basename(ome_dir)[:-9] - initials = wkw_dir.split('/')[-2][:2] - out = os.path.join(dst, basename + '_dsec_' + initials + '.ome.zarr') + # setup save info + basename = os.path.basename(ome_dir)[:-9] + initials = wkw_dir.split("/")[-2][:2] + out = os.path.join(dst, basename + "_dsec_" + initials + ".ome.zarr") if os.path.exists(out): shutil.rmtree(out) os.makedirs(out, exist_ok=True) @@ -106,97 +105,93 @@ def convert( store = zarr.storage.DirectoryStore(out) omz = zarr.group(store=store, overwrite=True) - # Prepare chunking options opt = { - 'chunks': [1, 1] + [chunk, chunk], - 'dimension_separator': r'/', - 'order': 'F', - 'dtype': 'uint8', - 'fill_value': None, - 'compressor': make_compressor(compressor, **compressor_opt), + "chunks": [1, 1] + [chunk, chunk], + "dimension_separator": r"/", + "order": "F", + "dtype": "uint8", + "fill_value": None, + "compressor": make_compressor(compressor, **compressor_opt), } print(opt) - # Write each level for level in range(nblevel): omz_res = omz_data[level] size = omz_res.shape[-2:] - shape = [1, n] + [i for i in size] - + shape = [1, n] + [i for i in size] + wkw_dataset_path = os.path.join(wkw_dir, get_mask_name(level)) wkw_dataset = wkw.Dataset.open(wkw_dataset_path) - omz.create_dataset(f'{level}', shape=shape, **opt) - array = omz[f'{level}'] + omz.create_dataset(f"{level}", shape=shape, **opt) + array = omz[f"{level}"] # Write each slice for idx in range(n): if -1 in low_res_offsets[idx]: array[0, idx, :1, :1] = np.zeros((1, 1), dtype=np.uint8) - continue - - top, bottom, left, right = [k*2**(nblevel-level-1) - for k in low_res_offsets[idx]] - height, width = size[0]-top-bottom, size[1]-left-right + continue + + top, bottom, left, right = [ + k * 2 ** (nblevel - level - 1) for k in low_res_offsets[idx] + ] + height, width = size[0] - top - bottom, size[1] - left - right - data = wkw_dataset.read(off = (left, top, idx), shape = [width, height, 1]) + data = wkw_dataset.read(off=(left, top, idx), shape=[width, height, 1]) data = data[0, :, :, 0] data = np.transpose(data, (1, 0)) if dic: - data = np.array([[dic[data[i][j]] - for j in range(data.shape[1])] - for i in range(data.shape[0])]) - subdat_size = data.shape - - print('Convert level', - level, - 'with shape', - shape, - 'and slice', - idx, - 'with size', - subdat_size) + data = np.array( + [ + [dic[data[i][j]] for j in range(data.shape[1])] + for i in range(data.shape[0]) + ] + ) + subdat_size = data.shape + + print( + "Convert level", + level, + "with shape", + shape, + "and slice", + idx, + "with size", + subdat_size, + ) if max_load is None or ( subdat_size[-2] < max_load and subdat_size[-1] < max_load - ): - array[0, - idx, - top: top+subdat_size[-2], - left: left+subdat_size[-1]] = data[...] + ): + array[ + 0, idx, top : top + subdat_size[-2], left : left + subdat_size[-1] + ] = data[...] else: ni = ceildiv(subdat_size[-2], max_load) nj = ceildiv(subdat_size[-1], max_load) - + for i in range(ni): for j in range(nj): - print(f'\r{i+1}/{ni}, {j+1}/{nj}', end=' ') - start_x, end_x = i*max_load, - min( - (i+1)*max_load, - subdat_size[-2]) - - start_y, end_y = j*max_load, - min( - (j+1)*max_load, - subdat_size[-1]) + print(f"\r{i+1}/{ni}, {j+1}/{nj}", end=" ") + start_x, end_x = (i * max_load,) + min((i + 1) * max_load, subdat_size[-2]) + + start_y, end_y = (j * max_load,) + min((j + 1) * max_load, subdat_size[-1]) array[ - 0, - idx, - top + start_x: top + end_x, - left + start_y: left + end_y] = data[ - start_x: end_x, - start_y: end_y] - print('') - - + 0, + idx, + top + start_x : top + end_x, + left + start_y : left + end_y, + ] = data[start_x:end_x, start_y:end_y] + print("") + # Write OME-Zarr multiscale metadata - print('Write metadata') + print("Write metadata") omz.attrs["multiscales"] = omz_data.attrs["multiscales"] - def get_mask_name(level: int) -> str: """ Return the name of the mask for a given resolution level. @@ -212,9 +207,9 @@ def get_mask_name(level: int) -> str: The name of the mask for the given level. """ if level == 0: - return '1' + return "1" else: - return f'{2**level}-{2**level}-1' + return f"{2**level}-{2**level}-1" def cal_distance(img: np.ndarray) -> int: @@ -236,7 +231,7 @@ def cal_distance(img: np.ndarray) -> int: cnt = np.sum(img[i, :]) if cnt > 0: return i - return m + return m def find_borders(img: np.ndarray) -> np.ndarray: @@ -256,8 +251,8 @@ def find_borders(img: np.ndarray) -> np.ndarray: if np.max(img) == 0: return [-1, -1, -1, -1] top = cal_distance(img) - bottom = cal_distance(img[::-1]) - left = cal_distance(np.rot90(img, k=3)) + bottom = cal_distance(img[::-1]) + left = cal_distance(np.rot90(img, k=3)) right = cal_distance(np.rot90(img, k=1)) - return [max(0, k-1) for k in [top, bottom, left, right]] + return [max(0, k - 1) for k in [top, bottom, left, right]] From 622047794a99d3df934f4c24115ebe476892ebdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E6=99=B6=E6=99=B6?= Date: Thu, 21 Nov 2024 11:24:30 -0500 Subject: [PATCH 6/9] Added wkw into package list --- conda.yaml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/conda.yaml b/conda.yaml index 152eb45..edbc97d 100644 --- a/conda.yaml +++ b/conda.yaml @@ -10,5 +10,6 @@ dependencies: - zarr - nibabel - tifffile + - wkw - pip: - cyclopts diff --git a/pyproject.toml b/pyproject.toml index 79b5610..2143e59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ cyclopts = "^2.0.0" numpy = "*" nibabel = "*" zarr = "^2.0.0" +wkw = "*" [tool.poetry.group.df.dependencies] glymur = "*" From f08623fcfa8539d7082f4a7844be2f2ae2aaf5fd Mon Sep 17 00:00:00 2001 From: Jingjing Wu <49615883+jingjingwu1225@users.noreply.github.com> Date: Fri, 22 Nov 2024 09:21:40 -0500 Subject: [PATCH 7/9] Update webknossos_annotation.py change parameter of dst to out --- linc_convert/modalities/wk/webknossos_annotation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/linc_convert/modalities/wk/webknossos_annotation.py b/linc_convert/modalities/wk/webknossos_annotation.py index ffb5f5b..33f7b0d 100644 --- a/linc_convert/modalities/wk/webknossos_annotation.py +++ b/linc_convert/modalities/wk/webknossos_annotation.py @@ -26,7 +26,7 @@ def convert( wkw_dir: str = None, ome_dir: str = None, - dst: str = None, + out: str = None, dic: str = None, *, chunk: int = 1024, @@ -48,7 +48,7 @@ def convert( in .wkw format. For example: .../annotation_folder/data_Volume. ome_dir : str Path to the underlying ome.zarr dataset, following the BIDS naming standard. - dst : str + out : str Path to the output directory for saving the converted ome.zarr. The ome.zarr file name is generated automatically based on ome_dir and the initials of the annotator. @@ -93,7 +93,7 @@ def convert( # setup save info basename = os.path.basename(ome_dir)[:-9] initials = wkw_dir.split("/")[-2][:2] - out = os.path.join(dst, basename + "_dsec_" + initials + ".ome.zarr") + out = os.path.join(out, basename + "_dsec_" + initials + ".ome.zarr") if os.path.exists(out): shutil.rmtree(out) os.makedirs(out, exist_ok=True) From a97d919ca6334f52a377efdac54c3ff4743c470b Mon Sep 17 00:00:00 2001 From: Calvin Chai Date: Mon, 25 Nov 2024 11:25:57 -0500 Subject: [PATCH 8/9] Update pyproject.toml --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e0b8ecf..977e584 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ cyclopts = "^2.0.0" numpy = "*" nibabel = "*" zarr = "^2.0.0" -wkw = "*" + [tool.poetry.group.df] optional = true @@ -46,6 +46,11 @@ optional = true h5py = "*" scipy = "*" +[tool.poetry.group.wk] +optional = true +[tool.poetry.group.wk.dependencies] +wkw = "*" + [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] From d9377f6bd38ea7c65bd9226f21d4d9c013028335 Mon Sep 17 00:00:00 2001 From: Calvin Chai Date: Mon, 25 Nov 2024 11:27:03 -0500 Subject: [PATCH 9/9] Update test_df.py --- tests/test_df.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_df.py b/tests/test_df.py index 3fa0317..bcac807 100644 --- a/tests/test_df.py +++ b/tests/test_df.py @@ -19,7 +19,7 @@ def _write_test_data(directory: str) -> None: def test_df(tmp_path): - _write_test_data(tmp_path) + # _write_test_data(tmp_path) with zipfile.ZipFile("data/df_input.zip", "r") as z: z.extractall(tmp_path) output_zarr = tmp_path / "output.zarr"