From dcfb77d8cf54b1783524131664cea709ee31bef0 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sun, 12 Jan 2025 22:56:48 -0500 Subject: [PATCH] refactored, and added new metadata extraction --- workflow/Snakefile | 1 + workflow/rules/imaris.smk | 74 ++++++++++++++++++++++++++ workflow/rules/import.smk | 2 +- workflow/rules/ome_zarr.smk | 36 ------------- workflow/scripts/imaris_to_metadata.py | 31 +++++++++++ workflow/scripts/imaris_to_ome_zarr.py | 2 +- 6 files changed, 108 insertions(+), 38 deletions(-) create mode 100644 workflow/rules/imaris.smk create mode 100644 workflow/scripts/imaris_to_metadata.py diff --git a/workflow/Snakefile b/workflow/Snakefile index 30834a5..fb91b72 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -43,3 +43,4 @@ include: "rules/bigstitcher.smk" include: "rules/ome_zarr.smk" include: "rules/bids.smk" include: "rules/qc.smk" +include: "rules/imaris.smk" diff --git a/workflow/rules/imaris.smk b/workflow/rules/imaris.smk new file mode 100644 index 0000000..eaaf4bd --- /dev/null +++ b/workflow/rules/imaris.smk @@ -0,0 +1,74 @@ +rule imaris_to_metadata: + input: + ims=get_input_sample, + output: + metadata_json=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq,[a-zA-Z0-9]*imaris[a-zA-Z0-9]*}", + suffix="SPIM.json", + ), + benchmark: + bids( + root="benchmarks", + datatype="imaris_to_metdata", + subject="{subject}", + sample="{sample}", + acq="{acq}", + suffix="benchmark.tsv", + ) + log: + bids( + root="logs", + datatype="prestitched_to_metdata", + subject="{subject}", + sample="{sample}", + acq="{acq}", + suffix="log.txt", + ), + group: + "preproc" + container: + config["containers"]["spimprep"] + script: + "../scripts/imaris_to_metadata.py" + + +rule imaris_to_ome_zarr: + input: + ims=get_input_sample, + metadata_json=rules.prestitched_to_metadata.output.metadata_json, + params: + max_downsampling_layers=config["ome_zarr"]["max_downsampling_layers"], + rechunk_size=config["ome_zarr"]["rechunk_size"], + scaling_method=config["ome_zarr"]["scaling_method"], + downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"], + stains=get_stains, + uri=get_output_ome_zarr_uri(), + storage_provider_settings=workflow.storage_provider_settings, + output: + **get_output_ome_zarr("imaris"), + log: + bids( + root="logs", + subject="{subject}", + datatype="imaris_to_ome_zarr", + sample="{sample}", + acq="{acq}", + suffix="log.txt", + ), + container: + config["containers"]["spimprep"] + group: + "preproc" + threads: config["total_cores"] + resources: + runtime=360, + mem_mb=config["total_mem_mb"], + shadow: 'minimal' + script: + "../scripts/imaris_to_ome_zarr.py" + + diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index 8d3d0a0..d3bb230 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -155,7 +155,7 @@ rule prestitched_to_metadata: subject="{subject}", datatype="micr", sample="{sample}", - acq="{acq,[a-zA-Z0-9]*(prestitched|imaris)[a-zA-Z0-9]*}", + acq="{acq,[a-zA-Z0-9]*prestitched[a-zA-Z0-9]*}", suffix="SPIM.json", ), benchmark: diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk index d61715f..4879b8d 100644 --- a/workflow/rules/ome_zarr.smk +++ b/workflow/rules/ome_zarr.smk @@ -173,39 +173,3 @@ rule ome_zarr_to_nii: script: "../scripts/ome_zarr_to_nii.py" -rule imaris_to_ome_zarr: - input: - ims=get_input_sample, - metadata_json=rules.prestitched_to_metadata.output.metadata_json, - params: - max_downsampling_layers=config["ome_zarr"]["max_downsampling_layers"], - rechunk_size=config["ome_zarr"]["rechunk_size"], - scaling_method=config["ome_zarr"]["scaling_method"], - downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"], - stains=get_stains, - uri=get_output_ome_zarr_uri(), - storage_provider_settings=workflow.storage_provider_settings, - output: - **get_output_ome_zarr("imaris"), - log: - bids( - root="logs", - subject="{subject}", - datatype="imaris_to_ome_zarr", - sample="{sample}", - acq="{acq}", - suffix="log.txt", - ), - container: - config["containers"]["spimprep"] - group: - "preproc" - threads: config["total_cores"] - resources: - runtime=360, - mem_mb=config["total_mem_mb"], - shadow: 'minimal' - script: - "../scripts/imaris_to_ome_zarr.py" - - diff --git a/workflow/scripts/imaris_to_metadata.py b/workflow/scripts/imaris_to_metadata.py new file mode 100644 index 0000000..99062bd --- /dev/null +++ b/workflow/scripts/imaris_to_metadata.py @@ -0,0 +1,31 @@ +import h5py +import xmltodict +import json + +with h5py.File(snakemake.input.ims, "r") as hdf5_file: + xml_data = hdf5_file['DataSetInfo/OME Image Tags/Image 0'][:] + + +# Convert byte array to string and then to a dictionary +xml_str = bytes(xml_data).decode('utf-8', errors='ignore') # Decode byte array to string + +try: + xml_dict = xmltodict.parse(f"{xml_str}", namespace_separator=':') +except Exception as e: + print(f"Error parsing XML: {e}") + + +metadata={} +metadata['physical_size_x'] = float(xml_dict['root']['ca:CustomAttributes']['DataAxis0']['@PhysicalUnit']) +metadata['physical_size_y'] = float(xml_dict['root']['ca:CustomAttributes']['DataAxis1']['@PhysicalUnit']) +metadata['physical_size_z'] = abs(float(xml_dict['root']['ca:CustomAttributes']['DataAxis3']['@PhysicalUnit'])) +metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0] #zyx since OME-Zarr is ZYX +metadata['PixelSizeUnits'] = 'mm' + +#write metadata to json +with open(snakemake.output.metadata_json, 'w') as fp: + json.dump(metadata, fp,indent=4) + + + + diff --git a/workflow/scripts/imaris_to_ome_zarr.py b/workflow/scripts/imaris_to_ome_zarr.py index e98fd11..a83d143 100644 --- a/workflow/scripts/imaris_to_ome_zarr.py +++ b/workflow/scripts/imaris_to_ome_zarr.py @@ -34,7 +34,7 @@ def copy_group(hdf5_group, zarr_group): data=item[()], chunks=item.chunks, dtype=item.dtype, - compression="gzip" # Optional compression + compression="blosc" # Optional compression ) print(f"Copied dataset: {key}")