From ee43d44fd685bee4370892c6ee16d9cf9fc6df9c Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 8 Oct 2024 07:44:57 -0500 Subject: [PATCH] updates for compatibility with input zstacks (#48) Blaze microscope now outputs data with one tif file per zstack. This adds compatibility for it. - Includes a number of other fixes and cleaning up too (removes unused rules/scripts). Tested locally and on gcs. - Some refactoring of global config params for resources (adds total_cores, total_mem_mb) - currently disables qc reports (was taking long for gcs runs - need to revisit that in another PR) --- README.md | 5 +- config/config.yml | 19 ++- poetry.lock | 141 ++++++++++------- pyproject.toml | 9 ++ resources/qc/ff_html_temp.html | 10 +- testing/dryrun_tests/datasets_gcs.tsv | 2 + testing/dryrun_tests/datasets_local.tsv | 2 + workflow/Snakefile | 2 +- workflow/macros/AutostitchMacro.ijm | 56 ------- workflow/macros/FuseImageMacroZarr.ijm | 47 ------ workflow/rules/bigstitcher.smk | 182 ++-------------------- workflow/rules/common.smk | 178 +++++++++++---------- workflow/rules/flatfield_corr.smk | 2 +- workflow/rules/import.smk | 19 +-- workflow/rules/ome_zarr.smk | 66 ++++---- workflow/scripts/blaze_to_metadata.py | 154 ++++++++++++------ workflow/scripts/blaze_to_metadata_gcs.py | 150 +++++++++++++----- workflow/scripts/tif_to_zarr.py | 98 +++++++++--- workflow/scripts/tif_to_zarr_gcs.py | 131 ++++++++++++---- 19 files changed, 656 insertions(+), 617 deletions(-) create mode 100644 testing/dryrun_tests/datasets_gcs.tsv create mode 100644 testing/dryrun_tests/datasets_local.tsv delete mode 100644 workflow/macros/AutostitchMacro.ijm delete mode 100644 workflow/macros/FuseImageMacroZarr.ijm diff --git a/README.md b/README.md index e5d5b8d..018def7 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Takes TIF images (tiled or prestitched) and outputs a validated BIDS Microscopy - Python >= 3.11 - Lightsheet data: - Raw Ultramicroscope Blaze OME TIFF files (include `blaze` in the acquisition tag) + - can be 2D or 3D TIFF files - Prestitched TIFF files (include `prestitched` in the acquisition tag) @@ -63,10 +64,8 @@ or for snakemake<8.0, use: snakemake -c all --use-singularity ``` -Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_spark_stitching:mem_mb=60000 bigstitcher_spark_fusion:mem_mb=100000` +Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_stitching:mem_mb=60000 bigstitcher_fusion:mem_mb=100000` 7. If you want to run the workflow using a batch job submission server, please see the executor plugins here: https://snakemake.github.io/snakemake-plugin-catalog/ - -Alternate usage of this workflow (making use of conda) is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog?repo=khanlab/SPIMprep). diff --git a/config/config.yml b/config/config.yml index edc16ba..cf69864 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,18 +1,20 @@ datasets: 'config/datasets.tsv' - root: 'bids' # can use a s3:// or gcs:// prefix to write output to cloud storage work: 'work' remote_creds: '~/.config/gcloud/application_default_credentials.json' #this is needed so we can pass creds to container -write_ome_zarr_direct: True #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk +use_zipstore: False #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr -cores_per_rule: 32 +#total resources available, used to set rule resources +total_cores: 32 +total_mem_mb: 128000 #import wildcards: tilex, tiley, channel, zslice (and prefix - unused) import_blaze: raw_tif_pattern: "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}_xyz-Table Z{zslice}.ome.tif" + raw_tif_pattern_zstack: "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}.ome.tif" intensity_rescaling: 0.5 #raw images seem to be at the upper end of uint16 (over-saturated) -- causes wrapping issues when adjusting with flatfield correction etc. this rescales the raw data as it imports it.. import_prestitched: @@ -36,16 +38,14 @@ bigstitcher: downsample_in_x: 4 downsample_in_y: 4 downsample_in_z: 1 - method: "phase_corr" #unused - methods: #unused + methods: #unused, only for reference phase_corr: "Phase Correlation" optical_flow: "Lucas-Kanade" filter_pairwise_shifts: - enabled: 1 #unused min_r: 0.7 max_shift_total: 50 global_optimization: - enabled: 1 + enabled: 1 method: TWO_ROUND_ITERATIVE methods: #unused, only for reference ONE_ROUND_SIMPLE: "One-Round" @@ -64,7 +64,7 @@ bigstitcher: block_size_factor_z: 32 ome_zarr: - desc: sparkstitchedflatcorr + desc: stitchedflatcorr max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1 @@ -100,7 +100,6 @@ ome_zarr: id: 0 name: spim version: "0.4" - use_zipstore: False #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr nifti: levels: #cannot be higher than max_downsampling_layers in ome_zarr @@ -155,5 +154,5 @@ report: containers: - spimprep: 'docker://khanlab/spimprep-deps:main' + spimprep: 'docker://khanlab/spimprep-deps:v0.1.0' diff --git a/poetry.lock b/poetry.lock index 84632eb..567aa71 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2100,6 +2100,18 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "pastel" +version = "0.2.1" +description = "Bring colors to your terminal." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pastel-0.2.1-py2.py3-none-any.whl", hash = "sha256:4349225fcdf6c2bb34d483e523475de5bb04a5c10ef711263452cb37d7dd4364"}, + {file = "pastel-0.2.1.tar.gz", hash = "sha256:e6581ac04e973cac858828c6202c1e1e81fee1dc7de7683f3e1ffe0bfd8a573d"}, +] + [[package]] name = "pathspec" version = "0.12.1" @@ -2177,6 +2189,25 @@ dev = ["paramiko", "psutil", "pytest (>=6.0)", "pytest-cov", "pytest-mock", "pyt docs = ["sphinx (>=4.0.0)", "sphinx-rtd-theme (>=1.0.0)"] ssh = ["paramiko"] +[[package]] +name = "poethepoet" +version = "0.29.0" +description = "A task runner that works well with poetry." +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "poethepoet-0.29.0-py3-none-any.whl", hash = "sha256:f8dfe55006dcfb5cf31bcb1904e1262e1c642a4502fee3688cbf1bddfe5c7601"}, + {file = "poethepoet-0.29.0.tar.gz", hash = "sha256:676842302f2304a86b31ac56398dd672fae8471128d2086896393384dbafc095"}, +] + +[package.dependencies] +pastel = ">=0.2.1,<0.3.0" +pyyaml = ">=6.0.2,<7.0.0" + +[package.extras] +poetry-plugin = ["poetry (>=1.0,<2.0)"] + [[package]] name = "prompt-toolkit" version = "3.0.36" @@ -2571,63 +2602,65 @@ files = [ [[package]] name = "pyyaml" -version = "6.0.1" +version = "6.0.2" description = "YAML parser and emitter for Python" category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, - {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, - {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, - {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, - {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, - {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, - {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, - {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, - {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, - {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, - {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, - {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, - {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, - {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] [[package]] @@ -4073,4 +4106,4 @@ pyyaml = ">=6.0,<7.0" [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "36156af32b44ffacce51d65da14d67a6dbb8d7dfc6d16986c0bcbc373bff18c3" +content-hash = "73bced7c37411f578085de72da605901463feb8cbc8b9df7e94ea03cdb8f1c98" diff --git a/pyproject.toml b/pyproject.toml index b8f1014..54f6961 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ universal-pathlib = "^0.2.2" snakefmt = "^0.10.0" mkdocs-material = "^9.5.20" mkdocs-include-markdown-plugin = "^6.0.6" +poethepoet = "^0.29.0" [tool.poetry.group.dataset_creation.dependencies] @@ -27,6 +28,14 @@ typer = "^0.12.3" xmltodict = "^0.13.0" tifffile = "^2024.5.10" +[tool.poe.tasks] +test_localin_gcsout = "snakemake --dry-run --config datasets=testing/dryrun_tests/datasets_local.tsv root='gcs://khanlab-lightsheet/data/test_bids'" +test_localin_localout = "snakemake --dry-run --config datasets=testing/dryrun_tests/datasets_local.tsv root=bids" +test_gcsin_gcsout = "snakemake --dry-run --config datasets=testing/dryrun_tests/datasets_gcs.tsv root='gcs://khanlab-lightsheet/data/test_bids'" +test_gcsin_localout = "snakemake --dry-run --config datasets=testing/dryrun_tests/datasets_gcs.tsv root=bids" +test_localin_localout_zipstore = "snakemake --dry-run --config datasets=testing/dryrun_tests/datasets_local.tsv root=bids use_zipstore=True" +test_localin_gcsout_zipstore = "snakemake --dry-run --config datasets=testing/dryrun_tests/datasets_local.tsv root='gcs://khanlab-lightsheet/data/test_bids' use_zipstore=True" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/resources/qc/ff_html_temp.html b/resources/qc/ff_html_temp.html index 658b26d..ba6ee7a 100644 --- a/resources/qc/ff_html_temp.html +++ b/resources/qc/ff_html_temp.html @@ -43,13 +43,13 @@

Chunk - {{ chunk_num }} Channel - {{ loop.index0 }}

{%- for image in channel %} - -

Corrected

+ +

Uncorrected

Slice-{{ image.slice }}

- -

Uncorrected

+ +

Corrected

Slice-{{ image.slice }}

{%- endfor %} @@ -117,4 +117,4 @@

Uncorrected

} }) - \ No newline at end of file + diff --git a/testing/dryrun_tests/datasets_gcs.tsv b/testing/dryrun_tests/datasets_gcs.tsv new file mode 100644 index 0000000..98e634e --- /dev/null +++ b/testing/dryrun_tests/datasets_gcs.tsv @@ -0,0 +1,2 @@ +subject sample acq stain_0 stain_1 stain_2 dataset_path +mouse1 brain blaze Lectin PI Abeta gcs://my-bucket/test_data diff --git a/testing/dryrun_tests/datasets_local.tsv b/testing/dryrun_tests/datasets_local.tsv new file mode 100644 index 0000000..49d6b29 --- /dev/null +++ b/testing/dryrun_tests/datasets_local.tsv @@ -0,0 +1,2 @@ +subject sample acq stain_0 stain_1 stain_2 dataset_path +mouse1 brain blaze Lectin PI Abeta .test/dryrun/data diff --git a/workflow/Snakefile b/workflow/Snakefile index cd19ab3..70b5ced 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -33,7 +33,7 @@ rule all: input: get_all_targets(), get_bids_toplevel_targets(), - get_qc_targets(), #need to skip this if using prestitched + # get_qc_targets(), #need to skip this if using prestitched localrule: True diff --git a/workflow/macros/AutostitchMacro.ijm b/workflow/macros/AutostitchMacro.ijm deleted file mode 100644 index e9df4df..0000000 --- a/workflow/macros/AutostitchMacro.ijm +++ /dev/null @@ -1,56 +0,0 @@ -args = getArgument() -args = split(args, " "); - -dataset_xml = args[0] -pairwise_method = args[1] -ds_x = args[2] -ds_y = args[3] -ds_z = args[4] -do_filter = args[5] -min_r = args[6] -do_global = args[7] -global_strategy = args[8] - -run("Calculate pairwise shifts ...", - "select=" + dataset_xml + - " process_angle=[All angles] " + - " process_channel=[All channels] " + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " method=["+ pairwise_method +"] " + - " channels=[Average Channels] " + - " downsample_in_x=" + ds_x + - " downsample_in_y=" + ds_y + - " downsample_in_z=" + ds_z ); - - -if ( do_filter == 1 ){ - -run("Filter pairwise shifts ...", - "select=" + dataset_xml + - " min_r=" + min_r + - " max_r=1 " + - " max_shift_in_x=0 " + - " max_shift_in_y=0 " + - " max_shift_in_z=0 " + - " max_displacement=0"); -} - -if ( do_global == 1 ){ - -run("Optimize globally and apply shifts ...", - "select=" + dataset_xml + - " process_angle=[All angles] " + - " process_channel=[All channels] " + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " relative=2.500 " + - " absolute=3.500 " + - " global_optimization_strategy=["+global_strategy+"] " + - " fix_group_0-0,"); -} - -// quit after we are finished -eval("script", "System.exit(0);"); diff --git a/workflow/macros/FuseImageMacroZarr.ijm b/workflow/macros/FuseImageMacroZarr.ijm deleted file mode 100644 index 9d49b06..0000000 --- a/workflow/macros/FuseImageMacroZarr.ijm +++ /dev/null @@ -1,47 +0,0 @@ -args = getArgument() -args = split(args, " "); - -in_dataset_xml = args[0] -downsampling = args[1] -channel = args[2] -output_zarr = args[3] -block_size_x = args[4] -block_size_y = args[5] -block_size_z = args[6] -block_size_factor_x = args[7] -block_size_factor_y = args[8] -block_size_factor_z = args[9] - -run("Fuse dataset ...", - "select=" + in_dataset_xml + - " process_angle=[All angles] " + - " process_channel=[Single channel (Select from List)] " + - " processing_channel=[channel " + channel + "]" + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " bounding_box=[Currently Selected Views] " + - " preserve_original " + - " downsampling=" + downsampling + - " interpolation=[Linear Interpolation] " + - " pixel_type=[16-bit unsigned integer] " + - " interest_points_for_non_rigid=[-= Disable Non-Rigid =-] " + - " blend produce=[Each timepoint & channel] " + - " fused_image=[ZARR/N5/HDF5 export using N5-API] " + - " define_input=[Auto-load from input data (values shown below)] " + - " export=ZARR create_0 " + - " zarr_dataset_path=" + output_zarr + - " zarr_base_dataset=/ " + - " zarr_dataset_extension=/s0 " + - " show_advanced_block_size_options " + - " block_size_x=" + block_size_x + - " block_size_y=" + block_size_y + - " block_size_z=" + block_size_z + - " block_size_factor_x=" + block_size_factor_x + - " block_size_factor_y=" + block_size_factor_y + - " block_size_factor_z=" + block_size_factor_z + - " subsampling_factors=[{ {1,1,1}}]"); - - -// quit after we are finished -eval("script", "System.exit(0);"); diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index 4256e3b..f280d9a 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -85,7 +85,7 @@ rule zarr_to_bdv: desc="{desc}", suffix="log.txt", ), - threads: config["cores_per_rule"] + threads: config["total_cores"] group: "preproc" container: @@ -94,74 +94,7 @@ rule zarr_to_bdv: "../scripts/zarr_to_n5_bdv.py" -rule bigstitcher: - input: - dataset_n5=rules.zarr_to_bdv.output.bdv_n5, - dataset_xml=rules.zarr_to_bdv.output.bdv_xml, - ijm=Path(workflow.basedir) / "macros" / "AutostitchMacro.ijm", - params: - fiji_launcher_cmd=get_fiji_launcher_cmd, - macro_args=get_macro_args_bigstitcher, - rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", - output: - launcher=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcherfiji.sh", - ) - ), - dataset_xml=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcherfiji.xml", - ) - ), - benchmark: - bids( - root="benchmarks", - datatype="bigstitcherfiji", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="benchmark.tsv", - ) - log: - bids( - root="logs", - datatype="bigstitcherfiji", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="log.txt", - ), - container: - config["containers"]["spimprep"] - resources: - runtime=30, - mem_mb=10000, - threads: config["cores_per_rule"] - group: - "preproc" - shell: - "cp {input.dataset_xml} {output.dataset_xml} && " - " {params.fiji_launcher_cmd} && " - " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " - " && {output.launcher} |& tee {log} && {params.rm_old_xml}" - - -rule bigstitcher_spark_stitching: +rule bigstitcher_stitching: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, dataset_xml=rules.zarr_to_bdv.output.bdv_xml, @@ -219,8 +152,8 @@ rule bigstitcher_spark_stitching: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=40000, - threads: config["cores_per_rule"] + mem_mb=int(config["total_mem_mb"] * 0.9), + threads: config["total_cores"] group: "preproc" shell: @@ -230,10 +163,10 @@ rule bigstitcher_spark_stitching: "{params.rm_old_xml}" -rule bigstitcher_spark_solver: +rule bigstitcher_solver: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, - dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml, + dataset_xml=rules.bigstitcher_stitching.output.dataset_xml, params: downsampling="--downsampling={dsx},{dsy},{dsz}".format( dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], @@ -283,8 +216,8 @@ rule bigstitcher_spark_solver: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=40000, - threads: config["cores_per_rule"] + mem_mb=int(config["total_mem_mb"] * 0.9), + threads: config["total_cores"] group: "preproc" shell: @@ -293,7 +226,6 @@ rule bigstitcher_spark_solver: " -s STITCHING --lambda 0.1 " " {params.method} && " "{params.rm_old_xml}" - #lambda 0.1 is default (can expose this if needed) rule bigstitcher_fusion: @@ -320,98 +252,6 @@ rule bigstitcher_fusion: else "stitching" ), ), - ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", - params: - fiji_launcher_cmd=get_fiji_launcher_cmd, - macro_args=get_macro_args_zarr_fusion, - output: - launcher=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="fuseimagen5.sh", - ) - ), - zarr=temp( - directory( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="SPIM.zarr", - ) - ) - ), - benchmark: - bids( - root="benchmarks", - datatype="fuse_dataset", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="benchmark.tsv", - ) - log: - bids( - root="logs", - datatype="fuse_dataset", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="log.txt", - ), - container: - config["containers"]["spimprep"] - resources: - runtime=30, - mem_mb=40000, - threads: config["cores_per_rule"] - group: - "preproc" - shell: - " {params.fiji_launcher_cmd} && " - " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " - " && {output.launcher} |& tee {log}" - - -rule bigstitcher_spark_fusion: - input: - dataset_n5=bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bdv.n5", - ), - dataset_xml=bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcher{}.xml".format( - "solver" - if config["bigstitcher"]["global_optimization"]["enabled"] - else "stitching" - ), - ), - ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: channel=lambda wildcards: "--channelId={channel}".format( channel=get_stains(wildcards).index(wildcards.stain) @@ -438,7 +278,7 @@ rule bigstitcher_spark_fusion: datatype="micr", sample="{sample}", acq="{acq}", - desc="sparkstitched{desc}", + desc="stitched{desc}", stain="{stain}", suffix="SPIM.zarr", ) @@ -470,8 +310,8 @@ rule bigstitcher_spark_fusion: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=30000, - threads: config["cores_per_rule"] + mem_mb=int(config["total_mem_mb"] * 0.9), + threads: config["total_cores"] group: "preproc" shell: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 88d40e7..1c2495f 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -73,11 +73,11 @@ def get_extension_ome_zarr(): (e.g. the touch file is used instead). Also appends a .zip if the zipstore option is enabled.""" - if is_remote(config["root"]): - return "ome.zarr/.snakemake_touch" + if config["use_zipstore"]: + return "ome.zarr.zip" else: - if config["ome_zarr"]["use_zipstore"]: - return "ome.zarr.zip" + if is_remote(config["root"]): + return "ome.zarr/.snakemake_touch" else: return "ome.zarr" @@ -128,7 +128,6 @@ def get_all_subj_html(wildcards): htmls = [] for i in range(len(datasets)): - html = "{root}/qc/sub-{subject}_sample-{sample}_acq-{acq}/subject.html".format( root=root, subject=datasets.loc[i, "subject"], @@ -184,7 +183,6 @@ def get_input_dataset(wildcards): def get_metadata_json(wildcards): """returns path to metadata, extracted from local or gcs""" dataset_path = Path(get_dataset_path(wildcards)) - suffix = dataset_path.suffix if is_remote_gcs(dataset_path): return rules.blaze_to_metadata_gcs.output.metadata_json.format(**wildcards) @@ -257,95 +255,55 @@ def get_stains(wildcards): return df.iloc[0][stain_columns].dropna().tolist() -# bigstitcher -def get_fiji_launcher_cmd(wildcards, output, threads, resources): - launcher_opts_find = "-Xincgc" - launcher_opts_replace = f"-XX:+UseG1GC -verbose:gc -XX:+PrintGCDateStamps -XX:ActiveProcessorCount={threads}" - pipe_cmds = [] - pipe_cmds.append("ImageJ-linux64 --dry-run --headless --console") - pipe_cmds.append(f"sed 's/{launcher_opts_find}/{launcher_opts_replace}'/") - pipe_cmds.append( - rf"sed 's/-Xmx[0-9a-z]\+/-Xmx{resources.mem_mb}m -Xms{resources.mem_mb}m/'" - ) - pipe_cmds.append("tr --delete '\\n'") - return "|".join(pipe_cmds) + f" > {output.launcher} && chmod a+x {output.launcher} " - - -def get_macro_args_bigstitcher(wildcards, input, output): - return "{dataset_xml} {pairwise_method} {ds_x} {ds_y} {ds_z} {do_filter} {min_r} {do_global} {global_strategy}".format( - dataset_xml=output.dataset_xml, - pairwise_method=config["bigstitcher"]["calc_pairwise_shifts"]["methods"][ - config["bigstitcher"]["calc_pairwise_shifts"]["method"] - ], - ds_x=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], - ds_y=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], - ds_z=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], - do_filter=config["bigstitcher"]["filter_pairwise_shifts"]["enabled"], - min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"], - do_global=config["bigstitcher"]["global_optimization"]["enabled"], - global_strategy=config["bigstitcher"]["global_optimization"]["strategies"][ - config["bigstitcher"]["global_optimization"]["strategy"] - ], - ) - - -def get_macro_args_zarr_fusion(wildcards, input, output): - return "{dataset_xml} {downsampling} {channel:02d} {output_zarr} {bsx} {bsy} {bsz} {bsfx} {bsfy} {bsfz}".format( - dataset_xml=input.dataset_xml, - downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"], - channel=get_stains(wildcards).index(wildcards.stain), - output_zarr=output.zarr, - bsx=config["bigstitcher"]["fuse_dataset"]["block_size_x"], - bsy=config["bigstitcher"]["fuse_dataset"]["block_size_y"], - bsz=config["bigstitcher"]["fuse_dataset"]["block_size_z"], - bsfx=config["bigstitcher"]["fuse_dataset"]["block_size_factor_x"], - bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], - bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], - ) - - def get_output_ome_zarr_uri(): + uri = _bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format( + ext="ome.zarr.zip" if config["use_zipstore"] else "ome.zarr" + ), + ) if is_remote(config["root"]): - return _bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), - ) + return uri else: - return "local://" + _bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), - ) + return "local://" + uri def get_output_ome_zarr(acq_type): if is_remote(config["root"]): - return { - "zarr": touch( - bids( - root=root, + + if config["use_zipstore"]: + return { + "zarr": bids( + root=work, subject="{subject}", datatype="micr", sample="{sample}", acq=f"{{acq,[a-zA-Z0-9]*{acq_type}[a-zA-Z0-9]*}}", - suffix="SPIM.{extension}".format( - extension=get_extension_ome_zarr() - ), + suffix="SPIM.ome.zarr", ) - ) - } + } + else: + return { + "zarr": touch( + bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq=f"{{acq,[a-zA-Z0-9]*{acq_type}[a-zA-Z0-9]*}}", + suffix="SPIM.ome.zarr/.snakemake_touch", + ) + ) + } else: - if config["write_ome_zarr_direct"]: + if config["use_zipstore"]: return { - "zarr": directory_bids( - root=root, + "zarr": bids( + root=work, subject="{subject}", datatype="micr", sample="{sample}", @@ -355,19 +313,59 @@ def get_output_ome_zarr(acq_type): } else: return { - "zarr": temp( - directory_bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq=f"{{acq,[a-zA-Z0-9]*{acq_type}[a-zA-Z0-9]*}}", - suffix="SPIM.ome.zarr", - ) + "zarr": directory_bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq=f"{{acq,[a-zA-Z0-9]*{acq_type}[a-zA-Z0-9]*}}", + suffix="SPIM.ome.zarr", ) } +def get_input_ome_zarr_to_nii(wildcards): + """input function for ome_zarr_to_nii""" + if is_remote(root): + if config["use_zipstore"]: + return bids( + root=work, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.ome.zarr", + ).format(**wildcards) + else: + return bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.ome.zarr/.snakemake_touch", + ).format(**wildcards) + else: + if config["use_zipstore"]: + return bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.ome.zarr.zip", + ).format(**wildcards) + else: + return bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.ome.zarr", + ).format(**wildcards) + + def get_storage_creds(): """for rules that deal with remote storage directly""" protocol = Path(config["root"]).protocol diff --git a/workflow/rules/flatfield_corr.smk b/workflow/rules/flatfield_corr.smk index 82470ec..95bcde7 100644 --- a/workflow/rules/flatfield_corr.smk +++ b/workflow/rules/flatfield_corr.smk @@ -115,7 +115,7 @@ rule apply_basic_flatfield_corr: resources: runtime=60, mem_mb=32000, - threads: config["cores_per_rule"] + threads: config["total_cores"] group: "preproc" script: diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index 8a9074a..1c6902a 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -42,7 +42,7 @@ rule blaze_to_metadata_gcs: storage_provider_settings=workflow.storage_provider_settings, output: metadata_json=bids( - root=root, + root=work, desc="gcs", subject="{subject}", datatype="micr", @@ -79,11 +79,6 @@ rule blaze_to_metadata_gcs: rule blaze_to_metadata: input: ome_dir=get_input_dataset, - params: - in_tif_pattern=lambda wildcards, input: os.path.join( - input.ome_dir, - config["import_blaze"]["raw_tif_pattern"], - ), output: metadata_json=temp( bids( @@ -197,10 +192,6 @@ rule tif_to_zarr: ome_dir=get_input_dataset, metadata_json=rules.copy_blaze_metadata.output.metadata_json, params: - in_tif_pattern=lambda wildcards, input: os.path.join( - input.ome_dir, - config["import_blaze"]["raw_tif_pattern"], - ), intensity_rescaling=config["import_blaze"]["intensity_rescaling"], output: zarr=temp( @@ -236,7 +227,9 @@ rule tif_to_zarr: ), group: "preproc" - threads: config["cores_per_rule"] + resources: + mem_mb=config["total_mem_mb"], + threads: int(config["total_mem_mb"] / 8000) #this is memory-limited -- seems to need ~8000mb for each thread, so threads=total_mem_mb / 8000 container: config["containers"]["spimprep"] script: @@ -289,7 +282,9 @@ rule tif_to_zarr_gcs: ), group: "preproc" - threads: config["cores_per_rule"] + resources: + mem_mb=config["total_mem_mb"], + threads: int(config["total_mem_mb"] / 8000) #this is memory-limited -- seems to need ~8000mb for each thread, so threads=total_mem_mb / 8000 container: config["containers"]["spimprep"] script: diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk index 0ea5854..d3a859b 100644 --- a/workflow/rules/ome_zarr.smk +++ b/workflow/rules/ome_zarr.smk @@ -1,3 +1,5 @@ + + rule zarr_to_ome_zarr: input: **get_storage_creds(), @@ -27,7 +29,9 @@ rule zarr_to_ome_zarr: storage_provider_settings=workflow.storage_provider_settings, output: **get_output_ome_zarr("blaze"), - threads: config["cores_per_rule"] + resources: + mem_mb=config["total_mem_mb"], + threads: config["total_cores"] log: bids( root="logs", @@ -77,38 +81,43 @@ rule tif_stacks_to_ome_zarr: config["containers"]["spimprep"] group: "preproc" - threads: config["cores_per_rule"] + threads: config["total_cores"] resources: runtime=360, - mem_mb=32000, + mem_mb=config["total_mem_mb"], script: "../scripts/tif_stacks_to_ome_zarr.py" -if config["write_ome_zarr_direct"] == False: - - rule ome_zarr_from_work: - """ generic rule to copy any ome.zarr from work """ - input: - zarr=f"{work}/{{prefix}}.ome.zarr", - output: - zarr=directory(f"{root}/{{prefix}}.ome.zarr"), - log: - "logs/ome_zarr_to_from_work/{prefix}.log", - group: - "preproc" - shell: - "cp -R {input.zarr} {output.zarr} &> {log}" - - rule ome_zarr_to_zipstore: - """ generic rule to process any ome.zarr from work """ + """ use 7zip to create a zipstore """ input: - zarr=f"{work}/{{prefix}}.ome.zarr", + zarr=bids( + root=work, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.ome.zarr", + ), output: - zarr_zip=f"{root}/{{prefix}}.ome.zarr.zip", + zarr_zip=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.ome.zarr.zip", + ), log: - "logs/ome_zarr_to_zipstore/{prefix}.log", + bids( + root="logs", + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="log.txt", + ), group: "preproc" shell: @@ -118,14 +127,7 @@ rule ome_zarr_to_zipstore: rule ome_zarr_to_nii: input: **get_storage_creds(), - zarr=bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), - ), + zarr=get_input_ome_zarr_to_nii, params: channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain), uri=get_output_ome_zarr_uri(), @@ -165,7 +167,7 @@ rule ome_zarr_to_nii: ), group: "preproc" - threads: config["cores_per_rule"] + threads: config["total_cores"] container: config["containers"]["spimprep"] script: diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py index 608158e..25a27e3 100644 --- a/workflow/scripts/blaze_to_metadata.py +++ b/workflow/scripts/blaze_to_metadata.py @@ -4,25 +4,67 @@ import re from itertools import product from snakemake.io import glob_wildcards - -in_tif_pattern = snakemake.params.in_tif_pattern +from glob import glob +from pathlib import Path +import os + +tif_files = glob(f"{snakemake.input.ome_dir}/*.tif") + +#check first tif file to see if it is zstack or not: +if 'xyz-Table Z' in Path(tif_files[0]).name: + is_zstack=False +else: + is_zstack=True + +#now check to see if there is just single tile +if ' x ' in Path(tif_files[0]).name: + is_tiled=True +else: + is_tiled=False + +if is_tiled: + if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) + else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) +else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"]) #add a wildcard constraint to ensure no #subfolders get parsed (ie don't match anything with / in it): prefix_constraint=r'[^/]+' in_tif_pattern_constrained = in_tif_pattern.replace('{prefix}',f'{{prefix,{prefix_constraint}}}') + #parse the filenames to get number of channels, tiles etc.. -prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained) +if is_tiled: + if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files) + else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) +else: + prefix, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) + +if is_tiled: + tiles_x = sorted(list(set(tilex))) + tiles_y = sorted(list(set(tiley))) -tiles_x = sorted(list(set(tilex))) -tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) -zslices = sorted(list(set(zslice))) prefixes = sorted(list(set(prefix))) -#read in series metadata from first file -in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + +if not is_zstack: + zslices = sorted(list(set(zslice))) + +if is_tiled: + if is_zstack: + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) + else: + #read in series metadata from first file + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) +else: + in_tif = in_tif_pattern.format(prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + raw_tif = tifffile.TiffFile(in_tif,mode='r') @@ -37,57 +79,81 @@ custom_metadata = ome_dict['OME']['Image']['ca:CustomAttributes'] - #read tile configuration from the microscope metadata if axes == 'CZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + else: + print('single tile, axes=CZYX') + elif axes == 'ZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" - -tile_pattern = re.compile(tile_config_pattern) - -#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice -map_x=dict() -map_y=dict() -map_z=dict() - -map_tiles_to_chunk=dict() -chunks = [] -for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): - map_tiles_to_chunk[tilex+tiley] = chunk - chunks.append(chunk) - -for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: - - d = re.search(tile_pattern,line).groupdict() - chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" - - map_x[key] = float(d['x']) - map_y[key] = float(d['y']) - map_z[key] = float(d['z']) - + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" + else: + print('single tile, axes=ZYX') + +if is_tiled: + tile_pattern = re.compile(tile_config_pattern) + + #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice + map_x=dict() + map_y=dict() + map_z=dict() + + map_tiles_to_chunk=dict() + chunks = [] + for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): + map_tiles_to_chunk[tilex+tiley] = chunk + chunks.append(chunk) + + for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: + d = re.search(tile_pattern,line).groupdict() + chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first + + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0000" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + + map_x[key] = float(d['x']) + map_y[key] = float(d['y']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) + metadata={} -metadata['tiles_x'] = tiles_x -metadata['tiles_y'] = tiles_y +if is_tiled: + metadata['tiles_x'] = tiles_x + metadata['tiles_y'] = tiles_y + metadata['chunks'] = chunks + metadata['lookup_tile_offset_x'] = map_x + metadata['lookup_tile_offset_y'] = map_y + metadata['lookup_tile_offset_z'] = map_z + metadata['channels'] = channels -metadata['zslices'] = zslices + +if not is_zstack: + metadata['zslices'] = zslices metadata['prefixes'] = prefixes -metadata['chunks'] = chunks metadata['axes'] = axes metadata['shape'] = shape metadata['physical_size_x'] = float(physical_size_x) metadata['physical_size_y'] = float(physical_size_y) metadata['physical_size_z'] = float(physical_size_z) -metadata['lookup_tile_offset_x'] = map_x -metadata['lookup_tile_offset_y'] = map_y -metadata['lookup_tile_offset_z'] = map_z metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' +metadata['is_zstack'] = is_zstack +metadata['is_tiled'] = is_tiled #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/blaze_to_metadata_gcs.py b/workflow/scripts/blaze_to_metadata_gcs.py index 7817d87..b652170 100644 --- a/workflow/scripts/blaze_to_metadata_gcs.py +++ b/workflow/scripts/blaze_to_metadata_gcs.py @@ -5,12 +5,11 @@ import os from itertools import product from snakemake.io import glob_wildcards +from pathlib import Path import gcsfs from lib.cloud_io import get_fsspec dataset_uri = snakemake.params.dataset_path -in_tif_pattern = snakemake.params.in_tif_pattern - gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, 'token': snakemake.input.creds} @@ -18,18 +17,57 @@ tifs = fs.glob(f"{dataset_uri}/*.tif") +#check first tif file to see if it is zstack or not: +if 'xyz-Table Z' in Path(tifs[0]).name: + is_zstack=False +else: + is_zstack=True + +#now check to see if there is just single tile +if ' x ' in Path(tifs[0]).name: + is_tiled=True +else: + is_tiled=False + + + +if is_tiled: + if is_zstack: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] + else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] +else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"] + + + #parse the filenames to get number of channels, tiles etc.. -prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) +if is_tiled: + if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs) + else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) +else: + prefix, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) + +if is_tiled: + tiles_x = sorted(list(set(tilex))) + tiles_y = sorted(list(set(tiley))) -tiles_x = sorted(list(set(tilex))) -tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) -zslices = sorted(list(set(zslice))) prefixes = sorted(list(set(prefix))) +if not is_zstack: + zslices = sorted(list(set(zslice))) + + +if not is_zstack: + zslices = sorted(list(set(zslice))) -#read in series metadata from first file -in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + #read in series metadata from first file + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) +else: + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) with fs.open(f"gcs://{in_tif}", 'rb') as tif_file: raw_tif = tifffile.TiffFile(tif_file,mode='r') @@ -49,54 +87,80 @@ #read tile configuration from the microscope metadata if axes == 'CZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + else: + print('single tile, axes=CZYX') + elif axes == 'ZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" - -tile_pattern = re.compile(tile_config_pattern) - -#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice -map_x=dict() -map_y=dict() -map_z=dict() - -map_tiles_to_chunk=dict() -chunks = [] -for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): - map_tiles_to_chunk[tilex+tiley] = chunk - chunks.append(chunk) - -for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: - - d = re.search(tile_pattern,line).groupdict() - chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" - - map_x[key] = float(d['x']) - map_y[key] = float(d['y']) - map_z[key] = float(d['z']) - + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" + else: + print('single tile, axes=ZYX') + +if is_tiled: + tile_pattern = re.compile(tile_config_pattern) + + #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice + map_x=dict() + map_y=dict() + map_z=dict() + + map_tiles_to_chunk=dict() + chunks = [] + for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): + map_tiles_to_chunk[tilex+tiley] = chunk + chunks.append(chunk) + + for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: + + d = re.search(tile_pattern,line).groupdict() + chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first + + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0000" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + + map_x[key] = float(d['x']) + map_y[key] = float(d['y']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) + metadata={} -metadata['tiles_x'] = tiles_x -metadata['tiles_y'] = tiles_y +if is_tiled: + metadata['tiles_x'] = tiles_x + metadata['tiles_y'] = tiles_y + metadata['chunks'] = chunks + metadata['lookup_tile_offset_x'] = map_x + metadata['lookup_tile_offset_y'] = map_y + metadata['lookup_tile_offset_z'] = map_z + metadata['channels'] = channels -metadata['zslices'] = zslices + +if not is_zstack: + metadata['zslices'] = zslices metadata['prefixes'] = prefixes -metadata['chunks'] = chunks metadata['axes'] = axes metadata['shape'] = shape metadata['physical_size_x'] = float(physical_size_x) metadata['physical_size_y'] = float(physical_size_y) metadata['physical_size_z'] = float(physical_size_z) -metadata['lookup_tile_offset_x'] = map_x -metadata['lookup_tile_offset_y'] = map_y -metadata['lookup_tile_offset_z'] = map_z metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' +metadata['is_zstack'] = is_zstack +metadata['is_tiled'] = is_tiled #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index bfd4a86..8787c9f 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -1,7 +1,10 @@ import tifffile +import os import json +import pyvips import dask.array as da -import dask.array.image +from dask.delayed import delayed +from dask.array.image import imread as imread_tifs from itertools import product from dask.diagnostics import ProgressBar @@ -21,46 +24,97 @@ def single_imread(*args): return tifffile.imread(*args,key=0) -#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke -in_tif_glob = replace_square_brackets(str(snakemake.params.in_tif_pattern)) +def read_page_as_numpy(tif_file,page): + """gets a single page (i.e. 2d image) from a tif file zstack""" + return pyvips.Image.new_from_file(tif_file, page=page).numpy() #read metadata json with open(snakemake.input.metadata_json) as fp: metadata = json.load(fp) + +is_zstack = metadata['is_zstack'] +is_tiled = metadata['is_tiled'] + +if is_tiled: + if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) + else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) +else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"]) + + + +#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke +in_tif_glob = replace_square_brackets(str(in_tif_pattern)) + + #TODO: put these in top-level metadata for easier access.. -size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'] -size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'] -size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'] -size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'] -size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) +size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']) +size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']) +size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']) +size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']) + + -#now get the first channel and first zslice tif -tiles=[] -for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): - +if is_tiled: + + size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) + + #now get the first channel and first zslice tif + tiles=[] + for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): + print(f'tile {tilex}x{tiley}, {i_tile}') + zstacks=[] + for i_chan,channel in enumerate(metadata['channels']): + + print(f'channel {i_chan}') + + if is_zstack: + tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) + + pages=[] + #read each page + for i_z in range(size_z): + pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) + + zstacks.append(da.stack(pages)) + print(zstacks[-1].shape) + else: + zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) + + #have list of zstack dask arrays for the tile, one for each channel + #stack them up and append to list of tiles + tiles.append(da.stack(zstacks)) + + + + #now we have list of tiles, each a dask array + #stack them up to get our final array + darr = da.stack(tiles) + +else: + #single tile, zslices: zstacks=[] for i_chan,channel in enumerate(metadata['channels']): - - zstacks.append(dask.array.image.imread(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - + print(f'channel {i_chan}') + zstacks.append(imread_tifs(in_tif_glob.format(prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - #have list of zstack dask arrays for the tile, one for each channel - #stack them up and append to list of tiles - tiles.append(da.stack(zstacks)) + #stack the channels up + darr = da.stack(zstacks) -#now we have list of tiles, each a dask array -#stack them up to get our final array -darr = da.stack(tiles) + +print(darr.shape) +print(darr.chunksize) #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling darr = darr.astype('uint16') - #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') with ProgressBar(): diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 01a9af1..4e1d642 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -1,15 +1,22 @@ +import tempfile import tifffile import json import dask.array as da +import numpy as np +import dask +from dask.delayed import delayed import dask.array.image from itertools import product from dask.diagnostics import ProgressBar import gcsfs +import pyvips +from snakemake.io import expand gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, 'token': snakemake.input.creds} fs = gcsfs.GCSFileSystem(**gcsfs_opts) +dask.config.set(scheduler='threads', num_workers=snakemake.threads) def replace_square_brackets(pattern): """replace all [ and ] in the string (have to use @@ -20,60 +27,132 @@ def replace_square_brackets(pattern): pattern = pattern.replace('##RIGHTBRACKET##','[]]') return pattern + +def get_tiff_num_pages(fs,gcs_uri): + with fs.open(gcs_uri, 'rb') as file: + return len(tifffile.TiffFile(file).pages) + def read_tiff_slice(fs,gcs_uri, key=0): """Read a single TIFF slice from GCS.""" with fs.open(gcs_uri, 'rb') as file: return tifffile.imread(file, key=key) -def build_zstack(gcs_uris,fs): +def read_stack_as_numpy(tif_file_uri, fs, Nz,Ny,Nx): + """Gets the full stack (i.e., 3D image) from a tif file z-stack stored in a cloud URI.""" + + #init array + vol = np.zeros((Nz,Ny,Nx),dtype='uint16') + + # Create a temporary file with a .tif extension + with tempfile.NamedTemporaryFile(suffix=".tif", delete=True) as temp_file: + temp_file_path = temp_file.name + + # Open the remote file and write it to the temporary file + with fs.open(tif_file_uri, 'rb') as remote_file: + temp_file.write(remote_file.read()) + + + # Use pyvips to read from the file + for i in range(Nz): + vol[i,:,:] = pyvips.Image.new_from_file(temp_file_path, page=i).numpy() + + return vol + + + +def build_zstack(gcs_uris,fs,size_z,size_y,size_x,dtype): """Build a z-stack from a list of GCS URIs.""" + lazy_arrays = [ dask.delayed(read_tiff_slice)(fs,uri) for uri in gcs_uris ] - sample_array = read_tiff_slice(fs,gcs_uris[0]) # Read a sample to get shape and dtype - shape = (len(gcs_uris),) + sample_array.shape - dtype = sample_array.dtype # Convert the list of delayed objects into a Dask array - return da.stack([da.from_delayed(lazy_array, shape=sample_array.shape, dtype=dtype) for lazy_array in lazy_arrays], axis=0) + return da.stack([da.from_delayed(lazy_array, shape=(size_y,size_x), dtype=dtype) for lazy_array in lazy_arrays], axis=0) +def build_zstack_from_single(gcs_uri,zstack_metadata,fs): + """Build a z-stack from a single GCS URI """ + pages = [i for i in range(zstack_metadata['n_z'])] + lazy_arrays = [ + dask.delayed(read_tiff_slice)(fs,gcs_uri,page) for page in pages + ] + + # Convert the list of delayed objects into a Dask array + return da.stack([da.from_delayed(lazy_array, shape=zstack_metadata['shape'], dtype=zstack_metadata['dtype']) for lazy_array in lazy_arrays], axis=0) -#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke -in_tif_glob = replace_square_brackets(str(snakemake.params.in_tif_pattern)) #read metadata json with open(snakemake.input.metadata_json) as fp: metadata = json.load(fp) + +is_zstack = metadata['is_zstack'] +is_tiled = metadata['is_tiled'] + +if is_tiled: + if is_zstack: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] + else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] +else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"] + + + + +#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke +in_tif_glob = replace_square_brackets(str(in_tif_pattern)) + + + #TODO: put these in top-level metadata for easier access.. -size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'] -size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'] -size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'] -size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'] -size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) +size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']) +size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']) +size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']) +size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']) + + +if is_tiled: -#now get the first channel and first zslice tif -tiles=[] -for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): + size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) + + #now get the first channel and first zslice tif + + + tiles=[] + for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): + #print(f'tile {tilex}x{tiley}, {i_tile}') + zstacks=[] + for i_chan,channel in enumerate(metadata['channels']): - zstacks=[] - for i_chan,channel in enumerate(metadata['channels']): + #print(f'channel {i_chan}') + if is_zstack: + + tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) + + zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),dtype='uint16').rechunk((1,size_y,size_x))) + else: + uris = expand('gcs://'+in_tif_pattern,tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice=metadata['zslices']) + + zstacks.append(build_zstack(uris,fs=fs,size_z=size_z,size_y=size_y,size_x=size_x,dtype='uint16')) - zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) - - #have list of zstack dask arrays for the tile, one for each channel - #stack them up and append to list of tiles - tiles.append(da.stack(zstacks)) + #have list of zstack dask arrays for the tile, one for each channel + #stack them up and append to list of tiles + tiles.append(da.stack(zstacks)) + + + #now we have list of tiles, each a dask array + #stack them up to get our final array + darr = da.stack(tiles) +else: + print("single tile data not supported for tif_to_zarr_gcs") -#now we have list of tiles, each a dask array -#stack them up to get our final array -darr = da.stack(tiles) #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling @@ -81,7 +160,7 @@ def build_zstack(gcs_uris,fs): #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') -with ProgressBar(): +with ProgressBar(dt=10): da.to_zarr(darr,snakemake.output.zarr,overwrite=True,dimension_separator='/')