From 6d365b0330839680d6c16ba018f5b19866a21772 Mon Sep 17 00:00:00 2001
From: Karl5766 <karl29029@gmail.com>
Date: Mon, 30 Sep 2024 16:35:35 -0400
Subject: [PATCH 1/2] negatively masked counting

---
 .gitignore                                    |  3 +-
 docs/getting_started/installation.md          |  6 +-
 poetry.lock                                   | 20 ++--
 pyproject.toml                                |  2 +-
 spimquant/run.py                              |  3 -
 spimquant/workflow/rules/blobdetect.smk       | 18 ++++
 .../negatively_masked_counting.py             | 91 +++++++++++++++++++
 7 files changed, 126 insertions(+), 17 deletions(-)
 create mode 100644 spimquant/workflow/scripts/contour_counting/negatively_masked_counting.py

diff --git a/.gitignore b/.gitignore
index 0fe6b96..6d18104 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 *.snakemake/
 *__pycache__/
 spimquant/results/
-spimquant/workflow/scripts/experiment/
\ No newline at end of file
+spimquant/workflow/scripts/experiment/
+
diff --git a/docs/getting_started/installation.md b/docs/getting_started/installation.md
index 2da3010..0a00a6f 100644
--- a/docs/getting_started/installation.md
+++ b/docs/getting_started/installation.md
@@ -1,9 +1,11 @@
 ## SPIMquant
 
-SPIMquant is a BIDS App for processing SPIM (lightsheet) microscopy datasets, performing registration to a template, and quantifying microscopic features from the SPIM data.
+SPIMquant is a parallel processing library that is designed to process scan 
+and quantify the number of cells within.
 
 Hardware requirements: If run locally, make sure you have sufficient memory 
-(at least quite a bit more than 16G of memory in total), as the `greedy` diffeormorphic registration we rely on can consume a significant amount of memory during the template registration process.
+(at least quite a bit more than 16G of memory in total), as the `greedy` command 
+line tool we rely on consumes memory heavily during the template registration process.
 
 Software requirements: A linux machine with Singularity or Apptainer installed is 
 recommended. Other-wise with a Windows machine, you want to have the following libraries 
diff --git a/poetry.lock b/poetry.lock
index 0eebf5c..1a7d137 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -996,13 +996,13 @@ questionary = ">=1.8.1"
 
 [[package]]
 name = "cvpl-tools"
-version = "0.6.3"
+version = "0.6.11"
 description = "A Python package for utilities and classes related to the file I/O, dataset record keeping and visualization for image processing and computer vision."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "cvpl_tools-0.6.3-py3-none-any.whl", hash = "sha256:6d62e1b4072de467529c8e035f653469becac924d2573db6373ca7be1853a277"},
-    {file = "cvpl_tools-0.6.3.tar.gz", hash = "sha256:d929cbec99801d5b39755782da23a63fbd6157e3d6478024bb5223bb20b8885a"},
+    {file = "cvpl_tools-0.6.11-py3-none-any.whl", hash = "sha256:1b1836fa51f3838b6511c549369dcc787f918c4e3ad152fc769807e841de27be"},
+    {file = "cvpl_tools-0.6.11.tar.gz", hash = "sha256:2e565934261ae62835c56bef2c42a0db2db7b2b2ee489a3dd1408191dda96baa"},
 ]
 
 [package.dependencies]
@@ -2037,13 +2037,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio
 
 [[package]]
 name = "ipython"
-version = "8.27.0"
+version = "8.28.0"
 description = "IPython: Productive Interactive Computing"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "ipython-8.27.0-py3-none-any.whl", hash = "sha256:f68b3cb8bde357a5d7adc9598d57e22a45dfbea19eb6b98286fa3b288c9cd55c"},
-    {file = "ipython-8.27.0.tar.gz", hash = "sha256:0b99a2dc9f15fd68692e898e5568725c6d49c527d36a9fb5960ffbdeaa82ff7e"},
+    {file = "ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35"},
+    {file = "ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a"},
 ]
 
 [package.dependencies]
@@ -6173,13 +6173,13 @@ typing-extensions = ">=3.7.4.3"
 
 [[package]]
 name = "types-python-dateutil"
-version = "2.9.0.20240906"
+version = "2.9.0.20241003"
 description = "Typing stubs for python-dateutil"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "types-python-dateutil-2.9.0.20240906.tar.gz", hash = "sha256:9706c3b68284c25adffc47319ecc7947e5bb86b3773f843c73906fd598bc176e"},
-    {file = "types_python_dateutil-2.9.0.20240906-py3-none-any.whl", hash = "sha256:27c8cc2d058ccb14946eebcaaa503088f4f6dbc4fb6093d3d456a49aef2753f6"},
+    {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"},
+    {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"},
 ]
 
 [[package]]
@@ -6625,4 +6625,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "b4e67b79254ffdc9f43d3aa11533910a333d1865eddfb3bb3bfdff48720f0062"
+content-hash = "f7d47f94c6fba12aa02a87786bfdacada8f7bb02cb16f06888f27a4911648af2"
diff --git a/pyproject.toml b/pyproject.toml
index f1a63d3..657c1b9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ pybids = "^0.16.5"
 sparse = "^0.15.1"
 bokeh = "^3.4.1"
 zarrnii = "0.1.3a1"
-cvpl_tools = "^0.6.3"
+cvpl_tools = "^0.6.11"
 
 [tool.poetry.scripts]
 spimquant = "spimquant.run:app.run"
diff --git a/spimquant/run.py b/spimquant/run.py
index 9303414..7d07ff3 100755
--- a/spimquant/run.py
+++ b/spimquant/run.py
@@ -2,9 +2,6 @@
 from pathlib import Path
 
 from snakebids import bidsapp, plugins
-import os
-import shutil
-
 
 app = bidsapp.app(
     [
diff --git a/spimquant/workflow/rules/blobdetect.smk b/spimquant/workflow/rules/blobdetect.smk
index 0fbdc77..fd40273 100644
--- a/spimquant/workflow/rules/blobdetect.smk
+++ b/spimquant/workflow/rules/blobdetect.smk
@@ -303,3 +303,21 @@ rule map_volume_tsv_dseg_to_template_nii:
         ),
     script:
         "../scripts/map_tsv_dseg_to_nii.py"
+
+# ---------------------------- Part 2: Negatively Masked Counting -------------------------------
+
+rule negatively_masked_counting:
+    """
+    Work in progress
+    """
+    input:
+        zarr='/path_to_bids_root/bids/sub-onuska21/micr/sub-onuska21_sample_brain_acq-prestitched_SPIM.ome.zarr?slices=[1]',
+        neg_mask='/path_to_bids_root/resources/onuska21_patched.tiff',
+    params:
+        neg_mask_scale=(1, 1, 1),
+        tmp_path=f'{config["output_dir"]}/tmp'
+    output:
+        found_lc=directory(f'{config["output_dir"]}/found_lc')
+    threads: 32
+    script:
+        "../scripts/contour_counting/negatively_masked_counting.py"
diff --git a/spimquant/workflow/scripts/contour_counting/negatively_masked_counting.py b/spimquant/workflow/scripts/contour_counting/negatively_masked_counting.py
new file mode 100644
index 0000000..019b22d
--- /dev/null
+++ b/spimquant/workflow/scripts/contour_counting/negatively_masked_counting.py
@@ -0,0 +1,91 @@
+"""
+This file finds, globally in a 3d brightness image, all the contours' size and location.
+
+Inputs to this file:
+1. Path to an ome-zarr 3d single channel brightness image
+2. Path to a tiffile 3d binary mask that mask off regions of the image in 1) with false positives
+3. A scaling factor tuple, when multiplied from left, transforms pixel location of 2) to 1)
+4. An integer (starting from 0) specifies the index of input channel to use
+
+Output of this file is a NDBlock list of centroids saved in local file, which can be loaded
+and read as a global list of all contours' size and location.
+"""
+
+
+if __name__ == '__main__':
+    import cvpl_tools.im.process.qsetup as qsetup
+    # IMPORT YOUR LIBRARIES HERE
+    import cvpl_tools.im.seg_process as seg_process
+    import cvpl_tools.im.process.bs_to_os as sp_bs_to_os
+    import cvpl_tools.im.process.os_to_cc as sp_os_to_cc
+    import cvpl_tools.im.process.any_to_any as sp_any_to_any
+    import cvpl_tools.ome_zarr.io as ome_io
+    import cvpl_tools.im.ndblock as ndblock
+    import dask.array as da
+    import numcodecs
+    import tifffile
+    import shutil
+
+    class Pipeline(seg_process.SegProcess):
+        def __init__(self):
+            super().__init__()
+            self.in_to_bs = seg_process.SimpleThreshold(.45)
+            self.bs_to_os = sp_bs_to_os.DirectBSToOS(is_global=True)
+            self.os_to_cc = sp_os_to_cc.CountOSBySize(
+                size_threshold=200.,
+                volume_weight=5.15e-3,
+                border_params=(3., -.5, 2.3),
+                min_size=8,
+                reduce=False,
+                is_global=True
+            )
+
+        def forward(self, im, cptr, viewer_args: dict = None):
+            cdir = cptr.subdir()
+            bs = self.in_to_bs.forward(im, cptr=cdir.cache(cid='in_to_bs'), viewer_args=viewer_args)
+            os = self.bs_to_os.forward(bs, cptr=cdir.cache(cid='bs_to_os'), viewer_args=viewer_args)
+            cc = self.os_to_cc.forward(os, cptr=cdir.cache(cid='os_to_cc'), viewer_args=viewer_args)
+            return cc
+
+    TMP_PATH = snakemake.params.tmp_path
+    with qsetup.PLComponents(TMP_PATH, 'CacheDirectoryNegMaskedCounting',
+                             client_args=dict(threads_per_worker=12, n_workers=1),
+                             viewer_args=dict(use_viewer=False)) as plc:
+        # DO DASK COMPUTATION, AND SHOW RESULTS IN plc.viewer
+        src_im = ome_io.load_dask_array_from_path(snakemake.input.zarr, mode='r', level=0)
+        pipeline = Pipeline()
+        src_im = da.clip(src_im / 1000, 0., 1.)
+        assert src_im.ndim == 3
+        print(f'Computing centroids size and location. Masking the image, imshape={src_im.shape}.')
+        src_im = src_im.rechunk(chunks=(128, 256, 256))
+        storage_options = dict(
+            dimension_separator='/',
+            preferred_chunksize=None,  # don't re-chunk when saving and loading
+            multiscale=0,
+            compressor=numcodecs.Blosc(cname='lz4', clevel=9, shuffle=numcodecs.Blosc.BITSHUFFLE)
+        )
+        viewer_args = dict(
+            viewer=None,
+            display_points=False,
+            display_checkerboard=False,
+            client=plc.dask_client,
+            storage_options=storage_options
+        )
+
+        up_sampler = sp_any_to_any.UpsamplingByIntFactor(factor=snakemake.params.neg_mask_scale, order=0)
+
+        def compute_masking():
+            neg_mask = da.from_array(tifffile.imread(snakemake.input.neg_mask), chunks=(64, 64, 64))
+            neg_mask = up_sampler.forward(neg_mask, cptr=plc.cache_root.cache('neg_mask_upsampling'),
+                                          viewer_args=viewer_args | dict(is_label=True))
+            return src_im * (1 - neg_mask)
+        plc.cache_root.cache_im(compute_masking, cid='masked_src_im', viewer_args=viewer_args)
+        cc = pipeline.forward(src_im, plc.cache_root.cache(cid='global_label'), viewer_args=viewer_args)
+        object_scores = cc.reduce(force_numpy=True)
+        print(f'Total number of object score (not n_object nor volume) estimated: {object_scores.sum().item()}')
+
+        tgt_folder = snakemake.output.found_lc
+        shutil.move(f'{plc.cache_root.abs_path}/dir_cache_global_label/dir_cache_os_to_cc/dir_cache_os_to_lc/file_cache_lc_ndblock',
+                    f'{tgt_folder}')
+        lc_snapshot = ndblock.NDBlock.load(tgt_folder).reduce(force_numpy=True)[:20]
+        print('Snapshot of list of centroids\n', lc_snapshot)

From c7227ad2fa83547bfc69b645a8f0443cec4b1e8d Mon Sep 17 00:00:00 2001
From: Karl5766 <karl29029@gmail.com>
Date: Fri, 4 Oct 2024 17:06:18 -0400
Subject: [PATCH 2/2] patch

---
 docs/getting_started/installation.md          |  7 ----
 docs/usage/cli.md                             | 37 ++++++++++++++++++-
 spimquant/workflow/rules/blobdetect.smk       |  5 +--
 .../negatively_masked_counting.py             | 14 ++++---
 4 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/docs/getting_started/installation.md b/docs/getting_started/installation.md
index 0a00a6f..3fc489f 100644
--- a/docs/getting_started/installation.md
+++ b/docs/getting_started/installation.md
@@ -30,13 +30,6 @@ pip install -e git+https://github.com/khanlab/spimquant#egg=spimquant
 
 Note: you can re-run this command to re-install with the latest version
 
-Before running the app, you need to specify a config file to use. "SPIMquant/examples/snakebids_template.yml" 
-provides a starting point for specifying a config. If you are using the example dataset provided in the 
-above section, then a config file is also included in the zip file.
-
-To specify the config, copy the config file into SPIMquant/spimquant/config/snakebids.yml, and change the 
-properties in the config file to ensure paths to the directory are properly set.
-
 ## Running the app
 
 Do a dry-run first (`-n`) and simply print (`-p`) what would be run:
diff --git a/docs/usage/cli.md b/docs/usage/cli.md
index 6054c65..45a6997 100644
--- a/docs/usage/cli.md
+++ b/docs/usage/cli.md
@@ -1,8 +1,43 @@
-## Core command-line interface
+## The `spimquant` Interface
 
 The spimquant tool command-line interface is a composition of the core 
 (app-based) arguments and options, and the options related to Snakemake. 
 
+As an end user, command line interface (CLI) is recommended as a way to specify 
+the input/output data paths and configuration settings. The arguments passed 
+in this way will be used to overwrite some of the default options defined in 
+the config file `spimquant/config/snakebids.yml` and adapt to different datasets. 
+
+Underlying SPIMquant, we use `snakemake` to manage workflow and config file. 
+`spimquant` is a program that writes to the config file, and supports 
+all Snakemake arguments. 
+
+Steps to run a typical job looks like the following:
+```bash
+spimquant bids_dir output_dir {participant} [snakemake_args]
+# now since we run spimquant, contents in config/snakebids.yml has changed
+# and we can run snakemake with the updated configuration
+snakemake result_file_path
+```
+All three arguments above to `spimquant` are not optional but necessary. 
+The above first command will do the following:
+1. Log the configuration used to run snakemake in the location output_dir, 
+   including the snakebids.yml file and logs.
+2. Pass bids_dir over to overwrite the bids_dir defined in the default 
+   snakebids.yml. 
+3. Start snakemake using the snakemake_args provided
+
+A more concrete example:
+```bash
+spimquant test_bids_dir test_out participant
+snakemake results/result_image.nii -n
+```
+Here we use -n so you can run this command but snakemake will only output a 
+plan without generating any file. In an actual run, you can use `--cores all` 
+to specify to use all cores and `--use-apptainer` to download and use existing 
+container image for SPIMquant that have already installed all dependencies, 
+without having to set up the environment yourself.
+
 The core BIDS App arguments and app-specific options are listed below. 
 
 
diff --git a/spimquant/workflow/rules/blobdetect.smk b/spimquant/workflow/rules/blobdetect.smk
index fd40273..78d854d 100644
--- a/spimquant/workflow/rules/blobdetect.smk
+++ b/spimquant/workflow/rules/blobdetect.smk
@@ -311,10 +311,9 @@ rule negatively_masked_counting:
     Work in progress
     """
     input:
-        zarr='/path_to_bids_root/bids/sub-onuska21/micr/sub-onuska21_sample_brain_acq-prestitched_SPIM.ome.zarr?slices=[1]',
-        neg_mask='/path_to_bids_root/resources/onuska21_patched.tiff',
+        neg_mask='C:/ProgrammingTools/ComputerVision/RobartsResearch/data/lightsheet/spimquant_ubuntu/resources/onuska21_patched.tiff',
     params:
-        neg_mask_scale=(1, 1, 1),
+        zarr='C:/ProgrammingTools/ComputerVision/RobartsResearch/data/lightsheet/spimquant_ubuntu/bids/sub-onuska21/micr/sub-onuska21_sample_brain_acq-prestitched_SPIM.ome.zarr?slices=[1]',
         tmp_path=f'{config["output_dir"]}/tmp'
     output:
         found_lc=directory(f'{config["output_dir"]}/found_lc')
diff --git a/spimquant/workflow/scripts/contour_counting/negatively_masked_counting.py b/spimquant/workflow/scripts/contour_counting/negatively_masked_counting.py
index 019b22d..0de35e1 100644
--- a/spimquant/workflow/scripts/contour_counting/negatively_masked_counting.py
+++ b/spimquant/workflow/scripts/contour_counting/negatively_masked_counting.py
@@ -19,6 +19,7 @@
     import cvpl_tools.im.process.bs_to_os as sp_bs_to_os
     import cvpl_tools.im.process.os_to_cc as sp_os_to_cc
     import cvpl_tools.im.process.any_to_any as sp_any_to_any
+    import cvpl_tools.im.algs.dask_resize as im_resize
     import cvpl_tools.ome_zarr.io as ome_io
     import cvpl_tools.im.ndblock as ndblock
     import dask.array as da
@@ -52,10 +53,11 @@ def forward(self, im, cptr, viewer_args: dict = None):
                              client_args=dict(threads_per_worker=12, n_workers=1),
                              viewer_args=dict(use_viewer=False)) as plc:
         # DO DASK COMPUTATION, AND SHOW RESULTS IN plc.viewer
-        src_im = ome_io.load_dask_array_from_path(snakemake.input.zarr, mode='r', level=0)
+        src_im = ome_io.load_dask_array_from_path(snakemake.params.zarr, mode='r', level=0)
         pipeline = Pipeline()
         src_im = da.clip(src_im / 1000, 0., 1.)
         assert src_im.ndim == 3
+        print(f'Saving results in {plc.cache_root.abs_path}')
         print(f'Computing centroids size and location. Masking the image, imshape={src_im.shape}.')
         src_im = src_im.rechunk(chunks=(128, 256, 256))
         storage_options = dict(
@@ -72,12 +74,14 @@ def forward(self, im, cptr, viewer_args: dict = None):
             storage_options=storage_options
         )
 
-        up_sampler = sp_any_to_any.UpsamplingByIntFactor(factor=snakemake.params.neg_mask_scale, order=0)
-
         def compute_masking():
             neg_mask = da.from_array(tifffile.imread(snakemake.input.neg_mask), chunks=(64, 64, 64))
-            neg_mask = up_sampler.forward(neg_mask, cptr=plc.cache_root.cache('neg_mask_upsampling'),
-                                          viewer_args=viewer_args | dict(is_label=True))
+            neg_mask = im_resize.upsample_pad_crop_fit(
+                src_arr=neg_mask,
+                tgt_arr=src_im,
+                cptr=plc.cache_root.cache('neg_mask_upsampling'),
+                viewer_args=viewer_args | dict(is_label=True),
+            )
             return src_im * (1 - neg_mask)
         plc.cache_root.cache_im(compute_masking, cid='masked_src_im', viewer_args=viewer_args)
         cc = pipeline.forward(src_im, plc.cache_root.cache(cid='global_label'), viewer_args=viewer_args)