ensure trace IDs are available with the nuclei designations (includin…

…g non-nuclear), and harmonize programs accordingly
gerlichlab · Nov 19, 2024 · b208ecf · b208ecf
1 parent 9ba5c70
commit b208ecf
Show file tree

Hide file tree

Showing 6 changed files with 75 additions and 33 deletions.
diff --git a/bin/cli/assign_spots_to_nucs.py b/bin/cli/assign_spots_to_nucs.py
@@ -155,8 +155,6 @@ def query_table_for_fov(table: pd.DataFrame) -> Callable[[str], pd.DataFrame]:
         all_rois = pd.concat(all_rois).sort_values(["fieldOfView", "timepoint"])
         logger.info(f"Writing nuclei-labeled spots file: {H.nuclei_labeled_spots_file_path}")
         all_rois.to_csv(H.nuclei_labeled_spots_file_path, index=False)
-        logger.info(f"Writing nuclei-filtered spots file: {H.nuclei_filtered_spots_file_path}")
-        all_rois[all_rois[NUC_LABEL_COL] != 0].to_csv(H.nuclei_filtered_spots_file_path, index=False)
 
     logger.info("Done!")
 

diff --git a/bin/cli/run_processing_pipeline.py b/bin/cli/run_processing_pipeline.py
@@ -9,11 +9,12 @@
 import subprocess
 import sys
 from typing import *
-import yaml
 
 from expression import Result
 from gertils import ExtantFile, ExtantFolder
+import pandas as pd
 import pypiper
+import yaml
 
 from looptrace import *
 from looptrace.Drifter import coarse_correction_workflow, fine_correction_workflow as run_fine_drift_correction
@@ -33,7 +34,7 @@
 from decon import workflow as run_deconvolution
 from drift_correct_accuracy_analysis import workflow as run_drift_correction_analysis, run_visualisation as run_drift_correction_accuracy_visualisation
 from detect_spots import workflow as run_spot_detection
-from assign_spots_to_nucs import workflow as run_spot_nucleus_filtration
+from assign_spots_to_nucs import NUC_LABEL_COL, workflow as run_spot_nucleus_assignment
 from partition_regional_spots_by_field_of_view import workflow as prep_regional_spots_visualisation
 from extract_spots_table import workflow as run_spot_bounding
 from extract_spots import workflow as run_spot_extraction
@@ -207,9 +208,9 @@ def assign_trace_ids(rounds_config: ExtantFile, params_config: ExtantFile) -> Pa
         "--pixels",
         build_pixels_config(H),
         "--roisFile",
-        str(H.nuclei_filtered_spots_file_path if H.spot_in_nuc else H.proximity_accepted_spots_file_path),
+        str(H.nuclei_labeled_spots_file_path if H.spot_in_nuc else H.proximity_accepted_spots_file_path),
         "--outputFile", 
-        str(H.spots_for_voxels_definition_file),
+        str(H.rois_with_trace_ids_file),
     ]
     logging.info(f"Running distance computation command: {' '.join(cmd_parts)}")
     return subprocess.check_call(cmd_parts)
@@ -414,6 +415,22 @@ def run_spot_merge_execution(rounds_config: ExtantFile, params_config: ExtantFil
     subprocess.check_call(cmd_parts)
 
 
+def filter_spots_for_nuclei(rounds_config: ExtantFile, params_config: ExtantFile) -> None:
+    H = ImageHandler(rounds_config=rounds_config, params_config=params_config)
+    if not H.spot_in_nuc:
+        logging.info("spot_in_nuc is False, nothing to do, skipping nuclei-based filtration")
+        return
+    rois_file: Path = H.rois_with_trace_ids_file
+    logging.info(f"Reading ROIs file for filtration: {rois_file}")
+    rois: pd.DataFrame = pd.read_csv(rois_file, index_col=False)
+    logging.debug(f"Initial ROI count: {rois.shape[0]}")
+    rois = rois[rois[NUC_LABEL_COL] != 0]
+    logging.debug(f"ROIs remaining after filtration for nuclei: {rois.shape[0]}")
+    logging.debug(f"Writing ROIs: {H.nuclei_filtered_spots_file_path}")
+    rois.to_csv(H.nuclei_filtered_spots_file_path, index=False)
+    logging.info("Done with nuclei-based spot filtration")
+
+
 class LooptracePipeline(pypiper.Pipeline):
     """Main looptrace processing pipeline"""
 
@@ -460,25 +477,30 @@ def stages(self) -> list[pypiper.Stage]:
             pypiper.Stage(name="spot_merge_execution", func=run_spot_merge_execution, f_kwargs=rounds_params),
             pypiper.Stage(name="spot_proximity_filtration", func=run_spot_proximity_filtration, f_kwargs=rounds_params_images),
             pypiper.Stage(
-                name="spot_nucleus_filtration", 
-                func=run_spot_nucleus_filtration, 
+                name="spot_nucleus_assignment", 
+                func=run_spot_nucleus_assignment, 
                 f_kwargs=rounds_params_images, # images are needed since H.image_lists is iterated in workflow.
             ), 
             pypiper.Stage(
                 name="trace_id_assignment",
                 func=assign_trace_ids,
-                f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config},
+                f_kwargs=rounds_params,
+            ),
+            pypiper.Stage(
+                name="spot_nuclei_filtration", 
+                func=filter_spots_for_nuclei,
+                f_kwargs=rounds_params,
             ),
             pypiper.Stage(
                 name="regional_spots_visualisation_data_prep", 
                 func=run_regional_spot_viewing_prep, 
-                f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config},
+                f_kwargs=rounds_params,
                 nofail=True,
             ),
             pypiper.Stage(
                 name="spot_counts_visualisation__regional", 
                 func=plot_spot_counts, 
-                f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config, "spot_type": SpotType.REGIONAL},
+                f_kwargs={**rounds_params, "spot_type": SpotType.REGIONAL},
             ), 
             # computes pad_x_min, etc.; writes *_dc_rois.csv (much bigger, since regional spots x timepoints)
             pypiper.Stage(name="spot_bounding", func=run_spot_bounding, f_kwargs=rounds_params_images),
@@ -500,20 +522,28 @@ def stages(self) -> list[pypiper.Stage]:
             pypiper.Stage(
                 name="spot_counts_visualisation__locus_specific", 
                 func=plot_spot_counts, 
-                f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config, "spot_type": SpotType.LOCUS_SPECIFIC},
+                f_kwargs={**rounds_params, "spot_type": SpotType.LOCUS_SPECIFIC},
             ), 
             pypiper.Stage(
                 name="pairwise_distances__locus_specific", 
                 func=compute_locus_pairwise_distances, 
-                f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config},
+                f_kwargs=rounds_params,
             ),
             pypiper.Stage(
                 name="pairwise_distances__regional", 
                 func=compute_region_pairwise_distances, 
-                f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config},
+                f_kwargs=rounds_params,
+            ),
+            pypiper.Stage(
+                name="locus_specific_spots_visualisation_data_prep", 
+                func=prep_locus_specific_spots_visualisation, 
+                f_kwargs=rounds_params_images,
+            ),
+            pypiper.Stage(
+                name="locus_spot_viewing_prep", 
+                func=run_locus_spot_viewing_prep, 
+                f_kwargs=rounds_params,
             ),
-            pypiper.Stage(name="locus_specific_spots_visualisation_data_prep", func=prep_locus_specific_spots_visualisation, f_kwargs=rounds_params_images),
-            pypiper.Stage(name="locus_spot_viewing_prep", func=run_locus_spot_viewing_prep, f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config}),
         ]
 
 

diff --git a/docs/pipeline-execution-control-and-rerun.md b/docs/pipeline-execution-control-and-rerun.md
@@ -62,8 +62,9 @@ Below are the sequential pipeline stage names.
 * spot_merge_determination
 * spot_merge_execution
 * spot_proximity_filtration
-* spot_nucleus_filtration
+* spot_nucleus_assignment
 * trace_id_assignment
+* spot_nuclei_filtration
 * regional_spots_visualisation_data_prep
 * spot_counts_visualisation__regional
 * spot_bounding

diff --git a/docs/pipeline-outputs.md b/docs/pipeline-outputs.md
@@ -27,7 +27,7 @@ This should operate directly on the raw (though possibly deconvoluted) image dat
 The labeled file has a column for which rows of the file are related as neighbors (per the configuration options in the [parameters config](./parameters-configuration-file.md) and the [imaging rounds config](./imaging-rounds-configuration-file.md)). 
 This uses the output of the spot detection.
 The filtered file is the same as the labeled, except for the exclusion of the rows in which the neighbors column was nonempty, and the subsequent dropping of that column.
-* `spot_nucleus_filtration`: produces the `*.nuclei_labeled.csv` and `*.nuclei_filtered.csv` files, which represent association of a nucleus label to each spot (`0` representing that the spot's center isn't found within the border of a nuclear mask), and the filtration of the rows to which that applies. More or less analogous in this respect to the proximity-labeled and -filtered files.
+* `spot_nucleus_assignment`: produces the `*.nuclei_labeled.csv` and `*.nuclei_filtered.csv` files, which represent association of a nucleus label to each spot (`0` representing that the spot's center isn't found within the border of a nuclear mask), and the filtration of the rows to which that applies. More or less analogous in this respect to the proximity-labeled and -filtered files.
 This uses the output of the spot proximity filtration.
 * `spot_counts_visualisation__regional`: produces the charts related to the counts of (_regional_ barcode) spots. This uses the output from the spot detection and filtration by proximity and nuclei.
 * `spot_bounding`: this computes the drift-corrected bounding boxes from which to extract pixel data that will be used for tracing. This uses the filtered spot detection output (either just the proximity-filtered or proximity- and nuclei-filtered, depending on the `spot_in_nuc` setting of the [parameters configuration file](./parameters-configuration-file.md)), as well as the coarse and fine drift correction outputs. 
@@ -56,4 +56,4 @@ The only direct dependence should be on the coordinates from the enriched, filte
 This pair of files is then typically also dragged-and-dropped into `napari` for visualisation. 
 For more about the use of these outputs, refer to the [visualisation doc](./visualisation.md).
 * `nuclear_masks_visualisation_data_prep` produces data similar to the locus-specific spots visualisation step, aimed at facilitating visualistation of the nuclear regions used for spot filtration; this ends up being relocated to the `_` subfolder of the main images folder, e.g. `images_all/_nuclear_masks_visualisation`. The underscore prefix prevents this from being repeatedly read (or at least attempted to be re-read) by the image handler component of `looptrace`.
-* `regional_spots_visualisation_data_prep` uses the 3 regional spots files produced by `spot_detection`, `spot_proximity_filtration`, and `spot_nucleus_filtration` to produce `regional_spots_visualisation`, with per-FOV data to drag-and-drop into Napari for visualisation.
+* `regional_spots_visualisation_data_prep` uses the 3 regional spots files produced by `spot_detection`, `spot_proximity_filtration`, and `spot_nucleus_assignment` to produce `regional_spots_visualisation`, with per-FOV data to drag-and-drop into Napari for visualisation.
diff --git a/looptrace/ImageHandler.py b/looptrace/ImageHandler.py
@@ -402,6 +402,10 @@ def roi_image_size(self) -> RoiImageSize:
         z, y, x = tuple(self.config["roi_image_size"])
         return RoiImageSize(z=z, y=y, x=x)
 
+    @property
+    def rois_with_trace_ids_file(self) -> Path:
+        return self.raw_spots_file.with_suffix(".with_trace_ids.csv")
+
     @property
     def spot_in_nuc(self) -> bool:
         return self.config.get("spot_in_nuc", False)
@@ -436,7 +440,7 @@ def spot_merge_results_file(self) -> Path:
     @property
     def spots_for_voxels_definition_file(self) -> Path:
         """Path to the file to use for defining the voxels for tracing"""
-        return self.raw_spots_file.with_suffix(".for_voxels_definition.csv")
+        return self.nuclei_filtered_spots_file_path if self.spot_in_nuc else self.rois_with_trace_ids_file
 
     @property
     def traces_path(self) -> Path:

diff --git a/src/main/scala/AssignTraceIds.scala b/src/main/scala/AssignTraceIds.scala
@@ -12,7 +12,7 @@ import squants.space.{ Length, Nanometers }
 
 import com.typesafe.scalalogging.StrictLogging
 
-import at.ac.oeaw.imba.gerlich.gerlib.cell.NucleusNumber
+import at.ac.oeaw.imba.gerlich.gerlib.cell.NuclearDesignation
 import at.ac.oeaw.imba.gerlich.gerlib.collections.AtLeast2
 import at.ac.oeaw.imba.gerlich.gerlib.geometry.{ Centroid, DistanceThreshold, EuclideanDistance, ProximityComparable }
 import at.ac.oeaw.imba.gerlich.gerlib.geometry.PiecewiseDistance.ConjunctiveThreshold
@@ -34,6 +34,7 @@ import at.ac.oeaw.imba.gerlich.gerlib.io.csv.{
 import at.ac.oeaw.imba.gerlich.gerlib.numeric.*
 import at.ac.oeaw.imba.gerlich.gerlib.numeric.instances.all.given
 import at.ac.oeaw.imba.gerlich.gerlib.syntax.all.*
+
 import at.ac.oeaw.imba.gerlich.looptrace.ImagingRoundsConfiguration.{
     RoiPartnersRequirementType,
     TraceIdDefinitionAndFiltrationRule,
@@ -51,7 +52,6 @@ import at.ac.oeaw.imba.gerlich.looptrace.instances.all.given
 import at.ac.oeaw.imba.gerlich.looptrace.internal.BuildInfo
 import at.ac.oeaw.imba.gerlich.looptrace.roi.MergeAndSplitRoiTools.IndexedDetectedSpot
 import at.ac.oeaw.imba.gerlich.looptrace.space.{ BoundingBox, Pixels3D }
-import at.ac.oeaw.imba.gerlich.looptrace.ImagingRoundsConfiguration.ProximityGroup
 
 /** Assign trace IDs to regional spots, considering the potential to group some together for downstream analytical purposes. */
 object AssignTraceIds extends ScoptCliReaders, StrictLogging:
@@ -339,6 +339,7 @@ object AssignTraceIds extends ScoptCliReaders, StrictLogging:
             encContext: CsvRowEncoder[ImagingContext, String],
             encCentroid: CsvRowEncoder[Centroid[Double], String],
             encBox: CsvRowEncoder[BoundingBox, String], 
+            encNuc: CellEncoder[NuclearDesignation],
             encTid: CellEncoder[TraceId],
         ): CsvRowEncoder[OutputRecord, String] with
             override def apply(elem: OutputRecord): RowF[Some, String] = 
@@ -350,10 +351,15 @@ object AssignTraceIds extends ScoptCliReaders, StrictLogging:
                 val tidRow = TraceIdColumnName.write(tid)
                 val partnersRow = 
                     TracePartnersColumName.write(maybePartners.fold(Set())(_.toSortedSet.toSet))
-                val nucRow = RowF(
-                    values = NonEmptyList.one(inrec.maybeNucleusNumber.fold("")(_.get.show_)), 
-                    headers = Some(NonEmptyList.one(NucleusDesignationColumnName.value)),
-                )
+                val nucRow = 
+                    elem._1.maybeNucleusDesignation match {
+                        case None => RowF(
+                            values = NonEmptyList.one(""), 
+                            headers = Some(NonEmptyList.one(NucleusDesignationColumnName.value)),
+                        )
+                        case Some(nuclearDesignation) => 
+                            NucleusDesignationColumnName.write(nuclearDesignation)
+                    }
                 idRow |+| ctxRow |+| centerRow |+| boxRow |+| nucRow |+| tidRow |+| partnersRow
     end OutputRecord
 
@@ -363,7 +369,7 @@ object AssignTraceIds extends ScoptCliReaders, StrictLogging:
         centroid: Centroid[Double], 
         box: BoundingBox, 
         maybeMergeInputs: Set[RoiIndex],  // may be empty, as the input collection is possibly a mix of singletons and merge results
-        maybeNucleusNumber: Option[NucleusNumber], // Allow the program to operate on non-nuclei-filtered ROIs.
+        maybeNucleusDesignation: Option[NuclearDesignation], // Allow the program to operate on non-nuclei-filtered ROIs.
     ):
         final def timepoint: ImagingTimepoint = context.timepoint
 
@@ -374,23 +380,26 @@ object AssignTraceIds extends ScoptCliReaders, StrictLogging:
             decContext: CsvRowDecoder[ImagingContext, String], 
             decCentroid: CsvRowDecoder[Centroid[Double], String],
             decBox: CsvRowDecoder[BoundingBox, String],
+            decNuclus: CellDecoder[NuclearDesignation],
         ): CsvRowDecoder[InputRecord, String] = new:
             override def apply(row: RowF[Some, String]): DecoderResult[InputRecord] = 
                 val spotNel = summon[CsvRowDecoder[IndexedDetectedSpot, String]](row)
-                    .leftMap(e => e.getMessage)
+                    .leftMap(e => s"Cannot decode spot from row ($row): ${e.getMessage}")
                     .toValidatedNel
-                val mergeInputsNel: ValidatedNel[String, Set[RoiIndex]] = 
+                val mergeInputsNel = 
                     MergeContributorsColumnNameForAssessedRecord.from(row)
-                val nucNel: ValidatedNel[String, Option[NucleusNumber]] = 
+                val nucNel = 
                     val key = NucleusDesignationColumnName.value
                     row.apply(key) match {
                         // Allow the program to operate on non-nuclei-filtered ROIs.
                         case None | Some("") => Option.empty.validNel
-                        case Some(s) => NucleusNumber.parse(s).map(_.some).toValidatedNel
+                        case Some(s) => decNuclus(s)
+                            .bimap(e => s"Cannot decode spot from row ($row): ${e.getMessage}", _.some)
+                            .toValidatedNel
                     }
                 (spotNel, mergeInputsNel, nucNel)
-                    .mapN{ (spot, maybeMergeIndices, maybeNucNum) => 
-                        InputRecord(spot.index, spot.context, spot.centroid, spot.box, maybeMergeIndices, maybeNucNum)
+                    .mapN{ (spot, maybeMergeIndices, maybeNucleus) => 
+                        InputRecord(spot.index, spot.context, spot.centroid, spot.box, maybeMergeIndices, maybeNucleus)
                     }
                     .toEither
                     .leftMap{ messages =>