diff --git a/bin/cli/assign_spots_to_nucs.py b/bin/cli/assign_spots_to_nucs.py index 1419fe09..1a20c139 100644 --- a/bin/cli/assign_spots_to_nucs.py +++ b/bin/cli/assign_spots_to_nucs.py @@ -155,8 +155,6 @@ def query_table_for_fov(table: pd.DataFrame) -> Callable[[str], pd.DataFrame]: all_rois = pd.concat(all_rois).sort_values(["fieldOfView", "timepoint"]) logger.info(f"Writing nuclei-labeled spots file: {H.nuclei_labeled_spots_file_path}") all_rois.to_csv(H.nuclei_labeled_spots_file_path, index=False) - logger.info(f"Writing nuclei-filtered spots file: {H.nuclei_filtered_spots_file_path}") - all_rois[all_rois[NUC_LABEL_COL] != 0].to_csv(H.nuclei_filtered_spots_file_path, index=False) logger.info("Done!") diff --git a/bin/cli/run_processing_pipeline.py b/bin/cli/run_processing_pipeline.py index 8eb85145..8ad21765 100644 --- a/bin/cli/run_processing_pipeline.py +++ b/bin/cli/run_processing_pipeline.py @@ -9,11 +9,12 @@ import subprocess import sys from typing import * -import yaml from expression import Result from gertils import ExtantFile, ExtantFolder +import pandas as pd import pypiper +import yaml from looptrace import * from looptrace.Drifter import coarse_correction_workflow, fine_correction_workflow as run_fine_drift_correction @@ -33,7 +34,7 @@ from decon import workflow as run_deconvolution from drift_correct_accuracy_analysis import workflow as run_drift_correction_analysis, run_visualisation as run_drift_correction_accuracy_visualisation from detect_spots import workflow as run_spot_detection -from assign_spots_to_nucs import workflow as run_spot_nucleus_filtration +from assign_spots_to_nucs import NUC_LABEL_COL, workflow as run_spot_nucleus_assignment from partition_regional_spots_by_field_of_view import workflow as prep_regional_spots_visualisation from extract_spots_table import workflow as run_spot_bounding from extract_spots import workflow as run_spot_extraction @@ -207,9 +208,9 @@ def assign_trace_ids(rounds_config: ExtantFile, params_config: ExtantFile) -> Pa "--pixels", build_pixels_config(H), "--roisFile", - str(H.nuclei_filtered_spots_file_path if H.spot_in_nuc else H.proximity_accepted_spots_file_path), + str(H.nuclei_labeled_spots_file_path if H.spot_in_nuc else H.proximity_accepted_spots_file_path), "--outputFile", - str(H.spots_for_voxels_definition_file), + str(H.rois_with_trace_ids_file), ] logging.info(f"Running distance computation command: {' '.join(cmd_parts)}") return subprocess.check_call(cmd_parts) @@ -414,6 +415,22 @@ def run_spot_merge_execution(rounds_config: ExtantFile, params_config: ExtantFil subprocess.check_call(cmd_parts) +def filter_spots_for_nuclei(rounds_config: ExtantFile, params_config: ExtantFile) -> None: + H = ImageHandler(rounds_config=rounds_config, params_config=params_config) + if not H.spot_in_nuc: + logging.info("spot_in_nuc is False, nothing to do, skipping nuclei-based filtration") + return + rois_file: Path = H.rois_with_trace_ids_file + logging.info(f"Reading ROIs file for filtration: {rois_file}") + rois: pd.DataFrame = pd.read_csv(rois_file, index_col=False) + logging.debug(f"Initial ROI count: {rois.shape[0]}") + rois = rois[rois[NUC_LABEL_COL] != 0] + logging.debug(f"ROIs remaining after filtration for nuclei: {rois.shape[0]}") + logging.debug(f"Writing ROIs: {H.nuclei_filtered_spots_file_path}") + rois.to_csv(H.nuclei_filtered_spots_file_path, index=False) + logging.info("Done with nuclei-based spot filtration") + + class LooptracePipeline(pypiper.Pipeline): """Main looptrace processing pipeline""" @@ -460,25 +477,30 @@ def stages(self) -> list[pypiper.Stage]: pypiper.Stage(name="spot_merge_execution", func=run_spot_merge_execution, f_kwargs=rounds_params), pypiper.Stage(name="spot_proximity_filtration", func=run_spot_proximity_filtration, f_kwargs=rounds_params_images), pypiper.Stage( - name="spot_nucleus_filtration", - func=run_spot_nucleus_filtration, + name="spot_nucleus_assignment", + func=run_spot_nucleus_assignment, f_kwargs=rounds_params_images, # images are needed since H.image_lists is iterated in workflow. ), pypiper.Stage( name="trace_id_assignment", func=assign_trace_ids, - f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config}, + f_kwargs=rounds_params, + ), + pypiper.Stage( + name="spot_nuclei_filtration", + func=filter_spots_for_nuclei, + f_kwargs=rounds_params, ), pypiper.Stage( name="regional_spots_visualisation_data_prep", func=run_regional_spot_viewing_prep, - f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config}, + f_kwargs=rounds_params, nofail=True, ), pypiper.Stage( name="spot_counts_visualisation__regional", func=plot_spot_counts, - f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config, "spot_type": SpotType.REGIONAL}, + f_kwargs={**rounds_params, "spot_type": SpotType.REGIONAL}, ), # computes pad_x_min, etc.; writes *_dc_rois.csv (much bigger, since regional spots x timepoints) pypiper.Stage(name="spot_bounding", func=run_spot_bounding, f_kwargs=rounds_params_images), @@ -500,20 +522,28 @@ def stages(self) -> list[pypiper.Stage]: pypiper.Stage( name="spot_counts_visualisation__locus_specific", func=plot_spot_counts, - f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config, "spot_type": SpotType.LOCUS_SPECIFIC}, + f_kwargs={**rounds_params, "spot_type": SpotType.LOCUS_SPECIFIC}, ), pypiper.Stage( name="pairwise_distances__locus_specific", func=compute_locus_pairwise_distances, - f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config}, + f_kwargs=rounds_params, ), pypiper.Stage( name="pairwise_distances__regional", func=compute_region_pairwise_distances, - f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config}, + f_kwargs=rounds_params, + ), + pypiper.Stage( + name="locus_specific_spots_visualisation_data_prep", + func=prep_locus_specific_spots_visualisation, + f_kwargs=rounds_params_images, + ), + pypiper.Stage( + name="locus_spot_viewing_prep", + func=run_locus_spot_viewing_prep, + f_kwargs=rounds_params, ), - pypiper.Stage(name="locus_specific_spots_visualisation_data_prep", func=prep_locus_specific_spots_visualisation, f_kwargs=rounds_params_images), - pypiper.Stage(name="locus_spot_viewing_prep", func=run_locus_spot_viewing_prep, f_kwargs={"rounds_config": self.rounds_config, "params_config": self.params_config}), ] diff --git a/docs/pipeline-execution-control-and-rerun.md b/docs/pipeline-execution-control-and-rerun.md index e5c6da22..1d15bde0 100644 --- a/docs/pipeline-execution-control-and-rerun.md +++ b/docs/pipeline-execution-control-and-rerun.md @@ -62,8 +62,9 @@ Below are the sequential pipeline stage names. * spot_merge_determination * spot_merge_execution * spot_proximity_filtration -* spot_nucleus_filtration +* spot_nucleus_assignment * trace_id_assignment +* spot_nuclei_filtration * regional_spots_visualisation_data_prep * spot_counts_visualisation__regional * spot_bounding diff --git a/docs/pipeline-outputs.md b/docs/pipeline-outputs.md index 4c105dcc..e38418c3 100644 --- a/docs/pipeline-outputs.md +++ b/docs/pipeline-outputs.md @@ -27,7 +27,7 @@ This should operate directly on the raw (though possibly deconvoluted) image dat The labeled file has a column for which rows of the file are related as neighbors (per the configuration options in the [parameters config](./parameters-configuration-file.md) and the [imaging rounds config](./imaging-rounds-configuration-file.md)). This uses the output of the spot detection. The filtered file is the same as the labeled, except for the exclusion of the rows in which the neighbors column was nonempty, and the subsequent dropping of that column. -* `spot_nucleus_filtration`: produces the `*.nuclei_labeled.csv` and `*.nuclei_filtered.csv` files, which represent association of a nucleus label to each spot (`0` representing that the spot's center isn't found within the border of a nuclear mask), and the filtration of the rows to which that applies. More or less analogous in this respect to the proximity-labeled and -filtered files. +* `spot_nucleus_assignment`: produces the `*.nuclei_labeled.csv` and `*.nuclei_filtered.csv` files, which represent association of a nucleus label to each spot (`0` representing that the spot's center isn't found within the border of a nuclear mask), and the filtration of the rows to which that applies. More or less analogous in this respect to the proximity-labeled and -filtered files. This uses the output of the spot proximity filtration. * `spot_counts_visualisation__regional`: produces the charts related to the counts of (_regional_ barcode) spots. This uses the output from the spot detection and filtration by proximity and nuclei. * `spot_bounding`: this computes the drift-corrected bounding boxes from which to extract pixel data that will be used for tracing. This uses the filtered spot detection output (either just the proximity-filtered or proximity- and nuclei-filtered, depending on the `spot_in_nuc` setting of the [parameters configuration file](./parameters-configuration-file.md)), as well as the coarse and fine drift correction outputs. @@ -56,4 +56,4 @@ The only direct dependence should be on the coordinates from the enriched, filte This pair of files is then typically also dragged-and-dropped into `napari` for visualisation. For more about the use of these outputs, refer to the [visualisation doc](./visualisation.md). * `nuclear_masks_visualisation_data_prep` produces data similar to the locus-specific spots visualisation step, aimed at facilitating visualistation of the nuclear regions used for spot filtration; this ends up being relocated to the `_` subfolder of the main images folder, e.g. `images_all/_nuclear_masks_visualisation`. The underscore prefix prevents this from being repeatedly read (or at least attempted to be re-read) by the image handler component of `looptrace`. -* `regional_spots_visualisation_data_prep` uses the 3 regional spots files produced by `spot_detection`, `spot_proximity_filtration`, and `spot_nucleus_filtration` to produce `regional_spots_visualisation`, with per-FOV data to drag-and-drop into Napari for visualisation. +* `regional_spots_visualisation_data_prep` uses the 3 regional spots files produced by `spot_detection`, `spot_proximity_filtration`, and `spot_nucleus_assignment` to produce `regional_spots_visualisation`, with per-FOV data to drag-and-drop into Napari for visualisation. diff --git a/looptrace/ImageHandler.py b/looptrace/ImageHandler.py index 15a455e7..50698992 100644 --- a/looptrace/ImageHandler.py +++ b/looptrace/ImageHandler.py @@ -402,6 +402,10 @@ def roi_image_size(self) -> RoiImageSize: z, y, x = tuple(self.config["roi_image_size"]) return RoiImageSize(z=z, y=y, x=x) + @property + def rois_with_trace_ids_file(self) -> Path: + return self.raw_spots_file.with_suffix(".with_trace_ids.csv") + @property def spot_in_nuc(self) -> bool: return self.config.get("spot_in_nuc", False) @@ -436,7 +440,7 @@ def spot_merge_results_file(self) -> Path: @property def spots_for_voxels_definition_file(self) -> Path: """Path to the file to use for defining the voxels for tracing""" - return self.raw_spots_file.with_suffix(".for_voxels_definition.csv") + return self.nuclei_filtered_spots_file_path if self.spot_in_nuc else self.rois_with_trace_ids_file @property def traces_path(self) -> Path: diff --git a/src/main/scala/AssignTraceIds.scala b/src/main/scala/AssignTraceIds.scala index 9e4e7ab9..cfa47eae 100644 --- a/src/main/scala/AssignTraceIds.scala +++ b/src/main/scala/AssignTraceIds.scala @@ -12,7 +12,7 @@ import squants.space.{ Length, Nanometers } import com.typesafe.scalalogging.StrictLogging -import at.ac.oeaw.imba.gerlich.gerlib.cell.NucleusNumber +import at.ac.oeaw.imba.gerlich.gerlib.cell.NuclearDesignation import at.ac.oeaw.imba.gerlich.gerlib.collections.AtLeast2 import at.ac.oeaw.imba.gerlich.gerlib.geometry.{ Centroid, DistanceThreshold, EuclideanDistance, ProximityComparable } import at.ac.oeaw.imba.gerlich.gerlib.geometry.PiecewiseDistance.ConjunctiveThreshold @@ -34,6 +34,7 @@ import at.ac.oeaw.imba.gerlich.gerlib.io.csv.{ import at.ac.oeaw.imba.gerlich.gerlib.numeric.* import at.ac.oeaw.imba.gerlich.gerlib.numeric.instances.all.given import at.ac.oeaw.imba.gerlich.gerlib.syntax.all.* + import at.ac.oeaw.imba.gerlich.looptrace.ImagingRoundsConfiguration.{ RoiPartnersRequirementType, TraceIdDefinitionAndFiltrationRule, @@ -51,7 +52,6 @@ import at.ac.oeaw.imba.gerlich.looptrace.instances.all.given import at.ac.oeaw.imba.gerlich.looptrace.internal.BuildInfo import at.ac.oeaw.imba.gerlich.looptrace.roi.MergeAndSplitRoiTools.IndexedDetectedSpot import at.ac.oeaw.imba.gerlich.looptrace.space.{ BoundingBox, Pixels3D } -import at.ac.oeaw.imba.gerlich.looptrace.ImagingRoundsConfiguration.ProximityGroup /** Assign trace IDs to regional spots, considering the potential to group some together for downstream analytical purposes. */ object AssignTraceIds extends ScoptCliReaders, StrictLogging: @@ -339,6 +339,7 @@ object AssignTraceIds extends ScoptCliReaders, StrictLogging: encContext: CsvRowEncoder[ImagingContext, String], encCentroid: CsvRowEncoder[Centroid[Double], String], encBox: CsvRowEncoder[BoundingBox, String], + encNuc: CellEncoder[NuclearDesignation], encTid: CellEncoder[TraceId], ): CsvRowEncoder[OutputRecord, String] with override def apply(elem: OutputRecord): RowF[Some, String] = @@ -350,10 +351,15 @@ object AssignTraceIds extends ScoptCliReaders, StrictLogging: val tidRow = TraceIdColumnName.write(tid) val partnersRow = TracePartnersColumName.write(maybePartners.fold(Set())(_.toSortedSet.toSet)) - val nucRow = RowF( - values = NonEmptyList.one(inrec.maybeNucleusNumber.fold("")(_.get.show_)), - headers = Some(NonEmptyList.one(NucleusDesignationColumnName.value)), - ) + val nucRow = + elem._1.maybeNucleusDesignation match { + case None => RowF( + values = NonEmptyList.one(""), + headers = Some(NonEmptyList.one(NucleusDesignationColumnName.value)), + ) + case Some(nuclearDesignation) => + NucleusDesignationColumnName.write(nuclearDesignation) + } idRow |+| ctxRow |+| centerRow |+| boxRow |+| nucRow |+| tidRow |+| partnersRow end OutputRecord @@ -363,7 +369,7 @@ object AssignTraceIds extends ScoptCliReaders, StrictLogging: centroid: Centroid[Double], box: BoundingBox, maybeMergeInputs: Set[RoiIndex], // may be empty, as the input collection is possibly a mix of singletons and merge results - maybeNucleusNumber: Option[NucleusNumber], // Allow the program to operate on non-nuclei-filtered ROIs. + maybeNucleusDesignation: Option[NuclearDesignation], // Allow the program to operate on non-nuclei-filtered ROIs. ): final def timepoint: ImagingTimepoint = context.timepoint @@ -374,23 +380,26 @@ object AssignTraceIds extends ScoptCliReaders, StrictLogging: decContext: CsvRowDecoder[ImagingContext, String], decCentroid: CsvRowDecoder[Centroid[Double], String], decBox: CsvRowDecoder[BoundingBox, String], + decNuclus: CellDecoder[NuclearDesignation], ): CsvRowDecoder[InputRecord, String] = new: override def apply(row: RowF[Some, String]): DecoderResult[InputRecord] = val spotNel = summon[CsvRowDecoder[IndexedDetectedSpot, String]](row) - .leftMap(e => e.getMessage) + .leftMap(e => s"Cannot decode spot from row ($row): ${e.getMessage}") .toValidatedNel - val mergeInputsNel: ValidatedNel[String, Set[RoiIndex]] = + val mergeInputsNel = MergeContributorsColumnNameForAssessedRecord.from(row) - val nucNel: ValidatedNel[String, Option[NucleusNumber]] = + val nucNel = val key = NucleusDesignationColumnName.value row.apply(key) match { // Allow the program to operate on non-nuclei-filtered ROIs. case None | Some("") => Option.empty.validNel - case Some(s) => NucleusNumber.parse(s).map(_.some).toValidatedNel + case Some(s) => decNuclus(s) + .bimap(e => s"Cannot decode spot from row ($row): ${e.getMessage}", _.some) + .toValidatedNel } (spotNel, mergeInputsNel, nucNel) - .mapN{ (spot, maybeMergeIndices, maybeNucNum) => - InputRecord(spot.index, spot.context, spot.centroid, spot.box, maybeMergeIndices, maybeNucNum) + .mapN{ (spot, maybeMergeIndices, maybeNucleus) => + InputRecord(spot.index, spot.context, spot.centroid, spot.box, maybeMergeIndices, maybeNucleus) } .toEither .leftMap{ messages =>