implement basic ops debug (#151)

BiomedSciAI · Aug 2, 2022 · bdd907a · bdd907a
1 parent 711bdf2
commit bdd907a
Show file tree

Hide file tree

Showing 19 changed files with 379 additions and 660 deletions.
diff --git a/examples/fuse_examples/imaging/classification/mnist/run_mnist.py b/examples/fuse_examples/imaging/classification/mnist/run_mnist.py
@@ -17,7 +17,7 @@
 
 ===============================
 
-MNIST classfier implementation that demonstrate end to end training, inference and evaluation using FuseMedML
+MNIST classifier implementation that demonstrate end to end training, inference and evaluation using FuseMedML
 """
 
 import copy
@@ -116,7 +116,7 @@ def create_model() -> torch.nn.Module:
         model=torch_model,
         model_inputs=["data.image"],
         post_forward_processing_function=perform_softmax,
-        model_outputs=["logits.classification", "output.classification"],
+        model_outputs=["model.logits.classification", "model.output.classification"],
     )
     return model
 

diff --git a/examples/fuse_examples/imaging/classification/mnist/run_mnist_custom_pl_imp.py b/examples/fuse_examples/imaging/classification/mnist/run_mnist_custom_pl_imp.py
@@ -92,7 +92,7 @@ def __init__(self, model_dir: str, opt_lr: float, opt_weight_decay: float, **kwa
             model=torch_model,
             model_inputs=["data.image"],
             post_forward_processing_function=perform_softmax,
-            model_outputs=["logits.classification", "output.classification"],
+            model_outputs=["model.logits.classification", "model.output.classification"],
         )
 
         # losses
@@ -119,7 +119,7 @@ def forward(self, batch_dict: NDict) -> NDict:
     ## Step
     def training_step(self, batch_dict: NDict, batch_idx: int) -> dict:
         # run forward function and store the outputs in batch_dict["model"]
-        batch_dict["model"] = self.forward(batch_dict)
+        batch_dict = self.forward(batch_dict)
         # given the batch_dict and FuseMedML style losses - compute the losses, return the total loss and save losses values in batch_dict["losses"]
         total_loss = fuse_pl.step_losses(self._losses, batch_dict)
         # given the batch_dict and FuseMedML style losses - collect the required values to compute the metrics on epoch_end
@@ -130,7 +130,7 @@ def training_step(self, batch_dict: NDict, batch_idx: int) -> dict:
 
     def validation_step(self, batch_dict: NDict, batch_idx: int) -> dict:
         # run forward function and store the outputs in batch_dict["model"]
-        batch_dict["model"] = self.forward(batch_dict)
+        batch_dict = self.forward(batch_dict)
         # given the batch_dict and FuseMedML style losses - compute the losses, return the total loss (ignored) and save losses values in batch_dict["losses"]
         _ = fuse_pl.step_losses(self._losses, batch_dict)
         # given the batch_dict and FuseMedML style losses - collect the required values to compute the metrics on epoch_end
@@ -145,7 +145,7 @@ def predict_step(self, batch_dict: NDict, batch_idx: int) -> dict:
                 "Error: predict_step expectes list of prediction keys to extract from batch_dict. Please specify it using set_predictions_keys() method "
             )
         # run forward function and store the outputs in batch_dict["model"]
-        batch_dict["model"] = self.forward(batch_dict)
+        batch_dict = self.forward(batch_dict)
         # extract the requried keys - defined in self.set_predictions_keys()
         return fuse_pl.step_extract_predictions(self._prediction_keys, batch_dict)
 

diff --git a/fuse/data/README.md b/fuse/data/README.md
@@ -309,6 +309,13 @@ The following operators are useful when implementing a common pipeline:
 * OpToTensor - convert many different types to PyTorch tensor
 * OpOneHotToNumber - convert one-hot encoding vectors into numbers
 
+[**Debug operators**](ops/ops_debug.py)
+
+* OpPrintKeys - print the keys available at this point in the pipeline. Use OpDebugBase constructor arguments to limit the samples to debug.
+* OpPrintShapes - print the shapes of all tensors, numpy arrays and sequences. Use OpDebugBase constructor arguments to limit the samples to debug.
+* OpPrintTypes - print the types of all keys. Use OpDebugBase constructor arguments to limit the samples to debug.
+
+
 **Imaging operators**
 See [fuseimg package](../../fuseimg/data/README.md)
 
diff --git a/fuse/data/ops/ops_debug.py b/fuse/data/ops/ops_debug.py
@@ -0,0 +1,125 @@
+from abc import abstractmethod
+from typing import Hashable, List, Sequence, Optional
+from fuse.data.utils.sample import get_sample_id
+from fuse.utils import NDict
+from fuse.data import OpBase
+import numpy
+import torch
+
+
+class OpDebugBase(OpBase):
+    """
+    Base class for debug operations.
+    Provides the ability to limit samples to debug (will debug the first k samples).
+    Inherits and implements self.call_debug instead of self.__call__.
+    """
+
+    def __init__(
+        self, name: Optional[str] = None, sample_ids: Optional[List[Hashable]] = None, num_samples: bool = False
+    ):
+        """
+        :param name: string identifier - might be useful when the debug op display or save information into a file
+        :param sample_ids: apply for the specified sample ids. To apply for all set to None.
+        :param num_samples: apply for the first num_samples (per process). if None, will apply for all.
+        """
+        super().__init__()
+        self._name = name
+        self._sample_ids = sample_ids
+        self._num_samples = num_samples
+        self._num_samples_done = 0
+
+    def reset(self, name: Optional[str] = None):
+        """Reset operation state"""
+        self._num_samples_done = 0
+        self._name = name
+
+    def should_debug_sample(self, sample_dict: NDict) -> bool:
+        if self._num_samples and self._num_samples_done >= self._num_samples:
+            return False
+
+        if self._sample_ids is not None:
+            sid = get_sample_id(sample_dict)
+            if sid not in self._sample_ids:
+                return False
+
+        self._num_samples_done += True
+        return True
+
+    def __call__(self, sample_dict: NDict, **kwargs) -> NDict:
+        if self.should_debug_sample(sample_dict):
+            self.call_debug(sample_dict, **kwargs)
+        return sample_dict
+
+    @abstractmethod
+    def call_debug(self, sample_dict: NDict, **kwargs) -> None:
+        """The actual debug op implementation"""
+        raise NotImplementedError
+
+
+class OpPrintKeys(OpDebugBase):
+    """
+    Print list of available keys at a given point in the data pipeline
+    It's recommended, but not a must, to run it in a single process.
+    ```
+    from fuse.utils.utils_debug import FuseDebug
+    FuseDebug("debug")
+    ```
+
+    Example:
+    ```
+    (OpPrintKeys(num_samples=1), dict()),
+    ```
+    """
+
+    def call_debug(self, sample_dict: NDict) -> None:
+        print(f"Sample {get_sample_id(sample_dict)} keys:")
+        for key in sample_dict.keypaths():
+            print(f"{key}")
+
+
+class OpPrintShapes(OpDebugBase):
+    """
+    Print the shapes/length of every torch tensor / numpy array / sequence
+    Add at the top your script to force single process:
+    ```
+    from fuse.utils.utils_debug import FuseDebug
+    FuseDebug("debug")
+    ```
+    Example:
+    ```
+    (OpPrintShapes(num_samples=1), dict()),
+    ```
+    """
+
+    def call_debug(self, sample_dict: NDict) -> None:
+        print(f"Sample {get_sample_id(sample_dict)} shapes:")
+        for key in sample_dict.keypaths():
+            value = sample_dict[key]
+            if isinstance(value, torch.Tensor):
+                print(f"{key} is tensor with shape: {value.shape}")
+            elif isinstance(value, numpy.ndarray):
+                print(f"{key} is numpy array with shape: {value.shape}")
+            elif not isinstance(value, str) and isinstance(value, Sequence):
+                print(f"{key} is sequence with length: {len(value)}")
+
+
+class OpPrintTypes(OpDebugBase):
+    """
+    Print the the type of each key
+
+    Add at the top your script to force single process:
+    ```
+    from fuse.utils.utils_debug import FuseDebug
+    FuseDebug("debug")
+    ```
+    Example:
+    ```
+    (OpPrintTypes(num_samples=1), dict()),
+    ```
+    """
+
+    def call_debug(self, sample_dict: NDict) -> None:
+        print(f"Sample {get_sample_id(sample_dict)} types:")
+        for key in sample_dict.keypaths():
+            value = sample_dict[key]
+            print(f"{key} - {type(value).__name__}")
diff --git a/fuse/data/ops/ops_visprobe.py b/fuse/data/ops/ops_visprobe.py