From e4585d8c5832e48feeaa4dc54e4443e6e523b76b Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Mon, 29 Apr 2024 08:51:19 +0200
Subject: [PATCH 01/61] added stats extractor parent component

added PlainExtractor based on numpy and scipy functions
---
 src/ctapipe/calib/camera/extractor.py | 86 +++++++++++++++++++++++++++
 src/ctapipe/containers.py             |  3 +
 2 files changed, 89 insertions(+)
 create mode 100644 src/ctapipe/calib/camera/extractor.py

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
new file mode 100644
index 00000000000..0140ed5bcb4
--- /dev/null
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -0,0 +1,86 @@
+"""
+Extraction algorithms to compute the statistics from a sequence of images
+"""
+
+__all__ = [
+    "StatisticsExtractor",
+    "PlainExtractor",
+]
+
+
+from abc import abstractmethod
+
+import numpy as np
+import scipy.stats
+from traitlets import Int
+
+from ctapipe.core import TelescopeComponent
+from ctapipe.containers import StatisticsContainer
+
+
+class StatisticsExtractor(TelescopeComponent):
+    def __init__(self, subarray, config=None, parent=None, **kwargs):
+        """
+        Base component to handle the extraction of the statistics
+        from a sequence of charges and pulse times (images).
+
+        Parameters
+        ----------
+        kwargs
+        """
+        super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
+
+    @abstractmethod
+    def __call__(self, images, trigger_times) -> list:
+        """
+        Call the relevant functions to extract the statistics
+        for the particular extractor.
+
+        Parameters
+        ----------
+        images : ndarray
+            images stored in a numpy array of shape
+            (n_images, n_channels, n_pix).
+        trigger_times : ndarray
+            images stored in a numpy array of shape
+            (n_images, )
+
+        Returns
+        -------
+        List StatisticsContainer:
+            List of extracted statistics and validity ranges
+        """
+
+class PlainExtractor(StatisticsExtractor):
+    """
+    Extractor the statistics from a sequence of images
+    using numpy and scipy functions
+    """
+
+    sample_size = Int(2500, help="sample size").tag(config=True)
+
+    def __call__(self, dl1_table, col_name="image") -> list:
+       
+        # in python 3.12 itertools.batched can be used
+        image_chunks = (dl1_table[col_name].data[i:i + self.sample_size] for i in range(0, len(dl1_table[col_name].data), self.sample_size))
+        time_chunks = (dl1_table["time"][i:i + self.sample_size] for i in range(0, len(dl1_table["time"]), self.sample_size))
+
+        # Calculate the statistics from a sequence of images
+        stats_list = []
+        for img, time in zip(image_chunks,time_chunks):
+            stats_list.append(self._plain_extraction(img, time))
+            
+        return stats_list
+
+    def _plain_extraction(self, images, trigger_times) -> StatisticsContainer:
+        return StatisticsContainer(
+            validity_start=trigger_times[0],
+            validity_stop=trigger_times[-1],
+            max=np.max(images, axis=0),
+            min=np.min(images, axis=0),
+            mean=np.nanmean(images, axis=0),
+            median=np.nanmedian(images, axis=0),
+            std=np.nanstd(images, axis=0),
+            skewness=scipy.stats.skew(images, axis=0),
+            kurtosis=scipy.stats.kurtosis(images, axis=0),
+        )
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index 9a5b39f4da8..1f2334ba386 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -414,9 +414,12 @@ class MorphologyContainer(Container):
 class StatisticsContainer(Container):
     """Store descriptive statistics"""
 
+    validity_start = Field(np.float32(nan), "start")
+    validity_stop = Field(np.float32(nan), "stop")
     max = Field(np.float32(nan), "value of pixel with maximum intensity")
     min = Field(np.float32(nan), "value of pixel with minimum intensity")
     mean = Field(np.float32(nan), "mean intensity")
+    median = Field(np.float32(nan), "median intensity")
     std = Field(np.float32(nan), "standard deviation of intensity")
     skewness = Field(nan, "skewness of intensity")
     kurtosis = Field(nan, "kurtosis of intensity")

From 1e1afcadaeb53c1f33f36b8796e9aa5131937f5e Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Mon, 29 Apr 2024 15:51:47 +0200
Subject: [PATCH 02/61] added stats extractor based on sigma clipping

---
 src/ctapipe/calib/camera/extractor.py | 67 ++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 0140ed5bcb4..654be103f8b 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -5,6 +5,7 @@
 __all__ = [
     "StatisticsExtractor",
     "PlainExtractor",
+    "SigmaClippingExtractor",
 ]
 
 
@@ -12,10 +13,14 @@
 
 import numpy as np
 import scipy.stats
-from traitlets import Int
+from astropy.stats import sigma_clipped_stats
 
 from ctapipe.core import TelescopeComponent
 from ctapipe.containers import StatisticsContainer
+from ctapipe.core.traits import (
+    Int,
+    List,
+)
 
 
 class StatisticsExtractor(TelescopeComponent):
@@ -84,3 +89,63 @@ def _plain_extraction(self, images, trigger_times) -> StatisticsContainer:
             skewness=scipy.stats.skew(images, axis=0),
             kurtosis=scipy.stats.kurtosis(images, axis=0),
         )
+
+class SigmaClippingExtractor(StatisticsExtractor):
+    """
+    Extractor the statistics from a sequence of images
+    using astropy's sigma clipping functions
+    """
+
+    sample_size = Int(2500, help="sample size").tag(config=True)
+
+    sigma_clipping_max_sigma = Int(
+        default_value=4,
+        help="Maximal value for the sigma clipping outlier removal",
+    ).tag(config=True)
+    sigma_clipping_iterations = Int(
+        default_value=5,
+        help="Number of iterations for the sigma clipping outlier removal",
+    ).tag(config=True)
+
+
+    def __call__(self, dl1_table, col_name="image") -> list:
+
+        # in python 3.12 itertools.batched can be used
+        image_chunks = (dl1_table[col_name].data[i:i + self.sample_size] for i in range(0, len(dl1_table[col_name].data), self.sample_size))
+        time_chunks = (dl1_table["time"][i:i + self.sample_size] for i in range(0, len(dl1_table["time"]), self.sample_size))
+
+        # Calculate the statistics from a sequence of images
+        stats_list = []
+        for img, time in zip(image_chunks,time_chunks):
+            stats_list.append(self._sigmaclipping_extraction(img, time))
+
+        return stats_list
+
+    def _sigmaclipping_extraction(self, images, trigger_times) -> StatisticsContainer:
+
+        # mean, median, and std over the sample per pixel
+        pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
+            images,
+            sigma=self.sigma_clipping_max_sigma,
+            maxiters=self.sigma_clipping_iterations,
+            cenfunc="mean",
+            axis=0,
+        )
+
+        # mask pixels without defined statistical values
+        pixel_mean = np.ma.array(pixel_mean, mask=np.isnan(pixel_mean))
+        pixel_median = np.ma.array(pixel_median, mask=np.isnan(pixel_median))
+        pixel_std = np.ma.array(pixel_std, mask=np.isnan(pixel_std))
+
+        return StatisticsContainer(
+            validity_start=trigger_times[0],
+            validity_stop=trigger_times[-1],
+            max=np.max(images, axis=0),
+            min=np.min(images, axis=0),
+            mean=pixel_mean.filled(np.nan),
+            median=pixel_median.filled(np.nan),
+            std=pixel_std.filled(np.nan),
+            skewness=scipy.stats.skew(images, axis=0),
+            kurtosis=scipy.stats.kurtosis(images, axis=0),
+        )
+

From d54aedab032f1ed4f0f90a877f1e5be386586b28 Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Tue, 30 Apr 2024 16:34:57 +0200
Subject: [PATCH 03/61] added cut of outliers

restructured the stats containers
---
 src/ctapipe/calib/camera/extractor.py      | 139 +++++++++++++++------
 src/ctapipe/containers.py                  |  17 ++-
 src/ctapipe/image/statistics.py            |   6 +-
 src/ctapipe/image/tests/test_statistics.py |   4 +-
 4 files changed, 119 insertions(+), 47 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 654be103f8b..6e7ca6aa634 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -24,6 +24,17 @@
 
 
 class StatisticsExtractor(TelescopeComponent):
+
+    sample_size = Int(2500, help="sample size").tag(config=True)
+    image_median_cut_outliers = List(
+        [-0.3, 0.3],
+        help="Interval of accepted image values (fraction with respect to camera median value)",
+    ).tag(config=True)
+    image_std_cut_outliers = List(
+        [-3, 3],
+        help="Interval (number of std) of accepted image standard deviation around camera median value",
+    ).tag(config=True)
+
     def __init__(self, subarray, config=None, parent=None, **kwargs):
         """
         Base component to handle the extraction of the statistics
@@ -36,19 +47,18 @@ def __init__(self, subarray, config=None, parent=None, **kwargs):
         super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
 
     @abstractmethod
-    def __call__(self, images, trigger_times) -> list:
+    def __call__(self, dl1_table, masked_pixels_of_sample=None, col_name="image") -> list:
         """
         Call the relevant functions to extract the statistics
         for the particular extractor.
 
         Parameters
         ----------
-        images : ndarray
-            images stored in a numpy array of shape
+        dl1_table : ndarray
+            dl1 table with images and times stored in a numpy array of shape
             (n_images, n_channels, n_pix).
-        trigger_times : ndarray
-            images stored in a numpy array of shape
-            (n_images, )
+        col_name : string
+            column name in the dl1 table
 
         Returns
         -------
@@ -62,42 +72,60 @@ class PlainExtractor(StatisticsExtractor):
     using numpy and scipy functions
     """
 
-    sample_size = Int(2500, help="sample size").tag(config=True)
+    def __call__(self, dl1_table, masked_pixels_of_sample=None, col_name="image") -> list:
 
-    def __call__(self, dl1_table, col_name="image") -> list:
-       
         # in python 3.12 itertools.batched can be used
         image_chunks = (dl1_table[col_name].data[i:i + self.sample_size] for i in range(0, len(dl1_table[col_name].data), self.sample_size))
         time_chunks = (dl1_table["time"][i:i + self.sample_size] for i in range(0, len(dl1_table["time"]), self.sample_size))
 
         # Calculate the statistics from a sequence of images
         stats_list = []
-        for img, time in zip(image_chunks,time_chunks):
-            stats_list.append(self._plain_extraction(img, time))
-            
+        for images, times in zip(image_chunks,time_chunks):
+            stats_list.append(self._plain_extraction(images, times, masked_pixels_of_sample))
         return stats_list
 
-    def _plain_extraction(self, images, trigger_times) -> StatisticsContainer:
+    def _plain_extraction(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+
+        # ensure numpy array
+        masked_images = np.ma.array(
+            images,
+            mask=masked_pixels_of_sample
+        )
+
+        # median over the sample per pixel
+        pixel_median = np.ma.median(masked_images, axis=0)
+
+        # mean over the sample per pixel
+        pixel_mean = np.ma.mean(masked_images, axis=0)
+
+        # std over the sample per pixel
+        pixel_std = np.ma.std(masked_images, axis=0)
+
+        # median of the median over the camera
+        median_of_pixel_median = np.ma.median(pixel_median, axis=1)
+
+        # outliers from median
+        image_median_outliers = np.logical_or(
+            pixel_median < self.image_median_cut_outliers[0],
+            pixel_median > self.image_median_cut_outliers[1],
+        )
+
         return StatisticsContainer(
-            validity_start=trigger_times[0],
-            validity_stop=trigger_times[-1],
-            max=np.max(images, axis=0),
-            min=np.min(images, axis=0),
-            mean=np.nanmean(images, axis=0),
-            median=np.nanmedian(images, axis=0),
-            std=np.nanstd(images, axis=0),
-            skewness=scipy.stats.skew(images, axis=0),
-            kurtosis=scipy.stats.kurtosis(images, axis=0),
+            validity_start=times[0],
+            validity_stop=times[-1],
+            mean=pixel_mean.filled(np.nan),
+            median=pixel_median.filled(np.nan),
+            median_outliers=image_median_outliers.filled(True),
+            std=pixel_std.filled(np.nan),
         )
 
+
 class SigmaClippingExtractor(StatisticsExtractor):
     """
     Extractor the statistics from a sequence of images
     using astropy's sigma clipping functions
     """
 
-    sample_size = Int(2500, help="sample size").tag(config=True)
-
     sigma_clipping_max_sigma = Int(
         default_value=4,
         help="Maximal value for the sigma clipping outlier removal",
@@ -107,8 +135,7 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-
-    def __call__(self, dl1_table, col_name="image") -> list:
+    def __call__(self, dl1_table, masked_pixels_of_sample=None, col_name="image") -> list:
 
         # in python 3.12 itertools.batched can be used
         image_chunks = (dl1_table[col_name].data[i:i + self.sample_size] for i in range(0, len(dl1_table[col_name].data), self.sample_size))
@@ -116,17 +143,26 @@ def __call__(self, dl1_table, col_name="image") -> list:
 
         # Calculate the statistics from a sequence of images
         stats_list = []
-        for img, time in zip(image_chunks,time_chunks):
-            stats_list.append(self._sigmaclipping_extraction(img, time))
-
+        for images, times in zip(image_chunks,time_chunks):
+            stats_list.append(self._sigmaclipping_extraction(images, times, masked_pixels_of_sample))
         return stats_list
 
-    def _sigmaclipping_extraction(self, images, trigger_times) -> StatisticsContainer:
+    def _sigmaclipping_extraction(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+
+        # ensure numpy array
+        masked_images = np.ma.array(
+            images,
+            mask=masked_pixels_of_sample
+        )
+
+        # median of the event images
+        image_median = np.ma.median(masked_images, axis=-1)
 
         # mean, median, and std over the sample per pixel
+        max_sigma = self.sigma_clipping_max_sigma
         pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
-            images,
-            sigma=self.sigma_clipping_max_sigma,
+            masked_images,
+            sigma=max_sigma,
             maxiters=self.sigma_clipping_iterations,
             cenfunc="mean",
             axis=0,
@@ -137,15 +173,42 @@ def _sigmaclipping_extraction(self, images, trigger_times) -> StatisticsContaine
         pixel_median = np.ma.array(pixel_median, mask=np.isnan(pixel_median))
         pixel_std = np.ma.array(pixel_std, mask=np.isnan(pixel_std))
 
+        unused_values = np.abs(masked_images - pixel_mean) > (max_sigma * pixel_std)
+
+        # only warn for values discard in the sigma clipping, not those from before
+        outliers = unused_values & (~masked_images.mask)
+
+        # add outliers identified by sigma clipping for following operations
+        masked_images.mask |= unused_values
+
+        # median of the median over the camera
+        median_of_pixel_median = np.ma.median(pixel_median, axis=1)
+
+        # median of the std over the camera
+        median_of_pixel_std = np.ma.median(pixel_std, axis=1)
+
+        # std of the std over camera
+        std_of_pixel_std = np.ma.std(pixel_std, axis=1)
+
+        # outliers from median
+        image_deviation = pixel_median - median_of_pixel_median[:, np.newaxis]
+        image_median_outliers = (
+            np.logical_or(image_deviation < self.image_median_cut_outliers[0] * median_of_pixel_median[:,np.newaxis],
+                          image_deviation > self.image_median_cut_outliers[1] * median_of_pixel_median[:,np.newaxis]))
+
+        # outliers from standard deviation
+        deviation = pixel_std - median_of_pixel_std[:, np.newaxis]
+        image_std_outliers = (
+            np.logical_or(deviation < self.image_std_cut_outliers[0] * std_of_pixel_std[:, np.newaxis],
+                          deviation > self.image_std_cut_outliers[1] * std_of_pixel_std[:, np.newaxis]))
+
         return StatisticsContainer(
-            validity_start=trigger_times[0],
-            validity_stop=trigger_times[-1],
-            max=np.max(images, axis=0),
-            min=np.min(images, axis=0),
+            validity_start=times[0],
+            validity_stop=times[-1],
             mean=pixel_mean.filled(np.nan),
             median=pixel_median.filled(np.nan),
+            median_outliers=image_median_outliers.filled(True),
             std=pixel_std.filled(np.nan),
-            skewness=scipy.stats.skew(images, axis=0),
-            kurtosis=scipy.stats.kurtosis(images, axis=0),
+            std_outliers=image_std_outliers.filled(True),
         )
 
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index 1f2334ba386..b8ce24e3973 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -57,6 +57,7 @@
     "TriggerContainer",
     "WaveformCalibrationContainer",
     "StatisticsContainer",
+    "ImageStatisticsContainer",
     "IntensityStatisticsContainer",
     "PeakTimeStatisticsContainer",
     "SchedulingBlockContainer",
@@ -412,24 +413,32 @@ class MorphologyContainer(Container):
 
 
 class StatisticsContainer(Container):
-    """Store descriptive statistics"""
+    """Store descriptive statistics of a sequence of images"""
 
     validity_start = Field(np.float32(nan), "start")
     validity_stop = Field(np.float32(nan), "stop")
+    mean = Field(np.float32(nan), "mean intensity")
+    median = Field(np.float32(nan), "median intensity")
+    median_outliers = Field(np.float32(nan), "median intensity")
+    std = Field(np.float32(nan), "standard deviation of intensity")
+    std_outliers = Field(np.float32(nan), "standard deviation intensity")
+
+class ImageStatisticsContainer(Container):
+    """Store descriptive image statistics"""
+
     max = Field(np.float32(nan), "value of pixel with maximum intensity")
     min = Field(np.float32(nan), "value of pixel with minimum intensity")
     mean = Field(np.float32(nan), "mean intensity")
-    median = Field(np.float32(nan), "median intensity")
     std = Field(np.float32(nan), "standard deviation of intensity")
     skewness = Field(nan, "skewness of intensity")
     kurtosis = Field(nan, "kurtosis of intensity")
 
 
-class IntensityStatisticsContainer(StatisticsContainer):
+class IntensityStatisticsContainer(ImageStatisticsContainer):
     default_prefix = "intensity"
 
 
-class PeakTimeStatisticsContainer(StatisticsContainer):
+class PeakTimeStatisticsContainer(ImageStatisticsContainer):
     default_prefix = "peak_time"
 
 
diff --git a/src/ctapipe/image/statistics.py b/src/ctapipe/image/statistics.py
index bd38f02a377..deb209efd08 100644
--- a/src/ctapipe/image/statistics.py
+++ b/src/ctapipe/image/statistics.py
@@ -3,7 +3,7 @@
 import numpy as np
 from numba import float32, float64, guvectorize, int64, njit
 
-from ..containers import StatisticsContainer
+from ..containers import ImageStatisticsContainer
 
 __all__ = [
     "arg_n_largest",
@@ -88,8 +88,8 @@ def kurtosis(data, mean=None, std=None, fisher=True):
 
 
 def descriptive_statistics(
-    values, container_class=StatisticsContainer
-) -> StatisticsContainer:
+    values, container_class=ImageStatisticsContainer
+) -> ImageStatisticsContainer:
     """compute intensity statistics of an image"""
     mean = values.mean()
     std = values.std()
diff --git a/src/ctapipe/image/tests/test_statistics.py b/src/ctapipe/image/tests/test_statistics.py
index 006c9c0dbce..23806705787 100644
--- a/src/ctapipe/image/tests/test_statistics.py
+++ b/src/ctapipe/image/tests/test_statistics.py
@@ -49,14 +49,14 @@ def test_kurtosis():
 
 
 def test_return_type():
-    from ctapipe.containers import PeakTimeStatisticsContainer, StatisticsContainer
+    from ctapipe.containers import PeakTimeStatisticsContainer, ImageStatisticsContainer
     from ctapipe.image import descriptive_statistics
 
     rng = np.random.default_rng(0)
     data = rng.normal(5, 2, 1000)
 
     stats = descriptive_statistics(data)
-    assert isinstance(stats, StatisticsContainer)
+    assert isinstance(stats, ImageStatisticsContainer)
 
     stats = descriptive_statistics(data, container_class=PeakTimeStatisticsContainer)
     assert isinstance(stats, PeakTimeStatisticsContainer)

From 1ab0f1ae06bca3826014fb94935be093dd09f561 Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@vpn-221-141.vpn.ucm.es>
Date: Thu, 2 May 2024 10:10:59 +0200
Subject: [PATCH 04/61] update docs

---
 src/ctapipe/calib/camera/extractor.py |  4 +++-
 src/ctapipe/containers.py             | 14 +++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 6e7ca6aa634..56b8a7f2219 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -25,7 +25,7 @@
 
 class StatisticsExtractor(TelescopeComponent):
 
-    sample_size = Int(2500, help="sample size").tag(config=True)
+    sample_size = Int(2500, help="Size of the sample used for the calculation of the statistical values").tag(config=True)
     image_median_cut_outliers = List(
         [-0.3, 0.3],
         help="Interval of accepted image values (fraction with respect to camera median value)",
@@ -57,6 +57,8 @@ def __call__(self, dl1_table, masked_pixels_of_sample=None, col_name="image") ->
         dl1_table : ndarray
             dl1 table with images and times stored in a numpy array of shape
             (n_images, n_channels, n_pix).
+        masked_pixels_of_sample : ndarray
+            boolean array of masked pixels that are not available for processing
         col_name : string
             column name in the dl1 table
 
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index b8ce24e3973..5be78775580 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -415,13 +415,13 @@ class MorphologyContainer(Container):
 class StatisticsContainer(Container):
     """Store descriptive statistics of a sequence of images"""
 
-    validity_start = Field(np.float32(nan), "start")
-    validity_stop = Field(np.float32(nan), "stop")
-    mean = Field(np.float32(nan), "mean intensity")
-    median = Field(np.float32(nan), "median intensity")
-    median_outliers = Field(np.float32(nan), "median intensity")
-    std = Field(np.float32(nan), "standard deviation of intensity")
-    std_outliers = Field(np.float32(nan), "standard deviation intensity")
+    validity_start = Field(np.float32(nan), "start of the validity range")
+    validity_stop = Field(np.float32(nan), "stop of the validity range")
+    mean = Field(np.float32(nan), "Channel-wise and pixel-wise mean")
+    median = Field(np.float32(nan), "Channel-wise and pixel-wise median")
+    median_outliers = Field(np.float32(nan), "Channel-wise and pixel-wise median outliers")
+    std = Field(np.float32(nan), "Channel-wise and pixel-wise standard deviation")
+    std_outliers = Field(np.float32(nan), "Channel-wise and pixel-wise standard deviation outliers")
 
 class ImageStatisticsContainer(Container):
     """Store descriptive image statistics"""

From 074bd459ef3eb1fdfb148c5c4925ed8188423b0b Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@vpn-221-141.vpn.ucm.es>
Date: Thu, 2 May 2024 10:12:33 +0200
Subject: [PATCH 05/61] formatted with black

---
 src/ctapipe/calib/camera/extractor.py | 87 ++++++++++++++++++---------
 1 file changed, 58 insertions(+), 29 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 56b8a7f2219..907f22923d2 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -25,7 +25,10 @@
 
 class StatisticsExtractor(TelescopeComponent):
 
-    sample_size = Int(2500, help="Size of the sample used for the calculation of the statistical values").tag(config=True)
+    sample_size = Int(
+        2500,
+        help="Size of the sample used for the calculation of the statistical values",
+    ).tag(config=True)
     image_median_cut_outliers = List(
         [-0.3, 0.3],
         help="Interval of accepted image values (fraction with respect to camera median value)",
@@ -47,7 +50,9 @@ def __init__(self, subarray, config=None, parent=None, **kwargs):
         super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
 
     @abstractmethod
-    def __call__(self, dl1_table, masked_pixels_of_sample=None, col_name="image") -> list:
+    def __call__(
+        self, dl1_table, masked_pixels_of_sample=None, col_name="image"
+    ) -> list:
         """
         Call the relevant functions to extract the statistics
         for the particular extractor.
@@ -68,31 +73,41 @@ def __call__(self, dl1_table, masked_pixels_of_sample=None, col_name="image") ->
             List of extracted statistics and validity ranges
         """
 
+
 class PlainExtractor(StatisticsExtractor):
     """
     Extractor the statistics from a sequence of images
     using numpy and scipy functions
     """
 
-    def __call__(self, dl1_table, masked_pixels_of_sample=None, col_name="image") -> list:
+    def __call__(
+        self, dl1_table, masked_pixels_of_sample=None, col_name="image"
+    ) -> list:
 
         # in python 3.12 itertools.batched can be used
-        image_chunks = (dl1_table[col_name].data[i:i + self.sample_size] for i in range(0, len(dl1_table[col_name].data), self.sample_size))
-        time_chunks = (dl1_table["time"][i:i + self.sample_size] for i in range(0, len(dl1_table["time"]), self.sample_size))
+        image_chunks = (
+            dl1_table[col_name].data[i : i + self.sample_size]
+            for i in range(0, len(dl1_table[col_name].data), self.sample_size)
+        )
+        time_chunks = (
+            dl1_table["time"][i : i + self.sample_size]
+            for i in range(0, len(dl1_table["time"]), self.sample_size)
+        )
 
         # Calculate the statistics from a sequence of images
         stats_list = []
-        for images, times in zip(image_chunks,time_chunks):
-            stats_list.append(self._plain_extraction(images, times, masked_pixels_of_sample))
+        for images, times in zip(image_chunks, time_chunks):
+            stats_list.append(
+                self._plain_extraction(images, times, masked_pixels_of_sample)
+            )
         return stats_list
 
-    def _plain_extraction(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+    def _plain_extraction(
+        self, images, times, masked_pixels_of_sample
+    ) -> StatisticsContainer:
 
         # ensure numpy array
-        masked_images = np.ma.array(
-            images,
-            mask=masked_pixels_of_sample
-        )
+        masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
         # median over the sample per pixel
         pixel_median = np.ma.median(masked_images, axis=0)
@@ -137,25 +152,34 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-    def __call__(self, dl1_table, masked_pixels_of_sample=None, col_name="image") -> list:
+    def __call__(
+        self, dl1_table, masked_pixels_of_sample=None, col_name="image"
+    ) -> list:
 
         # in python 3.12 itertools.batched can be used
-        image_chunks = (dl1_table[col_name].data[i:i + self.sample_size] for i in range(0, len(dl1_table[col_name].data), self.sample_size))
-        time_chunks = (dl1_table["time"][i:i + self.sample_size] for i in range(0, len(dl1_table["time"]), self.sample_size))
+        image_chunks = (
+            dl1_table[col_name].data[i : i + self.sample_size]
+            for i in range(0, len(dl1_table[col_name].data), self.sample_size)
+        )
+        time_chunks = (
+            dl1_table["time"][i : i + self.sample_size]
+            for i in range(0, len(dl1_table["time"]), self.sample_size)
+        )
 
         # Calculate the statistics from a sequence of images
         stats_list = []
-        for images, times in zip(image_chunks,time_chunks):
-            stats_list.append(self._sigmaclipping_extraction(images, times, masked_pixels_of_sample))
+        for images, times in zip(image_chunks, time_chunks):
+            stats_list.append(
+                self._sigmaclipping_extraction(images, times, masked_pixels_of_sample)
+            )
         return stats_list
 
-    def _sigmaclipping_extraction(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+    def _sigmaclipping_extraction(
+        self, images, times, masked_pixels_of_sample
+    ) -> StatisticsContainer:
 
         # ensure numpy array
-        masked_images = np.ma.array(
-            images,
-            mask=masked_pixels_of_sample
-        )
+        masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
         # median of the event images
         image_median = np.ma.median(masked_images, axis=-1)
@@ -194,15 +218,21 @@ def _sigmaclipping_extraction(self, images, times, masked_pixels_of_sample) -> S
 
         # outliers from median
         image_deviation = pixel_median - median_of_pixel_median[:, np.newaxis]
-        image_median_outliers = (
-            np.logical_or(image_deviation < self.image_median_cut_outliers[0] * median_of_pixel_median[:,np.newaxis],
-                          image_deviation > self.image_median_cut_outliers[1] * median_of_pixel_median[:,np.newaxis]))
+        image_median_outliers = np.logical_or(
+            image_deviation
+            < self.image_median_cut_outliers[0] * median_of_pixel_median[:, np.newaxis],
+            image_deviation
+            > self.image_median_cut_outliers[1] * median_of_pixel_median[:, np.newaxis],
+        )
 
         # outliers from standard deviation
         deviation = pixel_std - median_of_pixel_std[:, np.newaxis]
-        image_std_outliers = (
-            np.logical_or(deviation < self.image_std_cut_outliers[0] * std_of_pixel_std[:, np.newaxis],
-                          deviation > self.image_std_cut_outliers[1] * std_of_pixel_std[:, np.newaxis]))
+        image_std_outliers = np.logical_or(
+            deviation
+            < self.image_std_cut_outliers[0] * std_of_pixel_std[:, np.newaxis],
+            deviation
+            > self.image_std_cut_outliers[1] * std_of_pixel_std[:, np.newaxis],
+        )
 
         return StatisticsContainer(
             validity_start=times[0],
@@ -213,4 +243,3 @@ def _sigmaclipping_extraction(self, images, times, masked_pixels_of_sample) -> S
             std=pixel_std.filled(np.nan),
             std_outliers=image_std_outliers.filled(True),
         )
-

From d1611ecae8e2d5d6b74a203446a392ef70493e20 Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@vpn-221-141.vpn.ucm.es>
Date: Thu, 2 May 2024 10:29:10 +0200
Subject: [PATCH 06/61] added pass for __call__ function

---
 src/ctapipe/calib/camera/extractor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 907f22923d2..e4ed0342e1b 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -72,6 +72,7 @@ def __call__(
         List StatisticsContainer:
             List of extracted statistics and validity ranges
         """
+        pass
 
 
 class PlainExtractor(StatisticsExtractor):

From fdbb7963aec3dcf5461aead2c964ca32f4cb0c8d Mon Sep 17 00:00:00 2001
From: Christoph Toennis <christoph.toennis1988@gmail.com>
Date: Tue, 7 May 2024 09:30:29 +0200
Subject: [PATCH 07/61] Small commit for prototyping

---
 src/ctapipe/calib/camera/extractor.py | 46 ++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index e4ed0342e1b..718aa48494b 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -6,6 +6,7 @@
     "StatisticsExtractor",
     "PlainExtractor",
     "SigmaClippingExtractor",
+    "StarExtractor",
 ]
 
 
@@ -14,6 +15,8 @@
 import numpy as np
 import scipy.stats
 from astropy.stats import sigma_clipped_stats
+from astropy.coordinates import EarthLocation, SkyCoord, Angle
+from astroquery.vizier import Vizier
 
 from ctapipe.core import TelescopeComponent
 from ctapipe.containers import StatisticsContainer
@@ -21,6 +24,7 @@
     Int,
     List,
 )
+from ctapipe.coordinates import EngineeringCameraFrame
 
 
 class StatisticsExtractor(TelescopeComponent):
@@ -138,9 +142,49 @@ def _plain_extraction(
         )
 
 
+class StarExtractor(StatisticsExtractor):
+    """
+    Extracts pointing information from a series of variance images
+    using the startracker functions  
+    """
+
+    min_star_magnitude = Float(
+        0.1,
+        help="Minimum magnitude of stars to be used. Set to appropriate value to avoid ",
+    ).tag(config=True)
+
+    def __init__():
+
+    def __call__(
+        self, variance_table, initial_pointing, PSF_model
+    ):
+
+    def _stars_in_FOV(
+        self, pointing
+    ):
+
+        stars = Vizier.query_region(pointing, radius=Angle(2.0, "deg"),catalog='NOMAD')[0]
+
+        for star in stars:
+
+            star_coords = SkyCoord(star['RAJ2000'], star['DEJ2000'], unit='deg', frame='icrs')
+            star_coords = star_coords.transform_to(camera_frame)
+            central_pixel = self.camera_geometry.transform_to(camera_frame).position_to_pix_index(
+
+    def _star_extraction(
+        self,
+    ):
+        camera_frame = EngineeringCameraFrame(
+            telescope_pointing=current_pointing,
+            focal_length=self.focal_length,
+            obstime=time.utc,
+
+        
+
+
 class SigmaClippingExtractor(StatisticsExtractor):
     """
-    Extractor the statistics from a sequence of images
+    Extracts the statistics from a sequence of images
     using astropy's sigma clipping functions
     """
 

From 76bb812078fe31c430ceefe1e6f2ce68c4c8ff0b Mon Sep 17 00:00:00 2001
From: Christoph Toennis <christoph.toennis1988@gmail.com>
Date: Fri, 10 May 2024 09:05:01 +0200
Subject: [PATCH 08/61] Removed unneeded functions

---
 src/ctapipe/calib/camera/extractor.py | 28 +++------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 718aa48494b..eb245447527 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -5,8 +5,8 @@
 __all__ = [
     "StatisticsExtractor",
     "PlainExtractor",
+    "StarVarianceExtractor",
     "SigmaClippingExtractor",
-    "StarExtractor",
 ]
 
 
@@ -15,8 +15,6 @@
 import numpy as np
 import scipy.stats
 from astropy.stats import sigma_clipped_stats
-from astropy.coordinates import EarthLocation, SkyCoord, Angle
-from astroquery.vizier import Vizier
 
 from ctapipe.core import TelescopeComponent
 from ctapipe.containers import StatisticsContainer
@@ -142,7 +140,7 @@ def _plain_extraction(
         )
 
 
-class StarExtractor(StatisticsExtractor):
+class StarVarianceExtractor(StatisticsExtractor):
     """
     Extracts pointing information from a series of variance images
     using the startracker functions  
@@ -156,29 +154,9 @@ class StarExtractor(StatisticsExtractor):
     def __init__():
 
     def __call__(
-        self, variance_table, initial_pointing, PSF_model
+        self, variance_table, trigger_table, initial_pointing, PSF_model
     ):
 
-    def _stars_in_FOV(
-        self, pointing
-    ):
-
-        stars = Vizier.query_region(pointing, radius=Angle(2.0, "deg"),catalog='NOMAD')[0]
-
-        for star in stars:
-
-            star_coords = SkyCoord(star['RAJ2000'], star['DEJ2000'], unit='deg', frame='icrs')
-            star_coords = star_coords.transform_to(camera_frame)
-            central_pixel = self.camera_geometry.transform_to(camera_frame).position_to_pix_index(
-
-    def _star_extraction(
-        self,
-    ):
-        camera_frame = EngineeringCameraFrame(
-            telescope_pointing=current_pointing,
-            focal_length=self.focal_length,
-            obstime=time.utc,
-
         
 
 

From 34902c4c0b8b35df51a1eea98ca85a0918eb04ed Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Fri, 3 May 2024 11:41:44 +0200
Subject: [PATCH 09/61] added unit tests

---
 .../calib/camera/tests/test_extractors.py     | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 src/ctapipe/calib/camera/tests/test_extractors.py

diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
new file mode 100644
index 00000000000..5e5f7a617fc
--- /dev/null
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -0,0 +1,51 @@
+"""
+Tests for StatisticsExtractor and related functions
+"""
+
+import astropy.units as u
+from astropy.table import QTable
+import numpy as np
+
+from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
+
+
+def test_extractors(example_subarray):
+    plain_extractor = PlainExtractor(subarray=example_subarray, sample_size=2500)
+    sigmaclipping_extractor = SigmaClippingExtractor(subarray=example_subarray, sample_size=2500)
+    times = np.linspace(60117.911, 60117.9258, num=5000)
+    pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
+    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+
+    pedestal_dl1_table = QTable([times, pedestal_dl1_data], names=('time', 'image'))
+    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=('time', 'image'))
+    
+    plain_stats_list = plain_extractor(dl1_table=pedestal_dl1_table)
+    sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
+    
+    assert plain_stats_list[0].mean - 2.0) > 1.5) == False
+    assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) == False
+                
+    assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) == False
+    assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) == False
+    
+    assert np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5) == False
+    assert np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5) == False
+    
+    assert np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5) == False
+    assert np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5) == False
+    
+def test_check_outliers(example_subarray):
+    sigmaclipping_extractor = SigmaClippingExtractor(subarray=example_subarray, sample_size=2500)
+    times = np.linspace(60117.911, 60117.9258, num=5000)
+    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    # insert outliers
+    flatfield_dl1_data[:,0,120] = 120.0
+    flatfield_dl1_data[:,1,67] = 120.0
+    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=('time', 'image'))
+    sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
+    
+    #check if outliers where detected correctly
+    assert sigmaclipping_stats_list[0].median_outliers[0][120] == True
+    assert sigmaclipping_stats_list[0].median_outliers[1][67] == True
+    assert sigmaclipping_stats_list[1].median_outliers[0][120] == True
+    assert sigmaclipping_stats_list[1].median_outliers[1][67] == True

From d25f85da5cffcf1c0894f0ebfa7ab1edad70d51e Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Fri, 3 May 2024 11:50:48 +0200
Subject: [PATCH 10/61] added changelog

---
 docs/changes/2554.feature.rst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/changes/2554.feature.rst

diff --git a/docs/changes/2554.feature.rst b/docs/changes/2554.feature.rst
new file mode 100644
index 00000000000..2e6a6356b3a
--- /dev/null
+++ b/docs/changes/2554.feature.rst
@@ -0,0 +1 @@
+Add API to extract the statistics from a sequence of images.

From b5847f7bdc8628151c3699ef0db6810dfa003dd2 Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Fri, 3 May 2024 13:54:45 +0200
Subject: [PATCH 11/61] fix lint

---
 src/ctapipe/calib/camera/extractor.py             | 9 ---------
 src/ctapipe/calib/camera/tests/test_extractors.py | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index eb245447527..c8a1b3158ce 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -121,9 +121,6 @@ def _plain_extraction(
         # std over the sample per pixel
         pixel_std = np.ma.std(masked_images, axis=0)
 
-        # median of the median over the camera
-        median_of_pixel_median = np.ma.median(pixel_median, axis=1)
-
         # outliers from median
         image_median_outliers = np.logical_or(
             pixel_median < self.image_median_cut_outliers[0],
@@ -204,9 +201,6 @@ def _sigmaclipping_extraction(
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # median of the event images
-        image_median = np.ma.median(masked_images, axis=-1)
-
         # mean, median, and std over the sample per pixel
         max_sigma = self.sigma_clipping_max_sigma
         pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
@@ -224,9 +218,6 @@ def _sigmaclipping_extraction(
 
         unused_values = np.abs(masked_images - pixel_mean) > (max_sigma * pixel_std)
 
-        # only warn for values discard in the sigma clipping, not those from before
-        outliers = unused_values & (~masked_images.mask)
-
         # add outliers identified by sigma clipping for following operations
         masked_images.mask |= unused_values
 
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index 5e5f7a617fc..19b04c017fe 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -22,7 +22,7 @@ def test_extractors(example_subarray):
     plain_stats_list = plain_extractor(dl1_table=pedestal_dl1_table)
     sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
     
-    assert plain_stats_list[0].mean - 2.0) > 1.5) == False
+    assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) == False
     assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) == False
                 
     assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) == False

From 620ada6db5032cbf2b8a0412c7351a5ec06fa858 Mon Sep 17 00:00:00 2001
From: Christoph Toennis <christoph.toennis1988@gmail.com>
Date: Thu, 23 May 2024 11:37:46 +0200
Subject: [PATCH 12/61] I altered the class variables to th evariance
 statistics extractor

---
 src/ctapipe/calib/camera/extractor.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index c8a1b3158ce..70f4a2eb561 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -143,20 +143,23 @@ class StarVarianceExtractor(StatisticsExtractor):
     using the startracker functions  
     """
 
-    min_star_magnitude = Float(
-        0.1,
-        help="Minimum magnitude of stars to be used. Set to appropriate value to avoid ",
+    sigma_clipping_max_sigma = Int(
+        default_value=4,
+        help="Maximal value for the sigma clipping outlier removal",
+    ).tag(config=True)
+    sigma_clipping_iterations = Int(
+        default_value=5,
+        help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
     def __init__():
 
     def __call__(
-        self, variance_table, trigger_table, initial_pointing, PSF_model
+        self, variance_table
     ):
 
         
 
-
 class SigmaClippingExtractor(StatisticsExtractor):
     """
     Extracts the statistics from a sequence of images

From 4e275e7ed88b4f4660c967cbc9e38876dd2ae5d0 Mon Sep 17 00:00:00 2001
From: Christoph Toennis <christoph.toennis1988@gmail.com>
Date: Fri, 24 May 2024 13:38:36 +0200
Subject: [PATCH 13/61] added a container for mean variance images and fixed
 docustring

---
 src/ctapipe/calib/camera/extractor.py | 43 +++++++++++++++++++++++++--
 src/ctapipe/containers.py             |  9 +++++-
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 70f4a2eb561..3cc51de40cb 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -139,8 +139,9 @@ def _plain_extraction(
 
 class StarVarianceExtractor(StatisticsExtractor):
     """
-    Extracts pointing information from a series of variance images
-    using the startracker functions  
+    Generating average variance images from a set 
+    of variance images for the startracker 
+    pointing calibration  
     """
 
     sigma_clipping_max_sigma = Int(
@@ -158,7 +159,43 @@ def __call__(
         self, variance_table
     ):
 
-        
+        image_chunks = (
+            variance_table["image"].data[i : i + self.sample_size] 
+            for i in range(0, len(variance_table["image"].data), self.sample_size)
+            )
+
+        time_chunks = (
+            variance_table["trigger_times"].data[i : i + self.sample_size]
+            for i in range(0, len(variance_table["trigger_times"].data), self.sample_size)
+            )
+
+        stats_list = []
+
+        for images, times in zip(image_chunks, time_chunks):
+
+            stats_list.append(
+                self._sigmaclipping_extraction(images, times)
+            )
+        return stats_list
+
+    def _sigmaclipping_extraction(
+    self, images, times
+    )->StatisticsContainer:
+
+        pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
+            images,
+            sigma=self.sigma_clipping_max_sigma,
+            maxiters=self.sigma_clipping_iterations,
+            cenfunc="mean",
+            axis=0,
+        )
+
+        return StatisticsContainer(
+            validity_start=times[0],
+            validity_stop=times[-1],
+            mean=pixel_mean.filled(np.nan),
+            median=pixel_median.filled(np.nan)
+        )
 
 class SigmaClippingExtractor(StatisticsExtractor):
     """
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index 5be78775580..0aeff5a97eb 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -57,6 +57,7 @@
     "TriggerContainer",
     "WaveformCalibrationContainer",
     "StatisticsContainer",
+    "VarianceStatisticsContainer",
     "ImageStatisticsContainer",
     "IntensityStatisticsContainer",
     "PeakTimeStatisticsContainer",
@@ -423,6 +424,13 @@ class StatisticsContainer(Container):
     std = Field(np.float32(nan), "Channel-wise and pixel-wise standard deviation")
     std_outliers = Field(np.float32(nan), "Channel-wise and pixel-wise standard deviation outliers")
 
+class VarianceStatisticsContainer(Container):
+     """Store descriptive statistics of a sequence of variance images"""
+
+     validity_start = Field(np.float32(nan), "start of the validity range")
+     validity_stop = Field(np.float32(nan), "stop of the validity range")
+     mean = Field(np.float32(nan), "Channel-wise and pixel-wise mean")
+
 class ImageStatisticsContainer(Container):
     """Store descriptive image statistics"""
 
@@ -433,7 +441,6 @@ class ImageStatisticsContainer(Container):
     skewness = Field(nan, "skewness of intensity")
     kurtosis = Field(nan, "kurtosis of intensity")
 
-
 class IntensityStatisticsContainer(ImageStatisticsContainer):
     default_prefix = "intensity"
 

From 9e6823470585829814db948e808246a03b58037f Mon Sep 17 00:00:00 2001
From: Christoph Toennis <christoph.toennis1988@gmail.com>
Date: Mon, 27 May 2024 15:20:53 +0200
Subject: [PATCH 14/61] I changed the container type for the
 StarVarianceExtractor

---
 src/ctapipe/calib/camera/extractor.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 3cc51de40cb..ffcd561aa87 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -180,7 +180,7 @@ def __call__(
 
     def _sigmaclipping_extraction(
     self, images, times
-    )->StatisticsContainer:
+    )->VarianceStatisticsContainer:
 
         pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
             images,
@@ -190,11 +190,10 @@ def _sigmaclipping_extraction(
             axis=0,
         )
 
-        return StatisticsContainer(
+        return VarianceStatisticsContainer(
             validity_start=times[0],
             validity_stop=times[-1],
-            mean=pixel_mean.filled(np.nan),
-            median=pixel_median.filled(np.nan)
+            mean=pixel_mean.filled(np.nan)
         )
 
 class SigmaClippingExtractor(StatisticsExtractor):

From cda269eec05f093ee562cbba3b3eccad5cf9e64a Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Fri, 31 May 2024 17:42:35 +0200
Subject: [PATCH 15/61] fix pylint

Remove StarVarianceExtractor since is functionality is featured in the existing Extractors
---
 src/ctapipe/calib/camera/extractor.py         | 89 ++++---------------
 .../calib/camera/tests/test_extractors.py     | 64 +++++++------
 src/ctapipe/containers.py                     |  7 --
 3 files changed, 53 insertions(+), 107 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index ffcd561aa87..9e9e9947462 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -5,15 +5,12 @@
 __all__ = [
     "StatisticsExtractor",
     "PlainExtractor",
-    "StarVarianceExtractor",
     "SigmaClippingExtractor",
 ]
 
-
 from abc import abstractmethod
 
 import numpy as np
-import scipy.stats
 from astropy.stats import sigma_clipped_stats
 
 from ctapipe.core import TelescopeComponent
@@ -22,10 +19,9 @@
     Int,
     List,
 )
-from ctapipe.coordinates import EngineeringCameraFrame
-
 
 class StatisticsExtractor(TelescopeComponent):
+    """Base StatisticsExtractor component"""
 
     sample_size = Int(
         2500,
@@ -33,11 +29,13 @@ class StatisticsExtractor(TelescopeComponent):
     ).tag(config=True)
     image_median_cut_outliers = List(
         [-0.3, 0.3],
-        help="Interval of accepted image values (fraction with respect to camera median value)",
+        help="""Interval of accepted image values \\
+                (fraction with respect to camera median value)""",
     ).tag(config=True)
     image_std_cut_outliers = List(
         [-3, 3],
-        help="Interval (number of std) of accepted image standard deviation around camera median value",
+        help="""Interval (number of std) of accepted image standard deviation \\
+                around camera median value""",
     ).tag(config=True)
 
     def __init__(self, subarray, config=None, parent=None, **kwargs):
@@ -74,7 +72,6 @@ def __call__(
         List StatisticsContainer:
             List of extracted statistics and validity ranges
         """
-        pass
 
 
 class PlainExtractor(StatisticsExtractor):
@@ -136,66 +133,6 @@ def _plain_extraction(
             std=pixel_std.filled(np.nan),
         )
 
-
-class StarVarianceExtractor(StatisticsExtractor):
-    """
-    Generating average variance images from a set 
-    of variance images for the startracker 
-    pointing calibration  
-    """
-
-    sigma_clipping_max_sigma = Int(
-        default_value=4,
-        help="Maximal value for the sigma clipping outlier removal",
-    ).tag(config=True)
-    sigma_clipping_iterations = Int(
-        default_value=5,
-        help="Number of iterations for the sigma clipping outlier removal",
-    ).tag(config=True)
-
-    def __init__():
-
-    def __call__(
-        self, variance_table
-    ):
-
-        image_chunks = (
-            variance_table["image"].data[i : i + self.sample_size] 
-            for i in range(0, len(variance_table["image"].data), self.sample_size)
-            )
-
-        time_chunks = (
-            variance_table["trigger_times"].data[i : i + self.sample_size]
-            for i in range(0, len(variance_table["trigger_times"].data), self.sample_size)
-            )
-
-        stats_list = []
-
-        for images, times in zip(image_chunks, time_chunks):
-
-            stats_list.append(
-                self._sigmaclipping_extraction(images, times)
-            )
-        return stats_list
-
-    def _sigmaclipping_extraction(
-    self, images, times
-    )->VarianceStatisticsContainer:
-
-        pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
-            images,
-            sigma=self.sigma_clipping_max_sigma,
-            maxiters=self.sigma_clipping_iterations,
-            cenfunc="mean",
-            axis=0,
-        )
-
-        return VarianceStatisticsContainer(
-            validity_start=times[0],
-            validity_stop=times[-1],
-            mean=pixel_mean.filled(np.nan)
-        )
-
 class SigmaClippingExtractor(StatisticsExtractor):
     """
     Extracts the statistics from a sequence of images
@@ -273,18 +210,26 @@ def _sigmaclipping_extraction(
         image_deviation = pixel_median - median_of_pixel_median[:, np.newaxis]
         image_median_outliers = np.logical_or(
             image_deviation
-            < self.image_median_cut_outliers[0] * median_of_pixel_median[:, np.newaxis],
+            < self.image_median_cut_outliers[0] # pylint: disable=unsubscriptable-object
+            * median_of_pixel_median[
+                :, np.newaxis
+            ],
             image_deviation
-            > self.image_median_cut_outliers[1] * median_of_pixel_median[:, np.newaxis],
+            > self.image_median_cut_outliers[1] # pylint: disable=unsubscriptable-object
+            * median_of_pixel_median[
+                :, np.newaxis
+            ],
         )
 
         # outliers from standard deviation
         deviation = pixel_std - median_of_pixel_std[:, np.newaxis]
         image_std_outliers = np.logical_or(
             deviation
-            < self.image_std_cut_outliers[0] * std_of_pixel_std[:, np.newaxis],
+            < self.image_std_cut_outliers[0] # pylint: disable=unsubscriptable-object
+            * std_of_pixel_std[:, np.newaxis],
             deviation
-            > self.image_std_cut_outliers[1] * std_of_pixel_std[:, np.newaxis],
+            > self.image_std_cut_outliers[1] # pylint: disable=unsubscriptable-object
+            * std_of_pixel_std[:, np.newaxis],
         )
 
         return StatisticsContainer(
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index 19b04c017fe..89c375387e3 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -2,7 +2,6 @@
 Tests for StatisticsExtractor and related functions
 """
 
-import astropy.units as u
 from astropy.table import QTable
 import numpy as np
 
@@ -10,42 +9,51 @@
 
 
 def test_extractors(example_subarray):
+    """test basic functionality of the StatisticsExtractors"""
+
     plain_extractor = PlainExtractor(subarray=example_subarray, sample_size=2500)
-    sigmaclipping_extractor = SigmaClippingExtractor(subarray=example_subarray, sample_size=2500)
+    sigmaclipping_extractor = SigmaClippingExtractor(
+        subarray=example_subarray, sample_size=2500
+    )
     times = np.linspace(60117.911, 60117.9258, num=5000)
     pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
 
-    pedestal_dl1_table = QTable([times, pedestal_dl1_data], names=('time', 'image'))
-    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=('time', 'image'))
-    
+    pedestal_dl1_table = QTable([times, pedestal_dl1_data], names=("time", "image"))
+    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
+
     plain_stats_list = plain_extractor(dl1_table=pedestal_dl1_table)
     sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
-    
-    assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) == False
-    assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) == False
-                
-    assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) == False
-    assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) == False
-    
-    assert np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5) == False
-    assert np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5) == False
-    
-    assert np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5) == False
-    assert np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5) == False
-    
+
+    assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) is False
+    assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) is False
+
+    assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) is False
+    assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) is False
+
+    assert np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5) is False
+    assert np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5) is False
+
+    assert np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5) is False
+    assert np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5) is False
+
+
 def test_check_outliers(example_subarray):
-    sigmaclipping_extractor = SigmaClippingExtractor(subarray=example_subarray, sample_size=2500)
+    """test detection ability of outliers"""
+
+    sigmaclipping_extractor = SigmaClippingExtractor(
+        subarray=example_subarray, sample_size=2500
+    )
     times = np.linspace(60117.911, 60117.9258, num=5000)
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
     # insert outliers
-    flatfield_dl1_data[:,0,120] = 120.0
-    flatfield_dl1_data[:,1,67] = 120.0
-    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=('time', 'image'))
+    flatfield_dl1_data[:, 0, 120] = 120.0
+    flatfield_dl1_data[:, 1, 67] = 120.0
+    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
     sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
-    
-    #check if outliers where detected correctly
-    assert sigmaclipping_stats_list[0].median_outliers[0][120] == True
-    assert sigmaclipping_stats_list[0].median_outliers[1][67] == True
-    assert sigmaclipping_stats_list[1].median_outliers[0][120] == True
-    assert sigmaclipping_stats_list[1].median_outliers[1][67] == True
+
+    # check if outliers where detected correctly
+    assert sigmaclipping_stats_list[0].median_outliers[0][120] is True
+    assert sigmaclipping_stats_list[0].median_outliers[1][67] is True
+    assert sigmaclipping_stats_list[1].median_outliers[0][120] is True
+    assert sigmaclipping_stats_list[1].median_outliers[1][67] is True
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index 0aeff5a97eb..68685a26623 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -57,7 +57,6 @@
     "TriggerContainer",
     "WaveformCalibrationContainer",
     "StatisticsContainer",
-    "VarianceStatisticsContainer",
     "ImageStatisticsContainer",
     "IntensityStatisticsContainer",
     "PeakTimeStatisticsContainer",
@@ -424,12 +423,6 @@ class StatisticsContainer(Container):
     std = Field(np.float32(nan), "Channel-wise and pixel-wise standard deviation")
     std_outliers = Field(np.float32(nan), "Channel-wise and pixel-wise standard deviation outliers")
 
-class VarianceStatisticsContainer(Container):
-     """Store descriptive statistics of a sequence of variance images"""
-
-     validity_start = Field(np.float32(nan), "start of the validity range")
-     validity_stop = Field(np.float32(nan), "stop of the validity range")
-     mean = Field(np.float32(nan), "Channel-wise and pixel-wise mean")
 
 class ImageStatisticsContainer(Container):
     """Store descriptive image statistics"""

From 23c0536593bb84e0af756501996e98c64c07adaa Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Fri, 31 May 2024 17:45:29 +0200
Subject: [PATCH 16/61] change __call__() to _extract()

---
 src/ctapipe/calib/camera/extractor.py             | 6 +++---
 src/ctapipe/calib/camera/tests/test_extractors.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 9e9e9947462..eaff6c714c5 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -50,7 +50,7 @@ def __init__(self, subarray, config=None, parent=None, **kwargs):
         super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
 
     @abstractmethod
-    def __call__(
+    def _extract(
         self, dl1_table, masked_pixels_of_sample=None, col_name="image"
     ) -> list:
         """
@@ -80,7 +80,7 @@ class PlainExtractor(StatisticsExtractor):
     using numpy and scipy functions
     """
 
-    def __call__(
+    def _extract(
         self, dl1_table, masked_pixels_of_sample=None, col_name="image"
     ) -> list:
 
@@ -148,7 +148,7 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-    def __call__(
+    def _extract(
         self, dl1_table, masked_pixels_of_sample=None, col_name="image"
     ) -> list:
 
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index 89c375387e3..d5f082762ed 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -22,8 +22,8 @@ def test_extractors(example_subarray):
     pedestal_dl1_table = QTable([times, pedestal_dl1_data], names=("time", "image"))
     flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
 
-    plain_stats_list = plain_extractor(dl1_table=pedestal_dl1_table)
-    sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
+    plain_stats_list = plain_extractor._extract(dl1_table=pedestal_dl1_table)
+    sigmaclipping_stats_list = sigmaclipping_extractor._extract(dl1_table=flatfield_dl1_table)
 
     assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) is False
     assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) is False
@@ -50,7 +50,7 @@ def test_check_outliers(example_subarray):
     flatfield_dl1_data[:, 0, 120] = 120.0
     flatfield_dl1_data[:, 1, 67] = 120.0
     flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
-    sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
+    sigmaclipping_stats_list = sigmaclipping_extractor._extract(dl1_table=flatfield_dl1_table)
 
     # check if outliers where detected correctly
     assert sigmaclipping_stats_list[0].median_outliers[0][120] is True

From 5bd7cd9fc8ce18a2a69c38e9a7db59a0b2cf392d Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Fri, 31 May 2024 17:59:36 +0200
Subject: [PATCH 17/61] minor renaming

---
 src/ctapipe/calib/camera/extractor.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index eaff6c714c5..0c23caebb00 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -60,7 +60,7 @@ def _extract(
         Parameters
         ----------
         dl1_table : ndarray
-            dl1 table with images and times stored in a numpy array of shape
+            dl1 table with images and timestamps stored in a numpy array of shape
             (n_images, n_channels, n_pix).
         masked_pixels_of_sample : ndarray
             boolean array of masked pixels that are not available for processing
@@ -139,11 +139,11 @@ class SigmaClippingExtractor(StatisticsExtractor):
     using astropy's sigma clipping functions
     """
 
-    sigma_clipping_max_sigma = Int(
+    max_sigma = Int(
         default_value=4,
         help="Maximal value for the sigma clipping outlier removal",
     ).tag(config=True)
-    sigma_clipping_iterations = Int(
+    iterations = Int(
         default_value=5,
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
@@ -178,11 +178,10 @@ def _sigmaclipping_extraction(
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
         # mean, median, and std over the sample per pixel
-        max_sigma = self.sigma_clipping_max_sigma
         pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
             masked_images,
-            sigma=max_sigma,
-            maxiters=self.sigma_clipping_iterations,
+            sigma=self.max_sigma,
+            maxiters=self.iterations,
             cenfunc="mean",
             axis=0,
         )
@@ -192,7 +191,7 @@ def _sigmaclipping_extraction(
         pixel_median = np.ma.array(pixel_median, mask=np.isnan(pixel_median))
         pixel_std = np.ma.array(pixel_std, mask=np.isnan(pixel_std))
 
-        unused_values = np.abs(masked_images - pixel_mean) > (max_sigma * pixel_std)
+        unused_values = np.abs(masked_images - pixel_mean) > (self.max_sigma * pixel_std)
 
         # add outliers identified by sigma clipping for following operations
         masked_images.mask |= unused_values

From ea3331bb95d78a34adc5b3b166b151ab145814e6 Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Fri, 31 May 2024 18:15:53 +0200
Subject: [PATCH 18/61] use pytest.fixture for Extractors

---
 .../calib/camera/tests/test_extractors.py     | 32 ++++++++++++-------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index d5f082762ed..d363ee24ff0 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -7,14 +7,21 @@
 
 from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
 
+@pytest.fixture
+    def test_plainextractor(example_subarray):
+        return PlainExtractor(
+            subarray=example_subarray, sample_size=2500
+        )
 
-def test_extractors(example_subarray):
+@pytest.fixture
+    def test_sigmaclippingextractor(example_subarray):
+        return SigmaClippingExtractor(
+            subarray=example_subarray, sample_size=2500
+        )
+
+def test_extractors(test_plainextractor, test_sigmaclippingextractor):
     """test basic functionality of the StatisticsExtractors"""
 
-    plain_extractor = PlainExtractor(subarray=example_subarray, sample_size=2500)
-    sigmaclipping_extractor = SigmaClippingExtractor(
-        subarray=example_subarray, sample_size=2500
-    )
     times = np.linspace(60117.911, 60117.9258, num=5000)
     pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
@@ -22,8 +29,10 @@ def test_extractors(example_subarray):
     pedestal_dl1_table = QTable([times, pedestal_dl1_data], names=("time", "image"))
     flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
 
-    plain_stats_list = plain_extractor._extract(dl1_table=pedestal_dl1_table)
-    sigmaclipping_stats_list = sigmaclipping_extractor._extract(dl1_table=flatfield_dl1_table)
+    plain_stats_list = test_plainextractor._extract(dl1_table=pedestal_dl1_table)
+    sigmaclipping_stats_list = test_sigmaclippingextractor._extract(
+        dl1_table=flatfield_dl1_table
+    )
 
     assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) is False
     assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) is False
@@ -38,19 +47,18 @@ def test_extractors(example_subarray):
     assert np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5) is False
 
 
-def test_check_outliers(example_subarray):
+def test_check_outliers(test_sigmaclippingextractor):
     """test detection ability of outliers"""
 
-    sigmaclipping_extractor = SigmaClippingExtractor(
-        subarray=example_subarray, sample_size=2500
-    )
     times = np.linspace(60117.911, 60117.9258, num=5000)
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
     # insert outliers
     flatfield_dl1_data[:, 0, 120] = 120.0
     flatfield_dl1_data[:, 1, 67] = 120.0
     flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
-    sigmaclipping_stats_list = sigmaclipping_extractor._extract(dl1_table=flatfield_dl1_table)
+    sigmaclipping_stats_list = sigmaclipping_extractor._extract(
+        dl1_table=flatfield_dl1_table
+    )
 
     # check if outliers where detected correctly
     assert sigmaclipping_stats_list[0].median_outliers[0][120] is True

From 9f61c5608744797c2cc932456f27b76b0b72dfec Mon Sep 17 00:00:00 2001
From: Tjark Miener <tjarkmiener@Tjarks-MacBook-Air.local>
Date: Fri, 31 May 2024 18:59:57 +0200
Subject: [PATCH 19/61] reduce duplicated code of the call function

---
 src/ctapipe/calib/camera/extractor.py         | 52 ++++++-------------
 .../calib/camera/tests/test_extractors.py     | 30 ++++++-----
 2 files changed, 31 insertions(+), 51 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 0c23caebb00..d060d620508 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -49,8 +49,7 @@ def __init__(self, subarray, config=None, parent=None, **kwargs):
         """
         super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
 
-    @abstractmethod
-    def _extract(
+    def __call__(
         self, dl1_table, masked_pixels_of_sample=None, col_name="image"
     ) -> list:
         """
@@ -73,17 +72,6 @@ def _extract(
             List of extracted statistics and validity ranges
         """
 
-
-class PlainExtractor(StatisticsExtractor):
-    """
-    Extractor the statistics from a sequence of images
-    using numpy and scipy functions
-    """
-
-    def _extract(
-        self, dl1_table, masked_pixels_of_sample=None, col_name="image"
-    ) -> list:
-
         # in python 3.12 itertools.batched can be used
         image_chunks = (
             dl1_table[col_name].data[i : i + self.sample_size]
@@ -98,11 +86,23 @@ def _extract(
         stats_list = []
         for images, times in zip(image_chunks, time_chunks):
             stats_list.append(
-                self._plain_extraction(images, times, masked_pixels_of_sample)
+                self._extract(images, times, masked_pixels_of_sample)
             )
         return stats_list
 
-    def _plain_extraction(
+    @abstractmethod
+    def _extract(
+        self, images, times, masked_pixels_of_sample
+    ) -> StatisticsContainer:
+        pass
+
+class PlainExtractor(StatisticsExtractor):
+    """
+    Extractor the statistics from a sequence of images
+    using numpy and scipy functions
+    """
+
+    def _extract(
         self, images, times, masked_pixels_of_sample
     ) -> StatisticsContainer:
 
@@ -149,28 +149,6 @@ class SigmaClippingExtractor(StatisticsExtractor):
     ).tag(config=True)
 
     def _extract(
-        self, dl1_table, masked_pixels_of_sample=None, col_name="image"
-    ) -> list:
-
-        # in python 3.12 itertools.batched can be used
-        image_chunks = (
-            dl1_table[col_name].data[i : i + self.sample_size]
-            for i in range(0, len(dl1_table[col_name].data), self.sample_size)
-        )
-        time_chunks = (
-            dl1_table["time"][i : i + self.sample_size]
-            for i in range(0, len(dl1_table["time"]), self.sample_size)
-        )
-
-        # Calculate the statistics from a sequence of images
-        stats_list = []
-        for images, times in zip(image_chunks, time_chunks):
-            stats_list.append(
-                self._sigmaclipping_extraction(images, times, masked_pixels_of_sample)
-            )
-        return stats_list
-
-    def _sigmaclipping_extraction(
         self, images, times, masked_pixels_of_sample
     ) -> StatisticsContainer:
 
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index d363ee24ff0..06107a6e7b7 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -4,20 +4,22 @@
 
 from astropy.table import QTable
 import numpy as np
-
+import pytest
 from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
 
-@pytest.fixture
-    def test_plainextractor(example_subarray):
-        return PlainExtractor(
-            subarray=example_subarray, sample_size=2500
-        )
+@pytest.fixture(name="test_plainextractor")
+def fixture_test_plainextractor(example_subarray):
+    """test the PlainExtractor"""
+    return PlainExtractor(
+        subarray=example_subarray, sample_size=2500
+    )
 
-@pytest.fixture
-    def test_sigmaclippingextractor(example_subarray):
-        return SigmaClippingExtractor(
-            subarray=example_subarray, sample_size=2500
-        )
+@pytest.fixture(name="test_sigmaclippingextractor")
+def fixture_test_sigmaclippingextractor(example_subarray):
+    """test the SigmaClippingExtractor"""
+    return SigmaClippingExtractor(
+        subarray=example_subarray, sample_size=2500
+    )
 
 def test_extractors(test_plainextractor, test_sigmaclippingextractor):
     """test basic functionality of the StatisticsExtractors"""
@@ -29,8 +31,8 @@ def test_extractors(test_plainextractor, test_sigmaclippingextractor):
     pedestal_dl1_table = QTable([times, pedestal_dl1_data], names=("time", "image"))
     flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
 
-    plain_stats_list = test_plainextractor._extract(dl1_table=pedestal_dl1_table)
-    sigmaclipping_stats_list = test_sigmaclippingextractor._extract(
+    plain_stats_list = test_plainextractor(dl1_table=pedestal_dl1_table)
+    sigmaclipping_stats_list = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table
     )
 
@@ -56,7 +58,7 @@ def test_check_outliers(test_sigmaclippingextractor):
     flatfield_dl1_data[:, 0, 120] = 120.0
     flatfield_dl1_data[:, 1, 67] = 120.0
     flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
-    sigmaclipping_stats_list = sigmaclipping_extractor._extract(
+    sigmaclipping_stats_list = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table
     )
 

From 36ffc88d5fc83e6524687e1dad96a71070385cca Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 12 Jun 2024 12:02:41 +0200
Subject: [PATCH 20/61] edit description of StatisticsContainer

---
 src/ctapipe/containers.py | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index 68685a26623..f61e429417d 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -415,13 +415,33 @@ class MorphologyContainer(Container):
 class StatisticsContainer(Container):
     """Store descriptive statistics of a sequence of images"""
 
-    validity_start = Field(np.float32(nan), "start of the validity range")
-    validity_stop = Field(np.float32(nan), "stop of the validity range")
-    mean = Field(np.float32(nan), "Channel-wise and pixel-wise mean")
-    median = Field(np.float32(nan), "Channel-wise and pixel-wise median")
-    median_outliers = Field(np.float32(nan), "Channel-wise and pixel-wise median outliers")
-    std = Field(np.float32(nan), "Channel-wise and pixel-wise standard deviation")
-    std_outliers = Field(np.float32(nan), "Channel-wise and pixel-wise standard deviation outliers")
+    extraction_start = Field(np.float32(nan), "start of the extraction sequence")
+    extraction_stop = Field(np.float32(nan), "stop of the extraction sequence")
+    mean = Field(
+        None,
+        "mean of a pixel-wise quantity for each channel"
+        "Type: float; Shape: (n_channels, n_pixel)",
+    )
+    median = Field(
+        None,
+        "median of a pixel-wise quantity for each channel"
+        "Type: float; Shape: (n_channels, n_pixel)",
+    )
+    median_outliers = Field(
+        None,
+        "outliers from the median distribution of a pixel-wise quantity for each channel"
+        "Type: binary mask; Shape: (n_channels, n_pixel)",
+    )
+    std = Field(
+        None,
+        "standard deviation of a pixel-wise quantity for each channel"
+        "Type: float; Shape: (n_channels, n_pixel)",
+    )
+    std_outliers = Field(
+        None,
+        "outliers from the standard deviation distribution of a pixel-wise quantity for each channel"
+        "Type: binary mask; Shape: (n_channels, n_pixel)",
+    )
 
 
 class ImageStatisticsContainer(Container):
@@ -434,6 +454,7 @@ class ImageStatisticsContainer(Container):
     skewness = Field(nan, "skewness of intensity")
     kurtosis = Field(nan, "kurtosis of intensity")
 
+
 class IntensityStatisticsContainer(ImageStatisticsContainer):
     default_prefix = "intensity"
 

From a5caed45c4d35d5fd4adcfe6387943a148156a80 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 12 Jun 2024 13:12:54 +0200
Subject: [PATCH 21/61] added feature to shift the extraction sequence

allow overlapping extraction sequences
---
 src/ctapipe/calib/camera/extractor.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index d060d620508..f4ddb03d4bd 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -50,7 +50,11 @@ def __init__(self, subarray, config=None, parent=None, **kwargs):
         super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
 
     def __call__(
-        self, dl1_table, masked_pixels_of_sample=None, col_name="image"
+        self,
+        dl1_table,
+        masked_pixels_of_sample=None,
+        sample_shift=None,
+        col_name="image",
     ) -> list:
         """
         Call the relevant functions to extract the statistics
@@ -63,6 +67,8 @@ def __call__(
             (n_images, n_channels, n_pix).
         masked_pixels_of_sample : ndarray
             boolean array of masked pixels that are not available for processing
+        sample_shift : int
+            number of samples to shift the extraction sequence
         col_name : string
             column name in the dl1 table
 
@@ -72,14 +78,19 @@ def __call__(
             List of extracted statistics and validity ranges
         """
 
+        # If no sample_shift is provided, the sample_shift is set to self.sample_size
+        # meaning that the samples are not overlapping.
+        if sample_shift is None:
+            sample_shift = self.sample_size
+
         # in python 3.12 itertools.batched can be used
         image_chunks = (
             dl1_table[col_name].data[i : i + self.sample_size]
-            for i in range(0, len(dl1_table[col_name].data), self.sample_size)
+            for i in range(0, len(dl1_table[col_name].data), sample_shift)
         )
         time_chunks = (
             dl1_table["time"][i : i + self.sample_size]
-            for i in range(0, len(dl1_table["time"]), self.sample_size)
+            for i in range(0, len(dl1_table["time"]), sample_shift)
         )
 
         # Calculate the statistics from a sequence of images
@@ -169,7 +180,9 @@ def _extract(
         pixel_median = np.ma.array(pixel_median, mask=np.isnan(pixel_median))
         pixel_std = np.ma.array(pixel_std, mask=np.isnan(pixel_std))
 
-        unused_values = np.abs(masked_images - pixel_mean) > (self.max_sigma * pixel_std)
+        unused_values = np.abs(masked_images - pixel_mean) > (
+            self.max_sigma * pixel_std
+        )
 
         # add outliers identified by sigma clipping for following operations
         masked_images.mask |= unused_values

From 51d98f967c9d519e777e7d511ce5eb40242c59c6 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 12 Jun 2024 14:23:53 +0200
Subject: [PATCH 22/61] fix boundary case for the last chunk

renaming to chunk(s) and chunk_size and _shift

added test for chunk_shift and boundary case
---
 src/ctapipe/calib/camera/extractor.py         | 72 ++++++++++---------
 .../calib/camera/tests/test_extractors.py     | 21 +++++-
 src/ctapipe/containers.py                     |  6 +-
 3 files changed, 61 insertions(+), 38 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index f4ddb03d4bd..86d9c1345fd 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -1,5 +1,5 @@
 """
-Extraction algorithms to compute the statistics from a sequence of images
+Extraction algorithms to compute the statistics from a chunk of images
 """
 
 __all__ = [
@@ -23,9 +23,9 @@
 class StatisticsExtractor(TelescopeComponent):
     """Base StatisticsExtractor component"""
 
-    sample_size = Int(
+    chunk_size = Int(
         2500,
-        help="Size of the sample used for the calculation of the statistical values",
+        help="Size of the chunk used for the calculation of the statistical values",
     ).tag(config=True)
     image_median_cut_outliers = List(
         [-0.3, 0.3],
@@ -41,7 +41,7 @@ class StatisticsExtractor(TelescopeComponent):
     def __init__(self, subarray, config=None, parent=None, **kwargs):
         """
         Base component to handle the extraction of the statistics
-        from a sequence of charges and pulse times (images).
+        from a chunk of charges and pulse times (images).
 
         Parameters
         ----------
@@ -53,7 +53,7 @@ def __call__(
         self,
         dl1_table,
         masked_pixels_of_sample=None,
-        sample_shift=None,
+        chunk_shift=None,
         col_name="image",
     ) -> list:
         """
@@ -67,38 +67,44 @@ def __call__(
             (n_images, n_channels, n_pix).
         masked_pixels_of_sample : ndarray
             boolean array of masked pixels that are not available for processing
-        sample_shift : int
-            number of samples to shift the extraction sequence
+        chunk_shift : int
+            number of samples to shift the extraction chunk
         col_name : string
             column name in the dl1 table
 
         Returns
         -------
         List StatisticsContainer:
-            List of extracted statistics and validity ranges
+            List of extracted statistics and extraction chunks
         """
 
-        # If no sample_shift is provided, the sample_shift is set to self.sample_size
-        # meaning that the samples are not overlapping.
-        if sample_shift is None:
-            sample_shift = self.sample_size
-
-        # in python 3.12 itertools.batched can be used
-        image_chunks = (
-            dl1_table[col_name].data[i : i + self.sample_size]
-            for i in range(0, len(dl1_table[col_name].data), sample_shift)
-        )
-        time_chunks = (
-            dl1_table["time"][i : i + self.sample_size]
-            for i in range(0, len(dl1_table["time"]), sample_shift)
-        )
-
-        # Calculate the statistics from a sequence of images
+        # If no chunk_shift is provided, the chunk_shift is set to self.chunk_size
+        # meaning that the extraction chunks are not overlapping.
+        if chunk_shift is None:
+            chunk_shift = self.chunk_size
+
+        # Function to split table data into appropriated chunks
+        def _get_chunks(col_name):
+            return [
+                (
+                    dl1_table[col_name].data[i : i + self.chunk_size]
+                    if i + self.chunk_size <= len(dl1_table[col_name])
+                    else dl1_table[col_name].data[
+                        len(dl1_table[col_name].data)
+                        - self.chunk_size : len(dl1_table[col_name].data)
+                    ]
+                )
+                for i in range(0, len(dl1_table[col_name].data), chunk_shift)
+            ]
+
+        # Get the chunks for the timestamps and selected column name
+        time_chunks = _get_chunks("time")
+        image_chunks = _get_chunks(col_name)
+
+        # Calculate the statistics from a chunk of images
         stats_list = []
         for images, times in zip(image_chunks, time_chunks):
-            stats_list.append(
-                self._extract(images, times, masked_pixels_of_sample)
-            )
+            stats_list.append(self._extract(images, times, masked_pixels_of_sample))
         return stats_list
 
     @abstractmethod
@@ -109,7 +115,7 @@ def _extract(
 
 class PlainExtractor(StatisticsExtractor):
     """
-    Extractor the statistics from a sequence of images
+    Extractor the statistics from a chunk of images
     using numpy and scipy functions
     """
 
@@ -136,8 +142,8 @@ def _extract(
         )
 
         return StatisticsContainer(
-            validity_start=times[0],
-            validity_stop=times[-1],
+            extraction_start=times[0],
+            extraction_stop=times[-1],
             mean=pixel_mean.filled(np.nan),
             median=pixel_median.filled(np.nan),
             median_outliers=image_median_outliers.filled(True),
@@ -146,7 +152,7 @@ def _extract(
 
 class SigmaClippingExtractor(StatisticsExtractor):
     """
-    Extracts the statistics from a sequence of images
+    Extracts the statistics from a chunk of images
     using astropy's sigma clipping functions
     """
 
@@ -223,8 +229,8 @@ def _extract(
         )
 
         return StatisticsContainer(
-            validity_start=times[0],
-            validity_stop=times[-1],
+            extraction_start=times[0],
+            extraction_stop=times[-1],
             mean=pixel_mean.filled(np.nan),
             median=pixel_median.filled(np.nan),
             median_outliers=image_median_outliers.filled(True),
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index 06107a6e7b7..40efd4f2fc3 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -11,14 +11,14 @@
 def fixture_test_plainextractor(example_subarray):
     """test the PlainExtractor"""
     return PlainExtractor(
-        subarray=example_subarray, sample_size=2500
+        subarray=example_subarray, chunk_size=2500
     )
 
 @pytest.fixture(name="test_sigmaclippingextractor")
 def fixture_test_sigmaclippingextractor(example_subarray):
     """test the SigmaClippingExtractor"""
     return SigmaClippingExtractor(
-        subarray=example_subarray, sample_size=2500
+        subarray=example_subarray, chunk_size=2500
     )
 
 def test_extractors(test_plainextractor, test_sigmaclippingextractor):
@@ -67,3 +67,20 @@ def test_check_outliers(test_sigmaclippingextractor):
     assert sigmaclipping_stats_list[0].median_outliers[1][67] is True
     assert sigmaclipping_stats_list[1].median_outliers[0][120] is True
     assert sigmaclipping_stats_list[1].median_outliers[1][67] is True
+    
+
+def test_check_chunk_shift(test_sigmaclippingextractor):
+    """test the chunk shift option and the boundary case for the last chunk"""
+
+    times = np.linspace(60117.911, 60117.9258, num=5000)
+    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    # insert outliers
+    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
+    sigmaclipping_stats_list = test_sigmaclippingextractor(
+        dl1_table=flatfield_dl1_table,
+        chunk_shift=2000
+    )
+
+    # check if three chunks are used for the extraction
+    assert len(sigmaclipping_stats_list) == 3
+
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index f61e429417d..dcd70255519 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -413,10 +413,10 @@ class MorphologyContainer(Container):
 
 
 class StatisticsContainer(Container):
-    """Store descriptive statistics of a sequence of images"""
+    """Store descriptive statistics of a chunk of images"""
 
-    extraction_start = Field(np.float32(nan), "start of the extraction sequence")
-    extraction_stop = Field(np.float32(nan), "stop of the extraction sequence")
+    extraction_start = Field(np.float32(nan), "start of the extraction chunk")
+    extraction_stop = Field(np.float32(nan), "stop of the extraction chunk")
     mean = Field(
         None,
         "mean of a pixel-wise quantity for each channel"

From 5b1539742725af2916cdd3318907e7d3b6d473d3 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 12 Jun 2024 15:13:27 +0200
Subject: [PATCH 23/61] fix tests

---
 .../calib/camera/tests/test_extractors.py     | 44 +++++++++----------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index 40efd4f2fc3..a83c93fd1c0 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -2,24 +2,24 @@
 Tests for StatisticsExtractor and related functions
 """
 
-from astropy.table import QTable
 import numpy as np
 import pytest
+from astropy.table import QTable
+
 from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
 
+
 @pytest.fixture(name="test_plainextractor")
 def fixture_test_plainextractor(example_subarray):
     """test the PlainExtractor"""
-    return PlainExtractor(
-        subarray=example_subarray, chunk_size=2500
-    )
+    return PlainExtractor(subarray=example_subarray, chunk_size=2500)
+
 
 @pytest.fixture(name="test_sigmaclippingextractor")
 def fixture_test_sigmaclippingextractor(example_subarray):
     """test the SigmaClippingExtractor"""
-    return SigmaClippingExtractor(
-        subarray=example_subarray, chunk_size=2500
-    )
+    return SigmaClippingExtractor(subarray=example_subarray, chunk_size=2500)
+
 
 def test_extractors(test_plainextractor, test_sigmaclippingextractor):
     """test basic functionality of the StatisticsExtractors"""
@@ -36,17 +36,17 @@ def test_extractors(test_plainextractor, test_sigmaclippingextractor):
         dl1_table=flatfield_dl1_table
     )
 
-    assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) is False
-    assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) is False
+    assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5)
+    assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5)
 
-    assert np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) is False
-    assert np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) is False
+    assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5)
+    assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5)
 
-    assert np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5) is False
-    assert np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5) is False
+    assert not np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5)
+    assert not np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5)
 
-    assert np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5) is False
-    assert np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5) is False
+    assert not np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5)
+    assert not np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5)
 
 
 def test_check_outliers(test_sigmaclippingextractor):
@@ -63,11 +63,11 @@ def test_check_outliers(test_sigmaclippingextractor):
     )
 
     # check if outliers where detected correctly
-    assert sigmaclipping_stats_list[0].median_outliers[0][120] is True
-    assert sigmaclipping_stats_list[0].median_outliers[1][67] is True
-    assert sigmaclipping_stats_list[1].median_outliers[0][120] is True
-    assert sigmaclipping_stats_list[1].median_outliers[1][67] is True
-    
+    assert sigmaclipping_stats_list[0].median_outliers[0][120]
+    assert sigmaclipping_stats_list[0].median_outliers[1][67]
+    assert sigmaclipping_stats_list[1].median_outliers[0][120]
+    assert sigmaclipping_stats_list[1].median_outliers[1][67]
+
 
 def test_check_chunk_shift(test_sigmaclippingextractor):
     """test the chunk shift option and the boundary case for the last chunk"""
@@ -77,10 +77,8 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
     # insert outliers
     flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
     sigmaclipping_stats_list = test_sigmaclippingextractor(
-        dl1_table=flatfield_dl1_table,
-        chunk_shift=2000
+        dl1_table=flatfield_dl1_table, chunk_shift=2000
     )
 
     # check if three chunks are used for the extraction
     assert len(sigmaclipping_stats_list) == 3
-

From cb2a6fb319ce28e399c0837fbc9eb5bd9e7b093f Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 12 Jun 2024 15:26:03 +0200
Subject: [PATCH 24/61] fix ruff

---
 src/ctapipe/calib/camera/extractor.py      | 40 +++++++++-------------
 src/ctapipe/image/tests/test_statistics.py |  2 +-
 2 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 86d9c1345fd..4c8f49d1f38 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -13,13 +13,14 @@
 import numpy as np
 from astropy.stats import sigma_clipped_stats
 
-from ctapipe.core import TelescopeComponent
 from ctapipe.containers import StatisticsContainer
+from ctapipe.core import TelescopeComponent
 from ctapipe.core.traits import (
     Int,
     List,
 )
 
+
 class StatisticsExtractor(TelescopeComponent):
     """Base StatisticsExtractor component"""
 
@@ -90,8 +91,9 @@ def _get_chunks(col_name):
                     dl1_table[col_name].data[i : i + self.chunk_size]
                     if i + self.chunk_size <= len(dl1_table[col_name])
                     else dl1_table[col_name].data[
-                        len(dl1_table[col_name].data)
-                        - self.chunk_size : len(dl1_table[col_name].data)
+                        len(dl1_table[col_name].data) - self.chunk_size : len(
+                            dl1_table[col_name].data
+                        )
                     ]
                 )
                 for i in range(0, len(dl1_table[col_name].data), chunk_shift)
@@ -108,21 +110,17 @@ def _get_chunks(col_name):
         return stats_list
 
     @abstractmethod
-    def _extract(
-        self, images, times, masked_pixels_of_sample
-    ) -> StatisticsContainer:
+    def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
         pass
 
+
 class PlainExtractor(StatisticsExtractor):
     """
     Extractor the statistics from a chunk of images
     using numpy and scipy functions
     """
 
-    def _extract(
-        self, images, times, masked_pixels_of_sample
-    ) -> StatisticsContainer:
-
+    def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
@@ -150,6 +148,7 @@ def _extract(
             std=pixel_std.filled(np.nan),
         )
 
+
 class SigmaClippingExtractor(StatisticsExtractor):
     """
     Extracts the statistics from a chunk of images
@@ -165,10 +164,7 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-    def _extract(
-        self, images, times, masked_pixels_of_sample
-    ) -> StatisticsContainer:
-
+    def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
@@ -206,25 +202,21 @@ def _extract(
         image_deviation = pixel_median - median_of_pixel_median[:, np.newaxis]
         image_median_outliers = np.logical_or(
             image_deviation
-            < self.image_median_cut_outliers[0] # pylint: disable=unsubscriptable-object
-            * median_of_pixel_median[
-                :, np.newaxis
-            ],
+            < self.image_median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
+            * median_of_pixel_median[:, np.newaxis],
             image_deviation
-            > self.image_median_cut_outliers[1] # pylint: disable=unsubscriptable-object
-            * median_of_pixel_median[
-                :, np.newaxis
-            ],
+            > self.image_median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
+            * median_of_pixel_median[:, np.newaxis],
         )
 
         # outliers from standard deviation
         deviation = pixel_std - median_of_pixel_std[:, np.newaxis]
         image_std_outliers = np.logical_or(
             deviation
-            < self.image_std_cut_outliers[0] # pylint: disable=unsubscriptable-object
+            < self.image_std_cut_outliers[0]  # pylint: disable=unsubscriptable-object
             * std_of_pixel_std[:, np.newaxis],
             deviation
-            > self.image_std_cut_outliers[1] # pylint: disable=unsubscriptable-object
+            > self.image_std_cut_outliers[1]  # pylint: disable=unsubscriptable-object
             * std_of_pixel_std[:, np.newaxis],
         )
 
diff --git a/src/ctapipe/image/tests/test_statistics.py b/src/ctapipe/image/tests/test_statistics.py
index 23806705787..4403e05ca0a 100644
--- a/src/ctapipe/image/tests/test_statistics.py
+++ b/src/ctapipe/image/tests/test_statistics.py
@@ -49,7 +49,7 @@ def test_kurtosis():
 
 
 def test_return_type():
-    from ctapipe.containers import PeakTimeStatisticsContainer, ImageStatisticsContainer
+    from ctapipe.containers import ImageStatisticsContainer, PeakTimeStatisticsContainer
     from ctapipe.image import descriptive_statistics
 
     rng = np.random.default_rng(0)

From a396d13bdd81920ad4e5f9a2404c2fff4c472591 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Mon, 17 Jun 2024 10:49:44 +0200
Subject: [PATCH 25/61] edit docstring

dl1 table is a Qtable from astropy and not a ndarray
---
 src/ctapipe/calib/camera/extractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 4c8f49d1f38..0896562424f 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -63,7 +63,7 @@ def __call__(
 
         Parameters
         ----------
-        dl1_table : ndarray
+        dl1_table : astropy.table.QTable
             dl1 table with images and timestamps stored in a numpy array of shape
             (n_images, n_channels, n_pix).
         masked_pixels_of_sample : ndarray

From a08b831ff6191d9db91423abf72d93f16bf60f5d Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Mon, 17 Jun 2024 11:31:00 +0200
Subject: [PATCH 26/61] added check for length of dl1 table is greater than
 chunk size

---
 src/ctapipe/calib/camera/extractor.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 0896562424f..79a55a978b3 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -64,14 +64,13 @@ def __call__(
         Parameters
         ----------
         dl1_table : astropy.table.QTable
-            dl1 table with images and timestamps stored in a numpy array of shape
-            (n_images, n_channels, n_pix).
+            DL1 table with images of shape (n_images, n_channels, n_pix) and timestamps of shape (n_images, ) stored in an astropy QTable
         masked_pixels_of_sample : ndarray
-            boolean array of masked pixels that are not available for processing
+            boolean array of masked pixels of shape (n_pix, ) that are not available for processing
         chunk_shift : int
             number of samples to shift the extraction chunk
         col_name : string
-            column name in the dl1 table
+            column name in the DL1 table
 
         Returns
         -------
@@ -79,6 +78,11 @@ def __call__(
             List of extracted statistics and extraction chunks
         """
 
+        # Check if the length of the dl1 table is greater than the size of the chunk.
+        if len(dl1_table[col_name]) < self.chunk_size:
+            raise ValueError(
+                f"The length of the DL1 table '{len(dl1_table[col_name])}' must be greater than the size of the chunk '{self.chunk_size}'."
+            )
         # If no chunk_shift is provided, the chunk_shift is set to self.chunk_size
         # meaning that the extraction chunks are not overlapping.
         if chunk_shift is None:

From f20f1e3f59588369686d1640de68bd09e5233139 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Mon, 17 Jun 2024 11:33:09 +0200
Subject: [PATCH 27/61] fix typo

---
 src/ctapipe/calib/camera/extractor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 79a55a978b3..221403729e4 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -120,7 +120,7 @@ def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContaine
 
 class PlainExtractor(StatisticsExtractor):
     """
-    Extractor the statistics from a chunk of images
+    Extract the statistics from a chunk of images
     using numpy and scipy functions
     """
 
@@ -155,7 +155,7 @@ def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContaine
 
 class SigmaClippingExtractor(StatisticsExtractor):
     """
-    Extracts the statistics from a chunk of images
+    Extract the statistics from a chunk of images
     using astropy's sigma clipping functions
     """
 

From 1be72cc2e894de0c131e09e12a8cdbbdc7bd56bf Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Mon, 17 Jun 2024 12:21:03 +0200
Subject: [PATCH 28/61] edit docstring

Length of table should be greater OR EQUAL than the size of chunk.
---
 src/ctapipe/calib/camera/extractor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 221403729e4..7dbded7dc7a 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -78,10 +78,10 @@ def __call__(
             List of extracted statistics and extraction chunks
         """
 
-        # Check if the length of the dl1 table is greater than the size of the chunk.
+        # Check if the length of the dl1 table is greater or equal than the size of the chunk.
         if len(dl1_table[col_name]) < self.chunk_size:
             raise ValueError(
-                f"The length of the DL1 table '{len(dl1_table[col_name])}' must be greater than the size of the chunk '{self.chunk_size}'."
+                f"The length of the DL1 table '{len(dl1_table[col_name])}' must be greater or equal than the size of the chunk '{self.chunk_size}'."
             )
         # If no chunk_shift is provided, the chunk_shift is set to self.chunk_size
         # meaning that the extraction chunks are not overlapping.

From a0723861d6cff327e5d65c3b9aab1687fb77e29d Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Tue, 18 Jun 2024 10:32:16 +0200
Subject: [PATCH 29/61] removed lstchain relic

The mask is needed in the lstchain flatfield class to calculate the relative gain, which we will do outside the StatisticsExtractor.
---
 src/ctapipe/calib/camera/extractor.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 7dbded7dc7a..28977b7ad2e 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -186,13 +186,6 @@ def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContaine
         pixel_median = np.ma.array(pixel_median, mask=np.isnan(pixel_median))
         pixel_std = np.ma.array(pixel_std, mask=np.isnan(pixel_std))
 
-        unused_values = np.abs(masked_images - pixel_mean) > (
-            self.max_sigma * pixel_std
-        )
-
-        # add outliers identified by sigma clipping for following operations
-        masked_images.mask |= unused_values
-
         # median of the median over the camera
         median_of_pixel_median = np.ma.median(pixel_median, axis=1)
 

From 9c8090e9a1802491e5d33db22bd2efad35b0c085 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 27 Jun 2024 11:20:18 +0200
Subject: [PATCH 30/61] resolved remaining issues

Check for outliers depends if one process flatfield (signal) or pedestal (noise) events.

Change time to time_mono in order to read the telescope trigger and not the subarray.

Fix tests to have the correct types.
---
 src/ctapipe/calib/camera/extractor.py         | 83 +++++++++++++------
 .../calib/camera/tests/test_extractors.py     | 39 +++++++--
 2 files changed, 89 insertions(+), 33 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 28977b7ad2e..a644b34f347 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -63,8 +63,10 @@ def __call__(
 
         Parameters
         ----------
-        dl1_table : astropy.table.QTable
-            DL1 table with images of shape (n_images, n_channels, n_pix) and timestamps of shape (n_images, ) stored in an astropy QTable
+        dl1_table : astropy.table.Table
+            DL1 table with images of shape (n_images, n_channels, n_pix),
+            timestamps of shape (n_images, ), and event type (n_images, )
+            stored in an astropy Table
         masked_pixels_of_sample : ndarray
             boolean array of masked pixels of shape (n_pix, ) that are not available for processing
         chunk_shift : int
@@ -78,10 +80,25 @@ def __call__(
             List of extracted statistics and extraction chunks
         """
 
+        # Check if the provided data contains an unique type of calibration events
+        # and if the events contain signal or noise.
+        if np.all(dl1_table["event_type"] == 0) or np.all(dl1_table["event_type"] == 1):
+            outlier_check = "SIGNAL"
+        elif (
+            np.all(dl1_table["event_type"] == 2)
+            or np.all(dl1_table["event_type"] == 3)
+            or np.all(dl1_table["event_type"] == 4)
+        ):
+            outlier_check = "NOISE"
+        else:
+            raise Exception(
+                "Invalid input data. Only dl1-like images of claibration events are accepted!"
+            )
+
         # Check if the length of the dl1 table is greater or equal than the size of the chunk.
         if len(dl1_table[col_name]) < self.chunk_size:
             raise ValueError(
-                f"The length of the DL1 table '{len(dl1_table[col_name])}' must be greater or equal than the size of the chunk '{self.chunk_size}'."
+                f"The length of the DL1 table ({len(dl1_table[col_name])}) must be greater or equal than the size of the chunk ({self.chunk_size})."
             )
         # If no chunk_shift is provided, the chunk_shift is set to self.chunk_size
         # meaning that the extraction chunks are not overlapping.
@@ -89,32 +106,34 @@ def __call__(
             chunk_shift = self.chunk_size
 
         # Function to split table data into appropriated chunks
-        def _get_chunks(col_name):
+        def _get_chunks(dl1_table_data):
             return [
                 (
-                    dl1_table[col_name].data[i : i + self.chunk_size]
-                    if i + self.chunk_size <= len(dl1_table[col_name])
-                    else dl1_table[col_name].data[
-                        len(dl1_table[col_name].data) - self.chunk_size : len(
-                            dl1_table[col_name].data
-                        )
+                    dl1_table_data[i : i + self.chunk_size]
+                    if i + self.chunk_size <= len(dl1_table_data)
+                    else dl1_table_data[
+                        len(dl1_table_data) - self.chunk_size : len(dl1_table_data)
                     ]
                 )
-                for i in range(0, len(dl1_table[col_name].data), chunk_shift)
+                for i in range(0, len(dl1_table_data), chunk_shift)
             ]
 
         # Get the chunks for the timestamps and selected column name
-        time_chunks = _get_chunks("time")
-        image_chunks = _get_chunks(col_name)
+        time_chunks = _get_chunks(dl1_table["time_mono"])
+        image_chunks = _get_chunks(dl1_table[col_name].data)
 
         # Calculate the statistics from a chunk of images
         stats_list = []
         for images, times in zip(image_chunks, time_chunks):
-            stats_list.append(self._extract(images, times, masked_pixels_of_sample))
+            stats_list.append(
+                self._extract(images, times, masked_pixels_of_sample, outlier_check)
+            )
         return stats_list
 
     @abstractmethod
-    def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+    def _extract(
+        self, images, times, masked_pixels_of_sample, outlier_check
+    ) -> StatisticsContainer:
         pass
 
 
@@ -124,7 +143,9 @@ class PlainExtractor(StatisticsExtractor):
     using numpy and scipy functions
     """
 
-    def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+    def _extract(
+        self, images, times, masked_pixels_of_sample, outlier_check
+    ) -> StatisticsContainer:
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
@@ -168,7 +189,9 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-    def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+    def _extract(
+        self, images, times, masked_pixels_of_sample, outlier_check
+    ) -> StatisticsContainer:
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
@@ -197,14 +220,24 @@ def _extract(self, images, times, masked_pixels_of_sample) -> StatisticsContaine
 
         # outliers from median
         image_deviation = pixel_median - median_of_pixel_median[:, np.newaxis]
-        image_median_outliers = np.logical_or(
-            image_deviation
-            < self.image_median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
-            * median_of_pixel_median[:, np.newaxis],
-            image_deviation
-            > self.image_median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
-            * median_of_pixel_median[:, np.newaxis],
-        )
+        if outlier_check == "SIGNAL":
+            image_median_outliers = np.logical_or(
+                image_deviation
+                < self.image_median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
+                * median_of_pixel_median[:, np.newaxis],
+                image_deviation
+                > self.image_median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
+                * median_of_pixel_median[:, np.newaxis],
+            )
+        elif outlier_check == "NOISE":
+            image_median_outliers = np.logical_or(
+                image_deviation
+                < self.image_median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
+                * std_of_pixel_std[:, np.newaxis],
+                image_deviation
+                > self.image_median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
+                * std_of_pixel_std[:, np.newaxis],
+            )
 
         # outliers from standard deviation
         deviation = pixel_std - median_of_pixel_std[:, np.newaxis]
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index a83c93fd1c0..801c896ef31 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -4,7 +4,8 @@
 
 import numpy as np
 import pytest
-from astropy.table import QTable
+from astropy.table import Table
+from astropy.time import Time
 
 from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
 
@@ -24,12 +25,22 @@ def fixture_test_sigmaclippingextractor(example_subarray):
 def test_extractors(test_plainextractor, test_sigmaclippingextractor):
     """test basic functionality of the StatisticsExtractors"""
 
-    times = np.linspace(60117.911, 60117.9258, num=5000)
+    times = Time(
+        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
+    )
     pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
+    pedestal_event_type = np.full((5000,), 2)
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    flatfield_event_type = np.full((5000,), 0)
 
-    pedestal_dl1_table = QTable([times, pedestal_dl1_data], names=("time", "image"))
-    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
+    pedestal_dl1_table = Table(
+        [times, pedestal_dl1_data, pedestal_event_type],
+        names=("time_mono", "image", "event_type"),
+    )
+    flatfield_dl1_table = Table(
+        [times, flatfield_dl1_data, flatfield_event_type],
+        names=("time_mono", "image", "event_type"),
+    )
 
     plain_stats_list = test_plainextractor(dl1_table=pedestal_dl1_table)
     sigmaclipping_stats_list = test_sigmaclippingextractor(
@@ -52,12 +63,18 @@ def test_extractors(test_plainextractor, test_sigmaclippingextractor):
 def test_check_outliers(test_sigmaclippingextractor):
     """test detection ability of outliers"""
 
-    times = np.linspace(60117.911, 60117.9258, num=5000)
+    times = Time(
+        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
+    )
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    flatfield_event_type = np.full((5000,), 0)
     # insert outliers
     flatfield_dl1_data[:, 0, 120] = 120.0
     flatfield_dl1_data[:, 1, 67] = 120.0
-    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
+    flatfield_dl1_table = Table(
+        [times, flatfield_dl1_data, flatfield_event_type],
+        names=("time_mono", "image", "event_type"),
+    )
     sigmaclipping_stats_list = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table
     )
@@ -72,10 +89,16 @@ def test_check_outliers(test_sigmaclippingextractor):
 def test_check_chunk_shift(test_sigmaclippingextractor):
     """test the chunk shift option and the boundary case for the last chunk"""
 
-    times = np.linspace(60117.911, 60117.9258, num=5000)
+    times = Time(
+        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
+    )
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    flatfield_event_type = np.full((5000,), 0)
     # insert outliers
-    flatfield_dl1_table = QTable([times, flatfield_dl1_data], names=("time", "image"))
+    flatfield_dl1_table = Table(
+        [times, flatfield_dl1_data, flatfield_event_type],
+        names=("time_mono", "image", "event_type"),
+    )
     sigmaclipping_stats_list = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table, chunk_shift=2000
     )

From 7ef82a5a63cd2768808900a8444dbf72d3d64943 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 27 Jun 2024 12:21:14 +0200
Subject: [PATCH 31/61] added test for the input data

---
 src/ctapipe/calib/camera/extractor.py         |  2 +-
 .../calib/camera/tests/test_extractors.py     | 27 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index a644b34f347..aa17b008316 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -91,7 +91,7 @@ def __call__(
         ):
             outlier_check = "NOISE"
         else:
-            raise Exception(
+            raise ValueError(
                 "Invalid input data. Only dl1-like images of claibration events are accepted!"
             )
 
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index 801c896ef31..814211da576 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -105,3 +105,30 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
 
     # check if three chunks are used for the extraction
     assert len(sigmaclipping_stats_list) == 3
+
+
+def test_check_input(test_sigmaclippingextractor):
+    """test the input dl1 data"""
+
+    times = Time(
+        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
+    )
+    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    # Insert one event with wrong event type
+    flatfield_event_type = np.full((5000,), 0)
+    flatfield_event_type[0] = 2
+    flatfield_dl1_table = Table(
+        [times, flatfield_dl1_data, flatfield_event_type],
+        names=("time_mono", "image", "event_type"),
+    )
+    with pytest.raises(ValueError):
+        _ = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table, chunk_shift=2000)
+
+    # Construct event_type column for cosmic events
+    cosmic_event_type = np.full((5000,), 32)
+    cosmic_dl1_table = Table(
+        [times, flatfield_dl1_data, cosmic_event_type],
+        names=("time_mono", "image", "event_type"),
+    )
+    with pytest.raises(ValueError):
+        _ = test_sigmaclippingextractor(dl1_table=cosmic_dl1_table, chunk_shift=2000)

From 09ed8dcbc803e264150a92fdeb5ce367458bbac3 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 27 Jun 2024 14:47:07 +0200
Subject: [PATCH 32/61] fill std_outliers also for the PlainExtractor

---
 src/ctapipe/calib/camera/extractor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index aa17b008316..00d73155392 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -171,6 +171,7 @@ def _extract(
             median=pixel_median.filled(np.nan),
             median_outliers=image_median_outliers.filled(True),
             std=pixel_std.filled(np.nan),
+            std_outliers=np.full(np.shape(image_median_outliers), False),
         )
 
 

From 3052f69cdd01a85a07db8933b3c1aecc25e9ea6c Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 27 Jun 2024 15:24:39 +0200
Subject: [PATCH 33/61] renamed the traitlets for outliers

---
 src/ctapipe/calib/camera/extractor.py | 75 ++++++++++++++-------------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 00d73155392..57a8f1fd54c 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -28,16 +28,11 @@ class StatisticsExtractor(TelescopeComponent):
         2500,
         help="Size of the chunk used for the calculation of the statistical values",
     ).tag(config=True)
-    image_median_cut_outliers = List(
+    median_cut_outliers = List(
         [-0.3, 0.3],
-        help="""Interval of accepted image values \\
+        help="""Interval of accepted values \\
                 (fraction with respect to camera median value)""",
     ).tag(config=True)
-    image_std_cut_outliers = List(
-        [-3, 3],
-        help="""Interval (number of std) of accepted image standard deviation \\
-                around camera median value""",
-    ).tag(config=True)
 
     def __init__(self, subarray, config=None, parent=None, **kwargs):
         """
@@ -107,7 +102,7 @@ def __call__(
 
         # Function to split table data into appropriated chunks
         def _get_chunks(dl1_table_data):
-            return [
+            return (
                 (
                     dl1_table_data[i : i + self.chunk_size]
                     if i + self.chunk_size <= len(dl1_table_data)
@@ -116,7 +111,7 @@ def _get_chunks(dl1_table_data):
                     ]
                 )
                 for i in range(0, len(dl1_table_data), chunk_shift)
-            ]
+            )
 
         # Get the chunks for the timestamps and selected column name
         time_chunks = _get_chunks(dl1_table["time_mono"])
@@ -126,12 +121,12 @@ def _get_chunks(dl1_table_data):
         stats_list = []
         for images, times in zip(image_chunks, time_chunks):
             stats_list.append(
-                self._extract(images, times, masked_pixels_of_sample, outlier_check)
+                self.extract(images, times, masked_pixels_of_sample, outlier_check)
             )
         return stats_list
 
     @abstractmethod
-    def _extract(
+    def extract(
         self, images, times, masked_pixels_of_sample, outlier_check
     ) -> StatisticsContainer:
         pass
@@ -143,7 +138,7 @@ class PlainExtractor(StatisticsExtractor):
     using numpy and scipy functions
     """
 
-    def _extract(
+    def extract(
         self, images, times, masked_pixels_of_sample, outlier_check
     ) -> StatisticsContainer:
         # ensure numpy array
@@ -159,9 +154,9 @@ def _extract(
         pixel_std = np.ma.std(masked_images, axis=0)
 
         # outliers from median
-        image_median_outliers = np.logical_or(
-            pixel_median < self.image_median_cut_outliers[0],
-            pixel_median > self.image_median_cut_outliers[1],
+        median_outliers = np.logical_or(
+            pixel_median < self.median_cut_outliers[0],
+            pixel_median > self.median_cut_outliers[1],
         )
 
         return StatisticsContainer(
@@ -169,9 +164,9 @@ def _extract(
             extraction_stop=times[-1],
             mean=pixel_mean.filled(np.nan),
             median=pixel_median.filled(np.nan),
-            median_outliers=image_median_outliers.filled(True),
+            median_outliers=median_outliers.filled(True),
             std=pixel_std.filled(np.nan),
-            std_outliers=np.full(np.shape(image_median_outliers), False),
+            std_outliers=np.full(np.shape(median_outliers), False),
         )
 
 
@@ -181,6 +176,12 @@ class SigmaClippingExtractor(StatisticsExtractor):
     using astropy's sigma clipping functions
     """
 
+    std_cut_outliers = List(
+        [-3, 3],
+        help="""Interval of accepted standard deviation \\
+                around camera median value""",
+    ).tag(config=True)
+
     max_sigma = Int(
         default_value=4,
         help="Maximal value for the sigma clipping outlier removal",
@@ -190,7 +191,7 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-    def _extract(
+    def extract(
         self, images, times, masked_pixels_of_sample, outlier_check
     ) -> StatisticsContainer:
         # ensure numpy array
@@ -220,34 +221,34 @@ def _extract(
         std_of_pixel_std = np.ma.std(pixel_std, axis=1)
 
         # outliers from median
-        image_deviation = pixel_median - median_of_pixel_median[:, np.newaxis]
+        median_deviation = pixel_median - median_of_pixel_median[:, np.newaxis]
         if outlier_check == "SIGNAL":
-            image_median_outliers = np.logical_or(
-                image_deviation
-                < self.image_median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
+            median_outliers = np.logical_or(
+                median_deviation
+                < self.median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
                 * median_of_pixel_median[:, np.newaxis],
-                image_deviation
-                > self.image_median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
+                median_deviation
+                > self.median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
                 * median_of_pixel_median[:, np.newaxis],
             )
         elif outlier_check == "NOISE":
-            image_median_outliers = np.logical_or(
-                image_deviation
-                < self.image_median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
+            median_outliers = np.logical_or(
+                median_deviation
+                < self.median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
                 * std_of_pixel_std[:, np.newaxis],
-                image_deviation
-                > self.image_median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
+                median_deviation
+                > self.median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
                 * std_of_pixel_std[:, np.newaxis],
             )
 
         # outliers from standard deviation
-        deviation = pixel_std - median_of_pixel_std[:, np.newaxis]
-        image_std_outliers = np.logical_or(
-            deviation
-            < self.image_std_cut_outliers[0]  # pylint: disable=unsubscriptable-object
+        std_deviation = pixel_std - median_of_pixel_std[:, np.newaxis]
+        std_outliers = np.logical_or(
+            std_deviation
+            < self.std_cut_outliers[0]  # pylint: disable=unsubscriptable-object
             * std_of_pixel_std[:, np.newaxis],
-            deviation
-            > self.image_std_cut_outliers[1]  # pylint: disable=unsubscriptable-object
+            std_deviation
+            > self.std_cut_outliers[1]  # pylint: disable=unsubscriptable-object
             * std_of_pixel_std[:, np.newaxis],
         )
 
@@ -256,7 +257,7 @@ def _extract(
             extraction_stop=times[-1],
             mean=pixel_mean.filled(np.nan),
             median=pixel_median.filled(np.nan),
-            median_outliers=image_median_outliers.filled(True),
+            median_outliers=median_outliers.filled(True),
             std=pixel_std.filled(np.nan),
-            std_outliers=image_std_outliers.filled(True),
+            std_outliers=std_outliers.filled(True),
         )

From dbf632266bdaf0d7dd937953f726441b43002306 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Fri, 28 Jun 2024 08:20:23 +0200
Subject: [PATCH 34/61] set field for extraction start and stop to NAN_TIME

---
 src/ctapipe/containers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index dcd70255519..bf2da98da68 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -415,8 +415,8 @@ class MorphologyContainer(Container):
 class StatisticsContainer(Container):
     """Store descriptive statistics of a chunk of images"""
 
-    extraction_start = Field(np.float32(nan), "start of the extraction chunk")
-    extraction_stop = Field(np.float32(nan), "stop of the extraction chunk")
+    extraction_start = Field(NAN_TIME, "start of the extraction chunk")
+    extraction_stop = Field(NAN_TIME, "stop of the extraction chunk")
     mean = Field(
         None,
         "mean of a pixel-wise quantity for each channel"

From 643da4c68e717713de417ace91744ee6f8970623 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Fri, 28 Jun 2024 10:54:17 +0200
Subject: [PATCH 35/61] bug fix for chunks creation

The for loop needs only run till length of the table minus the size of a chunk. Before the last chunks were created multiple times at the end.
---
 src/ctapipe/calib/camera/extractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 57a8f1fd54c..ec73b2ec30c 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -110,7 +110,7 @@ def _get_chunks(dl1_table_data):
                         len(dl1_table_data) - self.chunk_size : len(dl1_table_data)
                     ]
                 )
-                for i in range(0, len(dl1_table_data), chunk_shift)
+                for i in range(0, len(dl1_table_data) - self.chunk_size, chunk_shift)
             )
 
         # Get the chunks for the timestamps and selected column name

From 6d7a9e58a5668b5cecf73ec35aa9f3725e912a35 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Fri, 28 Jun 2024 11:52:35 +0200
Subject: [PATCH 36/61] bug fix treatment of last chunk and fix tests

When the chunks are shifted (in the second pass) we want to drop overflow chunks, while for the first pass we want to keep the last overflow chunk and extract the stats for the last possible chunk.
---
 src/ctapipe/calib/camera/extractor.py         | 34 +++++++++++--------
 .../calib/camera/tests/test_extractors.py     | 17 ++++++----
 2 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index ec73b2ec30c..52d5f39e0a7 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -95,27 +95,31 @@ def __call__(
             raise ValueError(
                 f"The length of the DL1 table ({len(dl1_table[col_name])}) must be greater or equal than the size of the chunk ({self.chunk_size})."
             )
-        # If no chunk_shift is provided, the chunk_shift is set to self.chunk_size
-        # meaning that the extraction chunks are not overlapping.
-        if chunk_shift is None:
-            chunk_shift = self.chunk_size
 
         # Function to split table data into appropriated chunks
-        def _get_chunks(dl1_table_data):
-            return (
-                (
+        def _get_chunks(dl1_table_data, chunk_shift):
+            if chunk_shift is None:
+                return (
+                    (
+                        dl1_table_data[i : i + self.chunk_size]
+                        if i + self.chunk_size <= len(dl1_table_data)
+                        else dl1_table_data[
+                            len(dl1_table_data) - self.chunk_size : len(dl1_table_data)
+                        ]
+                    )
+                    for i in range(0, len(dl1_table_data), self.chunk_size)
+                )
+            else:
+                return (
                     dl1_table_data[i : i + self.chunk_size]
-                    if i + self.chunk_size <= len(dl1_table_data)
-                    else dl1_table_data[
-                        len(dl1_table_data) - self.chunk_size : len(dl1_table_data)
-                    ]
+                    for i in range(
+                        0, len(dl1_table_data) - self.chunk_size, chunk_shift
+                    )
                 )
-                for i in range(0, len(dl1_table_data) - self.chunk_size, chunk_shift)
-            )
 
         # Get the chunks for the timestamps and selected column name
-        time_chunks = _get_chunks(dl1_table["time_mono"])
-        image_chunks = _get_chunks(dl1_table[col_name].data)
+        time_chunks = _get_chunks(dl1_table["time_mono"], chunk_shift)
+        image_chunks = _get_chunks(dl1_table[col_name].data, chunk_shift)
 
         # Calculate the statistics from a chunk of images
         stats_list = []
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index 814211da576..a72931d48e3 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -90,21 +90,24 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
     """test the chunk shift option and the boundary case for the last chunk"""
 
     times = Time(
-        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
+        np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
     )
-    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
-    flatfield_event_type = np.full((5000,), 0)
+    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
+    flatfield_event_type = np.full((5500,), 0)
     # insert outliers
     flatfield_dl1_table = Table(
         [times, flatfield_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
-    sigmaclipping_stats_list = test_sigmaclippingextractor(
+
+    stats_list = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
+    stats_list_chunk_shift = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table, chunk_shift=2000
     )
 
     # check if three chunks are used for the extraction
-    assert len(sigmaclipping_stats_list) == 3
+    assert len(stats_list) == 3
+    assert len(stats_list_chunk_shift) == 2
 
 
 def test_check_input(test_sigmaclippingextractor):
@@ -122,7 +125,7 @@ def test_check_input(test_sigmaclippingextractor):
         names=("time_mono", "image", "event_type"),
     )
     with pytest.raises(ValueError):
-        _ = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table, chunk_shift=2000)
+        _ = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
 
     # Construct event_type column for cosmic events
     cosmic_event_type = np.full((5000,), 32)
@@ -131,4 +134,4 @@ def test_check_input(test_sigmaclippingextractor):
         names=("time_mono", "image", "event_type"),
     )
     with pytest.raises(ValueError):
-        _ = test_sigmaclippingextractor(dl1_table=cosmic_dl1_table, chunk_shift=2000)
+        _ = test_sigmaclippingextractor(dl1_table=cosmic_dl1_table)

From 3087ff31faacfdce917f46e3060b7bc4fc022864 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Fri, 28 Jun 2024 13:24:50 +0200
Subject: [PATCH 37/61] edit comments

---
 src/ctapipe/calib/camera/tests/test_extractors.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index a72931d48e3..a72a11ed55a 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -94,7 +94,6 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
     )
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
     flatfield_event_type = np.full((5500,), 0)
-    # insert outliers
     flatfield_dl1_table = Table(
         [times, flatfield_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
@@ -105,8 +104,9 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
         dl1_table=flatfield_dl1_table, chunk_shift=2000
     )
 
-    # check if three chunks are used for the extraction
+    # check if three chunks are used for the extraction as the last chunk overflows
     assert len(stats_list) == 3
+    # check if two chunks are used for the extraction as the last chunk is dropped
     assert len(stats_list_chunk_shift) == 2
 
 

From 6147c0e3405f57d8e100323f349075c3a83309e4 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Mon, 1 Jul 2024 15:48:23 +0200
Subject: [PATCH 38/61] updated docs and tests

---
 src/ctapipe/calib/camera/extractor.py         | 58 ++++++++----------
 .../calib/camera/tests/test_extractors.py     | 60 ++++++++++++-------
 2 files changed, 65 insertions(+), 53 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 52d5f39e0a7..79d7b34293d 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -22,28 +22,15 @@
 
 
 class StatisticsExtractor(TelescopeComponent):
-    """Base StatisticsExtractor component"""
+    """
+    Base component to handle the extraction of the statistics from a dl1 table
+    containing charges, peak times and/or charge variances (images).
+    """
 
     chunk_size = Int(
         2500,
         help="Size of the chunk used for the calculation of the statistical values",
     ).tag(config=True)
-    median_cut_outliers = List(
-        [-0.3, 0.3],
-        help="""Interval of accepted values \\
-                (fraction with respect to camera median value)""",
-    ).tag(config=True)
-
-    def __init__(self, subarray, config=None, parent=None, **kwargs):
-        """
-        Base component to handle the extraction of the statistics
-        from a chunk of charges and pulse times (images).
-
-        Parameters
-        ----------
-        kwargs
-        """
-        super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
 
     def __call__(
         self,
@@ -53,8 +40,8 @@ def __call__(
         col_name="image",
     ) -> list:
         """
-        Call the relevant functions to extract the statistics
-        for the particular extractor.
+        Prepare the extraction chunks and call the relevant function of the particular extractor
+        to extract the statistical values.
 
         Parameters
         ----------
@@ -75,7 +62,7 @@ def __call__(
             List of extracted statistics and extraction chunks
         """
 
-        # Check if the provided data contains an unique type of calibration events
+        # Check if the provided data contains a unique type of calibration events
         # and if the events contain signal or noise.
         if np.all(dl1_table["event_type"] == 0) or np.all(dl1_table["event_type"] == 1):
             outlier_check = "SIGNAL"
@@ -87,7 +74,7 @@ def __call__(
             outlier_check = "NOISE"
         else:
             raise ValueError(
-                "Invalid input data. Only dl1-like images of claibration events are accepted!"
+                "Invalid input data. Only dl1-like images of calibration events are accepted!"
             )
 
         # Check if the length of the dl1 table is greater or equal than the size of the chunk.
@@ -138,29 +125,34 @@ def extract(
 
 class PlainExtractor(StatisticsExtractor):
     """
-    Extract the statistics from a chunk of images
+    Extract the statistics from a chunk of peak time images
     using numpy and scipy functions
     """
 
+    time_median_cut_outliers = List(
+        [0, 60],
+        help="Interval (in waveform samples) of accepted median peak time values",
+    ).tag(config=True)
+
     def extract(
         self, images, times, masked_pixels_of_sample, outlier_check
     ) -> StatisticsContainer:
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # median over the sample per pixel
+        # median over the chunk per pixel
         pixel_median = np.ma.median(masked_images, axis=0)
 
-        # mean over the sample per pixel
+        # mean over the chunk per pixel
         pixel_mean = np.ma.mean(masked_images, axis=0)
 
-        # std over the sample per pixel
+        # std over the chunk per pixel
         pixel_std = np.ma.std(masked_images, axis=0)
 
         # outliers from median
         median_outliers = np.logical_or(
-            pixel_median < self.median_cut_outliers[0],
-            pixel_median > self.median_cut_outliers[1],
+            pixel_median < self.time_median_cut_outliers[0],
+            pixel_median > self.time_median_cut_outliers[1],
         )
 
         return StatisticsContainer(
@@ -176,16 +168,18 @@ def extract(
 
 class SigmaClippingExtractor(StatisticsExtractor):
     """
-    Extract the statistics from a chunk of images
+    Extract the statistics from a chunk of charge or variance images
     using astropy's sigma clipping functions
     """
 
+    median_cut_outliers = List(
+        [-0.3, 0.3],
+        help="Interval of accepted median values with respect to the camera median value of the pixel medians",
+    ).tag(config=True)
     std_cut_outliers = List(
         [-3, 3],
-        help="""Interval of accepted standard deviation \\
-                around camera median value""",
+        help="Interval of accepted standard deviations with respect to the camera median value of the pixel standard deviations",
     ).tag(config=True)
-
     max_sigma = Int(
         default_value=4,
         help="Maximal value for the sigma clipping outlier removal",
@@ -201,7 +195,7 @@ def extract(
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # mean, median, and std over the sample per pixel
+        # mean, median, and std over the chunk per pixel
         pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
             masked_images,
             sigma=self.max_sigma,
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index a72a11ed55a..b3a02064c03 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -25,44 +25,55 @@ def fixture_test_sigmaclippingextractor(example_subarray):
 def test_extractors(test_plainextractor, test_sigmaclippingextractor):
     """test basic functionality of the StatisticsExtractors"""
 
+    # Create dummy data for testing
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
     pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
     pedestal_event_type = np.full((5000,), 2)
-    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    flatfield_charge_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    flatfield_time_dl1_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
     flatfield_event_type = np.full((5000,), 0)
-
+    # Create dl1 tables
     pedestal_dl1_table = Table(
         [times, pedestal_dl1_data, pedestal_event_type],
         names=("time_mono", "image", "event_type"),
     )
-    flatfield_dl1_table = Table(
-        [times, flatfield_dl1_data, flatfield_event_type],
+    flatfield_charge_dl1_table = Table(
+        [times, flatfield_charge_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
-
-    plain_stats_list = test_plainextractor(dl1_table=pedestal_dl1_table)
-    sigmaclipping_stats_list = test_sigmaclippingextractor(
-        dl1_table=flatfield_dl1_table
+    flatfield_time_dl1_table = Table(
+        [times, flatfield_time_dl1_data, flatfield_event_type],
+        names=("time_mono", "peak_time", "event_type"),
     )
+    # Extract the statistical values
+    pedestal_stats_list = test_sigmaclippingextractor(dl1_table=pedestal_dl1_table)
+    flatfield_charge_stats_list = test_sigmaclippingextractor(
+        dl1_table=flatfield_charge_dl1_table
+    )
+    flatfield_time_stats_list = test_plainextractor(
+        dl1_table=flatfield_time_dl1_table, col_name="peak_time"
+    )
+    # check if the calculated statistical values are reasonable
+    # for a camera with two gain channels
+    assert not np.any(np.abs(pedestal_stats_list[0].mean - 2.0) > 1.5)
+    assert not np.any(np.abs(flatfield_charge_stats_list[0].mean - 77.0) > 1.5)
+    assert not np.any(np.abs(flatfield_time_stats_list[0].mean - 18.0) > 1.5)
 
-    assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5)
-    assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5)
-
-    assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5)
-    assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5)
-
-    assert not np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5)
-    assert not np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5)
+    assert not np.any(np.abs(pedestal_stats_list[1].median - 2.0) > 1.5)
+    assert not np.any(np.abs(flatfield_charge_stats_list[1].median - 77.0) > 1.5)
+    assert not np.any(np.abs(flatfield_time_stats_list[1].median - 18.0) > 1.5)
 
-    assert not np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5)
-    assert not np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5)
+    assert not np.any(np.abs(pedestal_stats_list[0].std - 5.0) > 1.5)
+    assert not np.any(np.abs(flatfield_charge_stats_list[0].std - 10.0) > 1.5)
+    assert not np.any(np.abs(flatfield_time_stats_list[0].std - 5.0) > 1.5)
 
 
 def test_check_outliers(test_sigmaclippingextractor):
     """test detection ability of outliers"""
 
+    # Create dummy data for testing
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
@@ -71,14 +82,15 @@ def test_check_outliers(test_sigmaclippingextractor):
     # insert outliers
     flatfield_dl1_data[:, 0, 120] = 120.0
     flatfield_dl1_data[:, 1, 67] = 120.0
+    # Create dl1 table
     flatfield_dl1_table = Table(
         [times, flatfield_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
+    # Extract the statistical values
     sigmaclipping_stats_list = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table
     )
-
     # check if outliers where detected correctly
     assert sigmaclipping_stats_list[0].median_outliers[0][120]
     assert sigmaclipping_stats_list[0].median_outliers[1][67]
@@ -89,21 +101,22 @@ def test_check_outliers(test_sigmaclippingextractor):
 def test_check_chunk_shift(test_sigmaclippingextractor):
     """test the chunk shift option and the boundary case for the last chunk"""
 
+    # Create dummy data for testing
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
     )
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
     flatfield_event_type = np.full((5500,), 0)
+    # Create dl1 table
     flatfield_dl1_table = Table(
         [times, flatfield_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
-
+    # Extract the statistical values
     stats_list = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
     stats_list_chunk_shift = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table, chunk_shift=2000
     )
-
     # check if three chunks are used for the extraction as the last chunk overflows
     assert len(stats_list) == 3
     # check if two chunks are used for the extraction as the last chunk is dropped
@@ -113,6 +126,7 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
 def test_check_input(test_sigmaclippingextractor):
     """test the input dl1 data"""
 
+    # Create dummy data for testing
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
@@ -120,18 +134,22 @@ def test_check_input(test_sigmaclippingextractor):
     # Insert one event with wrong event type
     flatfield_event_type = np.full((5000,), 0)
     flatfield_event_type[0] = 2
+    # Create dl1 table
     flatfield_dl1_table = Table(
         [times, flatfield_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
+    # Try to extract the statistical values, which results in a ValueError
     with pytest.raises(ValueError):
         _ = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
 
     # Construct event_type column for cosmic events
     cosmic_event_type = np.full((5000,), 32)
+    # Create dl1 table
     cosmic_dl1_table = Table(
         [times, flatfield_dl1_data, cosmic_event_type],
         names=("time_mono", "image", "event_type"),
     )
+    # Try to extract the statistical values, which results in a ValueError
     with pytest.raises(ValueError):
         _ = test_sigmaclippingextractor(dl1_table=cosmic_dl1_table)

From 537b09214d57e8cd3ec55cfe855507fdd37d2546 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 4 Jul 2024 08:06:18 +0200
Subject: [PATCH 39/61] properly name the fixture

---
 .../calib/camera/tests/test_extractors.py     | 34 +++++++++----------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
index b3a02064c03..795621df04c 100644
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -10,19 +10,19 @@
 from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
 
 
-@pytest.fixture(name="test_plainextractor")
-def fixture_test_plainextractor(example_subarray):
+@pytest.fixture()
+def plain_extractor(example_subarray):
     """test the PlainExtractor"""
     return PlainExtractor(subarray=example_subarray, chunk_size=2500)
 
 
-@pytest.fixture(name="test_sigmaclippingextractor")
-def fixture_test_sigmaclippingextractor(example_subarray):
+@pytest.fixture()
+def sigmaclipping_extractor(example_subarray):
     """test the SigmaClippingExtractor"""
     return SigmaClippingExtractor(subarray=example_subarray, chunk_size=2500)
 
 
-def test_extractors(test_plainextractor, test_sigmaclippingextractor):
+def test_extractors(plain_extractor, sigmaclipping_extractor):
     """test basic functionality of the StatisticsExtractors"""
 
     # Create dummy data for testing
@@ -48,11 +48,11 @@ def test_extractors(test_plainextractor, test_sigmaclippingextractor):
         names=("time_mono", "peak_time", "event_type"),
     )
     # Extract the statistical values
-    pedestal_stats_list = test_sigmaclippingextractor(dl1_table=pedestal_dl1_table)
-    flatfield_charge_stats_list = test_sigmaclippingextractor(
+    pedestal_stats_list = sigmaclipping_extractor(dl1_table=pedestal_dl1_table)
+    flatfield_charge_stats_list = sigmaclipping_extractor(
         dl1_table=flatfield_charge_dl1_table
     )
-    flatfield_time_stats_list = test_plainextractor(
+    flatfield_time_stats_list = plain_extractor(
         dl1_table=flatfield_time_dl1_table, col_name="peak_time"
     )
     # check if the calculated statistical values are reasonable
@@ -70,7 +70,7 @@ def test_extractors(test_plainextractor, test_sigmaclippingextractor):
     assert not np.any(np.abs(flatfield_time_stats_list[0].std - 5.0) > 1.5)
 
 
-def test_check_outliers(test_sigmaclippingextractor):
+def test_check_outliers(sigmaclipping_extractor):
     """test detection ability of outliers"""
 
     # Create dummy data for testing
@@ -88,9 +88,7 @@ def test_check_outliers(test_sigmaclippingextractor):
         names=("time_mono", "image", "event_type"),
     )
     # Extract the statistical values
-    sigmaclipping_stats_list = test_sigmaclippingextractor(
-        dl1_table=flatfield_dl1_table
-    )
+    sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
     # check if outliers where detected correctly
     assert sigmaclipping_stats_list[0].median_outliers[0][120]
     assert sigmaclipping_stats_list[0].median_outliers[1][67]
@@ -98,7 +96,7 @@ def test_check_outliers(test_sigmaclippingextractor):
     assert sigmaclipping_stats_list[1].median_outliers[1][67]
 
 
-def test_check_chunk_shift(test_sigmaclippingextractor):
+def test_check_chunk_shift(sigmaclipping_extractor):
     """test the chunk shift option and the boundary case for the last chunk"""
 
     # Create dummy data for testing
@@ -113,8 +111,8 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
         names=("time_mono", "image", "event_type"),
     )
     # Extract the statistical values
-    stats_list = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
-    stats_list_chunk_shift = test_sigmaclippingextractor(
+    stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
+    stats_list_chunk_shift = sigmaclipping_extractor(
         dl1_table=flatfield_dl1_table, chunk_shift=2000
     )
     # check if three chunks are used for the extraction as the last chunk overflows
@@ -123,7 +121,7 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
     assert len(stats_list_chunk_shift) == 2
 
 
-def test_check_input(test_sigmaclippingextractor):
+def test_check_input(sigmaclipping_extractor):
     """test the input dl1 data"""
 
     # Create dummy data for testing
@@ -141,7 +139,7 @@ def test_check_input(test_sigmaclippingextractor):
     )
     # Try to extract the statistical values, which results in a ValueError
     with pytest.raises(ValueError):
-        _ = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
+        _ = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
 
     # Construct event_type column for cosmic events
     cosmic_event_type = np.full((5000,), 32)
@@ -152,4 +150,4 @@ def test_check_input(test_sigmaclippingextractor):
     )
     # Try to extract the statistical values, which results in a ValueError
     with pytest.raises(ValueError):
-        _ = test_sigmaclippingextractor(dl1_table=cosmic_dl1_table)
+        _ = sigmaclipping_extractor(dl1_table=cosmic_dl1_table)

From 362d33618cdd8d9ac9870eab13620578ad4ea51d Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 4 Jul 2024 13:13:19 +0200
Subject: [PATCH 40/61] update doc strings and variable names

move outlier_check to traitlet; and remove redundant test on the input verification

init extractor classes without fixture inside the test itself
---
 src/ctapipe/calib/camera/extractor.py         | 131 ++++++++-------
 .../calib/camera/tests/test_extractor.py      | 119 ++++++++++++++
 .../calib/camera/tests/test_extractors.py     | 153 ------------------
 3 files changed, 182 insertions(+), 221 deletions(-)
 create mode 100644 src/ctapipe/calib/camera/tests/test_extractor.py
 delete mode 100644 src/ctapipe/calib/camera/tests/test_extractors.py

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 79d7b34293d..ac9f1178cc0 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -16,6 +16,7 @@
 from ctapipe.containers import StatisticsContainer
 from ctapipe.core import TelescopeComponent
 from ctapipe.core.traits import (
+    CaselessStrEnum,
     Int,
     List,
 )
@@ -46,9 +47,8 @@ def __call__(
         Parameters
         ----------
         dl1_table : astropy.table.Table
-            DL1 table with images of shape (n_images, n_channels, n_pix),
-            timestamps of shape (n_images, ), and event type (n_images, )
-            stored in an astropy Table
+            DL1 table with images of shape (n_images, n_channels, n_pix)
+            and timestamps of shape (n_images, ) stored in an astropy Table
         masked_pixels_of_sample : ndarray
             boolean array of masked pixels of shape (n_pix, ) that are not available for processing
         chunk_shift : int
@@ -62,21 +62,6 @@ def __call__(
             List of extracted statistics and extraction chunks
         """
 
-        # Check if the provided data contains a unique type of calibration events
-        # and if the events contain signal or noise.
-        if np.all(dl1_table["event_type"] == 0) or np.all(dl1_table["event_type"] == 1):
-            outlier_check = "SIGNAL"
-        elif (
-            np.all(dl1_table["event_type"] == 2)
-            or np.all(dl1_table["event_type"] == 3)
-            or np.all(dl1_table["event_type"] == 4)
-        ):
-            outlier_check = "NOISE"
-        else:
-            raise ValueError(
-                "Invalid input data. Only dl1-like images of calibration events are accepted!"
-            )
-
         # Check if the length of the dl1 table is greater or equal than the size of the chunk.
         if len(dl1_table[col_name]) < self.chunk_size:
             raise ValueError(
@@ -111,15 +96,11 @@ def _get_chunks(dl1_table_data, chunk_shift):
         # Calculate the statistics from a chunk of images
         stats_list = []
         for images, times in zip(image_chunks, time_chunks):
-            stats_list.append(
-                self.extract(images, times, masked_pixels_of_sample, outlier_check)
-            )
+            stats_list.append(self.extract(images, times, masked_pixels_of_sample))
         return stats_list
 
     @abstractmethod
-    def extract(
-        self, images, times, masked_pixels_of_sample, outlier_check
-    ) -> StatisticsContainer:
+    def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
         pass
 
 
@@ -134,9 +115,7 @@ class PlainExtractor(StatisticsExtractor):
         help="Interval (in waveform samples) of accepted median peak time values",
     ).tag(config=True)
 
-    def extract(
-        self, images, times, masked_pixels_of_sample, outlier_check
-    ) -> StatisticsContainer:
+    def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
@@ -172,13 +151,29 @@ class SigmaClippingExtractor(StatisticsExtractor):
     using astropy's sigma clipping functions
     """
 
-    median_cut_outliers = List(
+    median_outliers_interval = List(
         [-0.3, 0.3],
-        help="Interval of accepted median values with respect to the camera median value of the pixel medians",
+        help=(
+            "Interval of the multiplicative factor for detecting outliers based on"
+            "the deviation of the median distribution."
+            "- If `outlier_method` is `median`, the factors are multiplied by"
+            "  the camera median of pixel medians to set the thresholds for identifying outliers."
+            "- If `outlier_method` is `standard_deviation`, the factors are multiplied by"
+            "  the camera standard deviation of pixel medians to set the thresholds for identifying outliers."
+        ),
+    ).tag(config=True)
+    outlier_method = CaselessStrEnum(
+        values=["median", "standard_deviation"],
+        help="Method used for detecting outliers based on the deviation of the median distribution",
     ).tag(config=True)
-    std_cut_outliers = List(
+    std_outliers_interval = List(
         [-3, 3],
-        help="Interval of accepted standard deviations with respect to the camera median value of the pixel standard deviations",
+        help=(
+            "Interval of the multiplicative factor for detecting outliers based on"
+            "the deviation of the standard deviation distribution."
+            "The factors are multiplied by the camera standard deviation of pixel standard deviations"
+            "to set the thresholds for identifying outliers."
+        ),
     ).tag(config=True)
     max_sigma = Int(
         default_value=4,
@@ -189,14 +184,12 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-    def extract(
-        self, images, times, masked_pixels_of_sample, outlier_check
-    ) -> StatisticsContainer:
-        # ensure numpy array
+    def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+        # Mask broken pixels
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # mean, median, and std over the chunk per pixel
-        pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
+        # Calculate the mean, median, and std over the chunk per pixel
+        pix_mean, pix_median, pix_std = sigma_clipped_stats(
             masked_images,
             sigma=self.max_sigma,
             maxiters=self.iterations,
@@ -204,58 +197,60 @@ def extract(
             axis=0,
         )
 
-        # mask pixels without defined statistical values
-        pixel_mean = np.ma.array(pixel_mean, mask=np.isnan(pixel_mean))
-        pixel_median = np.ma.array(pixel_median, mask=np.isnan(pixel_median))
-        pixel_std = np.ma.array(pixel_std, mask=np.isnan(pixel_std))
+        # Mask pixels without defined statistical values
+        pix_mean = np.ma.array(pix_mean, mask=np.isnan(pix_mean))
+        pix_median = np.ma.array(pix_median, mask=np.isnan(pix_median))
+        pix_std = np.ma.array(pix_std, mask=np.isnan(pix_std))
 
-        # median of the median over the camera
-        median_of_pixel_median = np.ma.median(pixel_median, axis=1)
+        # Camera median of the pixel medians
+        cam_median_of_pix_median = np.ma.median(pix_median, axis=1)
 
-        # median of the std over the camera
-        median_of_pixel_std = np.ma.median(pixel_std, axis=1)
+        # Camera median of the pixel stds
+        cam_median_of_pix_std = np.ma.median(pix_std, axis=1)
 
-        # std of the std over camera
-        std_of_pixel_std = np.ma.std(pixel_std, axis=1)
+        # Camera std of the pixel stds
+        cam_std_of_pix_std = np.ma.std(pix_std, axis=1)
 
-        # outliers from median
-        median_deviation = pixel_median - median_of_pixel_median[:, np.newaxis]
-        if outlier_check == "SIGNAL":
+        # Detect outliers based on the deviation of the median distribution
+        median_deviation = pix_median - cam_median_of_pix_median[:, np.newaxis]
+        if self.outlier_method == "median":
             median_outliers = np.logical_or(
                 median_deviation
-                < self.median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
-                * median_of_pixel_median[:, np.newaxis],
+                < self.median_outliers_interval[0]  # pylint: disable=unsubscriptable-object
+                * cam_median_of_pix_median[:, np.newaxis],
                 median_deviation
-                > self.median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
-                * median_of_pixel_median[:, np.newaxis],
+                > self.median_outliers_interval[1]  # pylint: disable=unsubscriptable-object
+                * cam_median_of_pix_median[:, np.newaxis],
             )
-        elif outlier_check == "NOISE":
+        elif self.outlier_method == "standard_deviation":
+            # Camera std of pixel medians
+            cam_std_of_pix_median = np.ma.std(pix_median, axis=1)
             median_outliers = np.logical_or(
                 median_deviation
-                < self.median_cut_outliers[0]  # pylint: disable=unsubscriptable-object
-                * std_of_pixel_std[:, np.newaxis],
+                < self.median_outliers_interval[0]  # pylint: disable=unsubscriptable-object
+                * cam_std_of_pix_median[:, np.newaxis],
                 median_deviation
-                > self.median_cut_outliers[1]  # pylint: disable=unsubscriptable-object
-                * std_of_pixel_std[:, np.newaxis],
+                > self.median_outliers_interval[1]  # pylint: disable=unsubscriptable-object
+                * cam_std_of_pix_median[:, np.newaxis],
             )
 
-        # outliers from standard deviation
-        std_deviation = pixel_std - median_of_pixel_std[:, np.newaxis]
+        # Detect outliers based on the deviation of the standard deviation distribution
+        std_deviation = pix_std - cam_median_of_pix_std[:, np.newaxis]
         std_outliers = np.logical_or(
             std_deviation
-            < self.std_cut_outliers[0]  # pylint: disable=unsubscriptable-object
-            * std_of_pixel_std[:, np.newaxis],
+            < self.std_outliers_interval[0]  # pylint: disable=unsubscriptable-object
+            * cam_std_of_pix_std[:, np.newaxis],
             std_deviation
-            > self.std_cut_outliers[1]  # pylint: disable=unsubscriptable-object
-            * std_of_pixel_std[:, np.newaxis],
+            > self.std_outliers_interval[1]  # pylint: disable=unsubscriptable-object
+            * cam_std_of_pix_std[:, np.newaxis],
         )
 
         return StatisticsContainer(
             extraction_start=times[0],
             extraction_stop=times[-1],
-            mean=pixel_mean.filled(np.nan),
-            median=pixel_median.filled(np.nan),
+            mean=pix_mean.filled(np.nan),
+            median=pix_median.filled(np.nan),
             median_outliers=median_outliers.filled(True),
-            std=pixel_std.filled(np.nan),
+            std=pix_std.filled(np.nan),
             std_outliers=std_outliers.filled(True),
         )
diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
new file mode 100644
index 00000000000..568729e6ad8
--- /dev/null
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -0,0 +1,119 @@
+"""
+Tests for StatisticsExtractor and related functions
+"""
+
+import numpy as np
+from astropy.table import Table
+from astropy.time import Time
+
+from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
+
+
+def test_extractors(example_subarray):
+    """test basic functionality of the StatisticsExtractors"""
+
+    # Create dummy data for testing
+    times = Time(
+        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
+    )
+    ped_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
+    ff_charge_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    ff_time_dl1_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
+    # Create dl1 tables
+    ped_dl1_table = Table(
+        [times, ped_dl1_data],
+        names=("time_mono", "image"),
+    )
+    ff_charge_dl1_table = Table(
+        [times, ff_charge_dl1_data],
+        names=("time_mono", "image"),
+    )
+    ff_time_dl1_table = Table(
+        [times, ff_time_dl1_data],
+        names=("time_mono", "peak_time"),
+    )
+    # Initialize the extractors
+    ped_extractor = SigmaClippingExtractor(
+        subarray=example_subarray, chunk_size=2500, outlier_method="standard_deviation"
+    )
+    ff_charge_extractor = SigmaClippingExtractor(
+        subarray=example_subarray, chunk_size=2500, outlier_method="median"
+    )
+    ff_time_extractor = PlainExtractor(subarray=example_subarray, chunk_size=2500)
+
+    # Extract the statistical values
+    ped_stats_list = ped_extractor(dl1_table=ped_dl1_table)
+    ff_charge_stats_list = ff_charge_extractor(dl1_table=ff_charge_dl1_table)
+    ff_time_stats_list = ff_time_extractor(
+        dl1_table=ff_time_dl1_table, col_name="peak_time"
+    )
+    # Check if the calculated statistical values are reasonable
+    # for a camera with two gain channels
+    assert not np.any(np.abs(ped_stats_list[0].mean - 2.0) > 1.5)
+    assert not np.any(np.abs(ff_charge_stats_list[0].mean - 77.0) > 1.5)
+    assert not np.any(np.abs(ff_time_stats_list[0].mean - 18.0) > 1.5)
+
+    assert not np.any(np.abs(ped_stats_list[1].median - 2.0) > 1.5)
+    assert not np.any(np.abs(ff_charge_stats_list[1].median - 77.0) > 1.5)
+    assert not np.any(np.abs(ff_time_stats_list[1].median - 18.0) > 1.5)
+
+    assert not np.any(np.abs(ped_stats_list[0].std - 5.0) > 1.5)
+    assert not np.any(np.abs(ff_charge_stats_list[0].std - 10.0) > 1.5)
+    assert not np.any(np.abs(ff_time_stats_list[0].std - 5.0) > 1.5)
+
+
+def test_check_outliers(example_subarray):
+    """test detection ability of outliers"""
+
+    # Create dummy data for testing
+    times = Time(
+        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
+    )
+    ff_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    # Insert outliers
+    ff_dl1_data[:, 0, 120] = 120.0
+    ff_dl1_data[:, 1, 67] = 120.0
+    # Create dl1 table
+    ff_dl1_table = Table(
+        [times, ff_dl1_data],
+        names=("time_mono", "image"),
+    )
+    # Initialize the extractor
+    ff_charge_extractor = SigmaClippingExtractor(
+        subarray=example_subarray, chunk_size=2500, outlier_method="median"
+    )
+    # Extract the statistical values
+    ff_charge_stats_list = ff_charge_extractor(dl1_table=ff_dl1_table)
+    # Check if outliers where detected correctly
+    assert ff_charge_stats_list[0].median_outliers[0][120]
+    assert ff_charge_stats_list[0].median_outliers[1][67]
+    assert ff_charge_stats_list[1].median_outliers[0][120]
+    assert ff_charge_stats_list[1].median_outliers[1][67]
+
+
+def test_check_chunk_shift(example_subarray):
+    """test the chunk shift option and the boundary case for the last chunk"""
+
+    # Create dummy data for testing
+    times = Time(
+        np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
+    )
+    ff_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
+    # Create dl1 table
+    ff_dl1_table = Table(
+        [times, ff_dl1_data],
+        names=("time_mono", "image"),
+    )
+    # Initialize the extractor
+    ff_charge_extractor = SigmaClippingExtractor(
+        subarray=example_subarray, chunk_size=2500, outlier_method="median"
+    )
+    # Extract the statistical values
+    stats_list = ff_charge_extractor(dl1_table=ff_dl1_table)
+    stats_list_chunk_shift = ff_charge_extractor(
+        dl1_table=ff_dl1_table, chunk_shift=2000
+    )
+    # Check if three chunks are used for the extraction as the last chunk overflows
+    assert len(stats_list) == 3
+    # Check if two chunks are used for the extraction as the last chunk is dropped
+    assert len(stats_list_chunk_shift) == 2
diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
deleted file mode 100644
index 795621df04c..00000000000
--- a/src/ctapipe/calib/camera/tests/test_extractors.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""
-Tests for StatisticsExtractor and related functions
-"""
-
-import numpy as np
-import pytest
-from astropy.table import Table
-from astropy.time import Time
-
-from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
-
-
-@pytest.fixture()
-def plain_extractor(example_subarray):
-    """test the PlainExtractor"""
-    return PlainExtractor(subarray=example_subarray, chunk_size=2500)
-
-
-@pytest.fixture()
-def sigmaclipping_extractor(example_subarray):
-    """test the SigmaClippingExtractor"""
-    return SigmaClippingExtractor(subarray=example_subarray, chunk_size=2500)
-
-
-def test_extractors(plain_extractor, sigmaclipping_extractor):
-    """test basic functionality of the StatisticsExtractors"""
-
-    # Create dummy data for testing
-    times = Time(
-        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
-    )
-    pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
-    pedestal_event_type = np.full((5000,), 2)
-    flatfield_charge_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
-    flatfield_time_dl1_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
-    flatfield_event_type = np.full((5000,), 0)
-    # Create dl1 tables
-    pedestal_dl1_table = Table(
-        [times, pedestal_dl1_data, pedestal_event_type],
-        names=("time_mono", "image", "event_type"),
-    )
-    flatfield_charge_dl1_table = Table(
-        [times, flatfield_charge_dl1_data, flatfield_event_type],
-        names=("time_mono", "image", "event_type"),
-    )
-    flatfield_time_dl1_table = Table(
-        [times, flatfield_time_dl1_data, flatfield_event_type],
-        names=("time_mono", "peak_time", "event_type"),
-    )
-    # Extract the statistical values
-    pedestal_stats_list = sigmaclipping_extractor(dl1_table=pedestal_dl1_table)
-    flatfield_charge_stats_list = sigmaclipping_extractor(
-        dl1_table=flatfield_charge_dl1_table
-    )
-    flatfield_time_stats_list = plain_extractor(
-        dl1_table=flatfield_time_dl1_table, col_name="peak_time"
-    )
-    # check if the calculated statistical values are reasonable
-    # for a camera with two gain channels
-    assert not np.any(np.abs(pedestal_stats_list[0].mean - 2.0) > 1.5)
-    assert not np.any(np.abs(flatfield_charge_stats_list[0].mean - 77.0) > 1.5)
-    assert not np.any(np.abs(flatfield_time_stats_list[0].mean - 18.0) > 1.5)
-
-    assert not np.any(np.abs(pedestal_stats_list[1].median - 2.0) > 1.5)
-    assert not np.any(np.abs(flatfield_charge_stats_list[1].median - 77.0) > 1.5)
-    assert not np.any(np.abs(flatfield_time_stats_list[1].median - 18.0) > 1.5)
-
-    assert not np.any(np.abs(pedestal_stats_list[0].std - 5.0) > 1.5)
-    assert not np.any(np.abs(flatfield_charge_stats_list[0].std - 10.0) > 1.5)
-    assert not np.any(np.abs(flatfield_time_stats_list[0].std - 5.0) > 1.5)
-
-
-def test_check_outliers(sigmaclipping_extractor):
-    """test detection ability of outliers"""
-
-    # Create dummy data for testing
-    times = Time(
-        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
-    )
-    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
-    flatfield_event_type = np.full((5000,), 0)
-    # insert outliers
-    flatfield_dl1_data[:, 0, 120] = 120.0
-    flatfield_dl1_data[:, 1, 67] = 120.0
-    # Create dl1 table
-    flatfield_dl1_table = Table(
-        [times, flatfield_dl1_data, flatfield_event_type],
-        names=("time_mono", "image", "event_type"),
-    )
-    # Extract the statistical values
-    sigmaclipping_stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
-    # check if outliers where detected correctly
-    assert sigmaclipping_stats_list[0].median_outliers[0][120]
-    assert sigmaclipping_stats_list[0].median_outliers[1][67]
-    assert sigmaclipping_stats_list[1].median_outliers[0][120]
-    assert sigmaclipping_stats_list[1].median_outliers[1][67]
-
-
-def test_check_chunk_shift(sigmaclipping_extractor):
-    """test the chunk shift option and the boundary case for the last chunk"""
-
-    # Create dummy data for testing
-    times = Time(
-        np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
-    )
-    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
-    flatfield_event_type = np.full((5500,), 0)
-    # Create dl1 table
-    flatfield_dl1_table = Table(
-        [times, flatfield_dl1_data, flatfield_event_type],
-        names=("time_mono", "image", "event_type"),
-    )
-    # Extract the statistical values
-    stats_list = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
-    stats_list_chunk_shift = sigmaclipping_extractor(
-        dl1_table=flatfield_dl1_table, chunk_shift=2000
-    )
-    # check if three chunks are used for the extraction as the last chunk overflows
-    assert len(stats_list) == 3
-    # check if two chunks are used for the extraction as the last chunk is dropped
-    assert len(stats_list_chunk_shift) == 2
-
-
-def test_check_input(sigmaclipping_extractor):
-    """test the input dl1 data"""
-
-    # Create dummy data for testing
-    times = Time(
-        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
-    )
-    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
-    # Insert one event with wrong event type
-    flatfield_event_type = np.full((5000,), 0)
-    flatfield_event_type[0] = 2
-    # Create dl1 table
-    flatfield_dl1_table = Table(
-        [times, flatfield_dl1_data, flatfield_event_type],
-        names=("time_mono", "image", "event_type"),
-    )
-    # Try to extract the statistical values, which results in a ValueError
-    with pytest.raises(ValueError):
-        _ = sigmaclipping_extractor(dl1_table=flatfield_dl1_table)
-
-    # Construct event_type column for cosmic events
-    cosmic_event_type = np.full((5000,), 32)
-    # Create dl1 table
-    cosmic_dl1_table = Table(
-        [times, flatfield_dl1_data, cosmic_event_type],
-        names=("time_mono", "image", "event_type"),
-    )
-    # Try to extract the statistical values, which results in a ValueError
-    with pytest.raises(ValueError):
-        _ = sigmaclipping_extractor(dl1_table=cosmic_dl1_table)

From 864a061366d129b083938744d8fd104b9b72080c Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Tue, 9 Jul 2024 12:58:59 +0200
Subject: [PATCH 41/61] use only one chunk iterator on the whole table

before two chunk iterations were used for looping over two columns of the same table

remove pylint command since not used in CI system of ctapipe
---
 src/ctapipe/calib/camera/extractor.py | 49 +++++++++++++--------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index ac9f1178cc0..68159c28ab1 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -68,35 +68,36 @@ def __call__(
                 f"The length of the DL1 table ({len(dl1_table[col_name])}) must be greater or equal than the size of the chunk ({self.chunk_size})."
             )
 
-        # Function to split table data into appropriated chunks
-        def _get_chunks(dl1_table_data, chunk_shift):
+        # Function to split the dl1 table into appropriated chunks
+        def _get_chunks(dl1_table, chunk_shift):
             if chunk_shift is None:
                 return (
                     (
-                        dl1_table_data[i : i + self.chunk_size]
-                        if i + self.chunk_size <= len(dl1_table_data)
-                        else dl1_table_data[
-                            len(dl1_table_data) - self.chunk_size : len(dl1_table_data)
+                        dl1_table[i : i + self.chunk_size]
+                        if i + self.chunk_size <= len(dl1_table)
+                        else dl1_table[
+                            len(dl1_table) - self.chunk_size : len(dl1_table)
                         ]
                     )
-                    for i in range(0, len(dl1_table_data), self.chunk_size)
+                    for i in range(0, len(dl1_table), self.chunk_size)
                 )
             else:
                 return (
-                    dl1_table_data[i : i + self.chunk_size]
-                    for i in range(
-                        0, len(dl1_table_data) - self.chunk_size, chunk_shift
-                    )
+                    dl1_table[i : i + self.chunk_size]
+                    for i in range(0, len(dl1_table) - self.chunk_size, chunk_shift)
                 )
 
-        # Get the chunks for the timestamps and selected column name
-        time_chunks = _get_chunks(dl1_table["time_mono"], chunk_shift)
-        image_chunks = _get_chunks(dl1_table[col_name].data, chunk_shift)
+        # Get the chunks of the dl1 table
+        dl1_chunks = _get_chunks(dl1_table, chunk_shift)
 
         # Calculate the statistics from a chunk of images
-        stats_list = []
-        for images, times in zip(image_chunks, time_chunks):
-            stats_list.append(self.extract(images, times, masked_pixels_of_sample))
+        stats_list = [
+            self.extract(
+                chunk[col_name].data, chunk["time_mono"], masked_pixels_of_sample
+            )
+            for chunk in dl1_chunks
+        ]
+
         return stats_list
 
     @abstractmethod
@@ -216,10 +217,10 @@ def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer
         if self.outlier_method == "median":
             median_outliers = np.logical_or(
                 median_deviation
-                < self.median_outliers_interval[0]  # pylint: disable=unsubscriptable-object
+                < self.median_outliers_interval[0]
                 * cam_median_of_pix_median[:, np.newaxis],
                 median_deviation
-                > self.median_outliers_interval[1]  # pylint: disable=unsubscriptable-object
+                > self.median_outliers_interval[1]
                 * cam_median_of_pix_median[:, np.newaxis],
             )
         elif self.outlier_method == "standard_deviation":
@@ -227,10 +228,10 @@ def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer
             cam_std_of_pix_median = np.ma.std(pix_median, axis=1)
             median_outliers = np.logical_or(
                 median_deviation
-                < self.median_outliers_interval[0]  # pylint: disable=unsubscriptable-object
+                < self.median_outliers_interval[0]
                 * cam_std_of_pix_median[:, np.newaxis],
                 median_deviation
-                > self.median_outliers_interval[1]  # pylint: disable=unsubscriptable-object
+                > self.median_outliers_interval[1]
                 * cam_std_of_pix_median[:, np.newaxis],
             )
 
@@ -238,11 +239,9 @@ def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer
         std_deviation = pix_std - cam_median_of_pix_std[:, np.newaxis]
         std_outliers = np.logical_or(
             std_deviation
-            < self.std_outliers_interval[0]  # pylint: disable=unsubscriptable-object
-            * cam_std_of_pix_std[:, np.newaxis],
+            < self.std_outliers_interval[0] * cam_std_of_pix_std[:, np.newaxis],
             std_deviation
-            > self.std_outliers_interval[1]  # pylint: disable=unsubscriptable-object
-            * cam_std_of_pix_std[:, np.newaxis],
+            > self.std_outliers_interval[1] * cam_std_of_pix_std[:, np.newaxis],
         )
 
         return StatisticsContainer(

From 2d0d37d4c5a648eb672a82657a79bd19064130ae Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Tue, 9 Jul 2024 13:45:26 +0200
Subject: [PATCH 42/61] reformulate error messages for chunk size greater than
 dl1 table length

---
 src/ctapipe/calib/camera/extractor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 68159c28ab1..05539ca87a9 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -62,10 +62,10 @@ def __call__(
             List of extracted statistics and extraction chunks
         """
 
-        # Check if the length of the dl1 table is greater or equal than the size of the chunk.
-        if len(dl1_table[col_name]) < self.chunk_size:
+        # Check if the statistics of the dl1 table is sufficient to extract at least one chunk.
+        if len(dl1_table) < self.chunk_size:
             raise ValueError(
-                f"The length of the DL1 table ({len(dl1_table[col_name])}) must be greater or equal than the size of the chunk ({self.chunk_size})."
+                f"The length of the provided DL1 table ({len(dl1_table)}) is insufficient to meet the required statistics for a single extraction chunk of size ({self.chunk_size})."
             )
 
         # Function to split the dl1 table into appropriated chunks

From 8f4f58c25cb52421729f72cc0fa228718dcce4a3 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Tue, 9 Jul 2024 15:41:41 +0200
Subject: [PATCH 43/61] refactor _get_chunks for a better readability

---
 src/ctapipe/calib/camera/extractor.py | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 05539ca87a9..3e7847772dc 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -70,22 +70,17 @@ def __call__(
 
         # Function to split the dl1 table into appropriated chunks
         def _get_chunks(dl1_table, chunk_shift):
-            if chunk_shift is None:
-                return (
-                    (
-                        dl1_table[i : i + self.chunk_size]
-                        if i + self.chunk_size <= len(dl1_table)
-                        else dl1_table[
-                            len(dl1_table) - self.chunk_size : len(dl1_table)
-                        ]
-                    )
-                    for i in range(0, len(dl1_table), self.chunk_size)
-                )
-            else:
-                return (
-                    dl1_table[i : i + self.chunk_size]
-                    for i in range(0, len(dl1_table) - self.chunk_size, chunk_shift)
-                )
+            # Calculate the range step: Use chunk_shift if provided, otherwise use chunk_size
+            step = chunk_shift if chunk_shift is not None else self.chunk_size
+
+            # Generate chunks that do not overflow
+            for i in range(0, len(dl1_table), step):
+                if i + self.chunk_size <= len(dl1_table):
+                    yield dl1_table[i : i + self.chunk_size]
+
+            # If chunk_shift is None, ensure the last chunk is of size chunk_size, if needed
+            if chunk_shift is None and len(dl1_table) % self.chunk_size != 0:
+                yield dl1_table[-self.chunk_size :]
 
         # Get the chunks of the dl1 table
         dl1_chunks = _get_chunks(dl1_table, chunk_shift)

From d09dc8559dbf08504d6d0fc88c957401f5f4e488 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Tue, 16 Jul 2024 16:07:43 +0200
Subject: [PATCH 44/61] remove outlier detection from the stats extraction

---
 src/ctapipe/calib/camera/extractor.py         | 115 ++----------------
 .../calib/camera/tests/test_extractor.py      |  37 +-----
 src/ctapipe/containers.py                     |  10 --
 3 files changed, 13 insertions(+), 149 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 3e7847772dc..b1bb22cd1a3 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -15,11 +15,7 @@
 
 from ctapipe.containers import StatisticsContainer
 from ctapipe.core import TelescopeComponent
-from ctapipe.core.traits import (
-    CaselessStrEnum,
-    Int,
-    List,
-)
+from ctapipe.core.traits import Int
 
 
 class StatisticsExtractor(TelescopeComponent):
@@ -102,75 +98,32 @@ def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer
 
 class PlainExtractor(StatisticsExtractor):
     """
-    Extract the statistics from a chunk of peak time images
-    using numpy and scipy functions
+    Extract the statistics from a chunk of images using numpy functions
     """
 
-    time_median_cut_outliers = List(
-        [0, 60],
-        help="Interval (in waveform samples) of accepted median peak time values",
-    ).tag(config=True)
-
     def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
-        # ensure numpy array
+        # Mask broken pixels
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # median over the chunk per pixel
-        pixel_median = np.ma.median(masked_images, axis=0)
-
-        # mean over the chunk per pixel
+        # Calculate the mean, median, and std over the chunk per pixel
         pixel_mean = np.ma.mean(masked_images, axis=0)
-
-        # std over the chunk per pixel
+        pixel_median = np.ma.median(masked_images, axis=0)
         pixel_std = np.ma.std(masked_images, axis=0)
 
-        # outliers from median
-        median_outliers = np.logical_or(
-            pixel_median < self.time_median_cut_outliers[0],
-            pixel_median > self.time_median_cut_outliers[1],
-        )
-
         return StatisticsContainer(
             extraction_start=times[0],
             extraction_stop=times[-1],
             mean=pixel_mean.filled(np.nan),
             median=pixel_median.filled(np.nan),
-            median_outliers=median_outliers.filled(True),
             std=pixel_std.filled(np.nan),
-            std_outliers=np.full(np.shape(median_outliers), False),
         )
 
 
 class SigmaClippingExtractor(StatisticsExtractor):
     """
-    Extract the statistics from a chunk of charge or variance images
-    using astropy's sigma clipping functions
+    Extract the statistics from a chunk of images using astropy's sigma clipping functions
     """
 
-    median_outliers_interval = List(
-        [-0.3, 0.3],
-        help=(
-            "Interval of the multiplicative factor for detecting outliers based on"
-            "the deviation of the median distribution."
-            "- If `outlier_method` is `median`, the factors are multiplied by"
-            "  the camera median of pixel medians to set the thresholds for identifying outliers."
-            "- If `outlier_method` is `standard_deviation`, the factors are multiplied by"
-            "  the camera standard deviation of pixel medians to set the thresholds for identifying outliers."
-        ),
-    ).tag(config=True)
-    outlier_method = CaselessStrEnum(
-        values=["median", "standard_deviation"],
-        help="Method used for detecting outliers based on the deviation of the median distribution",
-    ).tag(config=True)
-    std_outliers_interval = List(
-        [-3, 3],
-        help=(
-            "Interval of the multiplicative factor for detecting outliers based on"
-            "the deviation of the standard deviation distribution."
-            "The factors are multiplied by the camera standard deviation of pixel standard deviations"
-            "to set the thresholds for identifying outliers."
-        ),
-    ).tag(config=True)
     max_sigma = Int(
         default_value=4,
         help="Maximal value for the sigma clipping outlier removal",
@@ -185,7 +138,7 @@ def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
         # Calculate the mean, median, and std over the chunk per pixel
-        pix_mean, pix_median, pix_std = sigma_clipped_stats(
+        pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
             masked_images,
             sigma=self.max_sigma,
             maxiters=self.iterations,
@@ -193,58 +146,10 @@ def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer
             axis=0,
         )
 
-        # Mask pixels without defined statistical values
-        pix_mean = np.ma.array(pix_mean, mask=np.isnan(pix_mean))
-        pix_median = np.ma.array(pix_median, mask=np.isnan(pix_median))
-        pix_std = np.ma.array(pix_std, mask=np.isnan(pix_std))
-
-        # Camera median of the pixel medians
-        cam_median_of_pix_median = np.ma.median(pix_median, axis=1)
-
-        # Camera median of the pixel stds
-        cam_median_of_pix_std = np.ma.median(pix_std, axis=1)
-
-        # Camera std of the pixel stds
-        cam_std_of_pix_std = np.ma.std(pix_std, axis=1)
-
-        # Detect outliers based on the deviation of the median distribution
-        median_deviation = pix_median - cam_median_of_pix_median[:, np.newaxis]
-        if self.outlier_method == "median":
-            median_outliers = np.logical_or(
-                median_deviation
-                < self.median_outliers_interval[0]
-                * cam_median_of_pix_median[:, np.newaxis],
-                median_deviation
-                > self.median_outliers_interval[1]
-                * cam_median_of_pix_median[:, np.newaxis],
-            )
-        elif self.outlier_method == "standard_deviation":
-            # Camera std of pixel medians
-            cam_std_of_pix_median = np.ma.std(pix_median, axis=1)
-            median_outliers = np.logical_or(
-                median_deviation
-                < self.median_outliers_interval[0]
-                * cam_std_of_pix_median[:, np.newaxis],
-                median_deviation
-                > self.median_outliers_interval[1]
-                * cam_std_of_pix_median[:, np.newaxis],
-            )
-
-        # Detect outliers based on the deviation of the standard deviation distribution
-        std_deviation = pix_std - cam_median_of_pix_std[:, np.newaxis]
-        std_outliers = np.logical_or(
-            std_deviation
-            < self.std_outliers_interval[0] * cam_std_of_pix_std[:, np.newaxis],
-            std_deviation
-            > self.std_outliers_interval[1] * cam_std_of_pix_std[:, np.newaxis],
-        )
-
         return StatisticsContainer(
             extraction_start=times[0],
             extraction_stop=times[-1],
-            mean=pix_mean.filled(np.nan),
-            median=pix_median.filled(np.nan),
-            median_outliers=median_outliers.filled(True),
-            std=pix_std.filled(np.nan),
-            std_outliers=std_outliers.filled(True),
+            mean=pixel_mean,
+            median=pixel_median,
+            std=pixel_std,
         )
diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
index 568729e6ad8..d7ef1892a06 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -33,11 +33,9 @@ def test_extractors(example_subarray):
         names=("time_mono", "peak_time"),
     )
     # Initialize the extractors
-    ped_extractor = SigmaClippingExtractor(
-        subarray=example_subarray, chunk_size=2500, outlier_method="standard_deviation"
-    )
+    ped_extractor = SigmaClippingExtractor(subarray=example_subarray, chunk_size=2500)
     ff_charge_extractor = SigmaClippingExtractor(
-        subarray=example_subarray, chunk_size=2500, outlier_method="median"
+        subarray=example_subarray, chunk_size=2500
     )
     ff_time_extractor = PlainExtractor(subarray=example_subarray, chunk_size=2500)
 
@@ -62,35 +60,6 @@ def test_extractors(example_subarray):
     assert not np.any(np.abs(ff_time_stats_list[0].std - 5.0) > 1.5)
 
 
-def test_check_outliers(example_subarray):
-    """test detection ability of outliers"""
-
-    # Create dummy data for testing
-    times = Time(
-        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
-    )
-    ff_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
-    # Insert outliers
-    ff_dl1_data[:, 0, 120] = 120.0
-    ff_dl1_data[:, 1, 67] = 120.0
-    # Create dl1 table
-    ff_dl1_table = Table(
-        [times, ff_dl1_data],
-        names=("time_mono", "image"),
-    )
-    # Initialize the extractor
-    ff_charge_extractor = SigmaClippingExtractor(
-        subarray=example_subarray, chunk_size=2500, outlier_method="median"
-    )
-    # Extract the statistical values
-    ff_charge_stats_list = ff_charge_extractor(dl1_table=ff_dl1_table)
-    # Check if outliers where detected correctly
-    assert ff_charge_stats_list[0].median_outliers[0][120]
-    assert ff_charge_stats_list[0].median_outliers[1][67]
-    assert ff_charge_stats_list[1].median_outliers[0][120]
-    assert ff_charge_stats_list[1].median_outliers[1][67]
-
-
 def test_check_chunk_shift(example_subarray):
     """test the chunk shift option and the boundary case for the last chunk"""
 
@@ -106,7 +75,7 @@ def test_check_chunk_shift(example_subarray):
     )
     # Initialize the extractor
     ff_charge_extractor = SigmaClippingExtractor(
-        subarray=example_subarray, chunk_size=2500, outlier_method="median"
+        subarray=example_subarray, chunk_size=2500
     )
     # Extract the statistical values
     stats_list = ff_charge_extractor(dl1_table=ff_dl1_table)
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index bf2da98da68..9d8f146fb4a 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -427,21 +427,11 @@ class StatisticsContainer(Container):
         "median of a pixel-wise quantity for each channel"
         "Type: float; Shape: (n_channels, n_pixel)",
     )
-    median_outliers = Field(
-        None,
-        "outliers from the median distribution of a pixel-wise quantity for each channel"
-        "Type: binary mask; Shape: (n_channels, n_pixel)",
-    )
     std = Field(
         None,
         "standard deviation of a pixel-wise quantity for each channel"
         "Type: float; Shape: (n_channels, n_pixel)",
     )
-    std_outliers = Field(
-        None,
-        "outliers from the standard deviation distribution of a pixel-wise quantity for each channel"
-        "Type: binary mask; Shape: (n_channels, n_pixel)",
-    )
 
 
 class ImageStatisticsContainer(Container):

From 71c051e075d00822214579029ffb95173bb80830 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 17 Jul 2024 11:10:37 +0200
Subject: [PATCH 45/61] simplify _get_chunks

polish doc string of __call__ function with explanation of overlapping and non-overlapping chunks as well as the overflow of the bounds of the dl1 table
---
 src/ctapipe/calib/camera/extractor.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index b1bb22cd1a3..cdb67a485e7 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -37,18 +37,24 @@ def __call__(
         col_name="image",
     ) -> list:
         """
-        Prepare the extraction chunks and call the relevant function of the particular extractor
-        to extract the statistical values.
+        Divides the input DL1 table into overlapping or non-overlapping chunks of size `chunk_size`
+        and call the relevant function of the particular extractor to extract the statistical values.
+        The chunks are generated in a way that ensures they do not overflow the bounds of the table.
+        - If `chunk_shift` is None, extraction chunks will not overlap, but the last chunk is ensured to be
+        of size `chunk_size`, even if it means the last two chunks will overlap.
+        - If `chunk_shift` is provided, it will determine the number of samples to shift between the start
+        of consecutive chunks resulting in an overlap of extraction chunks. Chunks that overflows the bounds
+        of the table are not considered.
 
         Parameters
         ----------
         dl1_table : astropy.table.Table
             DL1 table with images of shape (n_images, n_channels, n_pix)
             and timestamps of shape (n_images, ) stored in an astropy Table
-        masked_pixels_of_sample : ndarray
+        masked_pixels_of_sample : ndarray, optional
             boolean array of masked pixels of shape (n_pix, ) that are not available for processing
-        chunk_shift : int
-            number of samples to shift the extraction chunk
+        chunk_shift : int, optional
+            number of samples to shift between the start of consecutive extraction chunks
         col_name : string
             column name in the DL1 table
 
@@ -67,12 +73,11 @@ def __call__(
         # Function to split the dl1 table into appropriated chunks
         def _get_chunks(dl1_table, chunk_shift):
             # Calculate the range step: Use chunk_shift if provided, otherwise use chunk_size
-            step = chunk_shift if chunk_shift is not None else self.chunk_size
+            step = chunk_shift or self.chunk_size
 
             # Generate chunks that do not overflow
-            for i in range(0, len(dl1_table), step):
-                if i + self.chunk_size <= len(dl1_table):
-                    yield dl1_table[i : i + self.chunk_size]
+            for i in range(0, len(dl1_table) - self.chunk_size + 1, step):
+                yield dl1_table[i : i + self.chunk_size]
 
             # If chunk_shift is None, ensure the last chunk is of size chunk_size, if needed
             if chunk_shift is None and len(dl1_table) % self.chunk_size != 0:

From 44ed0dc187e8f05d83f7590b844b9e6bdc4442c6 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 17 Jul 2024 13:29:27 +0200
Subject: [PATCH 46/61] generalized extract()

moved extraction start and stop time outside stats container and only provide the extraction start, i.e. validity start. The returning of the caller is a dict with key the validity start and items the StatisticsContainer.
---
 src/ctapipe/calib/camera/extractor.py         | 65 ++++++++--------
 .../calib/camera/tests/test_extractor.py      | 77 +++++++++----------
 src/ctapipe/containers.py                     |  2 -
 3 files changed, 71 insertions(+), 73 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index cdb67a485e7..f669df82e34 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -20,8 +20,8 @@
 
 class StatisticsExtractor(TelescopeComponent):
     """
-    Base component to handle the extraction of the statistics from a dl1 table
-    containing charges, peak times and/or charge variances (images).
+    Base component to handle the extraction of the statistics from a table
+    containing e.g. charges, peak times and/or charge variances (images).
     """
 
     chunk_size = Int(
@@ -31,13 +31,15 @@ class StatisticsExtractor(TelescopeComponent):
 
     def __call__(
         self,
-        dl1_table,
+        table,
         masked_pixels_of_sample=None,
         chunk_shift=None,
         col_name="image",
     ) -> list:
         """
-        Divides the input DL1 table into overlapping or non-overlapping chunks of size `chunk_size`
+        Divide table into chunks and extract the statistical values.
+
+        This function divides the input table into overlapping or non-overlapping chunks of size `chunk_size`
         and call the relevant function of the particular extractor to extract the statistical values.
         The chunks are generated in a way that ensures they do not overflow the bounds of the table.
         - If `chunk_shift` is None, extraction chunks will not overlap, but the last chunk is ensured to be
@@ -48,15 +50,15 @@ def __call__(
 
         Parameters
         ----------
-        dl1_table : astropy.table.Table
-            DL1 table with images of shape (n_images, n_channels, n_pix)
+        table : astropy.table.Table
+            table with images of shape (n_images, n_channels, n_pix)
             and timestamps of shape (n_images, ) stored in an astropy Table
         masked_pixels_of_sample : ndarray, optional
             boolean array of masked pixels of shape (n_pix, ) that are not available for processing
         chunk_shift : int, optional
             number of samples to shift between the start of consecutive extraction chunks
         col_name : string
-            column name in the DL1 table
+            column name in the table
 
         Returns
         -------
@@ -64,40 +66,45 @@ def __call__(
             List of extracted statistics and extraction chunks
         """
 
-        # Check if the statistics of the dl1 table is sufficient to extract at least one chunk.
-        if len(dl1_table) < self.chunk_size:
+        # Check if the statistics of the table is sufficient to extract at least one chunk.
+        if len(table) < self.chunk_size:
+            raise ValueError(
+                f"The length of the provided table ({len(table)}) is insufficient to meet the required statistics for a single extraction chunk of size ({self.chunk_size})."
+            )
+        # Check if the chunk_shift is smaller than the chunk_size
+        if chunk_shift is None and chunk_shift > self.chunk_size:
             raise ValueError(
-                f"The length of the provided DL1 table ({len(dl1_table)}) is insufficient to meet the required statistics for a single extraction chunk of size ({self.chunk_size})."
+                f"The chunk_shift ({chunk_shift}) must be smaller than the chunk_size ({self.chunk_size})."
             )
 
-        # Function to split the dl1 table into appropriated chunks
-        def _get_chunks(dl1_table, chunk_shift):
+        # Function to split the table into appropriated chunks
+        def _get_chunks(table, chunk_shift):
             # Calculate the range step: Use chunk_shift if provided, otherwise use chunk_size
             step = chunk_shift or self.chunk_size
 
             # Generate chunks that do not overflow
-            for i in range(0, len(dl1_table) - self.chunk_size + 1, step):
-                yield dl1_table[i : i + self.chunk_size]
+            for i in range(0, len(table) - self.chunk_size + 1, step):
+                yield table[i : i + self.chunk_size]
 
             # If chunk_shift is None, ensure the last chunk is of size chunk_size, if needed
-            if chunk_shift is None and len(dl1_table) % self.chunk_size != 0:
-                yield dl1_table[-self.chunk_size :]
+            if chunk_shift is None and len(table) % self.chunk_size != 0:
+                yield table[-self.chunk_size :]
 
-        # Get the chunks of the dl1 table
-        dl1_chunks = _get_chunks(dl1_table, chunk_shift)
+        # Get the chunks of the table
+        chunks = _get_chunks(table, chunk_shift)
 
         # Calculate the statistics from a chunk of images
-        stats_list = [
-            self.extract(
-                chunk[col_name].data, chunk["time_mono"], masked_pixels_of_sample
+        chunk_stats = {
+            chunk["time_mono"][0]: self.extract(
+                chunk[col_name].data, masked_pixels_of_sample
             )
-            for chunk in dl1_chunks
-        ]
+            for chunk in chunks
+        }
 
-        return stats_list
+        return chunk_stats
 
     @abstractmethod
-    def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+    def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         pass
 
 
@@ -106,7 +113,7 @@ class PlainExtractor(StatisticsExtractor):
     Extract the statistics from a chunk of images using numpy functions
     """
 
-    def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+    def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         # Mask broken pixels
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
@@ -116,8 +123,6 @@ def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer
         pixel_std = np.ma.std(masked_images, axis=0)
 
         return StatisticsContainer(
-            extraction_start=times[0],
-            extraction_stop=times[-1],
             mean=pixel_mean.filled(np.nan),
             median=pixel_median.filled(np.nan),
             std=pixel_std.filled(np.nan),
@@ -138,7 +143,7 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-    def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer:
+    def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         # Mask broken pixels
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
@@ -152,8 +157,6 @@ def extract(self, images, times, masked_pixels_of_sample) -> StatisticsContainer
         )
 
         return StatisticsContainer(
-            extraction_start=times[0],
-            extraction_stop=times[-1],
             mean=pixel_mean,
             median=pixel_median,
             std=pixel_std,
diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
index d7ef1892a06..2c777934088 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -16,48 +16,49 @@ def test_extractors(example_subarray):
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
-    ped_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
-    ff_charge_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
-    ff_time_dl1_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
-    # Create dl1 tables
-    ped_dl1_table = Table(
-        [times, ped_dl1_data],
+    ped_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
+    charge_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    time_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
+    # Create tables
+    ped_table = Table(
+        [times, ped_data],
         names=("time_mono", "image"),
     )
-    ff_charge_dl1_table = Table(
-        [times, ff_charge_dl1_data],
+    charge_table = Table(
+        [times, charge_data],
         names=("time_mono", "image"),
     )
-    ff_time_dl1_table = Table(
-        [times, ff_time_dl1_data],
+    time_table = Table(
+        [times, time_data],
         names=("time_mono", "peak_time"),
     )
     # Initialize the extractors
-    ped_extractor = SigmaClippingExtractor(subarray=example_subarray, chunk_size=2500)
+    chunk_size = 2500
+    ped_extractor = SigmaClippingExtractor(
+        subarray=example_subarray, chunk_size=chunk_size
+    )
     ff_charge_extractor = SigmaClippingExtractor(
-        subarray=example_subarray, chunk_size=2500
+        subarray=example_subarray, chunk_size=chunk_size
     )
-    ff_time_extractor = PlainExtractor(subarray=example_subarray, chunk_size=2500)
+    ff_time_extractor = PlainExtractor(subarray=example_subarray, chunk_size=chunk_size)
 
     # Extract the statistical values
-    ped_stats_list = ped_extractor(dl1_table=ped_dl1_table)
-    ff_charge_stats_list = ff_charge_extractor(dl1_table=ff_charge_dl1_table)
-    ff_time_stats_list = ff_time_extractor(
-        dl1_table=ff_time_dl1_table, col_name="peak_time"
-    )
+    ped_stats = ped_extractor(table=ped_table)
+    charge_stats = ff_charge_extractor(table=charge_table)
+    time_stats = ff_time_extractor(table=time_table, col_name="peak_time")
     # Check if the calculated statistical values are reasonable
     # for a camera with two gain channels
-    assert not np.any(np.abs(ped_stats_list[0].mean - 2.0) > 1.5)
-    assert not np.any(np.abs(ff_charge_stats_list[0].mean - 77.0) > 1.5)
-    assert not np.any(np.abs(ff_time_stats_list[0].mean - 18.0) > 1.5)
+    assert not np.any(np.abs(ped_stats[times[0]].mean - 2.0) > 1.5)
+    assert not np.any(np.abs(charge_stats[times[0]].mean - 77.0) > 1.5)
+    assert not np.any(np.abs(time_stats[times[0]].mean - 18.0) > 1.5)
 
-    assert not np.any(np.abs(ped_stats_list[1].median - 2.0) > 1.5)
-    assert not np.any(np.abs(ff_charge_stats_list[1].median - 77.0) > 1.5)
-    assert not np.any(np.abs(ff_time_stats_list[1].median - 18.0) > 1.5)
+    assert not np.any(np.abs(ped_stats[times[chunk_size]].median - 2.0) > 1.5)
+    assert not np.any(np.abs(charge_stats[times[chunk_size]].median - 77.0) > 1.5)
+    assert not np.any(np.abs(time_stats[times[chunk_size]].median - 18.0) > 1.5)
 
-    assert not np.any(np.abs(ped_stats_list[0].std - 5.0) > 1.5)
-    assert not np.any(np.abs(ff_charge_stats_list[0].std - 10.0) > 1.5)
-    assert not np.any(np.abs(ff_time_stats_list[0].std - 5.0) > 1.5)
+    assert not np.any(np.abs(ped_stats[times[0]].std - 5.0) > 1.5)
+    assert not np.any(np.abs(charge_stats[times[0]].std - 10.0) > 1.5)
+    assert not np.any(np.abs(time_stats[times[0]].std - 5.0) > 1.5)
 
 
 def test_check_chunk_shift(example_subarray):
@@ -67,22 +68,18 @@ def test_check_chunk_shift(example_subarray):
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
     )
-    ff_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
-    # Create dl1 table
-    ff_dl1_table = Table(
-        [times, ff_dl1_data],
+    charge_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
+    # Create table
+    charge_table = Table(
+        [times, charge_data],
         names=("time_mono", "image"),
     )
     # Initialize the extractor
-    ff_charge_extractor = SigmaClippingExtractor(
-        subarray=example_subarray, chunk_size=2500
-    )
+    extractor = SigmaClippingExtractor(subarray=example_subarray, chunk_size=2500)
     # Extract the statistical values
-    stats_list = ff_charge_extractor(dl1_table=ff_dl1_table)
-    stats_list_chunk_shift = ff_charge_extractor(
-        dl1_table=ff_dl1_table, chunk_shift=2000
-    )
+    chunk_stats = extractor(table=charge_table)
+    chunk_stats_shift = extractor(table=charge_table, chunk_shift=2000)
     # Check if three chunks are used for the extraction as the last chunk overflows
-    assert len(stats_list) == 3
+    assert len(chunk_stats) == 3
     # Check if two chunks are used for the extraction as the last chunk is dropped
-    assert len(stats_list_chunk_shift) == 2
+    assert len(chunk_stats_shift) == 2
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index 9d8f146fb4a..5e943dba4f9 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -415,8 +415,6 @@ class MorphologyContainer(Container):
 class StatisticsContainer(Container):
     """Store descriptive statistics of a chunk of images"""
 
-    extraction_start = Field(NAN_TIME, "start of the extraction chunk")
-    extraction_stop = Field(NAN_TIME, "stop of the extraction chunk")
     mean = Field(
         None,
         "mean of a pixel-wise quantity for each channel"

From f2541f6093ec6e93d6e5ced39eafbd4b4e15b3dd Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 17 Jul 2024 13:57:45 +0200
Subject: [PATCH 47/61] fix tests

added check for exceptions
---
 src/ctapipe/calib/camera/extractor.py            | 2 +-
 src/ctapipe/calib/camera/tests/test_extractor.py | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index f669df82e34..6462fc31b70 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -72,7 +72,7 @@ def __call__(
                 f"The length of the provided table ({len(table)}) is insufficient to meet the required statistics for a single extraction chunk of size ({self.chunk_size})."
             )
         # Check if the chunk_shift is smaller than the chunk_size
-        if chunk_shift is None and chunk_shift > self.chunk_size:
+        if chunk_shift is not None and chunk_shift > self.chunk_size:
             raise ValueError(
                 f"The chunk_shift ({chunk_shift}) must be smaller than the chunk_size ({self.chunk_size})."
             )
diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
index 2c777934088..63df2d5bab6 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+import pytest
 from astropy.table import Table
 from astropy.time import Time
 
@@ -83,3 +84,9 @@ def test_check_chunk_shift(example_subarray):
     assert len(chunk_stats) == 3
     # Check if two chunks are used for the extraction as the last chunk is dropped
     assert len(chunk_stats_shift) == 2
+    # Check if ValueError is raised when the chunk_size is larger than the length of table
+    with pytest.raises(ValueError):
+        _ = extractor(table=charge_table[1000:1500])
+    # Check if ValueError is raised when the chunk_shift is smaller than the chunk_size
+    with pytest.raises(ValueError):
+        _ = extractor(table=charge_table, chunk_shift=3000)

From b772dff9cd8c6ba3c2368acc94ef2f0ac4aeaec4 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 17 Jul 2024 17:27:48 +0200
Subject: [PATCH 48/61] add test with inserting outliers

---
 .../calib/camera/tests/test_extractor.py      | 36 ++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
index 63df2d5bab6..cf9f8beb545 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -62,7 +62,7 @@ def test_extractors(example_subarray):
     assert not np.any(np.abs(time_stats[times[0]].std - 5.0) > 1.5)
 
 
-def test_check_chunk_shift(example_subarray):
+def test_chunk_shift(example_subarray):
     """test the chunk shift option and the boundary case for the last chunk"""
 
     # Create dummy data for testing
@@ -90,3 +90,37 @@ def test_check_chunk_shift(example_subarray):
     # Check if ValueError is raised when the chunk_shift is smaller than the chunk_size
     with pytest.raises(ValueError):
         _ = extractor(table=charge_table, chunk_shift=3000)
+
+
+def test_with_outliers(example_subarray):
+    """test the robustness of the extractors in the presence of outliers"""
+
+    # Create dummy data for testing
+    times = Time(
+        np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
+    )
+    ped_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
+    # Insert fake outliers that will skrew the mean value
+    ped_data[12, 0, :] = 100000.0
+    ped_data[16, 0, :] = 100000.0
+    ped_data[18, 1, :] = 100000.0
+    ped_data[28, 1, :] = 100000.0
+    # Create table
+    ped_table = Table(
+        [times, ped_data],
+        names=("time_mono", "image"),
+    )
+    # Initialize the extractors
+    sigmaclipping_extractor = SigmaClippingExtractor(
+        subarray=example_subarray, chunk_size=2500
+    )
+    plain_extractor = PlainExtractor(subarray=example_subarray, chunk_size=2500)
+
+    # Extract the statistical values
+    sigmaclipping_chunk_stats = sigmaclipping_extractor(table=ped_table)
+    plain_chunk_stats = plain_extractor(table=ped_table)
+
+    # Check if SigmaClippingExtractor is robust to a few fake outliers as expected
+    assert not np.any(np.abs(sigmaclipping_chunk_stats[times[0]].mean - 2.0) > 1.5)
+    # Check if PlainExtractor is not robust to a few fake outliers as expected
+    assert np.any(np.abs(plain_chunk_stats[times[0]].mean - 2.0) > 1.5)

From 16914b69b54245fac5c00b2517b4dec259c8509d Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 18 Jul 2024 09:01:03 +0200
Subject: [PATCH 49/61] update docstring

before we returned a list, but now a dict

also update description of the returning dict
---
 src/ctapipe/calib/camera/extractor.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 6462fc31b70..95fa61b6737 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -35,7 +35,7 @@ def __call__(
         masked_pixels_of_sample=None,
         chunk_shift=None,
         col_name="image",
-    ) -> list:
+    ) -> dict:
         """
         Divide table into chunks and extract the statistical values.
 
@@ -62,8 +62,9 @@ def __call__(
 
         Returns
         -------
-        List StatisticsContainer:
-            List of extracted statistics and extraction chunks
+        dict
+            dictionary where keys are the first timestamp of each chunk and values are `StatisticsContainer`
+            instances containing the extracted statistics (mean, median, std) for that chunk.
         """
 
         # Check if the statistics of the table is sufficient to extract at least one chunk.

From 2a62ab36e3914c4217794d1865325467222ac90c Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 18 Jul 2024 13:29:00 +0200
Subject: [PATCH 50/61] return Table instead of dict

added end of the chunk as well

added also event IDs for the range
---
 src/ctapipe/calib/camera/extractor.py         | 45 ++++++++--------
 .../calib/camera/tests/test_extractor.py      | 52 +++++++++++--------
 2 files changed, 55 insertions(+), 42 deletions(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 95fa61b6737..09e1be1e421 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -9,9 +9,11 @@
 ]
 
 from abc import abstractmethod
+from collections import defaultdict
 
 import numpy as np
 from astropy.stats import sigma_clipped_stats
+from astropy.table import Table
 
 from ctapipe.containers import StatisticsContainer
 from ctapipe.core import TelescopeComponent
@@ -35,7 +37,7 @@ def __call__(
         masked_pixels_of_sample=None,
         chunk_shift=None,
         col_name="image",
-    ) -> dict:
+    ) -> Table:
         """
         Divide table into chunks and extract the statistical values.
 
@@ -62,9 +64,9 @@ def __call__(
 
         Returns
         -------
-        dict
-            dictionary where keys are the first timestamp of each chunk and values are `StatisticsContainer`
-            instances containing the extracted statistics (mean, median, std) for that chunk.
+        astropy.table.Table
+            table containing the start and end values as timestamps and event IDs
+            as well as the extracted statistics (mean, median, std) for each chunk
         """
 
         # Check if the statistics of the table is sufficient to extract at least one chunk.
@@ -91,18 +93,19 @@ def _get_chunks(table, chunk_shift):
             if chunk_shift is None and len(table) % self.chunk_size != 0:
                 yield table[-self.chunk_size :]
 
-        # Get the chunks of the table
-        chunks = _get_chunks(table, chunk_shift)
+        # Calculate the statistics for each chunk of images
+        data = defaultdict(list)
+        for chunk in _get_chunks(table, chunk_shift):
+            stats = self.extract(chunk[col_name].data, masked_pixels_of_sample)
+            data["time_start"].append(chunk["time_mono"][0])
+            data["time_end"].append(chunk["time_mono"][-1])
+            data["event_id_start"].append(chunk["event_id"][0])
+            data["event_id_end"].append(chunk["event_id"][-1])
+            data["mean"].append(stats.mean)
+            data["median"].append(stats.median)
+            data["std"].append(stats.std)
 
-        # Calculate the statistics from a chunk of images
-        chunk_stats = {
-            chunk["time_mono"][0]: self.extract(
-                chunk[col_name].data, masked_pixels_of_sample
-            )
-            for chunk in chunks
-        }
-
-        return chunk_stats
+        return Table(data)
 
     @abstractmethod
     def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
@@ -119,14 +122,14 @@ def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
         # Calculate the mean, median, and std over the chunk per pixel
-        pixel_mean = np.ma.mean(masked_images, axis=0)
-        pixel_median = np.ma.median(masked_images, axis=0)
-        pixel_std = np.ma.std(masked_images, axis=0)
+        pixel_mean = np.ma.mean(masked_images, axis=0).filled(np.nan)
+        pixel_median = np.ma.median(masked_images, axis=0).filled(np.nan)
+        pixel_std = np.ma.std(masked_images, axis=0).filled(np.nan)
 
         return StatisticsContainer(
-            mean=pixel_mean.filled(np.nan),
-            median=pixel_median.filled(np.nan),
-            std=pixel_std.filled(np.nan),
+            mean=pixel_mean,
+            median=pixel_median,
+            std=pixel_std,
         )
 
 
diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
index cf9f8beb545..d5ac3693cda 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -17,21 +17,22 @@ def test_extractors(example_subarray):
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
+    event_ids = np.linspace(35, 725000, num=5000, dtype=int)
     ped_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
     charge_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
     time_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
     # Create tables
     ped_table = Table(
-        [times, ped_data],
-        names=("time_mono", "image"),
+        [times, event_ids, ped_data],
+        names=("time_mono", "event_id", "image"),
     )
     charge_table = Table(
-        [times, charge_data],
-        names=("time_mono", "image"),
+        [times, event_ids, charge_data],
+        names=("time_mono", "event_id", "image"),
     )
     time_table = Table(
-        [times, time_data],
-        names=("time_mono", "peak_time"),
+        [times, event_ids, time_data],
+        names=("time_mono", "event_id", "peak_time"),
     )
     # Initialize the extractors
     chunk_size = 2500
@@ -47,19 +48,26 @@ def test_extractors(example_subarray):
     ped_stats = ped_extractor(table=ped_table)
     charge_stats = ff_charge_extractor(table=charge_table)
     time_stats = ff_time_extractor(table=time_table, col_name="peak_time")
+
+    # Check if the start and end values are properly set for the timestamps and event IDs
+    assert ped_stats[0]["time_start"] == times[0]
+    assert time_stats[0]["event_id_start"] == event_ids[0]
+    assert ped_stats[1]["time_end"] == times[-1]
+    assert time_stats[1]["event_id_end"] == event_ids[-1]
+
     # Check if the calculated statistical values are reasonable
     # for a camera with two gain channels
-    assert not np.any(np.abs(ped_stats[times[0]].mean - 2.0) > 1.5)
-    assert not np.any(np.abs(charge_stats[times[0]].mean - 77.0) > 1.5)
-    assert not np.any(np.abs(time_stats[times[0]].mean - 18.0) > 1.5)
+    assert not np.any(np.abs(ped_stats[0]["mean"] - 2.0) > 1.5)
+    assert not np.any(np.abs(charge_stats[0]["mean"] - 77.0) > 1.5)
+    assert not np.any(np.abs(time_stats[0]["mean"] - 18.0) > 1.5)
 
-    assert not np.any(np.abs(ped_stats[times[chunk_size]].median - 2.0) > 1.5)
-    assert not np.any(np.abs(charge_stats[times[chunk_size]].median - 77.0) > 1.5)
-    assert not np.any(np.abs(time_stats[times[chunk_size]].median - 18.0) > 1.5)
+    assert not np.any(np.abs(ped_stats[1]["median"] - 2.0) > 1.5)
+    assert not np.any(np.abs(charge_stats[1]["median"] - 77.0) > 1.5)
+    assert not np.any(np.abs(time_stats[1]["median"] - 18.0) > 1.5)
 
-    assert not np.any(np.abs(ped_stats[times[0]].std - 5.0) > 1.5)
-    assert not np.any(np.abs(charge_stats[times[0]].std - 10.0) > 1.5)
-    assert not np.any(np.abs(time_stats[times[0]].std - 5.0) > 1.5)
+    assert not np.any(np.abs(ped_stats[0]["std"] - 5.0) > 1.5)
+    assert not np.any(np.abs(charge_stats[0]["std"] - 10.0) > 1.5)
+    assert not np.any(np.abs(time_stats[0]["std"] - 5.0) > 1.5)
 
 
 def test_chunk_shift(example_subarray):
@@ -69,11 +77,12 @@ def test_chunk_shift(example_subarray):
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
     )
+    event_ids = np.linspace(35, 725000, num=5500, dtype=int)
     charge_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
     # Create table
     charge_table = Table(
-        [times, charge_data],
-        names=("time_mono", "image"),
+        [times, event_ids, charge_data],
+        names=("time_mono", "event_id", "image"),
     )
     # Initialize the extractor
     extractor = SigmaClippingExtractor(subarray=example_subarray, chunk_size=2500)
@@ -99,6 +108,7 @@ def test_with_outliers(example_subarray):
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
+    event_ids = np.linspace(35, 725000, num=5000, dtype=int)
     ped_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
     # Insert fake outliers that will skrew the mean value
     ped_data[12, 0, :] = 100000.0
@@ -107,8 +117,8 @@ def test_with_outliers(example_subarray):
     ped_data[28, 1, :] = 100000.0
     # Create table
     ped_table = Table(
-        [times, ped_data],
-        names=("time_mono", "image"),
+        [times, event_ids, ped_data],
+        names=("time_mono", "event_id", "image"),
     )
     # Initialize the extractors
     sigmaclipping_extractor = SigmaClippingExtractor(
@@ -121,6 +131,6 @@ def test_with_outliers(example_subarray):
     plain_chunk_stats = plain_extractor(table=ped_table)
 
     # Check if SigmaClippingExtractor is robust to a few fake outliers as expected
-    assert not np.any(np.abs(sigmaclipping_chunk_stats[times[0]].mean - 2.0) > 1.5)
+    assert not np.any(np.abs(sigmaclipping_chunk_stats[0]["mean"] - 2.0) > 1.5)
     # Check if PlainExtractor is not robust to a few fake outliers as expected
-    assert np.any(np.abs(plain_chunk_stats[times[0]].mean - 2.0) > 1.5)
+    assert np.any(np.abs(plain_chunk_stats[0]["mean"] - 2.0) > 1.5)

From 0ef25c6861f0f53c937e5fc8c9708c486350ba2d Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Mon, 22 Jul 2024 12:20:40 +0200
Subject: [PATCH 51/61] attach units (if any) to the stats table

---
 src/ctapipe/calib/camera/extractor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 09e1be1e421..9a8c6ca8332 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -94,6 +94,7 @@ def _get_chunks(table, chunk_shift):
                 yield table[-self.chunk_size :]
 
         # Calculate the statistics for each chunk of images
+        units = {col: table[col_name].unit for col in ("mean", "median", "std")}
         data = defaultdict(list)
         for chunk in _get_chunks(table, chunk_shift):
             stats = self.extract(chunk[col_name].data, masked_pixels_of_sample)
@@ -105,7 +106,7 @@ def _get_chunks(table, chunk_shift):
             data["median"].append(stats.median)
             data["std"].append(stats.std)
 
-        return Table(data)
+        return Table(data, units=units)
 
     @abstractmethod
     def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:

From 051e66d8088edfb1142856ffea93da2fcbc631f2 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 25 Jul 2024 13:31:03 +0200
Subject: [PATCH 52/61] fix random seed in tests

---
 src/ctapipe/calib/camera/tests/test_extractor.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
index d5ac3693cda..09d1100c88f 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -18,9 +18,10 @@ def test_extractors(example_subarray):
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
     event_ids = np.linspace(35, 725000, num=5000, dtype=int)
-    ped_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
-    charge_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
-    time_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
+    rng = np.random.default_rng(0)
+    ped_data = rng.normal(2.0, 5.0, size=(5000, 2, 1855))
+    charge_data = rng.normal(77.0, 10.0, size=(5000, 2, 1855))
+    time_data = rng.normal(18.0, 5.0, size=(5000, 2, 1855))
     # Create tables
     ped_table = Table(
         [times, event_ids, ped_data],
@@ -78,7 +79,8 @@ def test_chunk_shift(example_subarray):
         np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
     )
     event_ids = np.linspace(35, 725000, num=5500, dtype=int)
-    charge_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
+    rng = np.random.default_rng(0)
+    charge_data = rng.normal(77.0, 10.0, size=(5500, 2, 1855))
     # Create table
     charge_table = Table(
         [times, event_ids, charge_data],
@@ -109,7 +111,8 @@ def test_with_outliers(example_subarray):
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
     event_ids = np.linspace(35, 725000, num=5000, dtype=int)
-    ped_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
+    rng = np.random.default_rng(0)
+    ped_data = rng.normal(2.0, 5.0, size=(5000, 2, 1855))
     # Insert fake outliers that will skrew the mean value
     ped_data[12, 0, :] = 100000.0
     ped_data[16, 0, :] = 100000.0

From 3b6b76b1d0f55765850034839edf4cb672cdcdad Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 25 Jul 2024 14:05:58 +0200
Subject: [PATCH 53/61] use np.testing.assert_allclose for tests

---
 .../calib/camera/tests/test_extractor.py      | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
index 09d1100c88f..c3001620ac3 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -58,17 +58,17 @@ def test_extractors(example_subarray):
 
     # Check if the calculated statistical values are reasonable
     # for a camera with two gain channels
-    assert not np.any(np.abs(ped_stats[0]["mean"] - 2.0) > 1.5)
-    assert not np.any(np.abs(charge_stats[0]["mean"] - 77.0) > 1.5)
-    assert not np.any(np.abs(time_stats[0]["mean"] - 18.0) > 1.5)
+    np.testing.assert_allclose(ped_stats[0]["mean"], 2.0, atol=1.5)
+    np.testing.assert_allclose(charge_stats[0]["mean"], 77.0, atol=1.5)
+    np.testing.assert_allclose(time_stats[0]["mean"], 18.0, atol=1.5)
 
-    assert not np.any(np.abs(ped_stats[1]["median"] - 2.0) > 1.5)
-    assert not np.any(np.abs(charge_stats[1]["median"] - 77.0) > 1.5)
-    assert not np.any(np.abs(time_stats[1]["median"] - 18.0) > 1.5)
+    np.testing.assert_allclose(ped_stats[1]["median"], 2.0, atol=1.5)
+    np.testing.assert_allclose(charge_stats[1]["median"], 77.0, atol=1.5)
+    np.testing.assert_allclose(time_stats[1]["median"], 18.0, atol=1.5)
 
-    assert not np.any(np.abs(ped_stats[0]["std"] - 5.0) > 1.5)
-    assert not np.any(np.abs(charge_stats[0]["std"] - 10.0) > 1.5)
-    assert not np.any(np.abs(time_stats[0]["std"] - 5.0) > 1.5)
+    np.testing.assert_allclose(ped_stats[0]["std"], 5.0, atol=1.5)
+    np.testing.assert_allclose(charge_stats[0]["std"], 10.0, atol=1.5)
+    np.testing.assert_allclose(time_stats[0]["std"], 5.0, atol=1.5)
 
 
 def test_chunk_shift(example_subarray):
@@ -134,6 +134,8 @@ def test_with_outliers(example_subarray):
     plain_chunk_stats = plain_extractor(table=ped_table)
 
     # Check if SigmaClippingExtractor is robust to a few fake outliers as expected
-    assert not np.any(np.abs(sigmaclipping_chunk_stats[0]["mean"] - 2.0) > 1.5)
+    np.testing.assert_allclose(sigmaclipping_chunk_stats[0]["mean"], 2.0, atol=1.5)
+
     # Check if PlainExtractor is not robust to a few fake outliers as expected
-    assert np.any(np.abs(plain_chunk_stats[0]["mean"] - 2.0) > 1.5)
+    with pytest.raises(AssertionError):
+        np.testing.assert_allclose(plain_chunk_stats[0]["mean"], 2.0, atol=1.5)

From bc4156b4122019acea32c5dd1fcca72d8e15afac Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 31 Jul 2024 16:25:29 +0200
Subject: [PATCH 54/61] added number of events for the stats extraction

---
 src/ctapipe/calib/camera/extractor.py            | 3 +++
 src/ctapipe/calib/camera/tests/test_extractor.py | 2 ++
 src/ctapipe/containers.py                        | 1 +
 3 files changed, 6 insertions(+)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
index 9a8c6ca8332..8cfa9fb1efe 100644
--- a/src/ctapipe/calib/camera/extractor.py
+++ b/src/ctapipe/calib/camera/extractor.py
@@ -102,6 +102,7 @@ def _get_chunks(table, chunk_shift):
             data["time_end"].append(chunk["time_mono"][-1])
             data["event_id_start"].append(chunk["event_id"][0])
             data["event_id_end"].append(chunk["event_id"][-1])
+            data["n_events"].append(stats.n_events)
             data["mean"].append(stats.mean)
             data["median"].append(stats.median)
             data["std"].append(stats.std)
@@ -128,6 +129,7 @@ def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         pixel_std = np.ma.std(masked_images, axis=0).filled(np.nan)
 
         return StatisticsContainer(
+            n_events=masked_images.shape[0],
             mean=pixel_mean,
             median=pixel_median,
             std=pixel_std,
@@ -162,6 +164,7 @@ def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         )
 
         return StatisticsContainer(
+            n_events=masked_images.shape[0],
             mean=pixel_mean,
             median=pixel_median,
             std=pixel_std,
diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/calib/camera/tests/test_extractor.py
index c3001620ac3..4eda6e04289 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/calib/camera/tests/test_extractor.py
@@ -51,10 +51,12 @@ def test_extractors(example_subarray):
     time_stats = ff_time_extractor(table=time_table, col_name="peak_time")
 
     # Check if the start and end values are properly set for the timestamps and event IDs
+    # and if the number of events used for the extraction of the statistics is equal the size of the chunk
     assert ped_stats[0]["time_start"] == times[0]
     assert time_stats[0]["event_id_start"] == event_ids[0]
     assert ped_stats[1]["time_end"] == times[-1]
     assert time_stats[1]["event_id_end"] == event_ids[-1]
+    np.testing.assert_allclose(ped_stats["n_events"], chunk_size)
 
     # Check if the calculated statistical values are reasonable
     # for a camera with two gain channels
diff --git a/src/ctapipe/containers.py b/src/ctapipe/containers.py
index 5e943dba4f9..36c42933b77 100644
--- a/src/ctapipe/containers.py
+++ b/src/ctapipe/containers.py
@@ -415,6 +415,7 @@ class MorphologyContainer(Container):
 class StatisticsContainer(Container):
     """Store descriptive statistics of a chunk of images"""
 
+    n_events = Field(-1, "number of events used for the extraction of the statistics")
     mean = Field(
         None,
         "mean of a pixel-wise quantity for each channel"

From 5a143dd56e3101007755a2a223a583ebdf1d0a4e Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Wed, 31 Jul 2024 16:31:30 +0200
Subject: [PATCH 55/61] moved extractor to monitoring module

---
 src/ctapipe/{calib/camera => monitoring}/extractor.py           | 0
 src/ctapipe/monitoring/tests/__init__.py                        | 0
 .../{calib/camera => monitoring}/tests/test_extractor.py        | 2 +-
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename src/ctapipe/{calib/camera => monitoring}/extractor.py (100%)
 create mode 100644 src/ctapipe/monitoring/tests/__init__.py
 rename src/ctapipe/{calib/camera => monitoring}/tests/test_extractor.py (98%)

diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/monitoring/extractor.py
similarity index 100%
rename from src/ctapipe/calib/camera/extractor.py
rename to src/ctapipe/monitoring/extractor.py
diff --git a/src/ctapipe/monitoring/tests/__init__.py b/src/ctapipe/monitoring/tests/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/ctapipe/calib/camera/tests/test_extractor.py b/src/ctapipe/monitoring/tests/test_extractor.py
similarity index 98%
rename from src/ctapipe/calib/camera/tests/test_extractor.py
rename to src/ctapipe/monitoring/tests/test_extractor.py
index 4eda6e04289..6a3a441cf12 100644
--- a/src/ctapipe/calib/camera/tests/test_extractor.py
+++ b/src/ctapipe/monitoring/tests/test_extractor.py
@@ -7,7 +7,7 @@
 from astropy.table import Table
 from astropy.time import Time
 
-from ctapipe.calib.camera.extractor import PlainExtractor, SigmaClippingExtractor
+from ctapipe.monitoring.extractor import PlainExtractor, SigmaClippingExtractor
 
 
 def test_extractors(example_subarray):

From cde68571a8f39aa78683551c594bd5ed258c29d5 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 1 Aug 2024 10:57:54 +0200
Subject: [PATCH 56/61] update docs for monitoring module

---
 docs/api-reference/monitoring/extractor.rst | 11 ++++++++
 docs/api-reference/monitoring/index.rst     | 28 +++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 docs/api-reference/monitoring/extractor.rst
 create mode 100644 docs/api-reference/monitoring/index.rst

diff --git a/docs/api-reference/monitoring/extractor.rst b/docs/api-reference/monitoring/extractor.rst
new file mode 100644
index 00000000000..1806b1479b1
--- /dev/null
+++ b/docs/api-reference/monitoring/extractor.rst
@@ -0,0 +1,11 @@
+.. _stats_extractor:
+
+********************
+Statistics Extractor
+********************
+
+
+Reference/API
+=============
+
+.. automodapi:: ctapipe.monitoring.extractor
diff --git a/docs/api-reference/monitoring/index.rst b/docs/api-reference/monitoring/index.rst
new file mode 100644
index 00000000000..ed5472bf246
--- /dev/null
+++ b/docs/api-reference/monitoring/index.rst
@@ -0,0 +1,28 @@
+.. _monitoring:
+
+**********************************
+Monitoring (`~ctapipe.monitoring`)
+**********************************
+
+.. currentmodule:: ctapipe.monitoring
+
+This module include all the functions and classes needed for the Monitoring of CTA data.
+
+Currently, only code related to :ref:`_stats_extractor` is implemented here.
+
+
+Submodules
+==========
+
+.. toctree::
+  :maxdepth: 1
+  :glob:
+
+  extractor
+
+
+Reference/API
+=============
+
+.. automodapi:: ctapipe.monitoring
+    :no-inheritance-diagram:

From 8fe3507c20a10e4839ea2e6251d9f2d455cbeb81 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 1 Aug 2024 11:10:20 +0200
Subject: [PATCH 57/61] fix docs

---
 docs/api-reference/monitoring/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/api-reference/monitoring/index.rst b/docs/api-reference/monitoring/index.rst
index ed5472bf246..7143b6b8b20 100644
--- a/docs/api-reference/monitoring/index.rst
+++ b/docs/api-reference/monitoring/index.rst
@@ -8,7 +8,7 @@ Monitoring (`~ctapipe.monitoring`)
 
 This module include all the functions and classes needed for the Monitoring of CTA data.
 
-Currently, only code related to :ref:`_stats_extractor` is implemented here.
+Currently, only code related to :ref:`stats_extractor` is implemented here.
 
 
 Submodules

From 52757083d40e9036f8c88b3c03171b8523583d08 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Thu, 1 Aug 2024 11:39:50 +0200
Subject: [PATCH 58/61] fix docstring with double back-ticks

---
 src/ctapipe/monitoring/extractor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/ctapipe/monitoring/extractor.py b/src/ctapipe/monitoring/extractor.py
index 8cfa9fb1efe..190d3c9cff9 100644
--- a/src/ctapipe/monitoring/extractor.py
+++ b/src/ctapipe/monitoring/extractor.py
@@ -41,12 +41,12 @@ def __call__(
         """
         Divide table into chunks and extract the statistical values.
 
-        This function divides the input table into overlapping or non-overlapping chunks of size `chunk_size`
+        This function divides the input table into overlapping or non-overlapping chunks of size ``chunk_size``
         and call the relevant function of the particular extractor to extract the statistical values.
         The chunks are generated in a way that ensures they do not overflow the bounds of the table.
-        - If `chunk_shift` is None, extraction chunks will not overlap, but the last chunk is ensured to be
+        - If ``chunk_shift`` is None, extraction chunks will not overlap, but the last chunk is ensured to be
         of size `chunk_size`, even if it means the last two chunks will overlap.
-        - If `chunk_shift` is provided, it will determine the number of samples to shift between the start
+        - If ``chunk_shift`` is provided, it will determine the number of samples to shift between the start
         of consecutive chunks resulting in an overlap of extraction chunks. Chunks that overflows the bounds
         of the table are not considered.
 

From fbab2d1c37c042d9cf481bea00f02e42ec2a8056 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Fri, 2 Aug 2024 16:08:58 +0200
Subject: [PATCH 59/61] update docs and rename extractor to aggregator

---
 docs/api-reference/monitoring/aggregator.rst  | 11 ++++
 docs/api-reference/monitoring/extractor.rst   | 11 ----
 docs/api-reference/monitoring/index.rst       |  8 ++-
 .../{extractor.py => aggregator.py}           | 60 +++++++++--------
 .../{test_extractor.py => test_aggregator.py} | 64 ++++++++++---------
 5 files changed, 82 insertions(+), 72 deletions(-)
 create mode 100644 docs/api-reference/monitoring/aggregator.rst
 delete mode 100644 docs/api-reference/monitoring/extractor.rst
 rename src/ctapipe/monitoring/{extractor.py => aggregator.py} (68%)
 rename src/ctapipe/monitoring/tests/{test_extractor.py => test_aggregator.py} (66%)

diff --git a/docs/api-reference/monitoring/aggregator.rst b/docs/api-reference/monitoring/aggregator.rst
new file mode 100644
index 00000000000..0420d543afa
--- /dev/null
+++ b/docs/api-reference/monitoring/aggregator.rst
@@ -0,0 +1,11 @@
+.. _stats_aggregator:
+
+*********************
+Statistics Aggregator
+*********************
+
+
+Reference/API
+=============
+
+.. automodapi:: ctapipe.monitoring.aggregator
diff --git a/docs/api-reference/monitoring/extractor.rst b/docs/api-reference/monitoring/extractor.rst
deleted file mode 100644
index 1806b1479b1..00000000000
--- a/docs/api-reference/monitoring/extractor.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-.. _stats_extractor:
-
-********************
-Statistics Extractor
-********************
-
-
-Reference/API
-=============
-
-.. automodapi:: ctapipe.monitoring.extractor
diff --git a/docs/api-reference/monitoring/index.rst b/docs/api-reference/monitoring/index.rst
index 7143b6b8b20..9d1fbc3890e 100644
--- a/docs/api-reference/monitoring/index.rst
+++ b/docs/api-reference/monitoring/index.rst
@@ -6,9 +6,11 @@ Monitoring (`~ctapipe.monitoring`)
 
 .. currentmodule:: ctapipe.monitoring
 
-This module include all the functions and classes needed for the Monitoring of CTA data.
+Monitoring data are time-series used to monitor the status or quality of hardware, software algorithms, the environment, or other data products. These contain values recorded periodically at different rates, and can be thought of as a set of tables with rows identified by a time-stamp. They are potentially acquired during the day or nighttime operation of the array and during subsequent data processing, but ataverage rates much slower than Event data and faster than the length of a typical observation block. Examples include telescope tracking positions, trigger rates, camera sensor conditions, weather conditions, and the status or quality-control data of a particular hardware or software component.
 
-Currently, only code related to :ref:`stats_extractor` is implemented here.
+This module provides some code to help to generate monitoring data from processed event data, particularly for the purposes of calibration and data quality assessment.
+
+Currently, only code related to :ref:`stats_aggregator` is implemented here.
 
 
 Submodules
@@ -18,7 +20,7 @@ Submodules
   :maxdepth: 1
   :glob:
 
-  extractor
+  aggregator
 
 
 Reference/API
diff --git a/src/ctapipe/monitoring/extractor.py b/src/ctapipe/monitoring/aggregator.py
similarity index 68%
rename from src/ctapipe/monitoring/extractor.py
rename to src/ctapipe/monitoring/aggregator.py
index 190d3c9cff9..b3f442f6e27 100644
--- a/src/ctapipe/monitoring/extractor.py
+++ b/src/ctapipe/monitoring/aggregator.py
@@ -1,11 +1,17 @@
 """
-Extraction algorithms to compute the statistics from a chunk of images
+Algorithms to compute aggregated time-series statistics from columns of an event table.
+
+These classes take as input an events table, divide it into time chunks, which
+may optionally overlap, and compute various aggregated statistics for each
+chunk.  The statistics include the count, mean, median, and standard deviation. The result
+is a monitoring table with columns describing the start and stop time of the chunk
+and the aggregated statistic values.
 """
 
 __all__ = [
-    "StatisticsExtractor",
-    "PlainExtractor",
-    "SigmaClippingExtractor",
+    "StatisticsAggregator",
+    "PlainAggregator",
+    "SigmaClippingAggregator",
 ]
 
 from abc import abstractmethod
@@ -20,15 +26,15 @@
 from ctapipe.core.traits import Int
 
 
-class StatisticsExtractor(TelescopeComponent):
+class StatisticsAggregator(TelescopeComponent):
     """
-    Base component to handle the extraction of the statistics from a table
+    Base component to handle the computation of aggregated statistic values from a table
     containing e.g. charges, peak times and/or charge variances (images).
     """
 
     chunk_size = Int(
         2500,
-        help="Size of the chunk used for the calculation of the statistical values",
+        help="Size of the chunk used for the computation of aggregated statistic values",
     ).tag(config=True)
 
     def __call__(
@@ -39,26 +45,26 @@ def __call__(
         col_name="image",
     ) -> Table:
         """
-        Divide table into chunks and extract the statistical values.
+        Divide table into chunks and compute aggregated statistic values.
 
         This function divides the input table into overlapping or non-overlapping chunks of size ``chunk_size``
-        and call the relevant function of the particular extractor to extract the statistical values.
+        and call the relevant function of the particular aggregator to compute aggregated statistic values.
         The chunks are generated in a way that ensures they do not overflow the bounds of the table.
-        - If ``chunk_shift`` is None, extraction chunks will not overlap, but the last chunk is ensured to be
+        - If ``chunk_shift`` is None, chunks will not overlap, but the last chunk is ensured to be
         of size `chunk_size`, even if it means the last two chunks will overlap.
         - If ``chunk_shift`` is provided, it will determine the number of samples to shift between the start
-        of consecutive chunks resulting in an overlap of extraction chunks. Chunks that overflows the bounds
+        of consecutive chunks resulting in an overlap of chunks. Chunks that overflows the bounds
         of the table are not considered.
 
         Parameters
         ----------
         table : astropy.table.Table
             table with images of shape (n_images, n_channels, n_pix)
-            and timestamps of shape (n_images, ) stored in an astropy Table
+            and timestamps of shape (n_images, )
         masked_pixels_of_sample : ndarray, optional
             boolean array of masked pixels of shape (n_pix, ) that are not available for processing
         chunk_shift : int, optional
-            number of samples to shift between the start of consecutive extraction chunks
+            number of samples to shift between the start of consecutive chunks
         col_name : string
             column name in the table
 
@@ -66,13 +72,13 @@ def __call__(
         -------
         astropy.table.Table
             table containing the start and end values as timestamps and event IDs
-            as well as the extracted statistics (mean, median, std) for each chunk
+            as well as the aggregated statistic values (mean, median, std) for each chunk
         """
 
-        # Check if the statistics of the table is sufficient to extract at least one chunk.
+        # Check if the statistics of the table is sufficient to compute at least one complete chunk.
         if len(table) < self.chunk_size:
             raise ValueError(
-                f"The length of the provided table ({len(table)}) is insufficient to meet the required statistics for a single extraction chunk of size ({self.chunk_size})."
+                f"The length of the provided table ({len(table)}) is insufficient to meet the required statistics for a single chunk of size ({self.chunk_size})."
             )
         # Check if the chunk_shift is smaller than the chunk_size
         if chunk_shift is not None and chunk_shift > self.chunk_size:
@@ -93,11 +99,11 @@ def _get_chunks(table, chunk_shift):
             if chunk_shift is None and len(table) % self.chunk_size != 0:
                 yield table[-self.chunk_size :]
 
-        # Calculate the statistics for each chunk of images
+        # Compute aggregated statistic values for each chunk of images
         units = {col: table[col_name].unit for col in ("mean", "median", "std")}
         data = defaultdict(list)
         for chunk in _get_chunks(table, chunk_shift):
-            stats = self.extract(chunk[col_name].data, masked_pixels_of_sample)
+            stats = self.compute_stats(chunk[col_name].data, masked_pixels_of_sample)
             data["time_start"].append(chunk["time_mono"][0])
             data["time_end"].append(chunk["time_mono"][-1])
             data["event_id_start"].append(chunk["event_id"][0])
@@ -110,20 +116,20 @@ def _get_chunks(table, chunk_shift):
         return Table(data, units=units)
 
     @abstractmethod
-    def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
+    def compute_stats(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         pass
 
 
-class PlainExtractor(StatisticsExtractor):
+class PlainAggregator(StatisticsAggregator):
     """
-    Extract the statistics from a chunk of images using numpy functions
+    Compute aggregated statistic values from a chunk of images using numpy functions
     """
 
-    def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
+    def compute_stats(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         # Mask broken pixels
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # Calculate the mean, median, and std over the chunk per pixel
+        # Compute the mean, median, and std over the chunk per pixel
         pixel_mean = np.ma.mean(masked_images, axis=0).filled(np.nan)
         pixel_median = np.ma.median(masked_images, axis=0).filled(np.nan)
         pixel_std = np.ma.std(masked_images, axis=0).filled(np.nan)
@@ -136,9 +142,9 @@ def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         )
 
 
-class SigmaClippingExtractor(StatisticsExtractor):
+class SigmaClippingAggregator(StatisticsAggregator):
     """
-    Extract the statistics from a chunk of images using astropy's sigma clipping functions
+    Compute aggregated statistic values from a chunk of images using astropy's sigma clipping functions
     """
 
     max_sigma = Int(
@@ -150,11 +156,11 @@ class SigmaClippingExtractor(StatisticsExtractor):
         help="Number of iterations for the sigma clipping outlier removal",
     ).tag(config=True)
 
-    def extract(self, images, masked_pixels_of_sample) -> StatisticsContainer:
+    def compute_stats(self, images, masked_pixels_of_sample) -> StatisticsContainer:
         # Mask broken pixels
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # Calculate the mean, median, and std over the chunk per pixel
+        # Compute the mean, median, and std over the chunk per pixel
         pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
             masked_images,
             sigma=self.max_sigma,
diff --git a/src/ctapipe/monitoring/tests/test_extractor.py b/src/ctapipe/monitoring/tests/test_aggregator.py
similarity index 66%
rename from src/ctapipe/monitoring/tests/test_extractor.py
rename to src/ctapipe/monitoring/tests/test_aggregator.py
index 6a3a441cf12..8d15b2a7c8a 100644
--- a/src/ctapipe/monitoring/tests/test_extractor.py
+++ b/src/ctapipe/monitoring/tests/test_aggregator.py
@@ -1,5 +1,5 @@
 """
-Tests for StatisticsExtractor and related functions
+Tests for StatisticsAggregator and related functions
 """
 
 import numpy as np
@@ -7,11 +7,11 @@
 from astropy.table import Table
 from astropy.time import Time
 
-from ctapipe.monitoring.extractor import PlainExtractor, SigmaClippingExtractor
+from ctapipe.monitoring.aggregator import PlainAggregator, SigmaClippingAggregator
 
 
-def test_extractors(example_subarray):
-    """test basic functionality of the StatisticsExtractors"""
+def test_aggregators(example_subarray):
+    """test basic functionality of the StatisticsAggregators"""
 
     # Create dummy data for testing
     times = Time(
@@ -35,23 +35,25 @@ def test_extractors(example_subarray):
         [times, event_ids, time_data],
         names=("time_mono", "event_id", "peak_time"),
     )
-    # Initialize the extractors
+    # Initialize the aggregators
     chunk_size = 2500
-    ped_extractor = SigmaClippingExtractor(
+    ped_aggregator = SigmaClippingAggregator(
         subarray=example_subarray, chunk_size=chunk_size
     )
-    ff_charge_extractor = SigmaClippingExtractor(
+    ff_charge_aggregator = SigmaClippingAggregator(
+        subarray=example_subarray, chunk_size=chunk_size
+    )
+    ff_time_aggregator = PlainAggregator(
         subarray=example_subarray, chunk_size=chunk_size
     )
-    ff_time_extractor = PlainExtractor(subarray=example_subarray, chunk_size=chunk_size)
 
-    # Extract the statistical values
-    ped_stats = ped_extractor(table=ped_table)
-    charge_stats = ff_charge_extractor(table=charge_table)
-    time_stats = ff_time_extractor(table=time_table, col_name="peak_time")
+    # Compute the statistical values
+    ped_stats = ped_aggregator(table=ped_table)
+    charge_stats = ff_charge_aggregator(table=charge_table)
+    time_stats = ff_time_aggregator(table=time_table, col_name="peak_time")
 
     # Check if the start and end values are properly set for the timestamps and event IDs
-    # and if the number of events used for the extraction of the statistics is equal the size of the chunk
+    # and if the number of events used for the computation of aggregated statistic values is equal the size of the chunk
     assert ped_stats[0]["time_start"] == times[0]
     assert time_stats[0]["event_id_start"] == event_ids[0]
     assert ped_stats[1]["time_end"] == times[-1]
@@ -88,25 +90,25 @@ def test_chunk_shift(example_subarray):
         [times, event_ids, charge_data],
         names=("time_mono", "event_id", "image"),
     )
-    # Initialize the extractor
-    extractor = SigmaClippingExtractor(subarray=example_subarray, chunk_size=2500)
-    # Extract the statistical values
-    chunk_stats = extractor(table=charge_table)
-    chunk_stats_shift = extractor(table=charge_table, chunk_shift=2000)
-    # Check if three chunks are used for the extraction as the last chunk overflows
+    # Initialize the aggregator
+    aggregator = SigmaClippingAggregator(subarray=example_subarray, chunk_size=2500)
+    # Compute aggregated statistic values
+    chunk_stats = aggregator(table=charge_table)
+    chunk_stats_shift = aggregator(table=charge_table, chunk_shift=2000)
+    # Check if three chunks are used for the computation of aggregated statistic values as the last chunk overflows
     assert len(chunk_stats) == 3
-    # Check if two chunks are used for the extraction as the last chunk is dropped
+    # Check if two chunks are used for the computation of aggregated statistic values as the last chunk is dropped
     assert len(chunk_stats_shift) == 2
     # Check if ValueError is raised when the chunk_size is larger than the length of table
     with pytest.raises(ValueError):
-        _ = extractor(table=charge_table[1000:1500])
+        _ = aggregator(table=charge_table[1000:1500])
     # Check if ValueError is raised when the chunk_shift is smaller than the chunk_size
     with pytest.raises(ValueError):
-        _ = extractor(table=charge_table, chunk_shift=3000)
+        _ = aggregator(table=charge_table, chunk_shift=3000)
 
 
 def test_with_outliers(example_subarray):
-    """test the robustness of the extractors in the presence of outliers"""
+    """test the robustness of the aggregators in the presence of outliers"""
 
     # Create dummy data for testing
     times = Time(
@@ -125,19 +127,19 @@ def test_with_outliers(example_subarray):
         [times, event_ids, ped_data],
         names=("time_mono", "event_id", "image"),
     )
-    # Initialize the extractors
-    sigmaclipping_extractor = SigmaClippingExtractor(
+    # Initialize the aggregators
+    sigmaclipping_aggregator = SigmaClippingAggregator(
         subarray=example_subarray, chunk_size=2500
     )
-    plain_extractor = PlainExtractor(subarray=example_subarray, chunk_size=2500)
+    plain_aggregator = PlainAggregator(subarray=example_subarray, chunk_size=2500)
 
-    # Extract the statistical values
-    sigmaclipping_chunk_stats = sigmaclipping_extractor(table=ped_table)
-    plain_chunk_stats = plain_extractor(table=ped_table)
+    # Compute aggregated statistic values
+    sigmaclipping_chunk_stats = sigmaclipping_aggregator(table=ped_table)
+    plain_chunk_stats = plain_aggregator(table=ped_table)
 
-    # Check if SigmaClippingExtractor is robust to a few fake outliers as expected
+    # Check if SigmaClippingAggregator is robust to a few fake outliers as expected
     np.testing.assert_allclose(sigmaclipping_chunk_stats[0]["mean"], 2.0, atol=1.5)
 
-    # Check if PlainExtractor is not robust to a few fake outliers as expected
+    # Check if PlainAggregator is not robust to a few fake outliers as expected
     with pytest.raises(AssertionError):
         np.testing.assert_allclose(plain_chunk_stats[0]["mean"], 2.0, atol=1.5)

From 98daeed743575af363841817f49b9a7baa046b5e Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Fri, 2 Aug 2024 16:51:57 +0200
Subject: [PATCH 60/61] fix typo

remove second space
---
 src/ctapipe/monitoring/aggregator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ctapipe/monitoring/aggregator.py b/src/ctapipe/monitoring/aggregator.py
index b3f442f6e27..5f2e5933bbf 100644
--- a/src/ctapipe/monitoring/aggregator.py
+++ b/src/ctapipe/monitoring/aggregator.py
@@ -3,7 +3,7 @@
 
 These classes take as input an events table, divide it into time chunks, which
 may optionally overlap, and compute various aggregated statistics for each
-chunk.  The statistics include the count, mean, median, and standard deviation. The result
+chunk. The statistics include the count, mean, median, and standard deviation. The result
 is a monitoring table with columns describing the start and stop time of the chunk
 and the aggregated statistic values.
 """

From ea058a0bda1bbcdd43a710374b8638fffbcdae16 Mon Sep 17 00:00:00 2001
From: TjarkMiener <tjark.miener4692@googlemail.com>
Date: Fri, 2 Aug 2024 16:57:55 +0200
Subject: [PATCH 61/61] removed last slash for intersphinx mapping

---
 docs/conf.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index e302a8c3e0c..8941db319e4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -392,24 +392,24 @@ def setup(app):
 
 # Configuration for intersphinx
 intersphinx_mapping = {
-    "astropy": ("https://docs.astropy.org/en/stable/", None),
-    "bokeh": ("https://docs.bokeh.org/en/latest/", None),
-    "cython": ("https://docs.cython.org/en/stable/", None),
-    "iminuit": ("https://scikit-hep.org/iminuit/", None),
-    "ipywidgets": ("https://ipywidgets.readthedocs.io/en/stable/", None),
-    "joblib": ("https://joblib.readthedocs.io/en/stable/", None),
-    "matplotlib": ("https://matplotlib.org/stable/", None),
-    "numba": ("https://numba.readthedocs.io/en/stable/", None),
-    "numpy": ("https://numpy.org/doc/stable/", None),
-    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
-    "psutil": ("https://psutil.readthedocs.io/en/stable/", None),
-    "pytables": ("https://www.pytables.org/", None),
-    "pytest": ("https://docs.pytest.org/en/stable/", None),
-    "python": ("https://docs.python.org/3/", None),
-    "scipy": ("https://docs.scipy.org/doc/scipy/", None),
-    "setuptools": ("https://setuptools.pypa.io/en/stable/", None),
-    "sklearn": ("https://scikit-learn.org/stable/", None),
-    "traitlets": ("https://traitlets.readthedocs.io/en/stable/", None),
+    "astropy": ("https://docs.astropy.org/en/stable", None),
+    "bokeh": ("https://docs.bokeh.org/en/latest", None),
+    "cython": ("https://docs.cython.org/en/stable", None),
+    "iminuit": ("https://scikit-hep.org/iminuit", None),
+    "ipywidgets": ("https://ipywidgets.readthedocs.io/en/stable", None),
+    "joblib": ("https://joblib.readthedocs.io/en/stable", None),
+    "matplotlib": ("https://matplotlib.org/stable", None),
+    "numba": ("https://numba.readthedocs.io/en/stable", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
+    "psutil": ("https://psutil.readthedocs.io/en/stable", None),
+    "pytables": ("https://www.pytables.org", None),
+    "pytest": ("https://docs.pytest.org/en/stable", None),
+    "python": ("https://docs.python.org/3", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy", None),
+    "setuptools": ("https://setuptools.pypa.io/en/stable", None),
+    "sklearn": ("https://scikit-learn.org/stable", None),
+    "traitlets": ("https://traitlets.readthedocs.io/en/stable", None),
 }