updated docs and tests

cta-observatory · Jul 1, 2024 · d826e9d · d826e9d
1 parent 2ea4561
commit d826e9d
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 53 deletions.
diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py
@@ -22,28 +22,15 @@
 
 
 class StatisticsExtractor(TelescopeComponent):
-    """Base StatisticsExtractor component"""
+    """
+    Base component to handle the extraction of the statistics from a dl1 table
+    containing charges, peak times and/or charge variances (images).
+    """
 
     chunk_size = Int(
         2500,
         help="Size of the chunk used for the calculation of the statistical values",
     ).tag(config=True)
-    median_cut_outliers = List(
-        [-0.3, 0.3],
-        help="""Interval of accepted values \\
-                (fraction with respect to camera median value)""",
-    ).tag(config=True)
-
-    def __init__(self, subarray, config=None, parent=None, **kwargs):
-        """
-        Base component to handle the extraction of the statistics
-        from a chunk of charges and pulse times (images).
-
-        Parameters
-        ----------
-        kwargs
-        """
-        super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)
 
     def __call__(
         self,
@@ -53,8 +40,8 @@ def __call__(
         col_name="image",
     ) -> list:
         """
-        Call the relevant functions to extract the statistics
-        for the particular extractor.
+        Prepare the extraction chunks and call the relevant function of the particular extractor
+        to extract the statistical values.
 
         Parameters
         ----------
@@ -75,7 +62,7 @@ def __call__(
             List of extracted statistics and extraction chunks
         """
 
-        # Check if the provided data contains an unique type of calibration events
+        # Check if the provided data contains a unique type of calibration events
         # and if the events contain signal or noise.
         if np.all(dl1_table["event_type"] == 0) or np.all(dl1_table["event_type"] == 1):
             outlier_check = "SIGNAL"
@@ -87,7 +74,7 @@ def __call__(
             outlier_check = "NOISE"
         else:
             raise ValueError(
-                "Invalid input data. Only dl1-like images of claibration events are accepted!"
+                "Invalid input data. Only dl1-like images of calibration events are accepted!"
             )
 
         # Check if the length of the dl1 table is greater or equal than the size of the chunk.
@@ -138,29 +125,34 @@ def extract(
 
 class PlainExtractor(StatisticsExtractor):
     """
-    Extract the statistics from a chunk of images
+    Extract the statistics from a chunk of peak time images
     using numpy and scipy functions
     """
 
+    time_median_cut_outliers = List(
+        [0, 60],
+        help="Interval (in waveform samples) of accepted median peak time values",
+    ).tag(config=True)
+
     def extract(
         self, images, times, masked_pixels_of_sample, outlier_check
     ) -> StatisticsContainer:
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # median over the sample per pixel
+        # median over the chunk per pixel
         pixel_median = np.ma.median(masked_images, axis=0)
 
-        # mean over the sample per pixel
+        # mean over the chunk per pixel
         pixel_mean = np.ma.mean(masked_images, axis=0)
 
-        # std over the sample per pixel
+        # std over the chunk per pixel
         pixel_std = np.ma.std(masked_images, axis=0)
 
         # outliers from median
         median_outliers = np.logical_or(
-            pixel_median < self.median_cut_outliers[0],
-            pixel_median > self.median_cut_outliers[1],
+            pixel_median < self.time_median_cut_outliers[0],
+            pixel_median > self.time_median_cut_outliers[1],
         )
 
         return StatisticsContainer(
@@ -176,16 +168,18 @@ def extract(
 
 class SigmaClippingExtractor(StatisticsExtractor):
     """
-    Extract the statistics from a chunk of images
+    Extract the statistics from a chunk of charge or variance images
     using astropy's sigma clipping functions
     """
 
+    median_cut_outliers = List(
+        [-0.3, 0.3],
+        help="Interval of accepted median values with respect to the camera median value of the pixel medians",
+    ).tag(config=True)
     std_cut_outliers = List(
         [-3, 3],
-        help="""Interval of accepted standard deviation \\
-                around camera median value""",
+        help="Interval of accepted standard deviations with respect to the camera median value of the pixel standard deviations",
     ).tag(config=True)
-
     max_sigma = Int(
         default_value=4,
         help="Maximal value for the sigma clipping outlier removal",
@@ -201,7 +195,7 @@ def extract(
         # ensure numpy array
         masked_images = np.ma.array(images, mask=masked_pixels_of_sample)
 
-        # mean, median, and std over the sample per pixel
+        # mean, median, and std over the chunk per pixel
         pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
             masked_images,
             sigma=self.max_sigma,

diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py
@@ -25,44 +25,55 @@ def fixture_test_sigmaclippingextractor(example_subarray):
 def test_extractors(test_plainextractor, test_sigmaclippingextractor):
     """test basic functionality of the StatisticsExtractors"""
 
+    # Create dummy data for testing
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
     pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
     pedestal_event_type = np.full((5000,), 2)
-    flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    flatfield_charge_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
+    flatfield_time_dl1_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
     flatfield_event_type = np.full((5000,), 0)
-
+    # Create dl1 tables
     pedestal_dl1_table = Table(
         [times, pedestal_dl1_data, pedestal_event_type],
         names=("time_mono", "image", "event_type"),
     )
-    flatfield_dl1_table = Table(
-        [times, flatfield_dl1_data, flatfield_event_type],
+    flatfield_charge_dl1_table = Table(
+        [times, flatfield_charge_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
-
-    plain_stats_list = test_plainextractor(dl1_table=pedestal_dl1_table)
-    sigmaclipping_stats_list = test_sigmaclippingextractor(
-        dl1_table=flatfield_dl1_table
+    flatfield_time_dl1_table = Table(
+        [times, flatfield_time_dl1_data, flatfield_event_type],
+        names=("time_mono", "peak_time", "event_type"),
     )
+    # Extract the statistical values
+    pedestal_stats_list = test_sigmaclippingextractor(dl1_table=pedestal_dl1_table)
+    flatfield_charge_stats_list = test_sigmaclippingextractor(
+        dl1_table=flatfield_charge_dl1_table
+    )
+    flatfield_time_stats_list = test_plainextractor(
+        dl1_table=flatfield_time_dl1_table, col_name="peak_time"
+    )
+    # check if the calculated statistical values are reasonable
+    # for a camera with two gain channels
+    assert not np.any(np.abs(pedestal_stats_list[0].mean - 2.0) > 1.5)
+    assert not np.any(np.abs(flatfield_charge_stats_list[0].mean - 77.0) > 1.5)
+    assert not np.any(np.abs(flatfield_time_stats_list[0].mean - 18.0) > 1.5)
 
-    assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5)
-    assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5)
-
-    assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5)
-    assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5)
-
-    assert not np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5)
-    assert not np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5)
+    assert not np.any(np.abs(pedestal_stats_list[1].median - 2.0) > 1.5)
+    assert not np.any(np.abs(flatfield_charge_stats_list[1].median - 77.0) > 1.5)
+    assert not np.any(np.abs(flatfield_time_stats_list[1].median - 18.0) > 1.5)
 
-    assert not np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5)
-    assert not np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5)
+    assert not np.any(np.abs(pedestal_stats_list[0].std - 5.0) > 1.5)
+    assert not np.any(np.abs(flatfield_charge_stats_list[0].std - 10.0) > 1.5)
+    assert not np.any(np.abs(flatfield_time_stats_list[0].std - 5.0) > 1.5)
 
 
 def test_check_outliers(test_sigmaclippingextractor):
     """test detection ability of outliers"""
 
+    # Create dummy data for testing
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
@@ -71,14 +82,15 @@ def test_check_outliers(test_sigmaclippingextractor):
     # insert outliers
     flatfield_dl1_data[:, 0, 120] = 120.0
     flatfield_dl1_data[:, 1, 67] = 120.0
+    # Create dl1 table
     flatfield_dl1_table = Table(
         [times, flatfield_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
+    # Extract the statistical values
     sigmaclipping_stats_list = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table
     )
-
     # check if outliers where detected correctly
     assert sigmaclipping_stats_list[0].median_outliers[0][120]
     assert sigmaclipping_stats_list[0].median_outliers[1][67]
@@ -89,21 +101,22 @@ def test_check_outliers(test_sigmaclippingextractor):
 def test_check_chunk_shift(test_sigmaclippingextractor):
     """test the chunk shift option and the boundary case for the last chunk"""
 
+    # Create dummy data for testing
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
     )
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
     flatfield_event_type = np.full((5500,), 0)
+    # Create dl1 table
     flatfield_dl1_table = Table(
         [times, flatfield_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
-
+    # Extract the statistical values
     stats_list = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
     stats_list_chunk_shift = test_sigmaclippingextractor(
         dl1_table=flatfield_dl1_table, chunk_shift=2000
     )
-
     # check if three chunks are used for the extraction as the last chunk overflows
     assert len(stats_list) == 3
     # check if two chunks are used for the extraction as the last chunk is dropped
@@ -113,25 +126,30 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
 def test_check_input(test_sigmaclippingextractor):
     """test the input dl1 data"""
 
+    # Create dummy data for testing
     times = Time(
         np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
     )
     flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
     # Insert one event with wrong event type
     flatfield_event_type = np.full((5000,), 0)
     flatfield_event_type[0] = 2
+    # Create dl1 table
     flatfield_dl1_table = Table(
         [times, flatfield_dl1_data, flatfield_event_type],
         names=("time_mono", "image", "event_type"),
     )
+    # Try to extract the statistical values, which results in a ValueError
     with pytest.raises(ValueError):
         _ = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
 
     # Construct event_type column for cosmic events
     cosmic_event_type = np.full((5000,), 32)
+    # Create dl1 table
     cosmic_dl1_table = Table(
         [times, flatfield_dl1_data, cosmic_event_type],
         names=("time_mono", "image", "event_type"),
     )
+    # Try to extract the statistical values, which results in a ValueError
     with pytest.raises(ValueError):
         _ = test_sigmaclippingextractor(dl1_table=cosmic_dl1_table)