diff --git a/src/ctapipe/calib/camera/extractor.py b/src/ctapipe/calib/camera/extractor.py index 52d5f39e0a7..79d7b34293d 100644 --- a/src/ctapipe/calib/camera/extractor.py +++ b/src/ctapipe/calib/camera/extractor.py @@ -22,28 +22,15 @@ class StatisticsExtractor(TelescopeComponent): - """Base StatisticsExtractor component""" + """ + Base component to handle the extraction of the statistics from a dl1 table + containing charges, peak times and/or charge variances (images). + """ chunk_size = Int( 2500, help="Size of the chunk used for the calculation of the statistical values", ).tag(config=True) - median_cut_outliers = List( - [-0.3, 0.3], - help="""Interval of accepted values \\ - (fraction with respect to camera median value)""", - ).tag(config=True) - - def __init__(self, subarray, config=None, parent=None, **kwargs): - """ - Base component to handle the extraction of the statistics - from a chunk of charges and pulse times (images). - - Parameters - ---------- - kwargs - """ - super().__init__(subarray=subarray, config=config, parent=parent, **kwargs) def __call__( self, @@ -53,8 +40,8 @@ def __call__( col_name="image", ) -> list: """ - Call the relevant functions to extract the statistics - for the particular extractor. + Prepare the extraction chunks and call the relevant function of the particular extractor + to extract the statistical values. Parameters ---------- @@ -75,7 +62,7 @@ def __call__( List of extracted statistics and extraction chunks """ - # Check if the provided data contains an unique type of calibration events + # Check if the provided data contains a unique type of calibration events # and if the events contain signal or noise. if np.all(dl1_table["event_type"] == 0) or np.all(dl1_table["event_type"] == 1): outlier_check = "SIGNAL" @@ -87,7 +74,7 @@ def __call__( outlier_check = "NOISE" else: raise ValueError( - "Invalid input data. Only dl1-like images of claibration events are accepted!" + "Invalid input data. Only dl1-like images of calibration events are accepted!" ) # Check if the length of the dl1 table is greater or equal than the size of the chunk. @@ -138,29 +125,34 @@ def extract( class PlainExtractor(StatisticsExtractor): """ - Extract the statistics from a chunk of images + Extract the statistics from a chunk of peak time images using numpy and scipy functions """ + time_median_cut_outliers = List( + [0, 60], + help="Interval (in waveform samples) of accepted median peak time values", + ).tag(config=True) + def extract( self, images, times, masked_pixels_of_sample, outlier_check ) -> StatisticsContainer: # ensure numpy array masked_images = np.ma.array(images, mask=masked_pixels_of_sample) - # median over the sample per pixel + # median over the chunk per pixel pixel_median = np.ma.median(masked_images, axis=0) - # mean over the sample per pixel + # mean over the chunk per pixel pixel_mean = np.ma.mean(masked_images, axis=0) - # std over the sample per pixel + # std over the chunk per pixel pixel_std = np.ma.std(masked_images, axis=0) # outliers from median median_outliers = np.logical_or( - pixel_median < self.median_cut_outliers[0], - pixel_median > self.median_cut_outliers[1], + pixel_median < self.time_median_cut_outliers[0], + pixel_median > self.time_median_cut_outliers[1], ) return StatisticsContainer( @@ -176,16 +168,18 @@ def extract( class SigmaClippingExtractor(StatisticsExtractor): """ - Extract the statistics from a chunk of images + Extract the statistics from a chunk of charge or variance images using astropy's sigma clipping functions """ + median_cut_outliers = List( + [-0.3, 0.3], + help="Interval of accepted median values with respect to the camera median value of the pixel medians", + ).tag(config=True) std_cut_outliers = List( [-3, 3], - help="""Interval of accepted standard deviation \\ - around camera median value""", + help="Interval of accepted standard deviations with respect to the camera median value of the pixel standard deviations", ).tag(config=True) - max_sigma = Int( default_value=4, help="Maximal value for the sigma clipping outlier removal", @@ -201,7 +195,7 @@ def extract( # ensure numpy array masked_images = np.ma.array(images, mask=masked_pixels_of_sample) - # mean, median, and std over the sample per pixel + # mean, median, and std over the chunk per pixel pixel_mean, pixel_median, pixel_std = sigma_clipped_stats( masked_images, sigma=self.max_sigma, diff --git a/src/ctapipe/calib/camera/tests/test_extractors.py b/src/ctapipe/calib/camera/tests/test_extractors.py index a72a11ed55a..b3a02064c03 100644 --- a/src/ctapipe/calib/camera/tests/test_extractors.py +++ b/src/ctapipe/calib/camera/tests/test_extractors.py @@ -25,44 +25,55 @@ def fixture_test_sigmaclippingextractor(example_subarray): def test_extractors(test_plainextractor, test_sigmaclippingextractor): """test basic functionality of the StatisticsExtractors""" + # Create dummy data for testing times = Time( np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd" ) pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855)) pedestal_event_type = np.full((5000,), 2) - flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855)) + flatfield_charge_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855)) + flatfield_time_dl1_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855)) flatfield_event_type = np.full((5000,), 0) - + # Create dl1 tables pedestal_dl1_table = Table( [times, pedestal_dl1_data, pedestal_event_type], names=("time_mono", "image", "event_type"), ) - flatfield_dl1_table = Table( - [times, flatfield_dl1_data, flatfield_event_type], + flatfield_charge_dl1_table = Table( + [times, flatfield_charge_dl1_data, flatfield_event_type], names=("time_mono", "image", "event_type"), ) - - plain_stats_list = test_plainextractor(dl1_table=pedestal_dl1_table) - sigmaclipping_stats_list = test_sigmaclippingextractor( - dl1_table=flatfield_dl1_table + flatfield_time_dl1_table = Table( + [times, flatfield_time_dl1_data, flatfield_event_type], + names=("time_mono", "peak_time", "event_type"), ) + # Extract the statistical values + pedestal_stats_list = test_sigmaclippingextractor(dl1_table=pedestal_dl1_table) + flatfield_charge_stats_list = test_sigmaclippingextractor( + dl1_table=flatfield_charge_dl1_table + ) + flatfield_time_stats_list = test_plainextractor( + dl1_table=flatfield_time_dl1_table, col_name="peak_time" + ) + # check if the calculated statistical values are reasonable + # for a camera with two gain channels + assert not np.any(np.abs(pedestal_stats_list[0].mean - 2.0) > 1.5) + assert not np.any(np.abs(flatfield_charge_stats_list[0].mean - 77.0) > 1.5) + assert not np.any(np.abs(flatfield_time_stats_list[0].mean - 18.0) > 1.5) - assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) - assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) - - assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5) - assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5) - - assert not np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5) - assert not np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5) + assert not np.any(np.abs(pedestal_stats_list[1].median - 2.0) > 1.5) + assert not np.any(np.abs(flatfield_charge_stats_list[1].median - 77.0) > 1.5) + assert not np.any(np.abs(flatfield_time_stats_list[1].median - 18.0) > 1.5) - assert not np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5) - assert not np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5) + assert not np.any(np.abs(pedestal_stats_list[0].std - 5.0) > 1.5) + assert not np.any(np.abs(flatfield_charge_stats_list[0].std - 10.0) > 1.5) + assert not np.any(np.abs(flatfield_time_stats_list[0].std - 5.0) > 1.5) def test_check_outliers(test_sigmaclippingextractor): """test detection ability of outliers""" + # Create dummy data for testing times = Time( np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd" ) @@ -71,14 +82,15 @@ def test_check_outliers(test_sigmaclippingextractor): # insert outliers flatfield_dl1_data[:, 0, 120] = 120.0 flatfield_dl1_data[:, 1, 67] = 120.0 + # Create dl1 table flatfield_dl1_table = Table( [times, flatfield_dl1_data, flatfield_event_type], names=("time_mono", "image", "event_type"), ) + # Extract the statistical values sigmaclipping_stats_list = test_sigmaclippingextractor( dl1_table=flatfield_dl1_table ) - # check if outliers where detected correctly assert sigmaclipping_stats_list[0].median_outliers[0][120] assert sigmaclipping_stats_list[0].median_outliers[1][67] @@ -89,21 +101,22 @@ def test_check_outliers(test_sigmaclippingextractor): def test_check_chunk_shift(test_sigmaclippingextractor): """test the chunk shift option and the boundary case for the last chunk""" + # Create dummy data for testing times = Time( np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd" ) flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855)) flatfield_event_type = np.full((5500,), 0) + # Create dl1 table flatfield_dl1_table = Table( [times, flatfield_dl1_data, flatfield_event_type], names=("time_mono", "image", "event_type"), ) - + # Extract the statistical values stats_list = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table) stats_list_chunk_shift = test_sigmaclippingextractor( dl1_table=flatfield_dl1_table, chunk_shift=2000 ) - # check if three chunks are used for the extraction as the last chunk overflows assert len(stats_list) == 3 # check if two chunks are used for the extraction as the last chunk is dropped @@ -113,6 +126,7 @@ def test_check_chunk_shift(test_sigmaclippingextractor): def test_check_input(test_sigmaclippingextractor): """test the input dl1 data""" + # Create dummy data for testing times = Time( np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd" ) @@ -120,18 +134,22 @@ def test_check_input(test_sigmaclippingextractor): # Insert one event with wrong event type flatfield_event_type = np.full((5000,), 0) flatfield_event_type[0] = 2 + # Create dl1 table flatfield_dl1_table = Table( [times, flatfield_dl1_data, flatfield_event_type], names=("time_mono", "image", "event_type"), ) + # Try to extract the statistical values, which results in a ValueError with pytest.raises(ValueError): _ = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table) # Construct event_type column for cosmic events cosmic_event_type = np.full((5000,), 32) + # Create dl1 table cosmic_dl1_table = Table( [times, flatfield_dl1_data, cosmic_event_type], names=("time_mono", "image", "event_type"), ) + # Try to extract the statistical values, which results in a ValueError with pytest.raises(ValueError): _ = test_sigmaclippingextractor(dl1_table=cosmic_dl1_table)