Skip to content

Commit

Permalink
updated docs and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
TjarkMiener committed Jul 1, 2024
1 parent 2ea4561 commit d826e9d
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 53 deletions.
58 changes: 26 additions & 32 deletions src/ctapipe/calib/camera/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,15 @@


class StatisticsExtractor(TelescopeComponent):
"""Base StatisticsExtractor component"""
"""
Base component to handle the extraction of the statistics from a dl1 table
containing charges, peak times and/or charge variances (images).
"""

chunk_size = Int(
2500,
help="Size of the chunk used for the calculation of the statistical values",
).tag(config=True)
median_cut_outliers = List(
[-0.3, 0.3],
help="""Interval of accepted values \\
(fraction with respect to camera median value)""",
).tag(config=True)

def __init__(self, subarray, config=None, parent=None, **kwargs):
"""
Base component to handle the extraction of the statistics
from a chunk of charges and pulse times (images).
Parameters
----------
kwargs
"""
super().__init__(subarray=subarray, config=config, parent=parent, **kwargs)

def __call__(
self,
Expand All @@ -53,8 +40,8 @@ def __call__(
col_name="image",
) -> list:
"""
Call the relevant functions to extract the statistics
for the particular extractor.
Prepare the extraction chunks and call the relevant function of the particular extractor
to extract the statistical values.
Parameters
----------
Expand All @@ -75,7 +62,7 @@ def __call__(
List of extracted statistics and extraction chunks
"""

# Check if the provided data contains an unique type of calibration events
# Check if the provided data contains a unique type of calibration events
# and if the events contain signal or noise.
if np.all(dl1_table["event_type"] == 0) or np.all(dl1_table["event_type"] == 1):
outlier_check = "SIGNAL"
Expand All @@ -87,7 +74,7 @@ def __call__(
outlier_check = "NOISE"
else:
raise ValueError(
"Invalid input data. Only dl1-like images of claibration events are accepted!"
"Invalid input data. Only dl1-like images of calibration events are accepted!"
)

# Check if the length of the dl1 table is greater or equal than the size of the chunk.
Expand Down Expand Up @@ -138,29 +125,34 @@ def extract(

class PlainExtractor(StatisticsExtractor):
"""
Extract the statistics from a chunk of images
Extract the statistics from a chunk of peak time images
using numpy and scipy functions
"""

time_median_cut_outliers = List(
[0, 60],
help="Interval (in waveform samples) of accepted median peak time values",
).tag(config=True)

def extract(
self, images, times, masked_pixels_of_sample, outlier_check
) -> StatisticsContainer:
# ensure numpy array
masked_images = np.ma.array(images, mask=masked_pixels_of_sample)

# median over the sample per pixel
# median over the chunk per pixel
pixel_median = np.ma.median(masked_images, axis=0)

# mean over the sample per pixel
# mean over the chunk per pixel
pixel_mean = np.ma.mean(masked_images, axis=0)

# std over the sample per pixel
# std over the chunk per pixel
pixel_std = np.ma.std(masked_images, axis=0)

# outliers from median
median_outliers = np.logical_or(
pixel_median < self.median_cut_outliers[0],
pixel_median > self.median_cut_outliers[1],
pixel_median < self.time_median_cut_outliers[0],
pixel_median > self.time_median_cut_outliers[1],
)

return StatisticsContainer(
Expand All @@ -176,16 +168,18 @@ def extract(

class SigmaClippingExtractor(StatisticsExtractor):
"""
Extract the statistics from a chunk of images
Extract the statistics from a chunk of charge or variance images
using astropy's sigma clipping functions
"""

median_cut_outliers = List(
[-0.3, 0.3],
help="Interval of accepted median values with respect to the camera median value of the pixel medians",
).tag(config=True)
std_cut_outliers = List(
[-3, 3],
help="""Interval of accepted standard deviation \\
around camera median value""",
help="Interval of accepted standard deviations with respect to the camera median value of the pixel standard deviations",
).tag(config=True)

max_sigma = Int(
default_value=4,
help="Maximal value for the sigma clipping outlier removal",
Expand All @@ -201,7 +195,7 @@ def extract(
# ensure numpy array
masked_images = np.ma.array(images, mask=masked_pixels_of_sample)

# mean, median, and std over the sample per pixel
# mean, median, and std over the chunk per pixel
pixel_mean, pixel_median, pixel_std = sigma_clipped_stats(
masked_images,
sigma=self.max_sigma,
Expand Down
60 changes: 39 additions & 21 deletions src/ctapipe/calib/camera/tests/test_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,44 +25,55 @@ def fixture_test_sigmaclippingextractor(example_subarray):
def test_extractors(test_plainextractor, test_sigmaclippingextractor):
"""test basic functionality of the StatisticsExtractors"""

# Create dummy data for testing
times = Time(
np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
)
pedestal_dl1_data = np.random.normal(2.0, 5.0, size=(5000, 2, 1855))
pedestal_event_type = np.full((5000,), 2)
flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
flatfield_charge_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
flatfield_time_dl1_data = np.random.normal(18.0, 5.0, size=(5000, 2, 1855))
flatfield_event_type = np.full((5000,), 0)

# Create dl1 tables
pedestal_dl1_table = Table(
[times, pedestal_dl1_data, pedestal_event_type],
names=("time_mono", "image", "event_type"),
)
flatfield_dl1_table = Table(
[times, flatfield_dl1_data, flatfield_event_type],
flatfield_charge_dl1_table = Table(
[times, flatfield_charge_dl1_data, flatfield_event_type],
names=("time_mono", "image", "event_type"),
)

plain_stats_list = test_plainextractor(dl1_table=pedestal_dl1_table)
sigmaclipping_stats_list = test_sigmaclippingextractor(
dl1_table=flatfield_dl1_table
flatfield_time_dl1_table = Table(
[times, flatfield_time_dl1_data, flatfield_event_type],
names=("time_mono", "peak_time", "event_type"),
)
# Extract the statistical values
pedestal_stats_list = test_sigmaclippingextractor(dl1_table=pedestal_dl1_table)
flatfield_charge_stats_list = test_sigmaclippingextractor(
dl1_table=flatfield_charge_dl1_table
)
flatfield_time_stats_list = test_plainextractor(
dl1_table=flatfield_time_dl1_table, col_name="peak_time"
)
# check if the calculated statistical values are reasonable
# for a camera with two gain channels
assert not np.any(np.abs(pedestal_stats_list[0].mean - 2.0) > 1.5)
assert not np.any(np.abs(flatfield_charge_stats_list[0].mean - 77.0) > 1.5)
assert not np.any(np.abs(flatfield_time_stats_list[0].mean - 18.0) > 1.5)

assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5)
assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5)

assert not np.any(np.abs(plain_stats_list[0].mean - 2.0) > 1.5)
assert not np.any(np.abs(sigmaclipping_stats_list[0].mean - 77.0) > 1.5)

assert not np.any(np.abs(plain_stats_list[1].median - 2.0) > 1.5)
assert not np.any(np.abs(sigmaclipping_stats_list[1].median - 77.0) > 1.5)
assert not np.any(np.abs(pedestal_stats_list[1].median - 2.0) > 1.5)
assert not np.any(np.abs(flatfield_charge_stats_list[1].median - 77.0) > 1.5)
assert not np.any(np.abs(flatfield_time_stats_list[1].median - 18.0) > 1.5)

assert not np.any(np.abs(plain_stats_list[0].std - 5.0) > 1.5)
assert not np.any(np.abs(sigmaclipping_stats_list[0].std - 10.0) > 1.5)
assert not np.any(np.abs(pedestal_stats_list[0].std - 5.0) > 1.5)
assert not np.any(np.abs(flatfield_charge_stats_list[0].std - 10.0) > 1.5)
assert not np.any(np.abs(flatfield_time_stats_list[0].std - 5.0) > 1.5)


def test_check_outliers(test_sigmaclippingextractor):
"""test detection ability of outliers"""

# Create dummy data for testing
times = Time(
np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
)
Expand All @@ -71,14 +82,15 @@ def test_check_outliers(test_sigmaclippingextractor):
# insert outliers
flatfield_dl1_data[:, 0, 120] = 120.0
flatfield_dl1_data[:, 1, 67] = 120.0
# Create dl1 table
flatfield_dl1_table = Table(
[times, flatfield_dl1_data, flatfield_event_type],
names=("time_mono", "image", "event_type"),
)
# Extract the statistical values
sigmaclipping_stats_list = test_sigmaclippingextractor(
dl1_table=flatfield_dl1_table
)

# check if outliers where detected correctly
assert sigmaclipping_stats_list[0].median_outliers[0][120]
assert sigmaclipping_stats_list[0].median_outliers[1][67]
Expand All @@ -89,21 +101,22 @@ def test_check_outliers(test_sigmaclippingextractor):
def test_check_chunk_shift(test_sigmaclippingextractor):
"""test the chunk shift option and the boundary case for the last chunk"""

# Create dummy data for testing
times = Time(
np.linspace(60117.911, 60117.9258, num=5500), scale="tai", format="mjd"
)
flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5500, 2, 1855))
flatfield_event_type = np.full((5500,), 0)
# Create dl1 table
flatfield_dl1_table = Table(
[times, flatfield_dl1_data, flatfield_event_type],
names=("time_mono", "image", "event_type"),
)

# Extract the statistical values
stats_list = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)
stats_list_chunk_shift = test_sigmaclippingextractor(
dl1_table=flatfield_dl1_table, chunk_shift=2000
)

# check if three chunks are used for the extraction as the last chunk overflows
assert len(stats_list) == 3
# check if two chunks are used for the extraction as the last chunk is dropped
Expand All @@ -113,25 +126,30 @@ def test_check_chunk_shift(test_sigmaclippingextractor):
def test_check_input(test_sigmaclippingextractor):
"""test the input dl1 data"""

# Create dummy data for testing
times = Time(
np.linspace(60117.911, 60117.9258, num=5000), scale="tai", format="mjd"
)
flatfield_dl1_data = np.random.normal(77.0, 10.0, size=(5000, 2, 1855))
# Insert one event with wrong event type
flatfield_event_type = np.full((5000,), 0)
flatfield_event_type[0] = 2
# Create dl1 table
flatfield_dl1_table = Table(
[times, flatfield_dl1_data, flatfield_event_type],
names=("time_mono", "image", "event_type"),
)
# Try to extract the statistical values, which results in a ValueError
with pytest.raises(ValueError):
_ = test_sigmaclippingextractor(dl1_table=flatfield_dl1_table)

# Construct event_type column for cosmic events
cosmic_event_type = np.full((5000,), 32)
# Create dl1 table
cosmic_dl1_table = Table(
[times, flatfield_dl1_data, cosmic_event_type],
names=("time_mono", "image", "event_type"),
)
# Try to extract the statistical values, which results in a ValueError
with pytest.raises(ValueError):
_ = test_sigmaclippingextractor(dl1_table=cosmic_dl1_table)

0 comments on commit d826e9d

Please sign in to comment.