Skip to content

Commit

Permalink
Update and add missing docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
LukasBeiske committed Dec 1, 2023
1 parent fbe8f05 commit ac0ea27
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 52 deletions.
95 changes: 55 additions & 40 deletions ctapipe/reco/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,41 +66,47 @@ class MLQualityQuery(QualityQuery):


class SKLearnReconstructor(Reconstructor):
"""Base Class for a Machine Learning Based Reconstructor.
"""
Base Class for a Machine Learning Based Reconstructor.
Keeps a dictionary of sklearn models, the current tools are designed
to train one model per telescope type.
"""

#: Name of the target column in training table
#: Name of the target table column for training.
target: str = ""

#: property predicted, overridden in baseclass
#: Property predicted, overridden in subclass.
property = None

prefix = traits.Unicode(
default_value=None,
allow_none=True,
help="Prefix for the output of this model. If None, ``model_cls`` is used.",
).tag(config=True)
features = traits.List(traits.Unicode(), help="Features to use for this model").tag(
features = traits.List(
traits.Unicode(), help="Features to use for this model."
).tag(config=True)
model_config = traits.Dict({}, help="kwargs for the sklearn model.").tag(
config=True
)
model_config = traits.Dict({}, help="kwargs for the sklearn model").tag(config=True)
model_cls = traits.Enum(
SUPPORTED_MODELS.keys(), default_value=None, allow_none=True
SUPPORTED_MODELS.keys(),
default_value=None,
allow_none=True,
help="Which scikit-learn model to use.",
).tag(config=True)

stereo_combiner_cls = traits.ComponentName(
StereoCombiner,
default_value="StereoMeanCombiner",
help="Which stereo combination method to use",
help="Which stereo combination method to use.",
).tag(config=True)

load_path = traits.Path(
default_value=None,
allow_none=True,
help="If given, load serialized model from this path",
help="If given, load serialized model from this path.",
).tag(config=True)

def __init__(self, subarray=None, models=None, n_jobs=None, **kwargs):
Expand Down Expand Up @@ -155,7 +161,8 @@ def __init__(self, subarray=None, models=None, n_jobs=None, **kwargs):

@abstractmethod
def __call__(self, event: ArrayEventContainer) -> None:
"""Event-wise prediction for the EventSource-Loop.
"""
Event-wise prediction for the EventSource-Loop.
Fills the event.dl2.<your-feature>[name] container.
Expand All @@ -167,7 +174,7 @@ def __call__(self, event: ArrayEventContainer) -> None:
@abstractmethod
def predict_table(self, key, table: Table) -> Table:
"""
Predict on a table of events
Predict on a table of events.
Parameters
----------
Expand Down Expand Up @@ -206,7 +213,7 @@ def _new_model(self):

def _table_to_y(self, table, mask=None):
"""
Extract target values as numpy array from input table
Extract target values as numpy array from input table.
"""
# make sure we use the unit that was used during training
if self.unit is not None:
Expand Down Expand Up @@ -236,9 +243,7 @@ def _set_n_jobs(self, n_jobs):


class SKLearnRegressionReconstructor(SKLearnReconstructor):
"""
Base class for regression tasks
"""
"""Base class for regression tasks."""

model_cls = traits.Enum(
SUPPORTED_REGRESSORS.keys(),
Expand Down Expand Up @@ -292,9 +297,7 @@ def _table_to_y(self, table, mask=None):


class SKLearnClassificationReconstructor(SKLearnReconstructor):
"""
Base class for classification tasks
"""
"""Base class for classification tasks."""

model_cls = traits.Enum(
SUPPORTED_CLASSIFIERS.keys(),
Expand All @@ -304,7 +307,7 @@ class SKLearnClassificationReconstructor(SKLearnReconstructor):
).tag(config=True)

invalid_class = traits.Integer(
default_value=-1, help="The label to fill in case no prediction could be made"
default_value=-1, help="The label to fill in case no prediction could be made."
).tag(config=True)

positive_class = traits.Integer(
Expand Down Expand Up @@ -369,16 +372,15 @@ def _get_positive_index(self, key):

class EnergyRegressor(SKLearnRegressionReconstructor):
"""
Use a scikit-learn regression model per telescope type to predict primary energy
Use a scikit-learn regression model per telescope type to predict primary energy.
"""

#: Name of the target table column for training
target = "true_energy"
property = ReconstructionProperty.ENERGY

def __call__(self, event: ArrayEventContainer) -> None:
"""
Apply model for a single event and fill result into the event container
Apply model for a single event and fill result into the event container.
"""
for tel_id in event.trigger.tels_with_trigger:
table = collect_features(event, tel_id, self.instrument_table)
Expand Down Expand Up @@ -408,7 +410,7 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events"""
"""Predict on a table of events."""
table = self.feature_generator(table, subarray=self.subarray)

n_rows = len(table)
Expand All @@ -434,11 +436,8 @@ def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table


class ParticleClassifier(SKLearnClassificationReconstructor):
"""
Predict dl2 particle classification
"""
"""Predict dl2 particle classification."""

#: Name of the target table column for training
target = "true_shower_primary_id"

positive_class = traits.Integer(
Expand All @@ -449,6 +448,9 @@ class ParticleClassifier(SKLearnClassificationReconstructor):
property = ReconstructionProperty.PARTICLE_TYPE

def __call__(self, event: ArrayEventContainer) -> None:
"""
Apply model for a single event and fill result into the event container.
"""
for tel_id in event.trigger.tels_with_trigger:
table = collect_features(event, tel_id, self.instrument_table)
table = self.feature_generator(table, subarray=self.subarray)
Expand All @@ -475,7 +477,7 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events"""
"""Predict on a table of events."""
table = self.feature_generator(table, subarray=self.subarray)

n_rows = len(table)
Expand Down Expand Up @@ -507,18 +509,26 @@ class DispReconstructor(Reconstructor):

target = "true_disp"

prefix = traits.Unicode(default_value="disp", allow_none=False).tag(config=True)
prefix = traits.Unicode(
default_value="disp",
allow_none=False,
help="Prefix for the output of this model. If None, ``disp`` is used.",
).tag(config=True)

features = traits.List(
traits.Unicode(), help="Features to use for both models"
traits.Unicode(), help="Features to use for both models."
).tag(config=True)

log_target = traits.Bool(
default_value=False,
help="If True, the model is trained to predict the natural logarithm of the absolute value.",
help=(
"If True, the norm(disp) model is trained to predict ln(norm(disp))"
" and the output is"
" ``prefix_parameter`` = ``sign_prediction`` * ``exp(norm_prediction)``."
),
).tag(config=True)

norm_config = traits.Dict({}, help="kwargs for the sklearn regressor").tag(
norm_config = traits.Dict({}, help="kwargs for the sklearn regressor.").tag(
config=True
)

Expand All @@ -529,7 +539,7 @@ class DispReconstructor(Reconstructor):
help="Which scikit-learn regression model to use.",
).tag(config=True)

sign_config = traits.Dict({}, help="kwargs for the sklearn classifier").tag(
sign_config = traits.Dict({}, help="kwargs for the sklearn classifier.").tag(
config=True
)

Expand All @@ -543,13 +553,13 @@ class DispReconstructor(Reconstructor):
stereo_combiner_cls = traits.ComponentName(
StereoCombiner,
default_value="StereoMeanCombiner",
help="Which stereo combination method to use",
help="Which stereo combination method to use.",
).tag(config=True)

load_path = traits.Path(
default_value=None,
allow_none=True,
help="If given, load serialized model from this path",
help="If given, load serialized model from this path.",
).tag(config=True)

def __init__(self, subarray=None, models=None, **kwargs):
Expand Down Expand Up @@ -606,7 +616,7 @@ def _new_models(self):

def _table_to_y(self, table, mask=None):
"""
Extract target values as numpy array from input table
Extract target values as numpy array from input table.
"""
# make sure we use the unit that was used during training
if self.unit is not None:
Expand Down Expand Up @@ -689,7 +699,8 @@ def _predict(self, key, table):
return prediction, valid

def __call__(self, event: ArrayEventContainer) -> None:
"""Event-wise prediction for the EventSource-Loop.
"""
Event-wise prediction for the EventSource-Loop.
Fills the event.dl2.tel[tel_id].disp[prefix] container
and event.dl2.tel[tel_id].geometry[prefix] container.
Expand Down Expand Up @@ -755,7 +766,8 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events
"""
Predict on a table of events.
Parameters
----------
Expand Down Expand Up @@ -831,9 +843,11 @@ def _set_n_jobs(self, n_jobs):


class CrossValidator(Component):
"""Class to train sklearn based reconstructors in a cross validation"""
"""Class to train sklearn based reconstructors in a cross validation."""

n_cross_validations = traits.Int(5).tag(config=True)
n_cross_validations = traits.Int(
default_value=5, help="Number of cross validation iterations."
).tag(config=True)

output_path = traits.Path(
default_value=None,
Expand All @@ -848,7 +862,7 @@ class CrossValidator(Component):
).tag(config=True)

rng_seed = traits.Int(
default_value=1337, help="Seed for the random number generator"
default_value=1337, help="Random seed for splitting the training data."
).tag(config=True)

def __init__(self, model_component, **kwargs):
Expand All @@ -872,6 +886,7 @@ def __init__(self, model_component, **kwargs):
)

def __call__(self, telescope_type, table):
"""Perform cross validation for the given model."""
if self.n_cross_validations == 0:
return

Expand Down
20 changes: 12 additions & 8 deletions ctapipe/reco/stereo_combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,24 @@ def _weighted_mean_ufunc(tel_values, weights, n_array_events, indices):


class StereoCombiner(Component):
"""Base Class for algorithms combining telescope-wise predictions to common prediction"""
"""
Base Class for algorithms combining telescope-wise predictions to common prediction.
"""

prefix = Unicode(
default_value="",
help="Prefix to be added to the output container / column names",
help="Prefix to be added to the output container / column names.",
).tag(config=True)

property = UseEnum(
ReconstructionProperty,
help="Which property is being combined",
help="Which property is being combined.",
).tag(config=True)

@abstractmethod
def __call__(self, event: ArrayEventContainer) -> None:
"""
Fill event container with stereo predictions
Fill event container with stereo predictions.
"""

@abstractmethod
Expand All @@ -91,17 +93,21 @@ def predict_table(self, mono_predictions: Table) -> Table:

class StereoMeanCombiner(StereoCombiner):
"""
Calculate array-event prediction as (weighted) mean of telescope-wise predictions
Calculate array-event prediction as (weighted) mean of telescope-wise predictions.
"""

weights = CaselessStrEnum(
["none", "intensity", "konrad"],
default_value="none",
help=(
"What kind of weights to use."
" Options: ``none``, ``intensity``, ``konrad``."
),
).tag(config=True)

log_target = Bool(
False,
help="If true, calculate exp(mean(log(values)))",
help="If true, calculate exp(mean(log(values))).",
).tag(config=True)

def __init__(self, *args, **kwargs):
Expand All @@ -118,8 +124,6 @@ def __init__(self, *args, **kwargs):
)

def _calculate_weights(self, data):
""""""

if isinstance(data, Container):
if self.weights == "intensity":
return data.hillas.intensity
Expand Down
2 changes: 1 addition & 1 deletion ctapipe/tools/train_disp_reconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class TrainDispReconstructor(Tool):
).tag(config=True)

random_seed = Int(
default_value=0, help="Random seed for sampling and cross validation"
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

n_jobs = Int(
Expand Down
2 changes: 1 addition & 1 deletion ctapipe/tools/train_energy_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class TrainEnergyRegressor(Tool):
).tag(config=True)

random_seed = Int(
default_value=0, help="Random seed for sampling and cross validation"
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

n_jobs = Int(
Expand Down
3 changes: 1 addition & 2 deletions ctapipe/tools/train_particle_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ class TrainParticleClassifier(Tool):
).tag(config=True)

random_seed = Int(
default_value=0,
help="Random number seed for sampling and the cross validation splitting",
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

n_jobs = Int(
Expand Down
1 change: 1 addition & 0 deletions docs/changes/2456.optimization.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update and add missing docstrings related to the ML functionalities.

0 comments on commit ac0ea27

Please sign in to comment.