diff --git a/ctapipe/reco/sklearn.py b/ctapipe/reco/sklearn.py index 2b08b891e54..bf85ad366e4 100644 --- a/ctapipe/reco/sklearn.py +++ b/ctapipe/reco/sklearn.py @@ -66,16 +66,17 @@ class MLQualityQuery(QualityQuery): class SKLearnReconstructor(Reconstructor): - """Base Class for a Machine Learning Based Reconstructor. + """ + Base Class for a Machine Learning Based Reconstructor. Keeps a dictionary of sklearn models, the current tools are designed to train one model per telescope type. """ - #: Name of the target column in training table + #: Name of the target table column for training. target: str = "" - #: property predicted, overridden in baseclass + #: Property predicted, overridden in subclass. property = None prefix = traits.Unicode( @@ -83,24 +84,29 @@ class SKLearnReconstructor(Reconstructor): allow_none=True, help="Prefix for the output of this model. If None, ``model_cls`` is used.", ).tag(config=True) - features = traits.List(traits.Unicode(), help="Features to use for this model").tag( + features = traits.List( + traits.Unicode(), help="Features to use for this model." + ).tag(config=True) + model_config = traits.Dict({}, help="kwargs for the sklearn model.").tag( config=True ) - model_config = traits.Dict({}, help="kwargs for the sklearn model").tag(config=True) model_cls = traits.Enum( - SUPPORTED_MODELS.keys(), default_value=None, allow_none=True + SUPPORTED_MODELS.keys(), + default_value=None, + allow_none=True, + help="Which scikit-learn model to use.", ).tag(config=True) stereo_combiner_cls = traits.ComponentName( StereoCombiner, default_value="StereoMeanCombiner", - help="Which stereo combination method to use", + help="Which stereo combination method to use.", ).tag(config=True) load_path = traits.Path( default_value=None, allow_none=True, - help="If given, load serialized model from this path", + help="If given, load serialized model from this path.", ).tag(config=True) def __init__(self, subarray=None, models=None, n_jobs=None, **kwargs): @@ -155,7 +161,8 @@ def __init__(self, subarray=None, models=None, n_jobs=None, **kwargs): @abstractmethod def __call__(self, event: ArrayEventContainer) -> None: - """Event-wise prediction for the EventSource-Loop. + """ + Event-wise prediction for the EventSource-Loop. Fills the event.dl2.[name] container. @@ -167,7 +174,7 @@ def __call__(self, event: ArrayEventContainer) -> None: @abstractmethod def predict_table(self, key, table: Table) -> Table: """ - Predict on a table of events + Predict on a table of events. Parameters ---------- @@ -206,7 +213,7 @@ def _new_model(self): def _table_to_y(self, table, mask=None): """ - Extract target values as numpy array from input table + Extract target values as numpy array from input table. """ # make sure we use the unit that was used during training if self.unit is not None: @@ -236,9 +243,7 @@ def _set_n_jobs(self, n_jobs): class SKLearnRegressionReconstructor(SKLearnReconstructor): - """ - Base class for regression tasks - """ + """Base class for regression tasks.""" model_cls = traits.Enum( SUPPORTED_REGRESSORS.keys(), @@ -292,9 +297,7 @@ def _table_to_y(self, table, mask=None): class SKLearnClassificationReconstructor(SKLearnReconstructor): - """ - Base class for classification tasks - """ + """Base class for classification tasks.""" model_cls = traits.Enum( SUPPORTED_CLASSIFIERS.keys(), @@ -304,7 +307,8 @@ class SKLearnClassificationReconstructor(SKLearnReconstructor): ).tag(config=True) invalid_class = traits.Integer( - default_value=-1, help="The label to fill in case no prediction could be made" + default_value=-1, + help="The label value to fill in case no prediction could be made.", ).tag(config=True) positive_class = traits.Integer( @@ -369,17 +373,13 @@ def _get_positive_index(self, key): class EnergyRegressor(SKLearnRegressionReconstructor): """ - Use a scikit-learn regression model per telescope type to predict primary energy + Use a scikit-learn regression model per telescope type to predict primary energy. """ - #: Name of the target table column for training target = "true_energy" property = ReconstructionProperty.ENERGY def __call__(self, event: ArrayEventContainer) -> None: - """ - Apply model for a single event and fill result into the event container - """ for tel_id in event.trigger.tels_with_trigger: table = collect_features(event, tel_id, self.instrument_table) table = self.feature_generator(table, subarray=self.subarray) @@ -408,7 +408,6 @@ def __call__(self, event: ArrayEventContainer) -> None: self.stereo_combiner(event) def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]: - """Predict on a table of events""" table = self.feature_generator(table, subarray=self.subarray) n_rows = len(table) @@ -434,11 +433,8 @@ def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table class ParticleClassifier(SKLearnClassificationReconstructor): - """ - Predict dl2 particle classification - """ + """Predict dl2 particle classification.""" - #: Name of the target table column for training target = "true_shower_primary_id" positive_class = traits.Integer( @@ -475,7 +471,6 @@ def __call__(self, event: ArrayEventContainer) -> None: self.stereo_combiner(event) def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]: - """Predict on a table of events""" table = self.feature_generator(table, subarray=self.subarray) n_rows = len(table) @@ -502,23 +497,32 @@ def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table class DispReconstructor(Reconstructor): """ - Predict absolute value and sign for disp origin reconstruction for each telescope. + Predict absolute value and sign for disp origin reconstruction and + convert to altitude and azimuth prediction for each telescope. """ target = "true_disp" - prefix = traits.Unicode(default_value="disp", allow_none=False).tag(config=True) + prefix = traits.Unicode( + default_value="disp", + allow_none=False, + help="Prefix for the output of this model. If None, ``disp`` is used.", + ).tag(config=True) features = traits.List( - traits.Unicode(), help="Features to use for both models" + traits.Unicode(), help="Features to use for both models." ).tag(config=True) log_target = traits.Bool( default_value=False, - help="If True, the model is trained to predict the natural logarithm of the absolute value.", + help=( + "If True, the norm(disp) model is trained to predict ln(norm(disp))" + " and the output is" + " ``prefix_parameter`` = ``sign_prediction`` * ``exp(norm_prediction)``." + ), ).tag(config=True) - norm_config = traits.Dict({}, help="kwargs for the sklearn regressor").tag( + norm_config = traits.Dict({}, help="kwargs for the sklearn regressor.").tag( config=True ) @@ -529,7 +533,7 @@ class DispReconstructor(Reconstructor): help="Which scikit-learn regression model to use.", ).tag(config=True) - sign_config = traits.Dict({}, help="kwargs for the sklearn classifier").tag( + sign_config = traits.Dict({}, help="kwargs for the sklearn classifier.").tag( config=True ) @@ -543,13 +547,13 @@ class DispReconstructor(Reconstructor): stereo_combiner_cls = traits.ComponentName( StereoCombiner, default_value="StereoMeanCombiner", - help="Which stereo combination method to use", + help="Which stereo combination method to use.", ).tag(config=True) load_path = traits.Path( default_value=None, allow_none=True, - help="If given, load serialized model from this path", + help="If given, load serialized model from this path.", ).tag(config=True) def __init__(self, subarray=None, models=None, **kwargs): @@ -606,7 +610,7 @@ def _new_models(self): def _table_to_y(self, table, mask=None): """ - Extract target values as numpy array from input table + Extract target values as numpy array from input table. """ # make sure we use the unit that was used during training if self.unit is not None: @@ -695,7 +699,8 @@ def _predict(self, key, table): return prediction, score, valid def __call__(self, event: ArrayEventContainer) -> None: - """Event-wise prediction for the EventSource-Loop. + """ + Event-wise prediction for the EventSource-Loop. Fills the event.dl2.tel[tel_id].disp[prefix] container and event.dl2.tel[tel_id].geometry[prefix] container. @@ -766,7 +771,8 @@ def __call__(self, event: ArrayEventContainer) -> None: self.stereo_combiner(event) def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]: - """Predict on a table of events + """ + Predict on a table of events. Parameters ---------- @@ -850,9 +856,11 @@ def _set_n_jobs(self, n_jobs): class CrossValidator(Component): - """Class to train sklearn based reconstructors in a cross validation""" + """Class to train sklearn based reconstructors in a cross validation.""" - n_cross_validations = traits.Int(5).tag(config=True) + n_cross_validations = traits.Int( + default_value=5, help="Number of cross validation iterations." + ).tag(config=True) output_path = traits.Path( default_value=None, @@ -867,7 +875,7 @@ class CrossValidator(Component): ).tag(config=True) rng_seed = traits.Int( - default_value=1337, help="Seed for the random number generator" + default_value=1337, help="Random seed for splitting the training data." ).tag(config=True) def __init__(self, model_component, **kwargs): @@ -891,6 +899,7 @@ def __init__(self, model_component, **kwargs): ) def __call__(self, telescope_type, table): + """Perform cross validation for the given model.""" if self.n_cross_validations == 0: return diff --git a/ctapipe/reco/stereo_combination.py b/ctapipe/reco/stereo_combination.py index f36e1230891..c7dbd00b910 100644 --- a/ctapipe/reco/stereo_combination.py +++ b/ctapipe/reco/stereo_combination.py @@ -63,22 +63,24 @@ def _weighted_mean_ufunc(tel_values, weights, n_array_events, indices): class StereoCombiner(Component): - """Base Class for algorithms combining telescope-wise predictions to common prediction""" + """ + Base Class for algorithms combining telescope-wise predictions to common prediction. + """ prefix = Unicode( default_value="", - help="Prefix to be added to the output container / column names", + help="Prefix to be added to the output container / column names.", ).tag(config=True) property = UseEnum( ReconstructionProperty, - help="Which property is being combined", + help="Which property is being combined.", ).tag(config=True) @abstractmethod def __call__(self, event: ArrayEventContainer) -> None: """ - Fill event container with stereo predictions + Fill event container with stereo predictions. """ @abstractmethod @@ -91,17 +93,21 @@ def predict_table(self, mono_predictions: Table) -> Table: class StereoMeanCombiner(StereoCombiner): """ - Calculate array-event prediction as (weighted) mean of telescope-wise predictions + Calculate array-event prediction as (weighted) mean of telescope-wise predictions. """ weights = CaselessStrEnum( ["none", "intensity", "konrad"], default_value="none", + help=( + "What kind of weights to use." + " Options: ``none``, ``intensity``, ``konrad``." + ), ).tag(config=True) log_target = Bool( False, - help="If true, calculate exp(mean(log(values)))", + help="If true, calculate exp(mean(log(values))).", ).tag(config=True) def __init__(self, *args, **kwargs): @@ -118,8 +124,6 @@ def __init__(self, *args, **kwargs): ) def _calculate_weights(self, data): - """""" - if isinstance(data, Container): if self.weights == "intensity": return data.hillas.intensity diff --git a/ctapipe/tools/train_disp_reconstructor.py b/ctapipe/tools/train_disp_reconstructor.py index 89ac2eeb8c3..405eb9f9fe9 100644 --- a/ctapipe/tools/train_disp_reconstructor.py +++ b/ctapipe/tools/train_disp_reconstructor.py @@ -63,7 +63,7 @@ class TrainDispReconstructor(Tool): ).tag(config=True) random_seed = Int( - default_value=0, help="Random seed for sampling and cross validation" + default_value=0, help="Random seed for sampling training events." ).tag(config=True) n_jobs = Int( diff --git a/ctapipe/tools/train_energy_regressor.py b/ctapipe/tools/train_energy_regressor.py index 383dedc7416..faf7c12d4c9 100644 --- a/ctapipe/tools/train_energy_regressor.py +++ b/ctapipe/tools/train_energy_regressor.py @@ -60,7 +60,7 @@ class TrainEnergyRegressor(Tool): ).tag(config=True) random_seed = Int( - default_value=0, help="Random seed for sampling and cross validation" + default_value=0, help="Random seed for sampling training events." ).tag(config=True) n_jobs = Int( diff --git a/ctapipe/tools/train_particle_classifier.py b/ctapipe/tools/train_particle_classifier.py index caf4a455764..14ca31831d6 100644 --- a/ctapipe/tools/train_particle_classifier.py +++ b/ctapipe/tools/train_particle_classifier.py @@ -88,8 +88,7 @@ class TrainParticleClassifier(Tool): ).tag(config=True) random_seed = Int( - default_value=0, - help="Random number seed for sampling and the cross validation splitting", + default_value=0, help="Random seed for sampling training events." ).tag(config=True) n_jobs = Int( diff --git a/docs/changes/2456.optimization.rst b/docs/changes/2456.optimization.rst new file mode 100644 index 00000000000..8b15128334f --- /dev/null +++ b/docs/changes/2456.optimization.rst @@ -0,0 +1 @@ +Update and add missing docstrings related to the ML functionalities.