Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update and add missing docstrings related to ML functionalities #2456

Merged
merged 3 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 52 additions & 43 deletions ctapipe/reco/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,41 +66,47 @@ class MLQualityQuery(QualityQuery):


class SKLearnReconstructor(Reconstructor):
"""Base Class for a Machine Learning Based Reconstructor.
"""
Base Class for a Machine Learning Based Reconstructor.

Keeps a dictionary of sklearn models, the current tools are designed
to train one model per telescope type.
"""

#: Name of the target column in training table
#: Name of the target table column for training.
target: str = ""

#: property predicted, overridden in baseclass
#: Property predicted, overridden in subclass.
property = None

prefix = traits.Unicode(
default_value=None,
allow_none=True,
help="Prefix for the output of this model. If None, ``model_cls`` is used.",
).tag(config=True)
features = traits.List(traits.Unicode(), help="Features to use for this model").tag(
features = traits.List(
traits.Unicode(), help="Features to use for this model."
).tag(config=True)
model_config = traits.Dict({}, help="kwargs for the sklearn model.").tag(
config=True
)
model_config = traits.Dict({}, help="kwargs for the sklearn model").tag(config=True)
model_cls = traits.Enum(
SUPPORTED_MODELS.keys(), default_value=None, allow_none=True
SUPPORTED_MODELS.keys(),
default_value=None,
allow_none=True,
help="Which scikit-learn model to use.",
).tag(config=True)

stereo_combiner_cls = traits.ComponentName(
StereoCombiner,
default_value="StereoMeanCombiner",
help="Which stereo combination method to use",
help="Which stereo combination method to use.",
).tag(config=True)

load_path = traits.Path(
default_value=None,
allow_none=True,
help="If given, load serialized model from this path",
help="If given, load serialized model from this path.",
).tag(config=True)

def __init__(self, subarray=None, models=None, n_jobs=None, **kwargs):
Expand Down Expand Up @@ -155,7 +161,8 @@ def __init__(self, subarray=None, models=None, n_jobs=None, **kwargs):

@abstractmethod
def __call__(self, event: ArrayEventContainer) -> None:
"""Event-wise prediction for the EventSource-Loop.
"""
Event-wise prediction for the EventSource-Loop.

Fills the event.dl2.<your-feature>[name] container.

Expand All @@ -167,7 +174,7 @@ def __call__(self, event: ArrayEventContainer) -> None:
@abstractmethod
def predict_table(self, key, table: Table) -> Table:
"""
Predict on a table of events
Predict on a table of events.

Parameters
----------
Expand Down Expand Up @@ -206,7 +213,7 @@ def _new_model(self):

def _table_to_y(self, table, mask=None):
"""
Extract target values as numpy array from input table
Extract target values as numpy array from input table.
"""
# make sure we use the unit that was used during training
if self.unit is not None:
Expand Down Expand Up @@ -236,9 +243,7 @@ def _set_n_jobs(self, n_jobs):


class SKLearnRegressionReconstructor(SKLearnReconstructor):
"""
Base class for regression tasks
"""
"""Base class for regression tasks."""

model_cls = traits.Enum(
SUPPORTED_REGRESSORS.keys(),
Expand Down Expand Up @@ -292,9 +297,7 @@ def _table_to_y(self, table, mask=None):


class SKLearnClassificationReconstructor(SKLearnReconstructor):
"""
Base class for classification tasks
"""
"""Base class for classification tasks."""

model_cls = traits.Enum(
SUPPORTED_CLASSIFIERS.keys(),
Expand All @@ -304,7 +307,8 @@ class SKLearnClassificationReconstructor(SKLearnReconstructor):
).tag(config=True)

invalid_class = traits.Integer(
default_value=-1, help="The label to fill in case no prediction could be made"
default_value=-1,
help="The label value to fill in case no prediction could be made.",
).tag(config=True)

positive_class = traits.Integer(
Expand Down Expand Up @@ -369,17 +373,13 @@ def _get_positive_index(self, key):

class EnergyRegressor(SKLearnRegressionReconstructor):
"""
Use a scikit-learn regression model per telescope type to predict primary energy
Use a scikit-learn regression model per telescope type to predict primary energy.
"""

#: Name of the target table column for training
target = "true_energy"
property = ReconstructionProperty.ENERGY

def __call__(self, event: ArrayEventContainer) -> None:
"""
Apply model for a single event and fill result into the event container
"""
for tel_id in event.trigger.tels_with_trigger:
table = collect_features(event, tel_id, self.instrument_table)
table = self.feature_generator(table, subarray=self.subarray)
Expand Down Expand Up @@ -408,7 +408,6 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events"""
table = self.feature_generator(table, subarray=self.subarray)

n_rows = len(table)
Expand All @@ -434,11 +433,8 @@ def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table


class ParticleClassifier(SKLearnClassificationReconstructor):
"""
Predict dl2 particle classification
"""
"""Predict dl2 particle classification."""

#: Name of the target table column for training
target = "true_shower_primary_id"

positive_class = traits.Integer(
Expand Down Expand Up @@ -475,7 +471,6 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events"""
table = self.feature_generator(table, subarray=self.subarray)

n_rows = len(table)
Expand All @@ -502,23 +497,32 @@ def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table

class DispReconstructor(Reconstructor):
"""
Predict absolute value and sign for disp origin reconstruction for each telescope.
Predict absolute value and sign for disp origin reconstruction and
convert to altitude and azimuth prediction for each telescope.
"""

target = "true_disp"

prefix = traits.Unicode(default_value="disp", allow_none=False).tag(config=True)
prefix = traits.Unicode(
default_value="disp",
allow_none=False,
help="Prefix for the output of this model. If None, ``disp`` is used.",
).tag(config=True)

features = traits.List(
traits.Unicode(), help="Features to use for both models"
traits.Unicode(), help="Features to use for both models."
).tag(config=True)

log_target = traits.Bool(
default_value=False,
help="If True, the model is trained to predict the natural logarithm of the absolute value.",
help=(
"If True, the norm(disp) model is trained to predict ln(norm(disp))"
" and the output is"
" ``prefix_parameter`` = ``sign_prediction`` * ``exp(norm_prediction)``."
),
).tag(config=True)

norm_config = traits.Dict({}, help="kwargs for the sklearn regressor").tag(
norm_config = traits.Dict({}, help="kwargs for the sklearn regressor.").tag(
config=True
)

Expand All @@ -529,7 +533,7 @@ class DispReconstructor(Reconstructor):
help="Which scikit-learn regression model to use.",
).tag(config=True)

sign_config = traits.Dict({}, help="kwargs for the sklearn classifier").tag(
sign_config = traits.Dict({}, help="kwargs for the sklearn classifier.").tag(
config=True
)

Expand All @@ -543,13 +547,13 @@ class DispReconstructor(Reconstructor):
stereo_combiner_cls = traits.ComponentName(
StereoCombiner,
default_value="StereoMeanCombiner",
help="Which stereo combination method to use",
help="Which stereo combination method to use.",
).tag(config=True)

load_path = traits.Path(
default_value=None,
allow_none=True,
help="If given, load serialized model from this path",
help="If given, load serialized model from this path.",
).tag(config=True)

def __init__(self, subarray=None, models=None, **kwargs):
Expand Down Expand Up @@ -606,7 +610,7 @@ def _new_models(self):

def _table_to_y(self, table, mask=None):
"""
Extract target values as numpy array from input table
Extract target values as numpy array from input table.
"""
# make sure we use the unit that was used during training
if self.unit is not None:
Expand Down Expand Up @@ -689,7 +693,8 @@ def _predict(self, key, table):
return prediction, valid

def __call__(self, event: ArrayEventContainer) -> None:
"""Event-wise prediction for the EventSource-Loop.
"""
Event-wise prediction for the EventSource-Loop.

Fills the event.dl2.tel[tel_id].disp[prefix] container
and event.dl2.tel[tel_id].geometry[prefix] container.
Expand Down Expand Up @@ -755,7 +760,8 @@ def __call__(self, event: ArrayEventContainer) -> None:
self.stereo_combiner(event)

def predict_table(self, key, table: Table) -> Dict[ReconstructionProperty, Table]:
"""Predict on a table of events
"""
Predict on a table of events.

Parameters
----------
Expand Down Expand Up @@ -831,9 +837,11 @@ def _set_n_jobs(self, n_jobs):


class CrossValidator(Component):
"""Class to train sklearn based reconstructors in a cross validation"""
"""Class to train sklearn based reconstructors in a cross validation."""

n_cross_validations = traits.Int(5).tag(config=True)
n_cross_validations = traits.Int(
default_value=5, help="Number of cross validation iterations."
).tag(config=True)

output_path = traits.Path(
default_value=None,
Expand All @@ -848,7 +856,7 @@ class CrossValidator(Component):
).tag(config=True)

rng_seed = traits.Int(
default_value=1337, help="Seed for the random number generator"
default_value=1337, help="Random seed for splitting the training data."
).tag(config=True)

def __init__(self, model_component, **kwargs):
Expand All @@ -872,6 +880,7 @@ def __init__(self, model_component, **kwargs):
)

def __call__(self, telescope_type, table):
"""Perform cross validation for the given model."""
if self.n_cross_validations == 0:
return

Expand Down
20 changes: 12 additions & 8 deletions ctapipe/reco/stereo_combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,24 @@ def _weighted_mean_ufunc(tel_values, weights, n_array_events, indices):


class StereoCombiner(Component):
"""Base Class for algorithms combining telescope-wise predictions to common prediction"""
"""
Base Class for algorithms combining telescope-wise predictions to common prediction.
"""

prefix = Unicode(
default_value="",
help="Prefix to be added to the output container / column names",
help="Prefix to be added to the output container / column names.",
).tag(config=True)

property = UseEnum(
ReconstructionProperty,
help="Which property is being combined",
help="Which property is being combined.",
).tag(config=True)

@abstractmethod
def __call__(self, event: ArrayEventContainer) -> None:
"""
Fill event container with stereo predictions
Fill event container with stereo predictions.
"""

@abstractmethod
Expand All @@ -91,17 +93,21 @@ def predict_table(self, mono_predictions: Table) -> Table:

class StereoMeanCombiner(StereoCombiner):
"""
Calculate array-event prediction as (weighted) mean of telescope-wise predictions
Calculate array-event prediction as (weighted) mean of telescope-wise predictions.
"""

weights = CaselessStrEnum(
["none", "intensity", "konrad"],
default_value="none",
help=(
"What kind of weights to use."
" Options: ``none``, ``intensity``, ``konrad``."
),
).tag(config=True)

log_target = Bool(
False,
help="If true, calculate exp(mean(log(values)))",
help="If true, calculate exp(mean(log(values))).",
).tag(config=True)

def __init__(self, *args, **kwargs):
Expand All @@ -118,8 +124,6 @@ def __init__(self, *args, **kwargs):
)

def _calculate_weights(self, data):
""""""

if isinstance(data, Container):
if self.weights == "intensity":
return data.hillas.intensity
Expand Down
2 changes: 1 addition & 1 deletion ctapipe/tools/train_disp_reconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class TrainDispReconstructor(Tool):
).tag(config=True)

random_seed = Int(
default_value=0, help="Random seed for sampling and cross validation"
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

n_jobs = Int(
Expand Down
2 changes: 1 addition & 1 deletion ctapipe/tools/train_energy_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class TrainEnergyRegressor(Tool):
).tag(config=True)

random_seed = Int(
default_value=0, help="Random seed for sampling and cross validation"
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

n_jobs = Int(
Expand Down
3 changes: 1 addition & 2 deletions ctapipe/tools/train_particle_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ class TrainParticleClassifier(Tool):
).tag(config=True)

random_seed = Int(
default_value=0,
help="Random number seed for sampling and the cross validation splitting",
default_value=0, help="Random seed for sampling training events."
).tag(config=True)

n_jobs = Int(
Expand Down
1 change: 1 addition & 0 deletions docs/changes/2456.optimization.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update and add missing docstrings related to the ML functionalities.