Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OutlierDetector components #2604

Merged
merged 13 commits into from
Aug 7, 2024
3 changes: 2 additions & 1 deletion docs/api-reference/monitoring/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Monitoring data are time-series used to monitor the status or quality of hardwar

This module provides some code to help to generate monitoring data from processed event data, particularly for the purposes of calibration and data quality assessment.

Currently, only code related to :ref:`stats_aggregator` is implemented here.
Currently, only code related to :ref:`stats_aggregator` and :ref:`outlier_detector` is implemented here.


Submodules
Expand All @@ -21,6 +21,7 @@ Submodules
:glob:

aggregator
outlier


Reference/API
Expand Down
11 changes: 11 additions & 0 deletions docs/api-reference/monitoring/outlier.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. _outlier_detector:

****************
Outlier Detector
****************


Reference/API
=============

.. automodapi:: ctapipe.monitoring.outlier
1 change: 1 addition & 0 deletions docs/changes/2604.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add outlier detection components to identify faulty pixels.
138 changes: 138 additions & 0 deletions src/ctapipe/monitoring/outlier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
Outlier detection algorithms to identify faulty pixels
"""

__all__ = [
"OutlierDetector",
"RangeOutlierDetector",
"MedianOutlierDetector",
"StdOutlierDetector",
]

from abc import abstractmethod

import numpy as np

from ctapipe.core import TelescopeComponent, traits
from ctapipe.core.traits import List


class OutlierDetector(TelescopeComponent):
"""
Base class for outlier detection algorithms.
"""

@abstractmethod
def __call__(self, column) -> bool:
"""
Detect outliers in the provided column.

This function should be implemented by subclasses to define the specific
outlier detection approach. The function examines the statistics in the
given column of the table and returns a boolean mask indicating which
entries are considered as outliers.

Parameters
----------
column : astropy.table.Column
column with chunk-wise aggregated statistic values (mean, median, or std)
of shape (n_entries, n_channels, n_pix)

Returns
-------
boolean mask
mask of outliers of shape (n_entries, n_channels, n_pix)
"""
pass


class RangeOutlierDetector(OutlierDetector):
"""
Detect outliers based on a valid range.

The clipping interval to set the thresholds for detecting outliers corresponds to
a configurable range of valid statistic values.
"""

validity_range = List(
trait=traits.Float(),
default_value=[1.0, 2.0],
help=(
"Defines the range of acceptable values (lower, upper) in units of image values. "
"Values outside this range will be flagged as outliers."
),
minlen=2,
maxlen=2,
).tag(config=True)

def __call__(self, column):
# Remove outliers is statistical values out a given range
outliers = np.logical_or(
column < self.validity_range[0],
column > self.validity_range[1],
)
return outliers


class MedianOutlierDetector(OutlierDetector):
"""
Detect outliers based on the deviation from the camera median.

The clipping interval to set the thresholds for detecting outliers is computed by multiplying
the configurable factors and the camera median of the statistic values.
"""

median_range_factors = List(
trait=traits.Float(),
default_value=[-1.0, 1.0],
help=(
"Defines the range of acceptable values (lower, upper) in units of medians. "
"Values whose deviation to the camera median lays outside of this range will be flagged as outliers."
),
TjarkMiener marked this conversation as resolved.
Show resolved Hide resolved
minlen=2,
maxlen=2,
).tag(config=True)

def __call__(self, column):
# Camera median
camera_median = np.ma.median(column, axis=2)
# Detect outliers based on the deviation of the median distribution
deviation = column - camera_median[:, :, np.newaxis]
outliers = np.logical_or(
deviation < self.median_range_factors[0] * camera_median[:, :, np.newaxis],
deviation > self.median_range_factors[1] * camera_median[:, :, np.newaxis],
)
return outliers


class StdOutlierDetector(OutlierDetector):
"""
Detect outliers based on the deviation from the camera standard deviation.

The clipping interval to set the thresholds for detecting outliers is computed by multiplying
the configurable factors and the camera standard deviation of the statistic values.
"""

std_range_factors = List(
trait=traits.Float(),
default_value=[-1.0, 1.0],
help=(
"Defines the range of acceptable values (lower, upper) in units of standard deviations. "
"Values whose deviation to the camera median lays outside of this range will be flagged as outliers."
),
minlen=2,
maxlen=2,
).tag(config=True)

def __call__(self, column):
# Camera median
camera_median = np.ma.median(column, axis=2)
# Camera std
camera_std = np.ma.std(column, axis=2)
# Detect outliers based on the deviation of the standard deviation distribution
deviation = column - camera_median[:, :, np.newaxis]
outliers = np.logical_or(
deviation < self.std_range_factors[0] * camera_std[:, :, np.newaxis],
deviation > self.std_range_factors[1] * camera_std[:, :, np.newaxis],
)
return outliers
104 changes: 104 additions & 0 deletions src/ctapipe/monitoring/tests/test_outlier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Tests for OutlierDetector and related functions
"""

import numpy as np
from astropy.table import Table

from ctapipe.monitoring.outlier import (
MedianOutlierDetector,
RangeOutlierDetector,
StdOutlierDetector,
)


def test_range_detection(example_subarray):
"""test the RangeOutlierDetector"""

# Create dummy data for testing
rng = np.random.default_rng(0)
# Distribution mimics the mean values of peak times of flat-field events
mean = rng.normal(18.0, 0.4, size=(50, 2, 1855))
# Insert outliers
mean[12, 0, 120] = 3.0
mean[42, 1, 67] = 35.0
# Create astropy table
table = Table([mean], names=("mean",))
# Initialize the outlier detector based on the range of valid values
# In this test, the interval [15, 25] corresponds to the range (in waveform samples)
# of accepted mean (or median) values of peak times of flat-field events
detector = RangeOutlierDetector(
subarray=example_subarray, validity_range=[15.0, 25.0]
)
# Detect outliers
outliers = detector(table["mean"])
# Construct the expected outliers
expected_outliers = np.zeros((50, 2, 1855), dtype=bool)
expected_outliers[12, 0, 120] = True
expected_outliers[42, 1, 67] = True
# Check if outliers where detected correctly
np.testing.assert_array_equal(outliers, expected_outliers)


def test_median_detection(example_subarray):
"""test the MedianOutlierDetector"""

# Create dummy data for testing
rng = np.random.default_rng(0)
# Distribution mimics the median values of charge images of flat-field events
median = rng.normal(77.0, 0.6, size=(50, 2, 1855))
# Insert outliers
median[12, 0, 120] = 1.2
median[42, 1, 67] = 21045.1
# Create astropy table
table = Table([median], names=("median",))
# Initialize the outlier detector based on the deviation from the camera median
# In this test, the interval [-0.9, 8] corresponds to multiplication factors
# typical used for the median values of charge images of flat-field events
detector = MedianOutlierDetector(
subarray=example_subarray, median_range_factors=[-0.9, 8.0]
)
# Detect outliers
outliers = detector(table["median"])
# Construct the expected outliers
expected_outliers = np.zeros((50, 2, 1855), dtype=bool)
expected_outliers[12, 0, 120] = True
expected_outliers[42, 1, 67] = True
# Check if outliers where detected correctly
np.testing.assert_array_equal(outliers, expected_outliers)


def test_std_detection(example_subarray):
"""test the StdOutlierDetector"""

# Create dummy data for testing
rng = np.random.default_rng(0)
# Distribution mimics the std values of charge images of flat-field events
ff_std = rng.normal(10.0, 2.0, size=(50, 2, 1855))
# Distribution mimics the median values of charge images of pedestal events
ped_median = rng.normal(2.0, 1.5, size=(50, 2, 1855))
# Insert outliers
ff_std[12, 0, 120] = 45.5
ped_median[42, 1, 67] = 77.2
# Create astropy table
ff_table = Table([ff_std], names=("std",))
ped_table = Table([ped_median], names=("median",))

# Initialize the outlier detector based on the deviation from the camera standard deviation
# In this test, the interval [-15, 15] corresponds to multiplication factors
# typical used for the std values of charge images of flat-field events
# and median (and std) values of charge images of pedestal events
detector = StdOutlierDetector(
subarray=example_subarray, std_range_factors=[-15.0, 15.0]
)
ff_outliers = detector(ff_table["std"])
ped_outliers = detector(ped_table["median"])
# Construct the expected outliers
ff_expected_outliers = np.zeros((50, 2, 1855), dtype=bool)
ff_expected_outliers[12, 0, 120] = True
ped_expected_outliers = np.zeros((50, 2, 1855), dtype=bool)
ped_expected_outliers[42, 1, 67] = True

# Check if outliers where detected correctly
np.testing.assert_array_equal(ff_outliers, ff_expected_outliers)
np.testing.assert_array_equal(ped_outliers, ped_expected_outliers)