Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Added IDK² and s-IDK² Anomaly Detector To Aeon #2465

Open
wants to merge 51 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
e9795f1
Added IDK² and s-IDK² anomaly detector to aeon
Ramana-Raja Dec 19, 2024
ff4b576
Added IDK to init
Ramana-Raja Dec 19, 2024
7709a7d
Added IDK to docs
Ramana-Raja Dec 19, 2024
7f2916f
Automatic `pre-commit` fixes
Ramana-Raja Dec 19, 2024
18516df
Update _idk.py to update docs
Ramana-Raja Dec 19, 2024
dd36f8b
Automatic `pre-commit` fixes
Ramana-Raja Dec 19, 2024
6734468
Update _idk.py to add get test param
Ramana-Raja Dec 19, 2024
b46a6fb
Automatic `pre-commit` fixes
Ramana-Raja Dec 19, 2024
ee81313
Update _idk.py to update axis
Ramana-Raja Dec 19, 2024
4de22ff
Update _idk.py to remove univariate
Ramana-Raja Dec 19, 2024
c7f057a
Update _idk.py changed axis
Ramana-Raja Dec 19, 2024
c77f556
Update _idk.py to make test_param small
Ramana-Raja Dec 19, 2024
6d8467d
Update _idk.py change width of test case to 1
Ramana-Raja Dec 19, 2024
4faa551
Update _idk.py changes psi1 and psi2 test values
Ramana-Raja Dec 19, 2024
af6ea04
Update _idk.py to add extra random_state
Ramana-Raja Dec 19, 2024
172fd80
Automatic `pre-commit` fixes
Ramana-Raja Dec 19, 2024
8cef628
Update _idk.py to add random_state for test_param
Ramana-Raja Dec 19, 2024
08d5ae8
Automatic `pre-commit` fixes
Ramana-Raja Dec 19, 2024
f78174e
test cases and changes have been added as requested by the moderators
Ramana-Raja Dec 20, 2024
6d28f5e
Merge remote-tracking branch 'origin/s-idk-and-idk' into s-idk-and-idk
Ramana-Raja Dec 20, 2024
d6b1719
Automatic `pre-commit` fixes
Ramana-Raja Dec 20, 2024
29f3348
added test_case random state
Ramana-Raja Dec 20, 2024
0112c67
Automatic `pre-commit` fixes
Ramana-Raja Dec 20, 2024
4e1ceab
fixed docs
Ramana-Raja Dec 20, 2024
39d9292
Automatic `pre-commit` fixes
Ramana-Raja Dec 20, 2024
4711ede
Updated docs
Ramana-Raja Dec 20, 2024
2c11c68
Automatic `pre-commit` fixes
Ramana-Raja Dec 20, 2024
8383533
Updated docs for test case
Ramana-Raja Dec 20, 2024
fc28ef3
Automatic `pre-commit` fixes
Ramana-Raja Dec 20, 2024
270e0e1
Updated test_idk.py
Ramana-Raja Dec 21, 2024
0319b5e
Automatic `pre-commit` fixes
Ramana-Raja Dec 21, 2024
4bb39a4
Updated test_idk.py to add docs
Ramana-Raja Dec 21, 2024
e4b51d2
Automatic `pre-commit` fixes
Ramana-Raja Dec 21, 2024
e5d9585
updated random_state
Ramana-Raja Dec 30, 2024
316a5d1
Automatic `pre-commit` fixes
Ramana-Raja Dec 30, 2024
cb7992e
Updated test.py
Ramana-Raja Dec 30, 2024
b319624
Automatic `pre-commit` fixes
Ramana-Raja Dec 30, 2024
3cbc709
Updated test_idk.py
Ramana-Raja Dec 31, 2024
cac70c7
Automatic `pre-commit` fixes
Ramana-Raja Dec 31, 2024
a2f4bf0
Updated test_idk.py to make sliding and non sliding into 1
Ramana-Raja Dec 31, 2024
1c4262b
Automatic `pre-commit` fixes
Ramana-Raja Dec 31, 2024
f91793b
Updated test_idk.py
Ramana-Raja Jan 5, 2025
81b0f5b
Automatic `pre-commit` fixes
Ramana-Raja Jan 5, 2025
ce8f7a0
fixed changes as requested
Ramana-Raja Jan 9, 2025
b8a76a5
Automatic `pre-commit` fixes
Ramana-Raja Jan 9, 2025
3c9bee7
updated code as requested by moderators
Ramana-Raja Jan 16, 2025
4ddc460
Automatic `pre-commit` fixes
Ramana-Raja Jan 16, 2025
fa0c74f
removed rng as instance variables
Ramana-Raja Jan 16, 2025
7615487
Automatic `pre-commit` fixes
Ramana-Raja Jan 16, 2025
baacc18
updated code as requested by moderators
Ramana-Raja Jan 17, 2025
b4e7046
Automatic `pre-commit` fixes
Ramana-Raja Jan 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions aeon/anomaly_detection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
"PyODAdapter",
"STOMP",
"STRAY",
"IDK",
]

from aeon.anomaly_detection._cblof import CBLOF
from aeon.anomaly_detection._copod import COPOD
from aeon.anomaly_detection._dwt_mlead import DWT_MLEAD
from aeon.anomaly_detection._idk import IDK
from aeon.anomaly_detection._iforest import IsolationForest
from aeon.anomaly_detection._kmeans import KMeansAD
from aeon.anomaly_detection._left_stampi import LeftSTAMPi
Expand Down
213 changes: 213 additions & 0 deletions aeon/anomaly_detection/_idk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
from typing import Optional

import numpy as np

from aeon.anomaly_detection.base import BaseAnomalyDetector
from aeon.utils.windowing import reverse_windowing


class IDK(BaseAnomalyDetector):
"""IDK² and s-IDK² anomaly detector.

The Isolation Distributional Kernel (IDK) is a data-dependent kernel for efficient
anomaly detection, improving accuracy without explicit learning. Its extension,
IDK², simplifies group anomaly detection, outperforming traditional methods in
speed and effectiveness.

Parameters
----------
psi1 : int
The number of samples randomly selected in each iteration to construct the
feature map matrix during the first stage. This parameter determines the
granularity of the first-stage feature representation. Higher values allow
the model to capture more detailed data characteristics but
increase computational complexity.
psi2 : int
The number of samples randomly selected in each iteration to construct
the feature map matrix during the second stage. This parameter
determines the granularity of the second-stage feature representation.
Higher values allow the model to capture more detailed
data characteristics but increase computational complexity.
width : int
The size of the sliding or fixed-width window used for anomaly detection.
For fixed-width processing, this defines the length of each segment analyzed.
In sliding window mode, it specifies the length of the window moving
across the data.
Smaller values lead to more localized anomaly detection, while
larger values capture
broader trends.
t : int, default=100
The number of iterations (time steps) for random sampling to
construct the feature
maps. Each iteration generates a set of random samples, which contribute to the
feature map matrix. Larger values improve the robustness of the feature maps
but increase the runtime.
sliding : bool, default=False
Determines whether a sliding window approach is used for anomaly detection.
If True, the model computes scores for overlapping windows across the
time series,
providing more detailed anomaly scores at each step. If False, the
model processes
the data in fixed-width segments, offering faster computation at the
cost of granularity.
random_state : int, Random state or None, default=None

Notes
-----
This implementation is inspired by the Isolation Distributional Kernel (IDK)
approach as detailed in [1]_.
The code is adapted from the open-source repository [2]_.

References
----------
[1] Isolation Distributional Kernel: A New Tool for Kernel-Based Anomaly Detection.
DOI: https://dl.acm.org/doi/10.1145/3394486.3403062

[2] GitHub Repository:
IsolationKernel/Codes: IDK Implementation for Time Series Data
URL: https://github.com/IsolationKernel/Codes/tree/main/IDK/TS
"""

_tags = {
"capability:univariate": True,
"capability:multivariate": False,
"capability:missing_values": False,
}

def __init__(
self,
psi1: int = 8,
psi2: int = 2,
width: int = 1,
t: int = 100,
sliding: bool = False,
random_state: Optional[int] = None,
) -> None:
self.psi1 = psi1
self.psi2 = psi2
self.width = width
self.t = t
self.sliding = sliding
self.random_state = random_state
super().__init__(axis=0)

def _compute_point_to_sample(self, X, sample_indices):
sample = X[sample_indices, :]
tem1 = np.dot(np.square(X), np.ones(sample.T.shape))
tem2 = np.dot(np.ones(X.shape), np.square(sample.T))
point2sample = tem1 + tem2 - 2 * np.dot(X, sample.T)

sample2sample = point2sample[sample_indices, :]
row, col = np.diag_indices_from(sample2sample)
sample2sample[row, col] = np.nan

radius_list = np.nanmin(sample2sample, axis=1)
min_dist_point2sample = np.argmin(point2sample, axis=1)

return point2sample, radius_list, min_dist_point2sample

def _ik_inne_fm(self, X, psi, t, rng):
onepoint_matrix = np.zeros((X.shape[0], t * psi), dtype=int)
for time in range(t):
sample_indices = rng.choice(len(X), size=psi, replace=False)
point2sample, radius_list, min_dist_point2sample = (
self._compute_point_to_sample(X, sample_indices)
)

min_point2sample_index = np.argmin(point2sample, axis=1)
min_dist_point2sample = min_point2sample_index + time * psi
point2sample_value = point2sample[
range(len(onepoint_matrix)), min_point2sample_index
]
ind = point2sample_value < radius_list[min_point2sample_index]
onepoint_matrix[ind, min_dist_point2sample[ind]] = 1

return onepoint_matrix

def _idk(self, X, psi, t, rng):
point_fm_list = self._ik_inne_fm(X=X, psi=psi, t=t, rng=rng)
feature_mean_map = np.mean(point_fm_list, axis=0)
return np.dot(point_fm_list, feature_mean_map) / t

def _idk_t(self, X, rng):
window_num = int(np.ceil(X.shape[0] / self.width))
featuremap_count = np.zeros((window_num, self.t * self.psi1))
onepoint_matrix = np.full((X.shape[0], self.t), -1)

for time in range(self.t):
sample_indices = rng.choice(X.shape[0], size=self.psi1, replace=False)
point2sample, radius_list, min_dist_point2sample = (
self._compute_point_to_sample(X, sample_indices)
)

for i in range(X.shape[0]):
if (
point2sample[i][min_dist_point2sample[i]]
< radius_list[min_dist_point2sample[i]]
):
onepoint_matrix[i][time] = (
min_dist_point2sample[i] + time * self.psi1
)
featuremap_count[(int)(i / self.width)][
onepoint_matrix[i][time]
] += 1

for i in range(window_num):
featuremap_count[i] /= self.width
isextra = X.shape[0] - (int)(X.shape[0] / self.width) * self.width
if isextra > 0:
featuremap_count[-1] /= isextra
if isextra > 0:
featuremap_count = np.delete(
featuremap_count, [featuremap_count.shape[0] - 1], axis=0
)

return self._idk(featuremap_count, psi=self.psi2, t=self.t, rng=rng)

def _idk_square_sliding(self, X, rng):
point_fm_list = self._ik_inne_fm(X=X, psi=self.psi1, t=self.t, rng=rng)
point_fm_list = np.insert(point_fm_list, 0, 0, axis=0)
cumsum = np.cumsum(point_fm_list, axis=0)

subsequence_fm_list = (cumsum[self.width :] - cumsum[: -self.width]) / float(
self.width
)

return self._idk(X=subsequence_fm_list, psi=self.psi2, t=self.t, rng=rng)

def _predict(self, X):
rng = np.random.default_rng(self.random_state)
if self.sliding:
sliding_output = self._idk_square_sliding(X, rng)
reversed_output = reverse_windowing(
y=sliding_output,
window_size=self.width,
stride=1,
reduction=np.nanmean,
)
return reversed_output
else:
return self._idk_t(X, rng)

@classmethod
def _get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.

Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.

Returns
-------
params : dict
Parameters to create testing instances of the class.
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
"""
return {
"psi1": 8,
"psi2": 2,
"width": 1,
}
60 changes: 60 additions & 0 deletions aeon/anomaly_detection/tests/test_idk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Tests for the IDK Class."""

import numpy as np
from numpy.testing import assert_allclose

from aeon.anomaly_detection import IDK


def test_idk_univariate_basic():
"""Test IDK on basic univariate data."""
rng = np.random.default_rng(seed=2)
series = rng.normal(size=(100,))
series[50:58] -= 5

ad = IDK(psi1=8, psi2=2, width=1, random_state=2)
pred = ad.fit_predict(series)

assert pred.shape == (100,)
assert pred.dtype == np.float64
assert 50 <= np.argmax(pred) <= 58


def test_idk_univariate_sliding():
"""Test IDK with sliding window on univariate data."""
rng = np.random.default_rng(seed=2)
series = rng.normal(size=(100,))
series[50:58] -= 5

ad_sliding = IDK(psi1=16, psi2=4, width=10, sliding=True, random_state=1)
pred_sliding = ad_sliding.fit_predict(series)

assert pred_sliding.shape == (100,)
assert pred_sliding.dtype == np.float64
assert 60 <= np.argmax(pred_sliding) <= 80


def test_idk_univariate_custom_series():
"""Test IDK on a custom univariate series with assert_allclose."""
series1 = np.array(
[
0.18905338,
-0.52274844,
-0.41306354,
-2.44146738,
1.79970738,
1.14416587,
-0.32542284,
0.77380659,
0.28121067,
-0.55382284,
]
)
expected = [0.52333333, 0.19, 0.52333333]

ad_2 = IDK(psi1=4, psi2=2, width=3, t=10, random_state=2)
pred2 = ad_2.fit_predict(series1)

assert pred2.shape == (3,)
assert pred2.dtype == np.float64
assert_allclose(pred2, expected, atol=0.01)
1 change: 1 addition & 0 deletions docs/api_reference/anomaly_detection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Detectors
PyODAdapter
STOMP
STRAY
IDK

Base
----
Expand Down