diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs new file mode 100644 index 0000000000..d264dd69f4 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs @@ -0,0 +1,99 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.TimeSeries; + +namespace Samples.Dynamic +{ + public static class DetectEntireAnomalyBySrCnn + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with an anomaly + var data = new List(); + for (int index = 0; index < 20; index++) + { + data.Add(new TimeSeriesData { Value = 5 }); + } + data.Add(new TimeSeriesData { Value = 10 }); + for (int index = 0; index < 5; index++) + { + data.Add(new TimeSeriesData { Value = 5 }); + } + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesData.Value); + + // Do batch anomaly detection + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, + threshold: 0.35, batchSize: 512, sensitivity: 90.0, detectMode: SrCnnDetectMode.AnomalyAndMargin); + + // Getting the data of the newly created column as an IEnumerable of + // SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + Console.WriteLine("Index\tData\tAnomaly\tAnomalyScore\tMag\tExpectedValue\tBoundaryUnit\tUpperBoundary\tLowerBoundary"); + + int k = 0; + foreach (var prediction in predictionColumn) + { + PrintPrediction(k, data[k].Value, prediction); + k++; + } + //Index Data Anomaly AnomalyScore Mag ExpectedValue BoundaryUnit UpperBoundary LowerBoundary + //0 5.00 0 0.00 0.21 5.00 5.00 5.01 4.99 + //1 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //2 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 + //3 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //4 5.00 0 0.00 0.03 5.00 5.00 5.01 4.99 + //5 5.00 0 0.00 0.06 5.00 5.00 5.01 4.99 + //6 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //7 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //8 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //9 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //10 5.00 0 0.00 0.00 5.00 5.00 5.01 4.99 + //11 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //12 5.00 0 0.00 0.01 5.00 5.00 5.01 4.99 + //13 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //14 5.00 0 0.00 0.07 5.00 5.00 5.01 4.99 + //15 5.00 0 0.00 0.08 5.00 5.00 5.01 4.99 + //16 5.00 0 0.00 0.02 5.00 5.00 5.01 4.99 + //17 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 + //18 5.00 0 0.00 0.12 5.00 5.00 5.01 4.99 + //19 5.00 0 0.00 0.17 5.00 5.00 5.01 4.99 + //20 10.00 1 0.50 0.80 5.00 5.00 5.01 4.99 + //21 5.00 0 0.00 0.16 5.00 5.00 5.01 4.99 + //22 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //23 5.00 0 0.00 0.05 5.00 5.00 5.01 4.99 + //24 5.00 0 0.00 0.11 5.00 5.00 5.01 4.99 + //25 5.00 0 0.00 0.19 5.00 5.00 5.01 4.99 + } + + private static void PrintPrediction(int idx, double value, SrCnnAnomalyDetection prediction) => + Console.WriteLine("{0}\t{1:0.00}\t{2}\t\t{3:0.00}\t{4:0.00}\t\t{5:0.00}\t\t{6:0.00}\t\t{7:0.00}\t\t{8:0.00}", + idx, value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], + prediction.Prediction[3], prediction.Prediction[4], prediction.Prediction[5], prediction.Prediction[6]); + + private class TimeSeriesData + { + public double Value { get; set; } + } + + private class SrCnnAnomalyDetection + { + [VectorType] + public double[] Prediction { get; set; } + } + } +} diff --git a/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs b/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs new file mode 100644 index 0000000000..e1cde83032 --- /dev/null +++ b/src/Microsoft.ML.Data/DataView/BatchDataViewMapperBase.cs @@ -0,0 +1,173 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.Runtime; + +namespace Microsoft.ML.Data.DataView +{ + internal abstract class BatchDataViewMapperBase : IDataView + { + public bool CanShuffle => false; + + public DataViewSchema Schema => SchemaBindings.AsSchema; + + private readonly IDataView _source; + protected readonly IHost Host; + + protected BatchDataViewMapperBase(IHostEnvironment env, string registrationName, IDataView input) + { + Contracts.CheckValue(env, nameof(env)); + Host = env.Register(registrationName); + _source = input; + } + + public long? GetRowCount() => _source.GetRowCount(); + + public DataViewRowCursor GetRowCursor(IEnumerable columnsNeeded, Random rand = null) + { + Host.CheckValue(columnsNeeded, nameof(columnsNeeded)); + Host.CheckValueOrNull(rand); + + var predicate = RowCursorUtils.FromColumnsToPredicate(columnsNeeded, SchemaBindings.AsSchema); + + // If we aren't selecting any of the output columns, don't construct our cursor. + // Note that because we cannot support random due to the inherently + // stratified nature, neither can we allow the base data to be shuffled, + // even if it supports shuffling. + if (!SchemaBindings.AnyNewColumnsActive(predicate)) + { + var activeInput = SchemaBindings.GetActiveInput(predicate); + var inputCursor = _source.GetRowCursor(_source.Schema.Where(c => activeInput[c.Index]), null); + return new BindingsWrappedRowCursor(Host, inputCursor, SchemaBindings); + } + var active = SchemaBindings.GetActive(predicate); + Contracts.Assert(active.Length == SchemaBindings.ColumnCount); + + // REVIEW: We can get a different input predicate for the input cursor and for the lookahead cursor. The lookahead + // cursor is only used for getting the values from the input column, so it only needs that column activated. The + // other cursor is used to get source columns, so it needs the rest of them activated. + var predInput = GetSchemaBindingDependencies(predicate); + var inputCols = _source.Schema.Where(c => predInput(c.Index)); + return new Cursor(this, _source.GetRowCursor(inputCols), _source.GetRowCursor(inputCols), active); + } + + public DataViewRowCursor[] GetRowCursorSet(IEnumerable columnsNeeded, int n, Random rand = null) + { + return new[] { GetRowCursor(columnsNeeded, rand) }; + } + + protected abstract ColumnBindingsBase SchemaBindings { get; } + protected abstract TBatch CreateBatch(DataViewRowCursor input); + protected abstract void ProcessBatch(TBatch currentBatch); + protected abstract void ProcessExample(TBatch currentBatch, TInput currentInput); + protected abstract Func GetLastInBatchDelegate(DataViewRowCursor lookAheadCursor); + protected abstract Func GetIsNewBatchDelegate(DataViewRowCursor lookAheadCursor); + protected abstract ValueGetter GetLookAheadGetter(DataViewRowCursor lookAheadCursor); + protected abstract Delegate[] CreateGetters(DataViewRowCursor input, TBatch currentBatch, bool[] active); + protected abstract Func GetSchemaBindingDependencies(Func predicate); + + private sealed class Cursor : RootCursorBase + { + private readonly BatchDataViewMapperBase _parent; + private readonly DataViewRowCursor _lookAheadCursor; + private readonly DataViewRowCursor _input; + + private readonly bool[] _active; + private readonly Delegate[] _getters; + + private readonly TBatch _currentBatch; + private readonly Func _lastInBatchInLookAheadCursorDel; + private readonly Func _firstInBatchInInputCursorDel; + private readonly ValueGetter _inputGetterInLookAheadCursor; + private TInput _currentInput; + + public override long Batch => 0; + + public override DataViewSchema Schema => _parent.Schema; + + public Cursor(BatchDataViewMapperBase parent, DataViewRowCursor input, DataViewRowCursor lookAheadCursor, bool[] active) + : base(parent.Host) + { + _parent = parent; + _input = input; + _lookAheadCursor = lookAheadCursor; + _active = active; + + _currentBatch = _parent.CreateBatch(_input); + + _getters = _parent.CreateGetters(_input, _currentBatch, _active); + + _lastInBatchInLookAheadCursorDel = _parent.GetLastInBatchDelegate(_lookAheadCursor); + _firstInBatchInInputCursorDel = _parent.GetIsNewBatchDelegate(_input); + _inputGetterInLookAheadCursor = _parent.GetLookAheadGetter(_lookAheadCursor); + } + + public override ValueGetter GetGetter(DataViewSchema.Column column) + { + Contracts.CheckParam(IsColumnActive(column), nameof(column), "requested column is not active"); + + var col = _parent.SchemaBindings.MapColumnIndex(out bool isSrc, column.Index); + if (isSrc) + { + Contracts.AssertValue(_input); + return _input.GetGetter(_input.Schema[col]); + } + + Ch.AssertValue(_getters); + var getter = _getters[col]; + Ch.Assert(getter != null); + var fn = getter as ValueGetter; + if (fn == null) + throw Ch.Except("Invalid TValue in GetGetter: '{0}'", typeof(TValue)); + return fn; + } + + public override ValueGetter GetIdGetter() + { + return + (ref DataViewRowId val) => + { + Ch.Check(IsGood, "Cannot call ID getter in current state"); + val = new DataViewRowId((ulong)Position, 0); + }; + } + + public override bool IsColumnActive(DataViewSchema.Column column) + { + Ch.Check(column.Index < _parent.SchemaBindings.AsSchema.Count); + return _active[column.Index]; + } + + protected override bool MoveNextCore() + { + if (!_input.MoveNext()) + return false; + if (!_firstInBatchInInputCursorDel()) + return true; + + // If we are here, this means that _input.MoveNext() has gotten us to the beginning of the next batch, + // so now we need to look ahead at the entire next batch in the _lookAheadCursor. + // The _lookAheadCursor's position should be on the last row of the previous batch (or -1). + Ch.Assert(_lastInBatchInLookAheadCursorDel()); + + var good = _lookAheadCursor.MoveNext(); + // The two cursors should have the same number of elements, so if _input.MoveNext() returned true, + // then it must return true here too. + Ch.Assert(good); + + do + { + _inputGetterInLookAheadCursor(ref _currentInput); + _parent.ProcessExample(_currentBatch, _currentInput); + } while (!_lastInBatchInLookAheadCursorDel() && _lookAheadCursor.MoveNext()); + + _parent.ProcessBatch(_currentBatch); + return true; + } + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 2cc2aa3b0a..1ca2e95cf6 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Reflection; using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.TimeSeries; @@ -150,6 +148,35 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog int windowSize = 64, int backAddWindowSize = 5, int lookaheadWindowSize = 5, int averageingWindowSize = 3, int judgementWindowSize = 21, double threshold = 0.3) => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); + /// + /// Create , which detects timeseries anomalies for entire input using SRCNN algorithm. + /// + /// The AnomalyDetectionCatalog. + /// Input DataView. + /// Name of the column resulting from data processing of . + /// The column data is a vector of . The length of this vector varies depending on . + /// Name of column to process. The column data must be . + /// The threshold to determine anomaly, score larger than the threshold is considered as anomaly. Must be in [0,1]. Default value is 0.3. + /// Divide the input data into batches to fit srcnn model. + /// When set to -1, use the whole input to fit model instead of batch by batch, when set to a positive integer, use this number as batch size. + /// Must be -1 or a positive integer no less than 12. Default value is 1024. + /// Sensitivity of boundaries, only useful when srCnnDetectMode is AnomalyAndMargin. Must be in [0,100]. Default value is 99. + /// An enum type of . + /// When set to AnomalyOnly, the output vector would be a 3-element Double vector of (IsAnomaly, RawScore, Mag). + /// When set to AnomalyAndExpectedValue, the output vector would be a 4-element Double vector of (IsAnomaly, RawScore, Mag, ExpectedValue). + /// When set to AnomalyAndMargin, the output vector would be a 7-element Double vector of (IsAnomaly, AnomalyScore, Mag, ExpectedValue, BoundaryUnit, UpperBoundary, LowerBoundary). + /// Default value is AnomalyOnly. + /// + /// + /// + /// + /// + public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, + double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly) + => new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode); + /// /// Create , which localizes root causes using decision tree algorithm. /// diff --git a/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs new file mode 100644 index 0000000000..e146ff1ebf --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs @@ -0,0 +1,844 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.Data.DataView; +using Microsoft.ML.Runtime; +using Microsoft.ML.Transforms.TimeSeries; + +namespace Microsoft.ML.TimeSeries +{ + /// + /// The detect modes of SrCnn models. + /// + public enum SrCnnDetectMode + { + /// + /// In this mode, output (IsAnomaly, RawScore, Mag). + /// + AnomalyOnly = 0, + + /// + /// In this mode, output (IsAnomaly, AnomalyScore, Mag, ExpectedValue, BoundaryUnit, UpperBoundary, LowerBoundary). + /// + AnomalyAndMargin = 1, + + /// + /// In this mode, output (IsAnomaly, RawScore, Mag, ExpectedValue). + /// + AnomalyAndExpectedValue = 2 + } + + /// + /// Detect timeseries anomalies for entire input using Spectral Residual(SR) algorithm. + /// + /// + /// \tau\\0,otherwise,\end{cases}$$ + /// where $x_i$ represents an arbitrary point in sequence $\mathbf{x}$; $S(x_i)$is the corresponding point in the saliency map; + /// and $\overline{S(x_i)}$ is the local average of the preceding points of $S(x_i)$. + /// + /// * [Link to the KDD 2019 paper](https://dl.acm.org/doi/10.1145/3292500.3330680) + /// ]]> + /// + /// + /// + internal sealed class SrCnnEntireAnomalyDetector : BatchDataViewMapperBase + { + private const int MinBatchSize = 12; + + private static readonly int[] _outputLengthArray = {3, 7, 4}; + private readonly int _batchSize; + private readonly string _inputColumnName; + private readonly int _outputLength; + private readonly Bindings _bindings; + private readonly double _threshold; + private readonly double _sensitivity; + private readonly SrCnnDetectMode _detectMode; + + private class Bindings : ColumnBindingsBase + { + private readonly VectorDataViewType _outputColumnType; + private readonly int _inputColumnIndex; + + public Bindings(DataViewSchema input, string inputColumnName, string outputColumnName, VectorDataViewType outputColumnType) + : base(input, true, outputColumnName) + { + _outputColumnType = outputColumnType; + _inputColumnIndex = Input[inputColumnName].Index; + } + + protected override DataViewType GetColumnTypeCore(int iinfo) + { + Contracts.Check(iinfo == 0); + return _outputColumnType; + } + + // Get a predicate for the input columns. + public Func GetDependencies(Func predicate) + { + Contracts.AssertValue(predicate); + + var active = new bool[Input.Count]; + for (int col = 0; col < ColumnCount; col++) + { + if (!predicate(col)) + continue; + + bool isSrc; + int index = MapColumnIndex(out isSrc, col); + if (isSrc) + active[index] = true; + else + active[_inputColumnIndex] = true; + } + + return col => 0 <= col && col < active.Length && active[col]; + } + } + + public SrCnnEntireAnomalyDetector(IHostEnvironment env, IDataView input, string inputColumnName, string outputColumnName, double threshold, int batchSize, double sensitivity, SrCnnDetectMode detectMode) + : base(env, nameof(SrCnnEntireAnomalyDetector), input) + { + Host.CheckValue(inputColumnName, nameof(inputColumnName)); + _inputColumnName = inputColumnName; + + Host.CheckUserArg(batchSize == -1 || batchSize >= MinBatchSize, nameof(batchSize), "BatchSize must be -1 or no less than 12."); + _batchSize = batchSize; + + Host.CheckUserArg(threshold >= 0 && threshold <= 1, nameof(threshold), "Must be in [0,1]."); + Host.CheckUserArg(detectMode == SrCnnDetectMode.AnomalyOnly + || detectMode == SrCnnDetectMode.AnomalyAndExpectedValue + || detectMode == SrCnnDetectMode.AnomalyAndMargin, nameof(detectMode), "Invalid detectMode"); + + Host.CheckUserArg(sensitivity >= 0 && sensitivity <= 100, nameof(sensitivity), "Must be in [0,100]."); + _outputLength = _outputLengthArray[(int)detectMode]; + _threshold = threshold; + _sensitivity = sensitivity; + _detectMode = detectMode; + + _bindings = new Bindings(input.Schema, inputColumnName, outputColumnName, new VectorDataViewType(NumberDataViewType.Double, _outputLength)); + } + + protected override ColumnBindingsBase SchemaBindings => _bindings; + + protected override Delegate[] CreateGetters(DataViewRowCursor input, Batch currentBatch, bool[] active) + { + if (!SchemaBindings.AnyNewColumnsActive(x => active[x])) + return new Delegate[1]; + return new[] { currentBatch.CreateGetter(input, _inputColumnName) }; + } + + protected override Batch CreateBatch(DataViewRowCursor input) => new Batch(_batchSize, _outputLength, _threshold, _sensitivity, _detectMode); + + protected override Func GetIsNewBatchDelegate(DataViewRowCursor input) + { + return () => _batchSize == -1 ? input.Position == 0 : input.Position % _batchSize == 0; + } + + protected override Func GetLastInBatchDelegate(DataViewRowCursor input) + { + return () => _batchSize == -1 ? input.Position == -1 : (input.Position + 1) % _batchSize == 0; + } + + protected override ValueGetter GetLookAheadGetter(DataViewRowCursor input) + { + return input.GetGetter(input.Schema[_inputColumnName]); + } + + protected override Func GetSchemaBindingDependencies(Func predicate) + { + return _bindings.GetDependencies(predicate); + } + + protected override void ProcessExample(Batch currentBatch, double currentInput) + { + currentBatch.AddValue(currentInput); + } + + protected override void ProcessBatch(Batch currentBatch) + { + currentBatch.Process(); + currentBatch.Reset(); + } + + internal sealed class Batch + { + private List _previousBatch; + private List _batch; + private readonly int _outputLength; + private SrCnnEntireModeler _modeler; + private int _batchSize; + private double[][] _results; + private int _bLen; + + public Batch(int batchSize, int outputLength, double threshold, double sensitivity, SrCnnDetectMode detectMode) + { + _batchSize = batchSize; + _outputLength = outputLength; + if (batchSize == -1) + { + _previousBatch = new List(); + _batch = new List(); + } + else + { + _previousBatch = new List(batchSize); + _batch = new List(batchSize); + } + _modeler = new SrCnnEntireModeler(threshold, sensitivity, detectMode); + } + + public void AddValue(double value) + { + _batch.Add(value); + } + + public int Count => _batch.Count; + + public void Process() + { + _batchSize = _batch.Count; + if (_batch.Count < MinBatchSize) + { + if (_previousBatch.Count == 0) + { + throw Contracts.Except("The input must contain no less than 12 points."); + } + _bLen = _previousBatch.Count - _batch.Count; + _previousBatch = _previousBatch.GetRange(_batch.Count, _bLen); + _previousBatch.AddRange(_batch); + _modeler.Train(_previousBatch.ToArray(), ref _results); + } + else + { + _modeler.Train(_batch.ToArray(), ref _results); + } + } + + public void Reset() + { + var tempBatch = _previousBatch; + _previousBatch = _batch; + _batch = tempBatch; + _batch.Clear(); + _bLen = 0; + } + + public ValueGetter> CreateGetter(DataViewRowCursor input, string inputCol) + { + ValueGetter srcGetter = input.GetGetter(input.Schema[inputCol]); + ValueGetter> getter = + (ref VBuffer dst) => + { + double src = default; + srcGetter(ref src); + var result = VBufferEditor.Create(ref dst, _outputLength); + _results[input.Position % _batchSize + _bLen].CopyTo(result.Values); + dst = result.Commit(); + }; + return getter; + } + } + + internal sealed class SrCnnEntireModeler + { + private static readonly int _lookaheadWindowSize = 5; + private static readonly int _backAddWindowSize = 5; + private static readonly int _averagingWindowSize = 3; + private static readonly int _judgementWindowSize = 40; + private static readonly double _eps = 1e-8; + private static readonly double _deanomalyThreshold = 0.35; + + // A fixed lookup table which returns factor using sensitivity as index. + // Since Margin = BoundaryUnit * factor, this factor is calculated to make sure Margin == Boundary when sensitivity is 50, + // and increases/decreases exponentially as sensitivity increases/decreases. + // The factor array is generated by formula: + // f(x)=1, if x=50; + // f(x)=f(x+1)*(1.25+0.001*x), if 0<=x<50; + // f(x)=f(x+1)/(1.25+0.001*(x-50)), if 50 values.Length) + { + Array.Resize(ref results, values.Length); + } + SpectralResidual(values, results, _threshold); + //Optional Steps + if (_detectMode == SrCnnDetectMode.AnomalyAndMargin) + { + GetMargin(values, results, _sensitivity); + } + else if (_detectMode == SrCnnDetectMode.AnomalyAndExpectedValue) + { + GetExpectedValue(values, results); + } + } + + private void AllocateDoubleArray(ref double[] arr, int length) + { + if (arr == null) + { + arr = new double[length]; + } + else if (arr.Length != length) + { + Array.Resize(ref arr, length); + } + } + + private void SpectralResidual(double[] values, double[][] results, double threshold) + { + // Step 1: Get backadd wave + BackAdd(values); + + // Step 2: FFT transformation + int length = _backAddArray.Length; + AllocateDoubleArray(ref _fftRe, length); + AllocateDoubleArray(ref _fftIm, length); + + AllocateDoubleArray(ref _zeroArray, length); + FftUtils.ComputeForwardFft(_backAddArray, _zeroArray, _fftRe, _fftIm, length); + + // Step 3: Calculate mags of FFT + AllocateDoubleArray(ref _magList, length); + AllocateDoubleArray(ref _magLogList, length); + for (int i = 0; i < length; ++i) + { + _magList[i] = Math.Sqrt((Math.Pow(_fftRe[i], 2) + Math.Pow(_fftIm[i], 2))); + if (_magList[i] > _eps) + { + _magLogList[i] = Math.Log(_magList[i]); + } + else + { + _magLogList[i] = 0; + } + } + + // Step 4: Calculate spectral + AverageFilter(_magLogList, _averagingWindowSize); + AllocateDoubleArray(ref _spectralList, length); + for (int i = 0; i < length; ++i) + { + _spectralList[i] = Math.Exp(_magLogList[i] - _cumSumList[i]); + } + + // Step 5: IFFT transformation + AllocateDoubleArray(ref _transRe, length); + AllocateDoubleArray(ref _transIm, length); + for (int i = 0; i < length; ++i) + { + if (_magLogList[i] != 0) + { + _transRe[i] = _fftRe[i] * _spectralList[i] / _magList[i]; + _transIm[i] = _fftIm[i] * _spectralList[i] / _magList[i]; + } + else + { + _transRe[i] = 0; + _transIm[i] = 0; + } + } + + AllocateDoubleArray(ref _ifftRe, length); + AllocateDoubleArray(ref _ifftIm, length); + FftUtils.ComputeBackwardFft(_transRe, _transIm, _ifftRe, _ifftIm, length); + + // Step 6: Calculate mag and ave_mag of IFFT + AllocateDoubleArray(ref _ifftMagList, length); + for (int i = 0; i < length; ++i) + { + _ifftMagList[i] = Math.Sqrt((Math.Pow(_ifftRe[i], 2) + Math.Pow(_ifftIm[i], 2))); + } + AverageFilter(_ifftMagList, Math.Min(_ifftMagList.Length, _judgementWindowSize)); + + // Step 7: Calculate raw score and set result + for (int i = 0; i < results.GetLength(0); ++i) + { + var score = CalculateScore(_ifftMagList[i], _cumSumList[i]); + score /= 10.0f; + score = Math.Min(score, 1); + score = Math.Max(score, 0); + + var detres = score > threshold ? 1 : 0; + + results[i][0] = detres; + results[i][1] = score; + results[i][2] = _ifftMagList[i]; + } + } + + private void BackAdd(double[] data) + { + int j = 0; + for (int i = data.Length - _lookaheadWindowSize - 2; i < data.Length - 1; ++i) + { + _predictArray[j++] = data[i]; + } + var predictedValue = PredictNext(_predictArray); + AllocateDoubleArray(ref _backAddArray, data.Length + _backAddWindowSize); + for (int i = 0; i < data.Length; ++i) + { + _backAddArray[i] = data[i]; + } + for (int i = 0; i < _backAddWindowSize; ++i) + { + _backAddArray[data.Length + i] = predictedValue; + } + } + + private double PredictNext(double[] data) + { + var n = data.Length; + double slopeSum = 0.0f; + for (int i = 0; i < n - 1; ++i) + { + slopeSum += (data[n - 1] - data[i]) / (n - 1 - i); + } + return (data[1] + slopeSum); + } + + private void AverageFilter(double[] data, int n) + { + double cumsum = 0.0f; + int length = data.Length; + + AllocateDoubleArray(ref _cumSumList, length); + AllocateDoubleArray(ref _cumSumShift, length); + + for (int i = 0; i < length; ++i) + { + cumsum += data[i]; + _cumSumList[i] = cumsum; + _cumSumShift[i] = cumsum; + } + for (int i = n; i < length; ++i) + { + _cumSumList[i] = (_cumSumList[i] - _cumSumShift[i - n]) / n; + } + for (int i = 1; i < n; ++i) + { + _cumSumList[i] /= (i + 1); + } + } + + private double CalculateScore(double mag, double avgMag) + { + double safeDivisor = avgMag; + if (Math.Abs(safeDivisor) < _eps) + { + safeDivisor = _eps; + } + return (Math.Abs(mag - avgMag) / safeDivisor); + } + + private void GetExpectedValue(double[] values, double[][] results) + { + //Step 8: Calculate Expected Value + GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray())); + CalculateExpectedValueByFft(_deAnomalyData); + + for (int i = 0; i < results.Length; ++i) + { + results[i][3] = _ifftRe[i]; + } + } + + private void GetMargin(double[] values, double[][] results, double sensitivity) + { + //Step 8: Calculate Expected Value + GetDeanomalyData(values, GetAnomalyIndex(results.Select(x => x[1]).ToArray())); + CalculateExpectedValueByFft(_deAnomalyData); + + //Step 9: Calculate Boundary Unit + CalculateBoundaryUnit(values, results.Select(x => x[0] > 0).ToArray()); + + for (int i = 0; i < results.Length; ++i) + { + //Step 10: Calculate UpperBound and LowerBound + var margin = CalculateMargin(_units[i], sensitivity); + results[i][3] = _ifftRe[i]; + results[i][4] = _units[i]; + results[i][5] = _ifftRe[i] + margin; + results[i][6] = _ifftRe[i] - margin; + //Step 11: Update Anomaly Score + results[i][1] = CalculateAnomalyScore(values[i], _ifftRe[i], _units[i], results[i][0] > 0); + } + } + + private int[] GetAnomalyIndex(double[] scores) + { + List anomalyIdxList = new List(); + for (int i = 0; i < scores.Length; ++i) + if (scores[i] > _deanomalyThreshold) + { + anomalyIdxList.Add(i); + } + + return anomalyIdxList.ToArray(); + } + + private void GetDeanomalyData(double[] data, int[] anomalyIdxList) + { + AllocateDoubleArray(ref _deAnomalyData, data.Length); + Array.Copy(data, _deAnomalyData, data.Length); + int minPointsToFit = 4; + foreach (var idx in anomalyIdxList) + { + int step = 1; + int start = Math.Max(idx - step, 0); + int end = Math.Min(data.Length - 1, idx + step); + + List> fitValues = new List>(); + for (int i = start; i <= end; ++i) + { + if (!anomalyIdxList.Contains(i)) + { + fitValues.Add(new Tuple(i, data[i])); + } + } + + while (fitValues.Count < minPointsToFit && (start > 0 || end < data.Length - 1)) + { + step += 2; + start = Math.Max(idx - step, 0); + end = Math.Min(data.Length - 1, idx + step); + fitValues.Clear(); + for (int i = start; i <= end; ++i) + { + if (!anomalyIdxList.Contains(i)) + { + fitValues.Add(new Tuple(i, data[i])); + } + } + } + + if (fitValues.Count > 1) + { + _deAnomalyData[idx] = CalculateInterpolate(fitValues, idx); + } + } + } + + private double CalculateInterpolate(List> values, int idx) + { + var n = values.Count; + double sumX = values.Sum(item => item.Item1); + double sumY = values.Sum(item => item.Item2); + double sumXX = values.Sum(item => Math.Pow(item.Item1, 2)); + double sumXY = values.Sum(item => item.Item1 * item.Item2); + + var a = ((double)n * sumXY - sumX * sumY) / ((double)n * sumXX - sumX * sumX); + var b = (sumXX * sumY - sumX * sumXY) / ((double)n * sumXX - sumX * sumX); + + return a * (double)idx + b; + } + + private void CalculateExpectedValueByFft(double[] data) + { + int length = data.Length; + AllocateDoubleArray(ref _fftRe, length); + AllocateDoubleArray(ref _fftIm, length); + AllocateDoubleArray(ref _zeroArray, length); + FftUtils.ComputeForwardFft(data, _zeroArray, _fftRe, _fftIm, length); + + for (int i = 0; i < length; ++i) + { + if (i > (double)length * 3 / 8 && i < (double)length * 5 / 8) + { + _fftRe[i] = 0.0f; + _fftIm[i] = 0.0f; + } + } + + AllocateDoubleArray(ref _ifftRe, length); + AllocateDoubleArray(ref _ifftIm, length); + FftUtils.ComputeBackwardFft(_fftRe, _fftIm, _ifftRe, _ifftIm, length); + } + + private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys) + { + int window = Math.Min(data.Length / 3, 512); + double trendFraction = 0.5; // mix trend and average of trend + double trendSum = 0; + int calculationSize = 0; + + MedianFilter(data, window, true); + for (int i = 0; i < _trends.Length; ++i) + { + if (!isAnomalys[i]) + { + trendSum += Math.Abs(_trends[i]); + ++calculationSize; + } + } + + double averageTrendPart = 0; + if (calculationSize > 0) + { + averageTrendPart = trendSum / calculationSize * (1 - trendFraction); + } + else + { + trendFraction = 1.0; + } + + AllocateDoubleArray(ref _units, _trends.Length); + for (int i = 0; i < _units.Length; ++i) + { + _units[i] = Math.Max(1, averageTrendPart + Math.Abs(_trends[i]) * trendFraction); + if (double.IsInfinity(_units[i])) + { + throw new ArithmeticException("Not finite unit value"); + } + } + } + + private void MedianFilter(double[] data, int window, bool needTwoEnd = false) + { + int wLen = window / 2 * 2 + 1; + int tLen = data.Length; + AllocateDoubleArray(ref _val, tLen); + Array.Copy(data, _val, tLen); + AllocateDoubleArray(ref _trends, tLen); + Array.Copy(data, _trends, tLen); + AllocateDoubleArray(ref _curWindow, wLen); + + if (tLen < wLen) + return; + + for (int i = 0; i < wLen; i++) + { + int index = i; + int addId = BisectRight(_curWindow, 0, i, _val[i]); + while (index > addId) + { + _curWindow[index] = _curWindow[index - 1]; + index -= 1; + } + _curWindow[addId] = data[i]; + if (i >= wLen / 2 && needTwoEnd) + _trends[i - wLen / 2] = SortedMedian(_curWindow, 0, i + 1); + } + + _trends[window / 2] = SortedMedian(_curWindow, 0, wLen); + + for (int i = window / 2 + 1; i < tLen - window / 2; i++) + { + int deleteId = BisectRight(_curWindow, 0, wLen, _val[i - window / 2 - 1]) - 1; + int index = deleteId; + while (index < wLen - 1) + { + _curWindow[index] = _curWindow[index + 1]; + index += 1; + } + int addId = BisectRight(_curWindow, 0, wLen - 1, _val[i + window / 2]); + index = wLen - 1; + while (index > addId) + { + _curWindow[index] = _curWindow[index - 1]; + index -= 1; + } + _curWindow[addId] = data[i + window / 2]; + _trends[i] = SortedMedian(_curWindow, 0, wLen); + } + + if (needTwoEnd) + { + for (int i = tLen - window / 2; i < tLen; i++) + { + int deleteId = BisectRight(_curWindow, 0, wLen, data[i - window / 2 - 1]) - 1; + int index = deleteId; + while (index < wLen - 1) + { + _curWindow[index] = _curWindow[index + 1]; + index += 1; + } + wLen -= 1; + _trends[i] = SortedMedian(_curWindow, 0, wLen); + } + } + } + + private int BisectRight(double[] arr, int begin, int end, double tar) + { + while (begin < end) + { + int mid = begin + (end - begin) / 2; + if (arr[mid] <= tar) + begin = mid + 1; + else + end = mid; + } + return begin; + } + + private double SortedMedian(double[] sortedValues, int begin, int end) + { + int n = end - begin; + if (n % 2 == 1) + return sortedValues[begin + n / 2]; + else + { + int mid = begin + n / 2; + return (sortedValues[mid - 1] + sortedValues[mid]) / 2; + } + } + + private double CalculateMargin(double unit, double sensitivity) + { + if (Math.Floor(sensitivity) == sensitivity) + { + return unit * _factors[(int)sensitivity]; + } + else + { + int lb = (int)sensitivity; + return (_factors[lb + 1] + (_factors[lb] - _factors[lb + 1]) * (1 - sensitivity + lb)) * unit; + } + } + + private double CalculateAnomalyScore(double value, double exp, double unit, bool isAnomaly) + { + double anomalyScore = 0.0f; + + if (isAnomaly.Equals(false)) + { + return anomalyScore; + } + + double distance = Math.Abs(exp - value); + List margins = new List(); + for (int i = 100; i >= 0; --i) + { + margins.Add(CalculateMargin(unit, i)); + } + + int lb = 0; + int ub = 100; + while (lb < ub) + { + int mid = (lb + ub) / 2; + if (margins[mid] < distance) + { + lb = mid + 1; + } + else + { + ub = mid; + } + } + + if (Math.Abs(margins[lb] - distance) < _eps || lb == 0) + { + anomalyScore = lb; + } + else + { + double lowerMargin = margins[lb - 1]; + double upperMargin = margins[lb]; + anomalyScore = lb - 1 + (distance - lowerMargin) / (upperMargin - lowerMargin); + } + + return anomalyScore / 100.0f; + } + } + } +} diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index f3f548fd7b..205b36fbb2 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -7,8 +7,6 @@ using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; -using Microsoft.ML.TestFramework.Attributes; -using Microsoft.ML.TestFrameworkCommon; using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; @@ -87,9 +85,15 @@ public TimeSeriesData(float value) } } + private sealed class TimeSeriesDataDouble + { + [LoadColumn(0)] + public double Value { get; set; } + } + private sealed class SrCnnAnomalyDetection { - [VectorType(3)] + [VectorType] public double[] Prediction { get; set; } } @@ -571,6 +575,92 @@ public void AnomalyDetectionWithSrCnn(bool loadDataFromFile) } } + [Theory, CombinatorialData] + public void TestSrCnnBatchAnomalyDetector( + [CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyAndMargin)]SrCnnDetectMode mode, + [CombinatorialValues(true, false)]bool loadDataFromFile, + [CombinatorialValues(-1, 24, 26, 512)]int batchSize) + { + var ml = new MLContext(1); + IDataView dataView; + if (loadDataFromFile) + { + var dataPath = GetDataPath("Timeseries", "anomaly_detection.csv"); + + // Load data from file into the dataView + dataView = ml.Data.LoadFromTextFile(dataPath, hasHeader: true); + } + else + { + // Generate sample series data with an anomaly + var data = new List(); + for (int index = 0; index < 20; index++) + { + data.Add(new TimeSeriesDataDouble { Value = 5 } ); + } + data.Add(new TimeSeriesDataDouble { Value = 10 }); + for (int index = 0; index < 5; index++) + { + data.Add(new TimeSeriesDataDouble { Value = 5 }); + } + + // Convert data to IDataView. + dataView = ml.Data.LoadFromEnumerable(data); + } + + // Setup the detection arguments + string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction); + string inputColumnName = nameof(TimeSeriesDataDouble.Value); + + // Do batch anomaly detection + var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, + threshold: 0.35, batchSize: batchSize, sensitivity: 90.0, mode); + + // Getting the data of the newly created column as an IEnumerable of + // SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + outputDataView, reuseRowObject: false); + + int k = 0; + foreach (var prediction in predictionColumn) + { + switch (mode) + { + case SrCnnDetectMode.AnomalyOnly: + Assert.Equal(3, prediction.Prediction.Length); + if (k == 20) + Assert.Equal(1, prediction.Prediction[0]); + else + Assert.Equal(0, prediction.Prediction[0]); + break; + case SrCnnDetectMode.AnomalyAndExpectedValue: + Assert.Equal(4, prediction.Prediction.Length); + if (k == 20) + { + Assert.Equal(1, prediction.Prediction[0]); + Assert.Equal(5.00, prediction.Prediction[3], 2); + } + else + Assert.Equal(0, prediction.Prediction[0]); + break; + case SrCnnDetectMode.AnomalyAndMargin: + Assert.Equal(7, prediction.Prediction.Length); + if (k == 20) + { + Assert.Equal(1, prediction.Prediction[0]); + Assert.Equal(5.00, prediction.Prediction[3], 2); + Assert.Equal(5.00, prediction.Prediction[4], 2); + Assert.Equal(5.01, prediction.Prediction[5], 2); + Assert.Equal(4.99, prediction.Prediction[6], 2); + } + else + Assert.Equal(0, prediction.Prediction[0]); + break; + } + k += 1; + } + } + [Fact] public void RootCauseLocalization() {