Skip to content

Commit

Permalink
Fix issue in SRCnnEntireAnomalyDetector (#5579)
Browse files Browse the repository at this point in the history
* update

* refine codes

* update comments

* update for nit

Co-authored-by: yuyi@microsoft.com <Yuanxiang.Ying@microsoft.com>
  • Loading branch information
guinao and yuyi@microsoft.com authored Feb 9, 2021
1 parent 335330d commit 3d3d45c
Show file tree
Hide file tree
Showing 3 changed files with 221 additions and 4 deletions.
33 changes: 30 additions & 3 deletions src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,9 @@ internal sealed class SrCnnEntireModeler
private static readonly double _unitForZero = 0.3;
private static readonly double _minimumScore = 0.0;
private static readonly double _maximumScore = 1.0;
// Use this threshold to correct false anomalies
private static readonly double _zscoreThreshold = 1.5;

// If the score window is smaller than this value, the anomaly score is tend to be small.
// Proof: For each point, the SR anomaly score is calculated as (w is average window size):
// (mag - avg_mag) / avg_mag
Expand Down Expand Up @@ -426,6 +429,8 @@ internal sealed class SrCnnEntireModeler
//used in all modes
private double _minimumOriginValue;
private double _maximumOriginValue;
private double _std;
private double _mean;
private readonly double[] _predictArray;
private double[] _backAddArray;
private double[] _fftRe;
Expand Down Expand Up @@ -491,14 +496,23 @@ public void Train(double[] values, ref double[][] results)
_minimumOriginValue = Double.MaxValue;
_maximumOriginValue = Double.MinValue;

var sum = 0.0;
var squareSum = 0.0;

Array.Resize(ref _seriesToDetect, values.Length);
for (int i = 0; i < values.Length; ++i)
{
_seriesToDetect[i] = values[i];
_minimumOriginValue = Math.Min(_minimumOriginValue, values[i]);
_maximumOriginValue = Math.Max(_maximumOriginValue, values[i]);
var value = values[i];
_seriesToDetect[i] = value;
_minimumOriginValue = Math.Min(_minimumOriginValue, value);
_maximumOriginValue = Math.Max(_maximumOriginValue, value);
sum += value;
squareSum += value * value;
}

_mean = sum / values.Length;
_std = Math.Sqrt((squareSum - (sum * sum) / values.Length) / values.Length);

if (_period > 0)
{
_deseasonalityFunction.Deseasonality(ref values, _period, ref _seriesToDetect);
Expand Down Expand Up @@ -612,9 +626,22 @@ private void SpectralResidual(double[] values, double[][] results, double thresh

var detres = score > threshold ? 1 : 0;

// Anomalies correction by zscore
if (detres > 0)
{
// Use zscore to filter out those false anomalies that lie within 1.5 sigma region.
var zscore = Math.Abs(values[i] - _mean) / _std;
if (_std < _eps || zscore < _zscoreThreshold)
{
detres = 0;
score = 0.0;
}
}

results[i][0] = detres;
results[i][1] = score;
results[i][2] = _ifftMagList[i];

}
}

Expand Down
56 changes: 55 additions & 1 deletion test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using Microsoft.ML.Data;
Expand Down Expand Up @@ -717,6 +716,61 @@ public void TestSrCnnAnomalyDetectorWithSeasonalData(
}
}

[Theory, CombinatorialData]
public void TestSrCnnAnomalyDetectorBigSpike(
[CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyOnly)] SrCnnDetectMode mode
)
{
var ml = new MLContext(1);
IDataView dataView;
List<TimeSeriesDataDouble> data;

var dataPath = GetDataPath("Timeseries", "big_spike_data.csv");

// Load data from file into the dataView
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();

// Setup the detection arguments
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
string inputColumnName = nameof(TimeSeriesDataDouble.Value);

// Do batch anomaly detection
var options = new SrCnnEntireAnomalyDetectorOptions()
{
Threshold = 0.3,
BatchSize = -1,
Sensitivity = 80.0,
DetectMode = mode,
Period = 0,
DeseasonalityMode = SrCnnDeseasonalityMode.Stl
};

var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);

// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
outputDataView, reuseRowObject: false);

var anomalyIndex = 26;

int k = 0;
foreach (var prediction in predictionColumn)
{
if (anomalyIndex == k)
{
Assert.Equal(1, prediction.Prediction[0]);
}
else
{
Assert.Equal(0, prediction.Prediction[0]);
}

++k;
}

}

[Theory, CombinatorialData]
public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
[CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode
Expand Down
136 changes: 136 additions & 0 deletions test/data/Timeseries/big_spike_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
Value
0.333061106
2.198203303
1.705836778
1.861708215
1.085050871
0.548409541
0.365537211
0.433823922
0.450379649
0.485662867
0.59162219
0.678494031
0.735315015
0.780228908
0.779309892
0.71637311
0.783369345
0.829129842
0.769519564
0.74230352
0.914116686
0.970162226
0.964537878
0.983059421
1.009637074
1.054769667
48232.24413
4739.675242
4963.982698
8555.732913
75.25537709
11.2742621
4.388301951
2.584960796
2.273629928
1.972334276
1.811987528
1.854365004
1.581860355
1.478895939
1.447799312
1.406460886
1.333295368
1.282260475
1.345933543
1.264431234
1.235222153
1.204307109
1.133533648
1.110515351
1.017397262
1.103902775
1.099039227
1.061479438
1.063725177
1.072777829
1.044107263
0.981847451
1.038324454
1.033883341
1.004416487
1.017918007
0.345233269
1.092365812
1.078005286
1.033142227
1.024832225
1.098672969
1.092767871
1.095272293
1.139357768
1.0711793
1.119012071
1.11906761
1.131538563
1.113967769
1.141610905
1.14317559
1.108130866
1.083645413
1.147460394
1.177086603
1.153490106
1.145660569
1.132464809
1.106364602
1.003350151
1.099011524
1.109557478
1.065336146
1.081590334
1.075768021
0.986278889
1.001219623
1.080312553
1.075076345
1.057146027
1.106862867
1.084433852
0.975639541
0.944182773
1.088712253
1.067152572
1.107507855
1.069142173
1.036247939
0.995907308
0.932153379
1.074865283
1.065780376
1.05063751
1.077263172
1.033459106
0.985960758
0.981842413
1.032862035
1.005063722
0.862145269
0.491629016
0.473904777
0.777874357
0.945595834
1.020180047
1.025171701
1.031632464
1.02571454
0.950313827
0.935412116
0.991591559
1.013279894
0.991734823
1.007466737
1.019160801
0.919227208
0.977617794

0 comments on commit 3d3d45c

Please sign in to comment.