diff --git a/Rust/Cargo.toml b/Rust/Cargo.toml index 98ce6dbb..b30e2420 100644 --- a/Rust/Cargo.toml +++ b/Rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rcf" -version = "3.3.0" +version = "4.0.0" edition = "2021" license = "Apache-2.0" @@ -18,11 +18,11 @@ path = "src/example.rs" [dependencies] -num = "0.4" -rayon = "1.5" +num = "0.4.1" +rayon = "1.7" rand = "*" -rand_chacha = "0.3.0" -rand_core = "0.6" +rand_chacha = "0.3.1" +rand_core = "0.6.2" [dev-dependencies] parameterized_test = "0.1.0" diff --git a/Rust/src/common/anomalydescriptor.rs b/Rust/src/common/anomalydescriptor.rs deleted file mode 100644 index ecafc831..00000000 --- a/Rust/src/common/anomalydescriptor.rs +++ /dev/null @@ -1,123 +0,0 @@ -use crate::common::divector::DiVector; - -/** - * This class maintains a simple discounted statistics. Setters are avoided - * except for discount rate which is useful as initialization from raw scores - */ -#[repr(C)] -#[derive(Clone)] -pub struct AnomalyDescriptor { - - // the current input point; can have missing values - pub current_input: Vec, - - // current timestamp - pub current_timestamp: usize, - - // potential missing values in the current input (ideally None) - pub missing_values: Option>, - - // potentially transformed point used by RCF, can have different dimensions than input - pub rcf_point: Option>, - - pub score: f32, - - pub internal_timestamp: usize, - - pub threshold: f32, - - pub anomaly_grade: f32, - - pub data_confidence: f32, - - pub forecast_reasonable: bool, - - // present only if grade > 0 - pub attribution: Option, - - pub expected_rcf_point: Option>, - - pub relative_index: Option, - - // useful for time augmented forests - pub expected_timestamp: Option, - - pub start_of_anomaly: Option, - - pub in_high_score_region: Option, - - pub relevant_attribution: Option>, - - pub time_attribution: Option, - - // the values being replaced; may correspond to past - pub past_values: Option>, - - pub past_timestamp: Option, - - pub expected_values_list: Option>>, - - pub likelihood_of_values: Option> - -} - -impl AnomalyDescriptor { - pub fn new(point: &[f32], timestamp: usize) -> Self { - AnomalyDescriptor { - current_input: Vec::from(point), - current_timestamp: timestamp, - missing_values: None, - rcf_point: None, - score: 0.0, - internal_timestamp: 0, - threshold: 0.0, - anomaly_grade: 0.0, - data_confidence: 0.0, - forecast_reasonable: false, - attribution: None, - expected_rcf_point: None, - relative_index: None, - expected_timestamp: None, - start_of_anomaly: None, - in_high_score_region: None, - relevant_attribution: None, - time_attribution: None, - past_values: None, - past_timestamp: None, - expected_values_list: None, - likelihood_of_values: None - } - } - - pub fn new_with_missing_values(point: Vec, timestamp: usize, missing_values: Vec) -> Self { - assert!(missing_values.len() <= point.len(), "incorrect input"); - for i in &missing_values { - assert!( *i >=0 && (*i as usize) < point.len(), "incorrect input") - } - AnomalyDescriptor { - current_input: point.clone(), - current_timestamp: timestamp, - missing_values: Some(missing_values.clone()), - rcf_point: None, - score: 0.0, - internal_timestamp: 0, - threshold: 0.0, - anomaly_grade: 0.0, - data_confidence: 0.0, - forecast_reasonable: false, - attribution: None, - expected_rcf_point: None, - relative_index: None, - expected_timestamp: None, - start_of_anomaly: None, - in_high_score_region: None, - relevant_attribution: None, - time_attribution: None, - past_values: None, - past_timestamp: None, - expected_values_list: None, - likelihood_of_values: None - } - } -} - diff --git a/Rust/src/common/cluster.rs b/Rust/src/common/cluster.rs index 4a433dea..e8463bb1 100644 --- a/Rust/src/common/cluster.rs +++ b/Rust/src/common/cluster.rs @@ -1,5 +1,4 @@ use std::cmp::{max, min}; -use std::f32::NAN; use std::ops::{Deref, Index}; use std::slice; use rand::{Rng, SeedableRng}; @@ -42,7 +41,7 @@ pub trait IntermediateCluster { // a function that assigns a point indexed by usize from a list of samples to // the cluster; note the weight used in the function need not be the entire weight of the // sampled point (for example in case of soft assignments) - fn add_point(&mut self, index: usize, weight: f32, dist: f64, representative: usize); + fn add_point(&mut self, index: usize, weight: f32, dist: f64, representative: usize) -> Result<()>; // given a set of previous assignments, recomputes the optimal set of representatives // this is the classic optimization step for k-Means; but the analogue exists for every // clustering; note that it is possible that recompute does nothing @@ -56,11 +55,11 @@ pub trait IntermediateCluster { // a function that indicates cluster quality fn average_radius(&self) -> f64; // a function that absorbs another cluster - fn absorb<'a>(&mut self, dictionary: &'a [Z], get_point: fn (usize,&'a [Z]) -> &'a T, another: &dyn IntermediateCluster, distance: fn(&T, &T) -> f64); + fn absorb<'a>(&mut self, dictionary: &'a [Z], get_point: fn (usize,&'a [Z]) -> &'a T, another: &dyn IntermediateCluster, distance: fn(&T, &T) -> f64) -> Result<()>; // a function to return a list of representatives corresponding to pairs (Q,weight) fn representatives(&self) -> Vec<(Q, f32)>; // a function that helps scale (by multiplication) the cluster weight - fn scale_weight(&mut self, factor: f64); + fn scale_weight(&mut self, factor: f64) -> Result<()>; } @@ -102,22 +101,22 @@ pub struct Center { } impl Center { - pub fn new(representative: usize, point:&[f32], weight: f32, _params:usize) -> Self { - Center { + pub fn new(_representative: usize, point:&[f32], weight: f32, _params:usize) -> Result { + Ok(Center { representative: Vec::from(point), weight: weight as f64, points: Vec::new(), sum_of_radii: 0.0, - } + }) } - pub fn new_as_vec(representative: usize, point:&Vec, weight: f32,_params:usize) -> Self { - Center { + pub fn new_as_vec(_representative: usize, point:&Vec, weight: f32,_params:usize) -> Result { + Ok(Center { representative: point.clone(), weight: weight as f64, points: Vec::new(), sum_of_radii: 0.0, - } + }) } pub fn average_radius(&self) -> f64 { @@ -217,9 +216,10 @@ impl IntermediateCluster, [f32]> for Center { self.weight() } - fn scale_weight(&mut self, factor: f64){ - assert!(!factor.is_nan() && factor>0.0," has to be positive"); - self.weight = (self.weight as f64 * factor); + fn scale_weight(&mut self, factor: f64) -> Result<()>{ + check_argument(!factor.is_nan() && factor>0.0," has to be positive")?; + self.weight = self.weight as f64 * factor; + Ok(()) } fn distance_to_point<'a>(&self, _points:&'a [Z],_get_point: fn(usize,&'a [Z]) ->&'a [f32],point: &[f32], distance: fn(&[f32], &[f32]) -> f64) -> (f64,usize) { @@ -237,10 +237,11 @@ impl IntermediateCluster, [f32]> for Center { (tuple.0,0,tuple.1) } - fn add_point(&mut self, index: usize, weight: f32, dist: f64, representative:usize) { - assert!(representative==0,"can have only one representative"); - assert!(!weight.is_nan() && weight >= 0.0f32, "non-negative weight"); + fn add_point(&mut self, index: usize, weight: f32, dist: f64, representative:usize) -> Result<()> { + check_argument(representative==0,"can have only one representative")?; + check_argument(!weight.is_nan() && weight >= 0.0f32, "non-negative weight")?; self.add_point(index,weight,dist); + Ok(()) } fn recompute<'a>(&mut self, points:&'a [Z],get_point: fn(usize,&'a [Z]) ->&'a [f32], distance: fn(&[f32], &[f32]) -> f64) -> f64 { @@ -266,9 +267,10 @@ impl IntermediateCluster, [f32]> for Center { get_point: fn(usize,&'a [Z]) ->&'a [f32], another: &dyn IntermediateCluster, [f32]>, distance: fn(&[f32], &[f32]) -> f64, - ) { - let closest = another.distance_to_point(points,get_point, &self.representative,distance); - self.absorb_list(another.weight(),&another.representatives(),closest); + ) ->Result<()> { + let closest = another.distance_to_point(points, get_point, &self.representative, distance); + self.absorb_list(another.weight(), &another.representatives(), closest); + Ok(()) } fn representatives(&self) -> Vec<(Vec, f32)> { @@ -281,9 +283,10 @@ impl IntermediateCluster, Vec> for Center { self.weight() } - fn scale_weight(&mut self, factor: f64){ - assert!(!factor.is_nan() && factor>0.0," has to be positive"); - self.weight = (self.weight as f64 * factor); + fn scale_weight(&mut self, factor: f64) -> Result<()>{ + check_argument(!factor.is_nan() && factor>0.0," has to be positive")?; + self.weight = self.weight as f64 * factor; + Ok(()) } fn distance_to_point<'a>(&self, _points:&'a [Z],_get_point: fn(usize,&'a [Z]) ->&'a Vec,point: &Vec, distance: fn(&Vec, &Vec) -> f64) -> (f64,usize) { @@ -301,10 +304,11 @@ impl IntermediateCluster, Vec> for Center { (tuple.0,0,tuple.1) } - fn add_point(&mut self, index: usize, weight: f32, dist: f64, representative:usize) { - assert!(representative==0,"can have only one representative"); - assert!(!weight.is_nan() && weight >= 0.0f32, "non-negative weight"); + fn add_point(&mut self, index: usize, weight: f32, dist: f64, representative:usize) ->Result<()>{ + check_argument(representative==0,"can have only one representative")?; + check_argument(!weight.is_nan() && weight >= 0.0f32, "non-negative weight")?; self.add_point(index,weight,dist); + Ok(()) } fn recompute<'a>(&mut self, points:&'a [Z],get_point: fn(usize,&'a [Z]) ->&'a Vec, distance: fn(&Vec, &Vec) -> f64) -> f64 { @@ -330,9 +334,10 @@ impl IntermediateCluster, Vec> for Center { get_point: fn(usize,&'a [Z]) ->&'a Vec, another: &dyn IntermediateCluster, Vec>, distance: fn(&Vec, &Vec) -> f64, - ) { + ) -> Result<()>{ let closest = another.distance_to_point(points,get_point, &self.representative,distance); self.absorb_list(another.weight(),&another.representatives(),closest); + Ok(()) } fn representatives(&self) -> Vec<(Vec, f32)> { @@ -345,7 +350,7 @@ impl IntermediateCluster, Vec> for Center { fn process_point<'a,Z,U,Q,T :?Sized>(dictionary: &'a [Z], get_point: fn(usize,&'a [Z])->&'a T, index: usize, centers: &mut [U], weight : f32, distance: fn(&T, &T) -> f64) -> Result<()> where U: IntermediateCluster + Send, - T: std::marker::Sync, + T: Sync, { let mut dist = vec![(0.0, 1); centers.len()]; let mut min_distance = (f64::MAX, 1); @@ -356,11 +361,11 @@ fn process_point<'a,Z,U,Q,T :?Sized>(dictionary: &'a [Z], get_point: fn(usize,&' min_distance = dist[j]; } }; - //check_argument(min_distance.0>=0.0," distances cannot be negative")?; + check_argument(min_distance.0>=0.0," distances cannot be negative")?; if min_distance.0 == 0.0 { for j in 0..centers.len() { if dist[j].0 == 0.0 { - centers[j].add_point(index, weight, 0.0, dist[j].1); + centers[j].add_point(index, weight, 0.0, dist[j].1)?; } } } else { @@ -376,7 +381,7 @@ fn process_point<'a,Z,U,Q,T :?Sized>(dictionary: &'a [Z], get_point: fn(usize,&' index, (weight as f64 * min_distance.0 / (sum * dist[j].0)) as f32, dist[j].0, dist[j].1 - ); + )?; } } } @@ -396,14 +401,14 @@ fn assign_and_recompute<'a, Z, Q, U, T: ?Sized>( ) -> Result where U: IntermediateCluster + Send, - T: std::marker::Sync, - Z: std::marker::Sync, + T: Sync, + Z: Sync, { for j in 0..centers.len() { centers[j].reset(); } - if (samples.len() == 0){ + if samples.len() == 0{ for i in 0..dictionary.len() { process_point(dictionary,get_point,i,centers,get_weight(i,dictionary,weights),distance)?; } @@ -487,7 +492,7 @@ pub fn general_iterative_clustering<'a, U, V, Q, Z, T: ?Sized>( approximate_bound: usize, seed: u64, parallel_enabled: bool, - create: fn(usize, &'a T, f32, V) -> U, + create: fn(usize, &'a T, f32, V) -> Result, create_params: V, distance: fn(&T, &T) -> f64, phase_2_reassign: bool, @@ -496,8 +501,8 @@ pub fn general_iterative_clustering<'a, U, V, Q, Z, T: ?Sized>( ) -> Result> where U: IntermediateCluster + Send, - T: std::marker::Sync, - Z: std::marker::Sync, + T: Sync, + Z: Sync, V: Copy, { check_argument(max_allowed < 51, " for large number of clusters, other methods may be better, consider recursively removing clusters")?; @@ -506,7 +511,7 @@ where let mut centers: Vec = Vec::new(); - let mut samples : Vec<(usize,f32)> = if dictionary.len() > approximate_bound { + let samples : Vec<(usize,f32)> = if dictionary.len() > approximate_bound { down_sample(dictionary,weights,get_weight,rng.next_u64(),approximate_bound) } else { Vec::new() @@ -537,7 +542,7 @@ where }; } if min_dist > 0.0 { - centers.push(create(index,get_point(index,dictionary), weight,create_params)); + centers.push(create(index,get_point(index,dictionary), weight,create_params)?); } } @@ -594,10 +599,10 @@ where let inital = centers.len(); if inital > max_allowed || found_merge || (enable_phase_3 && measure > overlap_parameter) { let (small, large) = centers.split_at_mut(second); - large.first_mut().unwrap().absorb(&dictionary,get_point, &small[first], distance); + large.first_mut().unwrap().absorb(&dictionary,get_point, &small[first], distance)?; centers.swap_remove(first); if phase_2_reassign && centers.len() <= PHASE2_THRESHOLD * max_allowed + 1{ - assign_and_recompute(&dictionary, weights,get_point,get_weight, &samples, &mut centers, distance, parallel_enabled); + assign_and_recompute(&dictionary, weights,get_point,get_weight, &samples, &mut centers, distance, parallel_enabled)?; } centers.sort_by(|o1, o2| o1.weight().partial_cmp(&o2.weight()).unwrap()); @@ -618,7 +623,7 @@ where centers.sort_by(|o1, o2| o2.weight().partial_cmp(&o1.weight()).unwrap()); // decreasing order let center_sum: f64 = centers.iter().map(|x| x.weight() as f64).sum(); for i in 0..centers.len() { - centers[i].scale_weight(1.0/center_sum); + centers[i].scale_weight(1.0/center_sum)?; } Ok(centers) } @@ -642,11 +647,11 @@ fn pick_to_slice<'a>(index: usize, entry:&'a [Vec]) -> &'a [f32]{ } -fn pick_tuple_weight(index:usize, entry:&[(T,f32)], weights: &[f32]) -> f32{ +fn pick_tuple_weight(index:usize, entry:&[(T,f32)], _weights: &[f32]) -> f32{ entry[index].1 } -fn pick_weight(index:usize, entry:&[T], weights: &[f32]) -> f32{ +fn pick_weight(index:usize, _entry:&[T], weights: &[f32]) -> f32{ weights[index] } @@ -835,18 +840,18 @@ impl<'b,T :?Sized> MultiCenterRef<'b,T>{ self.representatives.clone() } - pub fn new(representative: usize, point: &'b T, weight: f32, params : (usize,f32,bool)) -> Self { + pub fn new(_representative: usize, point: &'b T, weight: f32, params : (usize,f32,bool)) -> Result { let (number_of_representatives, shrinkage,is_compact) = params; - assert!(number_of_representatives>0,"has to be positive"); - assert!(shrinkage>=0.0 && shrinkage<= 1.0," has to between [0,1]"); - MultiCenterRef { + check_argument(number_of_representatives>0,"has to be positive")?; + check_argument(shrinkage>=0.0 && shrinkage<= 1.0," has to between [0,1]")?; + Ok(MultiCenterRef { representatives: vec![(point, weight as f32);1], number_of_representatives, shrinkage, is_compact, weight: weight as f64, sum_of_radii: 0.0, - } + }) } pub fn average_radius(&self) -> f64 { @@ -900,13 +905,14 @@ impl<'b, Z, T:?Sized> IntermediateCluster for MultiCenterRef<'b,T> { ((closest.0 * (1.0 - self.shrinkage as f64) + self.shrinkage as f64 * original.0), closest.1, closest.2) } - fn add_point(&mut self, index: usize, weight: f32, dist: f64, representative: usize) { + fn add_point(&mut self, _index: usize, weight: f32, dist: f64, representative: usize) -> Result<()>{ self.representatives[representative].1 += weight; self.sum_of_radii += weight as f64 * dist; self.weight += weight as f64; + Ok(()) } - fn recompute<'a>(&mut self, points:&'a [Z],get_point: fn(usize,&'a [Z]) ->&'a T, distance: fn(&T, &T) -> f64) -> f64 { + fn recompute<'a>(&mut self, _points:&'a [Z],_get_point: fn(usize,&'a [Z]) ->&'a T, _distance: fn(&T, &T) -> f64) -> f64 { self.representatives.sort_by(|a,b| a.1.partial_cmp(&b.1).unwrap()); 0.0 } @@ -929,11 +935,11 @@ impl<'b, Z, T:?Sized> IntermediateCluster for MultiCenterRef<'b,T> { fn absorb<'a>( &mut self, - points:&'a [Z], - get_point: fn(usize,&'a [Z]) ->&'a T, + _points:&'a [Z], + _get_point: fn(usize,&'a [Z]) ->&'a T, another: &dyn IntermediateCluster, distance: fn(&T, &T) -> f64, - ) { + ) -> Result<()> { self.sum_of_radii += if self.is_compact { another.average_radius()*another.weight() } else { @@ -962,19 +968,19 @@ impl<'b, Z, T:?Sized> IntermediateCluster for MultiCenterRef<'b,T> { * correspond to a well scattered set. See * https://en.wikipedia.org/wiki/CURE_algorithm */ - while (representatives.len() > 0 && self.representatives.len() < self.number_of_representatives) { + while representatives.len() > 0 && self.representatives.len() < self.number_of_representatives { let mut farthest_weighted_distance = 0.0; let mut farthest_index: usize = usize::MAX; for j in 0..representatives.len() { if representatives[j].1 as f64 > (weight as f64) / (2.0 * self.number_of_representatives as f64) { let mut new_weighted_distance = (distance)(self.representatives[0].0, representatives[j].0) * representatives[j].1 as f64; - assert!(new_weighted_distance >= 0.0, " weights or distances cannot be negative"); + check_argument(new_weighted_distance >= 0.0, " weights or distances cannot be negative")?; for i in 1..self.representatives.len() { let t = (distance)(self.representatives[i].0, representatives[j].0) * representatives[j].1 as f64; - assert!(t >= 0.0, " weights or distances cannot be negative"); - if (t < new_weighted_distance) { + check_argument(t >= 0.0, " weights or distances cannot be negative")?; + if t < new_weighted_distance { new_weighted_distance = t; } } @@ -994,13 +1000,13 @@ impl<'b, Z, T:?Sized> IntermediateCluster for MultiCenterRef<'b,T> { // absorb the remainder into existing representatives for j in 0..representatives.len() { let dist = (distance)(representatives[0].0, self.representatives[0].0); - assert!(dist >= 0.0, "distance cannot be negative"); + check_argument(dist >= 0.0, "distance cannot be negative")?; let mut min_dist = dist; let mut min_index: usize = 0; for i in 1..self.representatives.len() { let new_dist = (distance)(self.representatives[i].0, representatives[j].0); - assert!(new_dist >= 0.0, "distance cannot be negative"); - if (new_dist < min_dist) { + check_argument(new_dist >= 0.0, "distance cannot be negative")?; + if new_dist < min_dist { min_dist = new_dist; min_index = i; } @@ -1009,16 +1015,19 @@ impl<'b, Z, T:?Sized> IntermediateCluster for MultiCenterRef<'b,T> { self.sum_of_radii += representatives[j].1 as f64 * min_dist; } self.representatives.sort_by(|a,b| b.1.partial_cmp(&a.1).unwrap()); + Ok(()) } fn representatives(&self) -> Vec<(&'b T, f32)> { self.representatives() } - fn scale_weight(&mut self, factor: f64) { + fn scale_weight(&mut self, factor: f64) -> Result<()>{ + check_argument(!factor.is_nan() && factor>0.0," has to be positive")?; for i in 0..self.representatives.len() { self.representatives[i].1 = (self.representatives[i].1 as f64 * factor) as f32; } + Ok(()) } } diff --git a/Rust/src/common/conditionalfieldsummarizer.rs b/Rust/src/common/conditionalfieldsummarizer.rs index b01dac78..d4081398 100644 --- a/Rust/src/common/conditionalfieldsummarizer.rs +++ b/Rust/src/common/conditionalfieldsummarizer.rs @@ -1,6 +1,8 @@ +use std::hash::Hash; use crate::{ common::samplesummary::{summarize, SampleSummary}, pointstore::PointStore, + types::Result }; fn project_missing(point: &Vec, position: &[usize]) -> Vec { @@ -52,12 +54,12 @@ impl FieldSummarizer { } } - pub fn summarize_list( + pub fn summarize_list( &self, - pointstore: &dyn PointStore, + pointstore: &dyn PointStore, point_list_with_distance: &[(f64, usize, f64)], missing: &[usize] - ) -> SampleSummary { + ) -> Result { let mut distance_list: Vec = point_list_with_distance.iter().map(|a| a.2).collect(); distance_list.sort_by(|a, b| a.partial_cmp(&b).unwrap()); let mut threshold = 0.0; @@ -78,7 +80,7 @@ impl FieldSummarizer { let total_weight = point_list_with_distance.len() as f64; let dimensions = if !self.project || missing.len() == 0 { - pointstore.get_copy(point_list_with_distance[0].1).len() + pointstore.copy(point_list_with_distance[0].1)?.len() } else { missing.len() }; @@ -89,10 +91,10 @@ impl FieldSummarizer { let mut vec = Vec::new(); for i in 0..point_list_with_distance.len() { let point = if !self.project || missing.len() == 0 { - pointstore.get_copy(point_list_with_distance[i].1) + pointstore.copy(point_list_with_distance[i].1)? } else { project_missing( - &pointstore.get_copy(point_list_with_distance[i].1), + &pointstore.copy(point_list_with_distance[i].1)?, &missing, ) }; @@ -136,7 +138,7 @@ impl FieldSummarizer { } let summary = summarize(&vec, self.distance, self.max_number, false).unwrap(); - SampleSummary { + Ok(SampleSummary { summary_points: summary.summary_points.clone(), relative_weight: summary.relative_weight.clone(), total_weight: summary.total_weight, @@ -145,6 +147,6 @@ impl FieldSummarizer { upper, lower, deviation, - } + }) } } diff --git a/Rust/src/common/descriptor.rs b/Rust/src/common/descriptor.rs new file mode 100644 index 00000000..a9ce7e2a --- /dev/null +++ b/Rust/src/common/descriptor.rs @@ -0,0 +1,113 @@ +use crate::common::divector::DiVector; +use crate::common::rangevector::RangeVector; +use crate::trcf::types::{CorrectionMode, ImputationMethod, ScoringStrategy, TransformMethod}; +use crate::trcf::types::ImputationMethod::USE_RCF; +use crate::trcf::types::ScoringStrategy::EXPECTED_INVERSE_HEIGHT; + +/** + * This class maintains a simple discounted statistics. Setters are avoided + * except for discount rate which is useful as initialization from raw scores + */ +#[repr(C)] +#[derive(Clone)] +pub struct Descriptor { + pub id: u64, + pub current_input: Vec, + pub current_timestamp: u64, + pub missing_values: Option>, + pub rcf_point: Option>, + pub score: f32, + pub correction_mode: CorrectionMode, + pub values_seen: usize, + pub transform_method : TransformMethod, + pub threshold: f32, + pub anomaly_grade: f32, + pub data_confidence: f32, + pub attribution: Option, + pub relative_index : i32, + pub scale : Option>, + pub shift : Option>, + pub difference_deviations: Option>, + pub deviations_post : Option>, + pub time_augmented : bool, + pub expected_rcf_point: Option>, + pub last_anomaly : Option, + pub forecast : Option>, + pub error_information : Option, + pub scoring_strategy : ScoringStrategy, + pub imputation_method : ImputationMethod, +} + +#[repr(C)] +#[derive(Clone)] +pub struct AnomalyInformation { + // we do not explicitly provide a default so that each of these entires are + // considered carefully before declaring an anomaly + pub expected_rcf_point: Vec, + pub anomalous_rcf_point: Vec, + pub relative_index: i32, + pub values_seen: usize, + pub attribution: Option, + pub score: f32, + pub grade: f32, + pub expected_timestamp: u64, + pub relevant_attribution: Option>, + pub time_attribution: f32, + pub past_values: Vec, + pub past_timestamp: u64, + pub expected_values_list: Vec>, + pub likelihood_of_values: Vec +} + +#[repr(C)] +#[derive(Clone)] +pub struct ErrorInformation { + pub interval_precision: Vec, + pub error_distribution : RangeVector, + pub error_rmse : DiVector, + pub error_mean : Vec +} + +impl Default for Descriptor { + fn default() -> Self { + Descriptor{ + id: 0, + current_input: vec![], + current_timestamp: 0, + missing_values: None, + rcf_point: None, + score: 0.0, + correction_mode: CorrectionMode::NONE, + values_seen: 0, + transform_method: TransformMethod::NONE, + threshold: 0.0, + anomaly_grade: 0.0, + data_confidence: 0.0, + attribution: None, + relative_index: 0, + scale: None, + shift: None, + difference_deviations: None, + deviations_post: None, + time_augmented: false, + expected_rcf_point: None, + last_anomaly: None, + forecast: None, + error_information: None, + scoring_strategy: EXPECTED_INVERSE_HEIGHT, + imputation_method: USE_RCF + } + } +} + +impl Descriptor { + pub fn new(id: u64, point: &[f32], current_timestamp: u64,time_augmented: bool, missing_values: Option>) -> Self { + if missing_values.as_ref().is_some(){ + for i in missing_values.as_ref().unwrap() { + assert!( *i < point.len(), "incorrect input") + } + } + Descriptor { id, current_input: Vec::from(point), current_timestamp, time_augmented, missing_values, ..Default::default()} + } +} + diff --git a/Rust/src/common/deviation.rs b/Rust/src/common/deviation.rs index a75bed6c..900f299e 100644 --- a/Rust/src/common/deviation.rs +++ b/Rust/src/common/deviation.rs @@ -1,3 +1,6 @@ +use crate::util::check_argument; +use crate::types::Result; + /** * This class maintains a simple discounted statistics. Setters are avoided * except for discount rate which is useful as initialization from raw scores @@ -13,15 +16,15 @@ pub struct Deviation { } impl Deviation { - pub fn new(discount: f64) -> Self { - assert!(discount>=0.0 && discount < 1.0, "incorrect discount value"); - Deviation { + pub fn new(discount: f64) -> Result { + check_argument(discount>=0.0 && discount < 1.0, "incorrect discount value")?; + Ok(Deviation { discount, weight: 0.0, sum:0.0, sum_squared:0.0, count:0 - } + }) } pub fn default() -> Self { @@ -44,6 +47,13 @@ impl Deviation { } } + pub fn reset(&mut self) { + self.weight = 0.0; + self.count = 0; + self.sum = 0.0; + self.sum_squared = 0.0; + } + pub fn mean(&self) -> f64 { if self.is_empty() { 0.0 diff --git a/Rust/src/common/directionaldensity.rs b/Rust/src/common/directionaldensity.rs index 9004abd4..0b29fa15 100644 --- a/Rust/src/common/directionaldensity.rs +++ b/Rust/src/common/directionaldensity.rs @@ -1,4 +1,6 @@ use crate::{common::divector::DiVector, samplerplustree::boundingbox::BoundingBox}; +use crate::types::Result; +use crate::util::check_argument; #[repr(C)] #[derive(Clone)] @@ -24,21 +26,21 @@ impl InterpolationMeasure { distance: DiVector, prob_mass: DiVector, sample_size: f32, - ) -> Self { - assert!( + ) -> Result { + check_argument( measure.dimensions() == distance.dimensions(), " incorrect lengths" - ); - assert!( + )?; + check_argument( measure.dimensions() == prob_mass.dimensions(), " incorrect lengths" - ); - InterpolationMeasure { + )?; + Ok(InterpolationMeasure { measure: measure, distance: distance, probability_mass: prob_mass, sample_size, - } + }) } pub fn add_to(&self, other: &mut InterpolationMeasure) { @@ -98,13 +100,13 @@ impl InterpolationMeasure { prob } - pub fn directional_measure(&self, threshold: f64, manifold_dimension: f64) -> DiVector { - assert!( + pub fn directional_measure(&self, threshold: f64, manifold_dimension: f64) -> Result { + check_argument( self.sample_size >= 0.0 && self.measure.total() >= 0.0, " cannot have negative samples or measure" - ); + )?; if self.sample_size == 0.0f32 || self.measure.total() == 0.0 { - return DiVector::empty(self.measure.dimensions()); + return Ok(DiVector::empty(self.measure.dimensions())); } let mut sum_of_factors = 0.0; @@ -126,14 +128,14 @@ impl InterpolationMeasure { let density_factor = 1.0 / (threshold + sum_of_factors); let mut answer = self.measure.clone(); answer.scale(density_factor); - answer + Ok(answer) } - pub fn directional_density(&self) -> DiVector { + pub fn directional_density(&self) -> Result { self.directional_measure(1e-3, self.measure.dimensions() as f64) } - pub fn density(&self) -> f64 { - self.directional_density().total() + pub fn density(&self) -> Result { + Ok(self.directional_density()?.total()) } } diff --git a/Rust/src/common/divector.rs b/Rust/src/common/divector.rs index 9583b9d4..6d53270d 100644 --- a/Rust/src/common/divector.rs +++ b/Rust/src/common/divector.rs @@ -1,4 +1,6 @@ use crate::samplerplustree::boundingbox::BoundingBox; +use crate::util::check_argument; +use crate::types::Result; #[repr(C)] #[derive(Clone)] @@ -15,12 +17,12 @@ impl DiVector { } } - pub fn new(high: &[f64], low: &[f64]) -> Self { - assert!(high.len() == low.len(), " incorrect lengths"); - DiVector { + pub fn new(high: &[f64], low: &[f64]) -> Result { + check_argument(high.len() == low.len(), " incorrect lengths")?; + Ok(DiVector { high: Vec::from(high), low: Vec::from(low), - } + }) } pub fn assign_as_probability_of_cut(&mut self, bounding_box: &BoundingBox, point: &[f32]) { @@ -177,13 +179,20 @@ impl DiVector { self.high[index] + self.low[index] } - pub fn max_contribution(&self, base_dimension: usize) -> usize { + pub fn max_contribution(&self, base_dimension: usize) -> Result { + self.max_gap_contribution(base_dimension,self.dimensions()) + } + + pub fn max_gap_contribution(&self, base_dimension: usize, gap: usize) -> Result { + check_argument(gap>0, "incorrect input")?; + check_argument(base_dimension>0, "incorrect input")?; + check_argument(self.dimensions()%base_dimension == 0, "incorrect input")?; let mut val = 0.0; - let mut index = 0; + let mut index = if gap * base_dimension > self.dimensions() {0} else { self.dimensions()/base_dimension - gap}; for i in 0..base_dimension { - val += self.high_low_sum(i); + val += self.high_low_sum(index*base_dimension + i); } - for j in 1..(self.dimensions() / base_dimension) { + for j in (index+1)..(self.dimensions() / base_dimension) { let mut sum = 0.0; for i in 0..base_dimension { sum += self.high_low_sum(j * base_dimension + i); @@ -193,6 +202,6 @@ impl DiVector { index = j; } } - index + Ok(index) } } diff --git a/Rust/src/common/intervalstoremanager.rs b/Rust/src/common/intervalstoremanager.rs index 854cc068..28895c86 100644 --- a/Rust/src/common/intervalstoremanager.rs +++ b/Rust/src/common/intervalstoremanager.rs @@ -1,4 +1,6 @@ use std::fmt::Debug; +use crate::types::Result; +use crate::util::check_argument; #[repr(C)] pub struct IntervalStoreManager { @@ -52,14 +54,11 @@ where self.last_in_use == 0 } - pub fn get(&mut self) -> usize + pub fn get(&mut self) -> Result where >::Error: Debug, { - if self.is_empty() { - println!(" no more indices left"); - panic!(); - } + check_argument(!self.is_empty(),"no more indices left in interval manager")?; let answer = self.free_indices_start[self.last_in_use - 1]; let new_value: usize = answer.into(); if answer == self.free_indices_end[self.last_in_use - 1] { @@ -67,10 +66,10 @@ where } else { self.free_indices_start[self.last_in_use - 1] = (new_value + 1).try_into().unwrap(); } - new_value + Ok(new_value) } - pub fn release(&mut self, index: usize) + pub fn release(&mut self, index: usize) -> Result<()> where >::Error: Debug, { @@ -80,10 +79,10 @@ where let end: usize = self.free_indices_end[self.last_in_use - 1].into(); if start == index + 1 { self.free_indices_start[self.last_in_use - 1] = val; - return; + return Ok(()); } else if end + 1 == index { self.free_indices_end[self.last_in_use - 1] = val; - return; + return Ok(()); } } if self.last_in_use < self.free_indices_start.len() { @@ -94,6 +93,7 @@ where self.free_indices_end.resize(self.last_in_use + 1, val); } self.last_in_use += 1; + Ok(()) } pub fn used(&self) -> usize { diff --git a/Rust/src/common/mod.rs b/Rust/src/common/mod.rs index 10954539..d174034c 100644 --- a/Rust/src/common/mod.rs +++ b/Rust/src/common/mod.rs @@ -6,5 +6,5 @@ pub mod intervalstoremanager; pub mod multidimdatawithkey; pub mod samplesummary; pub mod deviation; -pub mod anomalydescriptor; pub mod rangevector; +pub mod descriptor; diff --git a/Rust/src/common/multidimdatawithkey.rs b/Rust/src/common/multidimdatawithkey.rs index d2aa84a6..d1e13784 100644 --- a/Rust/src/common/multidimdatawithkey.rs +++ b/Rust/src/common/multidimdatawithkey.rs @@ -8,6 +8,8 @@ use rand_chacha::ChaCha20Rng; use rand_core::RngCore; use crate::rand::Rng; +use crate::util::check_argument; +use crate::types::Result; pub struct MultiDimDataWithKey { pub data: Vec>, @@ -24,15 +26,15 @@ impl MultiDimDataWithKey { noise: f32, seed: u64, base_dimension: usize, - ) -> Self { - assert!( + ) -> Result { + check_argument( period.len() == base_dimension, " need a period for each dimension " - ); - assert!( + )?; + check_argument( amplitude.len() == base_dimension, " need an amplitude for each dimension" - ); + )?; let mut rng = ChaCha20Rng::seed_from_u64(seed); let mut noiserng = ChaCha20Rng::seed_from_u64(seed + 1); let mut phase: Vec = Vec::new(); @@ -71,12 +73,12 @@ impl MultiDimDataWithKey { changes.push(new_change); } } - MultiDimDataWithKey { + Ok(MultiDimDataWithKey { data, change_indices, labels: Vec::new(), changes, - } + }) } pub fn mixture( @@ -85,29 +87,29 @@ impl MultiDimDataWithKey { scale: &[Vec], weight: &[f32], seed: u64, - ) -> Self { + ) -> Result { let mut rng = ChaCha20Rng::seed_from_u64(seed); - assert!(num > 0, " number of elements cannot be 0"); - assert!(mean.len() > 0, " cannot be null"); + check_argument(num > 0, " number of elements cannot be 0")?; + check_argument(mean.len() > 0, " cannot be null")?; let base_dimension = mean[0].len(); - assert!( + check_argument( mean.len() == scale.len(), " need scales and means to be 1-1" - ); - assert!( + )?; + check_argument( weight.len() == mean.len(), " need weights and means to be 1-1" - ); + )?; for i in 0..mean.len() { - assert!( + check_argument( mean[i].len() == base_dimension, " must have the same dimensions" - ); - assert!( + )?; + check_argument( scale[i].len() == base_dimension, "sclaes must have the same dimension as the mean" - ); - assert!(weight[i] >= 0.0, " weights cannot be negative"); + )?; + check_argument(weight[i] >= 0.0, " weights cannot be negative")?; } let sum: f32 = weight.iter().sum(); @@ -124,12 +126,12 @@ impl MultiDimDataWithKey { labels.push(i); } - MultiDimDataWithKey { + Ok(MultiDimDataWithKey { data, labels, change_indices: vec![], changes: vec![], - } + }) } } diff --git a/Rust/src/common/rangevector.rs b/Rust/src/common/rangevector.rs index 240cf5be..b370437d 100644 --- a/Rust/src/common/rangevector.rs +++ b/Rust/src/common/rangevector.rs @@ -1,4 +1,5 @@ - +use crate::util::check_argument; +use crate::types::Result; /** * A RangeVector is used when we want to track a quantity and its upper and @@ -6,13 +7,23 @@ */ #[repr(C)] #[derive(Clone)] -pub struct RangeVector { - pub values: Vec, - pub upper: Vec, - pub lower: Vec +pub struct RangeVector { + pub values: Vec, + pub upper: Vec, + pub lower: Vec +} + +impl RangeVector { + pub fn new(dimensions: usize) -> Self { + RangeVector { + values: vec![0.0; dimensions], + upper: vec![0.0; dimensions], + lower: vec![0.0; dimensions] + } + } } -impl RangeVector { +impl RangeVector { pub fn new(dimensions: usize) -> Self { RangeVector { values: vec![0.0; dimensions], @@ -20,8 +31,11 @@ impl RangeVector { lower: vec![0.0; dimensions] } } +} + - pub fn from(values : Vec) -> Self { +impl RangeVector { + pub fn from(values : Vec) -> Self { RangeVector{ values : values.clone(), upper : values.clone(), @@ -29,20 +43,20 @@ impl RangeVector { } } - pub fn create(values: &[f32], upper: &[f32], lower:&[f32]) -> Self { - assert!(values.len() == upper.len() && upper.len() == lower.len(), " incorrect lengths"); + pub fn create(values: &[T], upper: &[T], lower:&[T]) -> Result { + check_argument(values.len() == upper.len() && upper.len() == lower.len(), " incorrect lengths")?; for i in 0..values.len() { - assert!(values[i] <= upper[i], " incorrect upper bound at {}", i); - assert!(lower[i] <= values [i], "incorrect lower bounds at {}",i); + check_argument(values[i] <= upper[i], " incorrect upper bound")?; + check_argument(lower[i] <= values [i], "incorrect lower bounds")?; } - RangeVector{ + Ok(RangeVector{ values :Vec::from(values), upper : Vec::from(upper), lower : Vec::from(lower) - } + }) } - pub fn shift(&mut self, i:usize, shift: f32) { + pub fn shift(&mut self, i:usize, shift: T) { self.values[i] += shift; self.upper[i] += shift; self.lower[i] += shift; @@ -55,7 +69,22 @@ impl RangeVector { } } - pub fn scale(&mut self, i:usize, scale: f32) { + pub fn cascaded_add(&mut self, base: &[T]) -> Result<()>{ + check_argument(base.len() >0 , "must be of positive length")?; + let horizon = self.values.len()/base.len(); + check_argument(horizon * base.len() == self.values.len(), " incorrect function call")?; + for j in 0..base.len() { + self.shift(j,base[j]); + } + for i in 1..horizon { + for j in 0..base.len() { + self.shift(i * base.len() + j, self.values[(i-1)*base.len() + j]); + } + } + Ok(()) + } + + pub fn scale(&mut self, i:usize, scale: T) { self.values[i] *= scale; self.upper[i] *= scale; self.lower[i] *= scale; diff --git a/Rust/src/common/samplesummary.rs b/Rust/src/common/samplesummary.rs index d7be012f..b11852b2 100644 --- a/Rust/src/common/samplesummary.rs +++ b/Rust/src/common/samplesummary.rs @@ -1,11 +1,7 @@ use std::cmp::min; use std::ops::Index; -use std::slice::SliceIndex; use crate::types::Result; -use crate::errors; use crate::util::check_argument; -use rand::{Rng, SeedableRng}; -use rand_chacha::ChaCha20Rng; use rayon::range; use crate::common::cluster::{Center, multi_cluster_as_weighted_obj, multi_cluster_as_weighted_ref, single_centroid_cluster_weighted_vec_with_distance_over_slices}; diff --git a/Rust/src/example.rs b/Rust/src/example.rs index 31eaa15e..75cb1358 100644 --- a/Rust/src/example.rs +++ b/Rust/src/example.rs @@ -5,8 +5,9 @@ use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha20Rng; use rcflib::{ common::multidimdatawithkey, - rcf::{create_rcf, RCF}, + rcf::{RCF}, }; +use rcflib::rcf::{RCFBuilder, RCFOptionsBuilder}; fn main() { let shingle_size = 8; @@ -15,7 +16,6 @@ fn main() { let number_of_trees = 30; let capacity = 256; let initial_accept_fraction = 0.1; - let dimensions = shingle_size * base_dimension; let _point_store_capacity = capacity * number_of_trees + 1; let time_decay = 0.1 / capacity as f64; let bounding_box_cache_fraction = 1.0; @@ -26,20 +26,12 @@ fn main() { let internal_rotation = false; let noise = 5.0; - let mut forest: Box = create_rcf( - dimensions, - shingle_size, - capacity, - number_of_trees, - random_seed, - store_attributes, - parallel_enabled, - internal_shingling, - internal_rotation, - time_decay, - initial_accept_fraction, - bounding_box_cache_fraction, - ); + let mut forest = RCFBuilder::::new(base_dimension,shingle_size) + .tree_capacity(capacity).number_of_trees(number_of_trees).random_seed(random_seed) + .store_attributes(store_attributes).parallel_enabled(parallel_enabled).internal_shingling(internal_shingling) + .time_decay(time_decay).initial_accept_fraction(initial_accept_fraction) + .internal_rotation(internal_rotation) + .bounding_box_cache_fraction(bounding_box_cache_fraction).build_default().unwrap(); let mut rng = ChaCha20Rng::seed_from_u64(42); let mut amplitude = Vec::new(); @@ -54,7 +46,7 @@ fn main() { noise, 0, base_dimension.into(), - ); + ).unwrap(); let mut score: f64 = 0.0; let _next_index = 0; @@ -64,7 +56,7 @@ fn main() { for i in 0..data_with_key.data.len() { if i > 200 { let next_values = forest.extrapolate(1).unwrap().values; - assert!(next_values.len() == base_dimension); + assert_eq!(next_values.len(), base_dimension); error += next_values .iter() .zip(&data_with_key.data[i]) diff --git a/Rust/src/glad.rs b/Rust/src/glad.rs index 6a75d429..bd1dbc5c 100644 --- a/Rust/src/glad.rs +++ b/Rust/src/glad.rs @@ -2,10 +2,9 @@ use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha20Rng; use rand_core::RngCore; -use crate::common::cluster::{multi_cluster_as_object_with_weight_array, multi_cluster_as_weighted_obj, MultiCenter, persist}; +use crate::common::cluster::{multi_cluster_as_weighted_obj, MultiCenter, persist}; use crate::util::check_argument; use crate::trcf::basicthresholder::BasicThresholder; -use crate::common::intervalstoremanager; use crate::common::intervalstoremanager::IntervalStoreManager; use crate::types::Result; @@ -52,13 +51,13 @@ pub struct GlobalLocalAnomalyDetector { } impl GlobalLocalAnomalyDetector { - pub fn new(capacity: usize, random_seed: u64, time_decay: f64, number_of_representatives: usize, shrinkage: f32, is_compact: bool) -> Self{ - let mut basic_thresholder = BasicThresholder::new_adjustible(time_decay as f32,false); + pub fn new(capacity: usize, random_seed: u64, time_decay: f64, number_of_representatives: usize, shrinkage: f32, is_compact: bool) -> Result{ + let mut basic_thresholder = BasicThresholder::new_adjustible(time_decay,false)?; basic_thresholder.set_absolute_threshold(1.2); if !is_compact { basic_thresholder.set_z_factor(2.5); } - GlobalLocalAnomalyDetector{ + Ok(GlobalLocalAnomalyDetector{ capacity, current_size: 0, random_seed, @@ -83,7 +82,7 @@ impl GlobalLocalAnomalyDetector { ignore_below: DEFAULT_IGNORE_SMALL_CLUSTER_REPRESENTATIVE, initial_accept_fraction: 0.125, //global_distance: () - } + }) } fn initial_accept_probability(&self, fill_fraction: f64) -> f64 { @@ -101,7 +100,7 @@ impl GlobalLocalAnomalyDetector { if self.current_size == self.capacity { return 1.0; }; - (self.current_size as f64 / self.capacity as f64) + self.current_size as f64 / self.capacity as f64 } fn compute_weight(&self, random_number: f64, weight: f32) -> f64 { @@ -144,7 +143,7 @@ impl GlobalLocalAnomalyDetector { evicted_point } - fn sample(&mut self, object: &T, weight: f32) -> bool { + fn sample(&mut self, object: &T, weight: f32) -> Result { self.sequence_number += 1; self.entries_seen += 1; let mut initial = false; @@ -160,15 +159,15 @@ impl GlobalLocalAnomalyDetector { if !initial { let old_index = self.evict_max().1; self.evicted = Some(self.object_list[old_index].clone()); - self.interval_manager.release(old_index); + self.interval_manager.release(old_index)?; } - let index = self.interval_manager.get(); + let index = self.interval_manager.get()?; if index < self.object_list.len() { self.object_list[index] = (object.clone(), weight); } else { self.object_list.push((object.clone(), weight)); } - if (self.heap.len() == self.current_size){ + if self.heap.len() == self.current_size { self.heap.push((heap_weight, index)); } else { self.heap[self.current_size] = (heap_weight, index); @@ -185,9 +184,9 @@ impl GlobalLocalAnomalyDetector { break; } } - return true; + return Ok(true); }; - false + Ok(false) } pub fn set_z_factor(&mut self, z_factor : f32){ @@ -195,7 +194,7 @@ impl GlobalLocalAnomalyDetector { } pub fn score(&self, current: &T, local_distance: fn(&T, &T) -> f64, consider_occlusion: bool) -> Result> { - if (self.clusters.len() == 0) { + if self.clusters.len() == 0 { return Ok(Vec::new()); } else { let mut candidate_list: Vec<(usize, (f64, &T), f64)> = Vec::new(); @@ -204,15 +203,16 @@ impl GlobalLocalAnomalyDetector { let close = self.clusters[j].distance_to_point_and_ref(current, self.ignore_below, local_distance)?; candidate_list.push((j, close, rad)); } - candidate_list.sort_by(|a, b| a.1.0.partial_cmp(&b.1.0).unwrap()); + candidate_list.sort_by(|a, b| a.1.0.partial_cmp(&b.1.0) + .expect("should not have NaN/Infinities")); - if (candidate_list[0].1.0 == 0.0) { + if candidate_list[0].1.0 == 0.0 { return Ok(vec![(candidate_list[0].1.1.clone(), 0.0)]); } let mut index = 0; - while (index < candidate_list.len()) { + while index < candidate_list.len() { let head = candidate_list[index]; - if (consider_occlusion) { + if consider_occlusion { for j in index + 1..candidate_list.len() { let occlude = (local_distance)(head.1.1, candidate_list[j].1.1); check_argument(occlude>=0.0, "distances cannot be negative")?; @@ -243,10 +243,10 @@ impl GlobalLocalAnomalyDetector { -> Result> { check_argument(weight >= 0.0, "weight cannot be negative")?; // recompute clusters first; this enables easier merges and deserialization - if (self.sequence_number > self.last_cluster + self.do_not_recluster_within) { + if self.sequence_number > self.last_cluster + self.do_not_recluster_within { let current_mean = self.basic_thresholder.primary_mean() as f32; - if (f32::abs(current_mean - self.last_mean) > 0.1 || current_mean > 1.7f32 - || self.sequence_number > self.last_cluster + 20 * self.do_not_recluster_within) { + if f32::abs(current_mean - self.last_mean) > 0.1 || current_mean > 1.7f32 + || self.sequence_number > self.last_cluster + 20 * self.do_not_recluster_within { self.last_cluster = self.sequence_number; self.last_mean = current_mean; let temp = multi_cluster_as_weighted_obj(&self.object_list, @@ -258,7 +258,9 @@ impl GlobalLocalAnomalyDetector { let threshold = self.basic_thresholder.threshold(); let mut grade: f32 = 0.0; let score: f32 = if score_list.len() == 0 { 0.0 } else { - score_list.iter().map(|a| a.1).min_by(|a,b| a.partial_cmp(b).unwrap()).unwrap() + score_list.iter().map(|a| a.1).min_by(|a, b| a.partial_cmp(b) + .expect("should not contain NaN, corrupt state")) + .expect("should be total order, corrupt state") }; if score_list.len() > 0 { @@ -266,7 +268,7 @@ impl GlobalLocalAnomalyDetector { // an exponential attribution let sum: f64 = score_list.iter().map(|a| if a.1 == SCORE_MAX { 0.0f64 } else { - f64::exp(-( a.1 * a.1) as f64) + f64::exp(-(a.1 * a.1) as f64) } ).sum(); for mut item in &mut score_list { @@ -281,13 +283,13 @@ impl GlobalLocalAnomalyDetector { item.1 = 1.0 / (y as f32); } } - grade = self.basic_thresholder.anomaly_grade(score, false); + grade = self.basic_thresholder.primary_grade(score); let other = self.basic_thresholder.z_factor(); self.basic_thresholder.update_both(score, f32::min(score, other)); } - self.sample(object, weight); + self.sample(object, weight)?; - return Ok(GenericAnomalyDescriptor { + Ok(GenericAnomalyDescriptor { representative_list: score_list, score: score as f64, threshold, diff --git a/Rust/src/pointstore.rs b/Rust/src/pointstore.rs index f6ed3d5b..f0c71f0a 100644 --- a/Rust/src/pointstore.rs +++ b/Rust/src/pointstore.rs @@ -1,49 +1,78 @@ extern crate num; use std::{collections::HashMap, convert::TryFrom, fmt::Debug}; - +use std::hash::Hash; +use std::ptr::hash; +use crate::types::{Result}; use crate::{common::intervalstoremanager::IntervalStoreManager, types::Location}; +use crate::errors::RCFError; +use crate::util::check_argument; + +pub const MAX_ATTRIBUTES: usize = 10; -pub trait PointStore { - fn get_shingled_point(&self, point: &[f32]) -> Vec; - fn get_size(&self) -> usize; - fn get_missing_indices(&self, look_ahead: usize, values: &[usize]) -> Vec; - fn get_next_indices(&self, look_ahead: usize) -> Vec; - fn get_copy(&self, index: usize) -> Vec; - fn is_equal(&self, point: &[f32], index: usize) -> bool; - fn get_reference_and_offset(&self, index: usize) -> (&[f32], usize); - - fn add(&mut self, point: &[f32]) -> usize; - fn inc(&mut self, index: usize); - fn dec(&mut self, index: usize); - fn adjust_count(&mut self, result: &[(usize, usize)]); - fn compact(&mut self); +pub trait PointStore where Label: Copy + Sync, Attributes: Copy + Sync + Hash + Eq + Send { + fn shingled_point(&self, point: &[f32]) -> Result>; + fn size(&self) -> usize; + fn missing_indices(&self, look_ahead: usize, values: &[usize]) -> Result>; + fn next_indices(&self, look_ahead: usize) -> Result>; + fn copy(&self, index: usize) -> Result>; + fn is_equal(&self, point: &[f32], index: usize) -> Result; + fn reference_and_offset(&self, index: usize) -> Result<(&[f32], usize)>; + fn entries_seen(&self) -> u64; + fn add(&mut self, point: &[f32], label:Label) -> Result<(usize,usize,Option>)>; + fn inc(&mut self, index: usize,attribute_index: usize) -> Result<()>; + fn dec(&mut self, index: usize,attribute_index: usize) -> Result<()>; + fn adjust_count(&mut self, result: &[((usize, usize),(usize,usize))]) -> Result<()>; + fn compact(&mut self) -> Result<()>; + fn label(&self, index: usize) -> Result