-
Notifications
You must be signed in to change notification settings - Fork 1
/
prescreening.py
47 lines (41 loc) · 1.61 KB
/
prescreening.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from pyod.models.xgbod import XGBOD as xgbod
def sp_label(X, Y, thres=None, fraction=0.1):
"""Get inlier (0) and outlier (1) for given X, y values
Parameters
----------
X : ndarray with a shape of (n_training_samples, feature_size)
Current set of experiments.
Y : ndarray with a shape of (n_training_samples, 1) of floats
Current measurements using X experimental design.
thres : float, default=None
Threshold used to select the inliers and outiers
fraction : float, default=0.1
If there is no threshold provided, the fraction of experiments can be
outliers in the current experiments
Returns
-------
labels : ndarray of zero or one
Final vector for experiements marked as inliers and outliers
"""
labels = np.zeros(len(Y))
if thres == None:
sort_ids = np.argsort(Y)
labels[sort_ids[0:int(len(Y) * fraction)]] = np.ones(
int(len(Y) * fraction))
else:
outlier = [k for k, x in enumerate(Y) if x <= thres]
labels[outlier] = np.ones(len(outlier))
return labels
class XGBOD(xgbod):
"""XGBOD class (pass to pyOD package)"""
def __init__(self, *args, **kwargs):
super(XGBOD, self).__init__(*args, **kwargs)
if 'estimator_list' not in kwargs:
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.ocsvm import OCSVM
from pyod.models.iforest import IForest
self.estimator_list = [KNN(), LOF(), OCSVM(), IForest()]