Skip to content

Commit

Permalink
Merge pull request #600 from xcanhu/bm-update
Browse files Browse the repository at this point in the history
benchmark-update
  • Loading branch information
yzhao062 authored Dec 20, 2024
2 parents 8109f06 + a363b5f commit 64c7f05
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 15 deletions.
142 changes: 127 additions & 15 deletions notebooks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,35 @@
from pyod.models.cof import COF
from pyod.models.sod import SOD


from pyod.models.auto_encoder import AutoEncoder
from pyod.models.cd import CD
from pyod.models.copod import COPOD
from pyod.models.dif import DIF
from pyod.models.ecod import ECOD
from pyod.models.gmm import GMM
from pyod.models.kde import KDE
from pyod.models.lmdd import LMDD
from pyod.models.loci import LOCI #19S
from pyod.models.loda import LODA
from pyod.models.qmcd import QMCD
from pyod.models.sampling import Sampling
from pyod.models.sos import SOS

from pyod.models.alad import ALAD #40s
from pyod.models.anogan import AnoGAN #151s
from pyod.models.inne import INNE
from pyod.models.kpca import KPCA
from pyod.models.lscp import LSCP
from pyod.models.lunar import LUNAR
from pyod.models.mad import MAD
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.rgraph import RGraph #271S
from pyod.models.rod import ROD
from pyod.models.so_gaal import SO_GAAL
from pyod.models.sod import SOD
from pyod.models.vae import VAE

from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score
Expand All @@ -61,15 +90,19 @@
'shuttle.mat',
'vertebral.mat',
'vowels.mat',
'wbc.mat']
'wbc.mat'
]

# define the number of iterations
n_ite = 10
n_classifiers = 10
n_ite = 1

df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',
'ABOD', 'CBLOF', 'FB', 'HBOS', 'IForest', 'KNN', 'LOF',
'MCD', 'OCSVM', 'PCA']
'MCD', 'OCSVM', 'PCA', 'AutoEncoder', 'CD', 'COPOD', 'DIF', 'ECOD',
'GMM', 'KDE', 'LODA', 'QMCD','Sampling', 'SOS', 'ALAD', 'AnoGAN ',
'INNE', 'KPCA', 'LMDD', 'LOCI', 'LSCP', 'LUNAR', 'MO_GAAL', 'RGraph', 'SO_GAAL', 'SOD', 'VAE']

n_classifiers = len(df_columns)-4

# initialize the container for saving the results
roc_df = pd.DataFrame(columns=df_columns)
Expand Down Expand Up @@ -106,27 +139,71 @@
# standardizing data for processing
X_train_norm, X_test_norm = standardizer(X_train, X_test)

classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
contamination=outliers_fraction),
classifiers = {
'Angle-based Outlier Detector (ABOD)': ABOD(
contamination=outliers_fraction),
'Cluster-based Local Outlier Factor': CBLOF(
n_clusters=10,
contamination=outliers_fraction,
check_estimator=False,
random_state=random_state),
'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
random_state=random_state),
'Feature Bagging': FeatureBagging(
contamination=outliers_fraction,
random_state=random_state),
'Histogram-base Outlier Detection (HBOS)': HBOS(
contamination=outliers_fraction),
'Isolation Forest': IForest(contamination=outliers_fraction,
random_state=random_state),
'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
'Isolation Forest': IForest(
contamination=outliers_fraction,
random_state=random_state),
'K Nearest Neighbors (KNN)': KNN(
contamination=outliers_fraction),
'Local Outlier Factor (LOF)': LOF(
contamination=outliers_fraction),
'Minimum Covariance Determinant (MCD)': MCD(
contamination=outliers_fraction, random_state=random_state),
'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
contamination=outliers_fraction,
random_state=random_state),
'One-class SVM (OCSVM)': OCSVM(
contamination=outliers_fraction),
'Principal Component Analysis (PCA)': PCA(
contamination=outliers_fraction, random_state=random_state),
contamination=outliers_fraction,
random_state=random_state),
'AutoEncoder': AutoEncoder(
contamination=outliers_fraction),
'CD': CD(
contamination=outliers_fraction),
'COPOD': COPOD(
contamination=outliers_fraction),
'DIF': DIF(
contamination=outliers_fraction),
'ECOD': ECOD(
contamination=outliers_fraction),
'GMM': GMM(
contamination=outliers_fraction),
'KDE': KDE(
contamination=outliers_fraction),

'LODA': LODA(
contamination=outliers_fraction),
'QMCD': QMCD(
contamination=outliers_fraction),
'Sampling': Sampling(
contamination=outliers_fraction),
'SOS': SOS(
contamination=outliers_fraction),
# 'ALAD': ALAD(
# contamination=outliers_fraction),
# 'AnoGAN':AnoGAN(
# contamination=outliers_fraction),
'INNE': INNE(contamination=outliers_fraction),
'KPCA': KPCA(contamination=outliers_fraction),
'LMDD': LMDD(contamination=outliers_fraction),
# 'LOCI': LOCI(contamination=outliers_fraction),
'LUNAR': LUNAR(contamination=outliers_fraction),
'MO_GAAL': MO_GAAL(contamination=outliers_fraction),
# 'RGraph': RGraph(contamination=outliers_fraction),
# 'SO_GAAL': SO_GAAL(contamination=outliers_fraction),
'SOD': SOD(contamination=outliers_fraction),

}
classifiers_indices = {
'Angle-based Outlier Detector (ABOD)': 0,
Expand All @@ -139,12 +216,47 @@
'Minimum Covariance Determinant (MCD)': 7,
'One-class SVM (OCSVM)': 8,
'Principal Component Analysis (PCA)': 9,
'AutoEncoder': 10,
'CD': 11,
'COPOD': 12,
'DIF': 13,
'ECOD': 14,
'GMM': 15,
'KDE': 16,
'LODA': 17,
'QMCD': 18,
'Sampling': 19,
'SOS': 20,
'ALAD': 21,
'AnoGAN': 22,
'INNE': 23,
'KPCA': 24,
'LMDD': 25,
'LOCI': 26,
'LUNAR': 27,
'MO_GAAL': 28,
'RGraph': 29,
'SO_GAAL': 30,
'SOD': 31,



}


for clf_name, clf in classifiers.items():
t0 = time()
clf.fit(X_train_norm)
test_scores = clf.decision_function(X_test_norm)

# Handle NaN values in test_scores
test_scores = np.nan_to_num(test_scores,
nan=0.0,
posinf=np.nanmax(test_scores),
neginf=np.nanmin(test_scores))
# Handle NaN values in y_test
y_test = np.nan_to_num(y_test, nan=0.0, posinf=0.0, neginf=0.0)

t1 = time()
duration = round(t1 - t0, ndigits=4)

Expand Down Expand Up @@ -177,4 +289,4 @@
# Save the results for each run
time_df.to_csv('time.csv', index=False, float_format='%.3f')
roc_df.to_csv('roc.csv', index=False, float_format='%.3f')
prn_df.to_csv('prc.csv', index=False, float_format='%.3f')
prn_df.to_csv('prc.csv', index=False, float_format='%.3f')
18 changes: 18 additions & 0 deletions notebooks/prc.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA,AutoEncoder,CD,COPOD,DIF,ECOD,GMM,KDE,LODA,QMCD,Sampling,SOS,ALAD,AnoGAN ,INNE,KPCA,LMDD,LOCI,LSCP,LUNAR,MO_GAAL,RGraph,SO_GAAL,SOD,VAE
arrhythmia,452,274,14.6018,0.38076000000000004,0.4585699999999999,0.42641,0.51108,0.49556999999999995,0.44636999999999993,0.4334300000000001,0.39952,0.4614,0.46129,0.41970999999999997,0.0,0.46495,0.47433,0.49446,0.36323000000000005,0.41471,0.43984999999999996,0.049729999999999996,0.45327,0.30998000000000003,0.0,0.0,0.36761000000000005,0.42781,0.47909000000000007,0.0,0.48439,0.31001,0.0,0.0,0.36889,0.0,0.0
cardio,1831,21,9.6122,0.23742999999999997,0.40406000000000003,0.16194,0.44761000000000006,0.49265,0.33227,0.15409,0.43043000000000003,0.50112,0.609,0.37847,0.41884000000000005,0.53687,0.6144000000000001,0.5457700000000001,0.45156999999999997,0.27362000000000003,0.43045,0.50086,0.4677999999999999,0.14051000000000002,0.0,0.0,0.38051,0.31983,0.43345,0.0,0.25598,0.33255999999999997,0.0,0.0,0.30310999999999994,0.0,0.0
glass,214,9,4.2056,0.17023,0.07261999999999999,0.18095,0.0,0.10594999999999999,0.07261999999999999,0.14762,0.025,0.17262,0.07261999999999999,0.15594999999999998,0.12261999999999999,0.07261999999999999,0.24522999999999998,0.20595,0.17262,0.17262,0.025,0.025,0.10594999999999999,0.23928,0.0,0.0,0.20595,0.17262,0.07261999999999999,0.0,0.14762,0.0,0.0,0.0,0.15594999999999998,0.0,0.0
ionosphere,351,33,35.8974,0.84415,0.8134600000000001,0.7093700000000002,0.32951,0.64698,0.86021,0.7063400000000001,0.8806499999999999,0.7000499999999998,0.5728599999999999,0.7146800000000001,0.8447799999999999,0.56287,0.7875099999999999,0.51387,0.85226,0.8648100000000001,0.71319,0.41784999999999994,0.75755,0.6650400000000001,0.0,0.0,0.76081,0.8687000000000001,0.55983,0.0,0.8628600000000001,0.5791,0.0,0.0,0.7865500000000002,0.0,0.0
letter,1600,32,6.25,0.38009,0.22858,0.38184,0.07152,0.08813,0.33117,0.36411,0.19273999999999997,0.15096,0.08746999999999999,0.28865999999999997,0.27525999999999995,0.03683,0.16262,0.08904,0.29725000000000007,0.41737,0.10225999999999999,0.11845999999999998,0.16158999999999998,0.42885,0.0,0.0,0.23424,0.3634,0.08878,0.0,0.42205000000000004,0.10539000000000001,0.0,0.0,0.41620999999999997,0.0,0.0
lympho,148,18,4.0541,0.44833999999999996,0.75167,0.75167,0.8466699999999999,0.8766700000000001,0.75167,0.75167,0.44833999999999996,0.75167,0.75167,0.75167,0.29167,0.79667,0.6883299999999999,0.79667,0.64834,0.6183299999999999,0.43165999999999993,0.17332999999999998,0.80167,0.25833,0.0,0.0,0.41834,0.6183299999999999,0.79834,0.0,0.6683299999999999,0.17332999999999998,0.0,0.0,0.49833999999999995,0.0,0.0
mnist,7603,100,9.2069,0.35550000000000004,0.3953,0.33338,0.11882000000000001,0.30436,0.42042999999999997,0.33429000000000003,0.32133,0.39619,0.38460999999999995,0.37344000000000005,0.1536,0.23819999999999997,0.45736,0.18249,0.39078,0.37636,0.33729000000000003,0.17887,0.39403,0.15399,0.0,0.0,0.39781,0.4051600000000001,0.2644000000000001,0.0,0.35859,0.16896,0.0,0.0,0.33967,0.0,0.0
musk,3062,166,3.1679,0.050749999999999997,1.0,0.20373999999999998,0.9783199999999999,0.9854900000000001,0.2733,0.16954999999999998,0.98378,1.0,0.97994,0.31725,0.42475000000000007,0.34767,0.62775,0.46912000000000004,0.76602,0.12333000000000001,0.32001999999999997,0.69581,0.74071,0.042109999999999995,0.0,0.0,0.99443,0.20093999999999998,0.83667,0.0,0.11652999999999998,0.21476,0.0,0.0,0.16380999999999996,0.0,0.0
optdigits,5216,64,2.8758,0.006019999999999999,0.0,0.023719999999999998,0.21939999999999998,0.028399999999999998,0.0,0.023350000000000003,0.0,0.0,0.0,0.0,0.00471,0.010589999999999999,0.01965,0.0014500000000000001,0.0,0.0,0.0,0.0,0.0,0.04734000000000001,0.0,0.0,0.0,0.0,0.0,0.0,0.02731,0.02054,0.0,0.0,0.007090000000000001,0.0,0.0
pendigits,6870,16,2.2707,0.08124999999999999,0.23104999999999998,0.0658,0.29793000000000003,0.35816000000000003,0.09844,0.06528999999999999,0.08928,0.32866000000000006,0.31865000000000004,0.05608,0.02181,0.27415999999999996,0.32224,0.3545,0.050019999999999995,0.12423,0.39934000000000003,0.17589,0.19923,0.03913,0.0,0.0,0.15775,0.11564,0.20180000000000003,0.0,0.06629,0.14783000000000002,0.0,0.0,0.07569999999999999,0.0,0.0
pima,768,8,34.8958,0.51929,0.48672000000000004,0.44945,0.54238,0.50144,0.54133,0.45552000000000004,0.4981999999999999,0.47035,0.49429,0.47081,0.49493,0.48265,0.38776999999999995,0.44753,0.48932000000000003,0.53364,0.41633,0.52378,0.49695,0.35656000000000004,0.0,0.0,0.5000199999999999,0.5300900000000001,0.42932,0.0,0.51976,0.15489000000000003,0.0,0.0,0.46201,0.0,0.0
satellite,6435,36,31.6395,0.39023,0.5792900000000001,0.39061999999999997,0.56903,0.5571499999999999,0.49944999999999995,0.38929,0.6850200000000001,0.5345500000000001,0.4784400000000001,0.50582,0.42051999999999995,0.4777,0.6402000000000001,0.44645,0.45641,0.5605100000000001,0.48584999999999995,0.62853,0.5153099999999999,0.27453,0.0,0.0,0.61313,0.5763100000000001,0.26108999999999993,0.0,0.44067,0.46269,0.0,0.0,0.45056,0.0,0.0
satimage-2,5803,36,1.2235,0.21304999999999996,0.9375900000000001,0.06375,0.6939,0.8764399999999999,0.38087,0.055510000000000004,0.6481300000000001,0.9355600000000001,0.8040799999999999,0.36232000000000003,0.31453,0.72342,0.75913,0.61295,0.39531,0.32111,0.78276,0.88734,0.82279,0.031380000000000005,0.0,0.0,0.8136300000000001,0.53797,0.01705,0.0,0.25627,0.0,0.0,0.0,0.28467000000000003,0.0,0.0
shuttle,49097,9,7.1511,0.19773000000000002,0.28849,0.04965,0.95511,0.94676,0.21839,0.1425,0.7509000000000002,0.95418,0.95013,0.8938,0.47101,0.9497300000000001,0.58068,0.8712899999999999,0.8626299999999999,0.8650100000000001,0.3405,0.9522099999999998,0.64059,0.07549,0.0,0.0,0.8355899999999998,0.40153999999999995,0.9501800000000001,0.0,0.18308000000000002,0.42557,0.0,0.0,0.19948000000000002,0.0,0.0
vertebral,240,6,12.5,0.060050000000000006,0.03482999999999999,0.05724999999999999,0.0071400000000000005,0.03428,0.02381,0.05058999999999999,0.02857,0.02381,0.02262,0.10905000000000001,0.07939,0.0,0.11738,0.09933,0.01381,0.01381,0.01334,0.0071400000000000005,0.025,0.13516,0.0,0.0,0.05724999999999999,0.0329,0.06504000000000001,0.0,0.0329,0.08663,0.0,0.0,0.07899999999999999,0.0,0.0
vowels,1456,12,3.4341,0.57102,0.41319999999999996,0.34133,0.12974000000000002,0.19754999999999998,0.50929,0.35506000000000004,0.17649,0.27907000000000004,0.13636,0.39759000000000005,0.34219,0.01326,0.19398999999999997,0.16819,0.4132999999999999,0.52183,0.21627000000000002,0.13916,0.24786000000000002,0.24613999999999997,0.0,0.0,0.36521000000000003,0.54957,0.1307,0.0,0.58233,0.0,0.0,0.0,0.40793,0.0,0.0
wbc,378,30,5.5556,0.30604,0.4988800000000001,0.5287900000000001,0.5816600000000001,0.5087900000000001,0.49518000000000006,0.51879,0.45569,0.51249,0.47673000000000004,0.47636,0.41537999999999997,0.65316,0.15875999999999998,0.4163200000000001,0.45569,0.46159999999999995,0.48046,0.5472,0.51037,0.11879999999999999,0.0,0.0,0.4447,0.50507,0.52207,0.0,0.45959000000000005,0.0,0.0,0.0,0.47625,0.0,0.0
Loading

0 comments on commit 64c7f05

Please sign in to comment.