-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy paththesis.toc
148 lines (148 loc) · 13.9 KB
/
thesis.toc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
\contentsline {chapter}{List of Contributions}{iii}{dummy.1}
\contentsline {chapter}{List of Figures}{ix}{dummy.2}
\contentsline {chapter}{List of Tables}{xiii}{dummy.3}
\contentsline {chapter}{\numberline {1}Summary}{1}{chapter.1}
\contentsline {section}{\numberline {1.1}Introduction}{1}{section.1.1}
\contentsline {paragraph}{Notations}{2}{section.1.1}
\contentsline {section}{\numberline {1.2}Anomaly Detection, Anomaly Ranking and Scoring Functions}{2}{section.1.2}
\contentsline {section}{\numberline {1.3}M-estimation and criteria for scoring functions}{4}{section.1.3}
\contentsline {subsection}{\numberline {1.3.1}Minimum Volume sets}{5}{subsection.1.3.1}
\contentsline {subsection}{\numberline {1.3.2}Mass-Volume curve}{5}{subsection.1.3.2}
\contentsline {subsection}{\numberline {1.3.3}The Excess-Mass criterion}{7}{subsection.1.3.3}
\contentsline {section}{\numberline {1.4}Accuracy on extreme regions}{9}{section.1.4}
\contentsline {subsection}{\numberline {1.4.1}Extreme Values Analysis through STDF estimation}{9}{subsection.1.4.1}
\contentsline {paragraph}{Preliminaries.}{9}{subsection.1.4.1}
\contentsline {paragraph}{Learning the dependence structure of rare events.}{10}{subsection.1.4.1}
\contentsline {subsection}{\numberline {1.4.2}Sparse Representation of Multivariate Extremes}{11}{subsection.1.4.2}
\contentsline {section}{\numberline {1.5}Heuristic approaches}{13}{section.1.5}
\contentsline {subsection}{\numberline {1.5.1}Evaluation of anomaly detection algorithms}{14}{subsection.1.5.1}
\contentsline {subsection}{\numberline {1.5.2}One-Class Random Forests}{15}{subsection.1.5.2}
\contentsline {section}{\numberline {1.6}Scikit-learn contributions}{18}{section.1.6}
\contentsline {section}{\numberline {1.7}Conclusion and Scientific Output}{18}{section.1.7}
\contentsline {paragraph}{Context of this work.}{19}{section.1.7}
\contentsline {paragraph}{Outline of the thesis.}{19}{section.1.7}
\contentsline {part}{I\hspace {1em}Preliminaries}{21}{part.1}
\contentsline {chapter}{\numberline {2}Concentration Inequalities from the Method of bounded differences}{23}{chapter.2}
\contentsline {section}{\numberline {2.1}Two fundamental results}{23}{section.2.1}
\contentsline {subsection}{\numberline {2.1.1}Preliminary definitions}{23}{subsection.2.1.1}
\contentsline {subsection}{\numberline {2.1.2}Inequality for Bounded Random Variables}{24}{subsection.2.1.2}
\contentsline {subsection}{\numberline {2.1.3}Bernstein-type Inequality (with variance term)}{26}{subsection.2.1.3}
\contentsline {section}{\numberline {2.2}Popular Inequalities}{27}{section.2.2}
\contentsline {section}{\numberline {2.3}Connections with Statistical Learning and VC theory}{29}{section.2.3}
\contentsline {section}{\numberline {2.4}Sharper VC-bounds through a Bernstein-type inequality}{31}{section.2.4}
\contentsline {chapter}{\numberline {3}Extreme Value Theory}{37}{chapter.3}
\contentsline {paragraph}{Notation reminder}{37}{chapter.3}
\contentsline {section}{\numberline {3.1}Univariate Extreme Value Theory}{37}{section.3.1}
\contentsline {section}{\numberline {3.2}Extension to the Multivariate framework}{40}{section.3.2}
\contentsline {chapter}{\numberline {4}Background on classical Anomaly Detection algorithms}{43}{chapter.4}
\contentsline {section}{\numberline {4.1}What is Anomaly Detection?}{43}{section.4.1}
\contentsline {section}{\numberline {4.2}Three efficient Anomaly Detection Algorithms}{44}{section.4.2}
\contentsline {subsection}{\numberline {4.2.1}One-class SVM}{44}{subsection.4.2.1}
\contentsline {subsection}{\numberline {4.2.2}Local Outlier Factor algorithm}{47}{subsection.4.2.2}
\contentsline {subsection}{\numberline {4.2.3}Isolation Forest}{47}{subsection.4.2.3}
\contentsline {section}{\numberline {4.3}Examples through scikit-learn}{47}{section.4.3}
\contentsline {subsection}{\numberline {4.3.1}What is scikit-learn?}{48}{subsection.4.3.1}
\contentsline {subsection}{\numberline {4.3.2}LOF examples}{49}{subsection.4.3.2}
\contentsline {subsection}{\numberline {4.3.3}Isolation Forest examples}{50}{subsection.4.3.3}
\contentsline {subsection}{\numberline {4.3.4}Comparison examples}{52}{subsection.4.3.4}
\contentsline {part}{II\hspace {1em}An Excess-Mass based Performance Criterion}{55}{part.2}
\contentsline {chapter}{\numberline {5}On Anomaly Ranking and Excess-Mass Curves}{57}{chapter.5}
\contentsline {section}{\numberline {5.1}Introduction}{57}{section.5.1}
\contentsline {section}{\numberline {5.2}Background and related work}{59}{section.5.2}
\contentsline {section}{\numberline {5.3}The Excess-Mass curve}{60}{section.5.3}
\contentsline {section}{\numberline {5.4}A general approach to learn a scoring function}{63}{section.5.4}
\contentsline {section}{\numberline {5.5}Extensions - Further results}{66}{section.5.5}
\contentsline {subsection}{\numberline {5.5.1}Distributions with non compact support}{66}{subsection.5.5.1}
\contentsline {subsection}{\numberline {5.5.2}Bias analysis}{68}{subsection.5.5.2}
\contentsline {section}{\numberline {5.6}Simulation examples}{69}{section.5.6}
\contentsline {section}{\numberline {5.7}Detailed Proofs}{71}{section.5.7}
\contentsline {part}{III\hspace {1em}Accuracy on Extreme Regions}{77}{part.3}
\contentsline {chapter}{\numberline {6}Learning the dependence structure of rare events: a non-asymptotic study}{79}{chapter.6}
\contentsline {section}{\numberline {6.1}Introduction}{79}{section.6.1}
\contentsline {section}{\numberline {6.2}Background on the stable tail dependence function}{80}{section.6.2}
\contentsline {section}{\numberline {6.3}A VC-type inequality adapted to the study of low probability regions}{81}{section.6.3}
\contentsline {paragraph}{Classification of Extremes}{82}{theorem.6.3.4}
\contentsline {section}{\numberline {6.4}A bound on the STDF}{84}{section.6.4}
\contentsline {section}{\numberline {6.5}Discussion}{88}{section.6.5}
\contentsline {chapter}{\numberline {7}Sparse Representation of Multivariate Extremes}{91}{chapter.7}
\contentsline {section}{\numberline {7.1}Introduction}{91}{section.7.1}
\contentsline {subsection}{\numberline {7.1.1}Context: multivariate extreme values in large dimension}{91}{subsection.7.1.1}
\contentsline {subsection}{\numberline {7.1.2}Application to Anomaly Detection}{93}{subsection.7.1.2}
\contentsline {section}{\numberline {7.2}Multivariate EVT Framework and Problem Statement}{94}{section.7.2}
\contentsline {subsection}{\numberline {7.2.1}Statement of the Statistical Problem}{95}{subsection.7.2.1}
\contentsline {subsection}{\numberline {7.2.2}Regularity Assumptions}{98}{subsection.7.2.2}
\contentsline {section}{\numberline {7.3}A non-parametric estimator of the subcones' mass : definition and preliminary results}{99}{section.7.3}
\contentsline {subsection}{\numberline {7.3.1}A natural empirical version of the exponent measure mu}{100}{subsection.7.3.1}
\contentsline {subsection}{\numberline {7.3.2}Accounting for the non asymptotic nature of data: epsilon-thickening.}{100}{subsection.7.3.2}
\contentsline {subsection}{\numberline {7.3.3}Preliminaries: uniform approximation over a VC-class of rectangles}{101}{subsection.7.3.3}
\contentsline {subsection}{\numberline {7.3.4}Bounding empirical deviations over thickened rectangles}{104}{subsection.7.3.4}
\contentsline {subsection}{\numberline {7.3.5}Bounding the bias induced by thickened rectangles}{105}{subsection.7.3.5}
\contentsline {subsection}{\numberline {7.3.6}Main result}{106}{subsection.7.3.6}
\contentsline {section}{\numberline {7.4}Application to Anomaly Detection }{108}{section.7.4}
\contentsline {subsection}{\numberline {7.4.1}Extremes and Anomaly Detection.}{108}{subsection.7.4.1}
\contentsline {subsection}{\numberline {7.4.2}DAMEX Algorithm: Detecting Anomalies among Multivariate Extremes}{109}{subsection.7.4.2}
\contentsline {section}{\numberline {7.5}Experimental results}{112}{section.7.5}
\contentsline {subsection}{\numberline {7.5.1}Recovering the support of the dependence structure of generated data}{112}{subsection.7.5.1}
\contentsline {subsection}{\numberline {7.5.2}Sparse structure of extremes (wave data)}{113}{subsection.7.5.2}
\contentsline {subsection}{\numberline {7.5.3}Application to Anomaly Detection on real-world data sets}{113}{subsection.7.5.3}
\contentsline {section}{\numberline {7.6}Conclusion}{116}{section.7.6}
\contentsline {section}{\numberline {7.7}Technical proofs}{118}{section.7.7}
\contentsline {subsection}{\numberline {7.7.1}Proof of Lemma \ref {jmva:lem:equivalence}}{118}{subsection.7.7.1}
\contentsline {subsection}{\numberline {7.7.2}Proof of Lemma \ref {jmva:lem:g-alpha}}{119}{subsection.7.7.2}
\contentsline {subsection}{\numberline {7.7.3}Proof of Proposition \ref {jmva:prop:g}}{120}{subsection.7.7.3}
\contentsline {subsection}{\numberline {7.7.4}Proof of Lemma \ref {jmva:lemma_simplex}}{124}{subsection.7.7.4}
\contentsline {subsection}{\numberline {7.7.5}Proof of Remark\nobreakspace {}\ref {jmva:rk:optim}}{126}{subsection.7.7.5}
\contentsline {part}{IV\hspace {1em}Efficient heuristic approaches}{127}{part.4}
\contentsline {chapter}{\numberline {8}How to Evaluate the Quality of Anomaly Detection Algorithms?}{129}{chapter.8}
\contentsline {section}{\numberline {8.1}Introduction}{129}{section.8.1}
\contentsline {paragraph}{What is a scoring function?}{130}{section.8.1}
\contentsline {paragraph}{How to know if a scoring function is good?}{130}{section.8.1}
\contentsline {section}{\numberline {8.2}Mass-Volume and Excess-Mass based criteria}{131}{section.8.2}
\contentsline {subsection}{\numberline {8.2.1}Preliminaries}{131}{subsection.8.2.1}
\contentsline {subsection}{\numberline {8.2.2}Numerical unsupervised criteria}{132}{subsection.8.2.2}
\contentsline {section}{\numberline {8.3}Scaling with dimension}{133}{section.8.3}
\contentsline {section}{\numberline {8.4}Benchmarks}{134}{section.8.4}
\contentsline {subsection}{\numberline {8.4.1}Datasets description}{135}{subsection.8.4.1}
\contentsline {subsection}{\numberline {8.4.2}Results}{136}{subsection.8.4.2}
\contentsline {section}{\numberline {8.5}Conclusion}{139}{section.8.5}
\contentsline {section}{\numberline {8.6}Further material on the experiments}{140}{section.8.6}
\contentsline {chapter}{\numberline {9}One Class Splitting Criteria for Random Forests}{149}{chapter.9}
\contentsline {section}{\numberline {9.1}Introduction}{149}{section.9.1}
\contentsline {section}{\numberline {9.2}Background on decision trees}{151}{section.9.2}
\contentsline {section}{\numberline {9.3}Adaptation to the one-class setting}{152}{section.9.3}
\contentsline {subsection}{\numberline {9.3.1}One-class splitting criterion}{153}{subsection.9.3.1}
\contentsline {paragraph}{Naive approach.}{153}{subsection.9.3.1}
\contentsline {paragraph}{Adaptive approach.}{153}{theorem.9.3.1}
\contentsline {subsection}{\numberline {9.3.2}Prediction: a majority vote with one single candidate?}{155}{subsection.9.3.2}
\contentsline {subsection}{\numberline {9.3.3}OneClassRF: a Generic One-Class Random Forest algorithm}{156}{subsection.9.3.3}
\contentsline {section}{\numberline {9.4}Benchmarks}{157}{section.9.4}
\contentsline {subsection}{\numberline {9.4.1}Default parameters of OneClassRF.}{157}{subsection.9.4.1}
\contentsline {subsection}{\numberline {9.4.2}Hyper-Parameters of tested algorithms}{158}{subsection.9.4.2}
\contentsline {subsection}{\numberline {9.4.3}Description of the datasets}{159}{subsection.9.4.3}
\contentsline {subsection}{\numberline {9.4.4}Results}{160}{subsection.9.4.4}
\contentsline {section}{\numberline {9.5}Theoretical justification for the one-class splitting criterion}{161}{section.9.5}
\contentsline {subsection}{\numberline {9.5.1}Underlying model}{161}{subsection.9.5.1}
\contentsline {subsection}{\numberline {9.5.2}Adaptive approach}{162}{subsection.9.5.2}
\contentsline {section}{\numberline {9.6}Conclusion}{163}{section.9.6}
\contentsline {section}{\numberline {9.7}Further details on benchmarks and unsupervised results}{164}{section.9.7}
\contentsline {chapter}{\numberline {10}Conclusion, limitations \& perspectives}{173}{chapter.10}
\contentsline {chapter}{\numberline {11}R\IeC {\'e}sum\IeC {\'e} des contributions en Fran\IeC {\c c}ais}{175}{chapter.11}
\contentsline {section}{\numberline {11.1}Introduction}{175}{section.11.1}
\contentsline {paragraph}{Notations.}{176}{section.11.1}
\contentsline {section}{\numberline {11.2}D\IeC {\'e}tection d'anomalies, ranking d'anomalies et fonctions de scores}{176}{section.11.2}
\contentsline {section}{\numberline {11.3}M-estimation et crit\IeC {\`e}res de performance pour les fonctions de scores}{178}{section.11.3}
\contentsline {subsection}{\numberline {11.3.1}Ensembles \IeC {\`a} volume minimal}{178}{subsection.11.3.1}
\contentsline {subsection}{\numberline {11.3.2}La courbe Masse-Volume}{179}{subsection.11.3.2}
\contentsline {subsection}{\numberline {11.3.3}Le crit\IeC {\`e}re d'exc\IeC {\`e}s de masse}{181}{subsection.11.3.3}
\contentsline {section}{\numberline {11.4}Pr\IeC {\'e}cision sur les r\IeC {\'e}gions extr\IeC {\^e}mes}{183}{section.11.4}
\contentsline {subsection}{\numberline {11.4.1}Analyse du point de vue de la th\IeC {\'e}orie des valeurs extr\IeC {\^e}mes par l'estimation de la STDF}{183}{subsection.11.4.1}
\contentsline {paragraph}{Pr\IeC {\'e}liminaires.}{183}{subsection.11.4.1}
\contentsline {paragraph}{Apprentissage de la structure de d\IeC {\'e}pendance des \IeC {\'e}v\IeC {\'e}nements rares.}{184}{subsection.11.4.1}
\contentsline {subsection}{\numberline {11.4.2}Repr\IeC {\'e}sentation parcimonieuse des extr\IeC {\^e}mes multivari\IeC {\'e}s}{185}{subsection.11.4.2}
\contentsline {section}{\numberline {11.5}Approches heuristiques}{188}{section.11.5}
\contentsline {subsection}{\numberline {11.5.1}\IeC {\'E}valuer un algorithme de d\IeC {\'e}tection d'anomalies}{188}{subsection.11.5.1}
\contentsline {subsection}{\numberline {11.5.2}For\IeC {\^e}ts al\IeC {\'e}atoires \IeC {\`a} une classe}{190}{subsection.11.5.2}
\contentsline {section}{\numberline {11.6}Contributions sur scikit-learn}{193}{section.11.6}
\contentsline {section}{\numberline {11.7}Conclusion and production scientifique}{193}{section.11.7}
\contentsline {paragraph}{Contexte de ce travail.}{195}{section.11.7}
\contentsline {chapter}{Bibliography}{197}{dummy.4}