-
Notifications
You must be signed in to change notification settings - Fork 35
/
train_ensemble.m
185 lines (160 loc) · 7.28 KB
/
train_ensemble.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
function cf = train_ensemble(param,X,clabel)
% Trains an ensemble of classifiers. Uses many classifiers (aka weak
% learners) trained on subsets of the samples and subsets of the features
% (random subspaces).
%
% Usage:
% cf = train_ensemble(param,X,clabel)
%
%Parameters:
% X - [samples x features] matrix of training samples
% clabel - [samples x 1] vector of class labels
%
% param: struct with parameters:
% .nsamples - number of randomly subselected samples for each
% learner. Can be an integer number or a
% fraction, e.g. 0.1 means that 10% of the training
% data is used for each learner. (Default 0.5)
% .nfeatures - number of randomly subselected features. Can be an
% integer number or a fraction, e.g. 0.1 means that
% 10% of the features are used for each learner. (Default 0.1)
% .nlearners - number of learners (default 100)
% .strategy - strategy for making decisions. If 'vote', the class
% label from each learner is obtained, then the class
% associated with the majority vote it taken (randomly
% choose a class in case of a draw). Many classifiers
% also provided decision values; in this case, the
% decision values can be averaged and the decision is
% taken according to the mean decision value, use
% 'dval' in this case. Note that 'dval' only works for
% binary classifiers (default 'vote')
% .stratify - if 1, takes care that the class proportions are
% preserved during the subselection of samples. If 0,
% samples are randomly chosen which can lead to some
% learners not 'seeing' a particular class (default 1)
% .bootstrap - if 1, samples are selected with replacement
% (this is also called bootstrapping), otherwise they
% are drawn without replacement (default 1)
% .learner - type of learning algorithm, e.g. 'lda','logreg' etc.
% Any classifier with train and test functions can
% serve as a learner.
% .learner_param - struct with further parameters passed on to the learning
% algorithm (e.g. cfg.hyperparameter.learner_param.lambda specifies the
% regularisation hyperparameter for LDA)
% .simplify - for linear classifiers, the operation of the ensemble
% is again equivalent to a single linear classifier.
% Hence, a projection w and a threshold b can be
% calculated which can increase efficiency for
% prediction. Currently, this works for lda and logreg
%
%Output:
% cf - struct specifying the ensemble classifier
%
% (c) Matthias Treder 2017-2018
[N,F] = size(X);
nclasses = max(clabel);
% dval only works for binary classification problems
if strcmp(param.strategy, 'dval') && max(clabel)>2
error(['strategy=''dval'' only works for binary classification problems. ' ...
'For multi-class problems, set strategy=''vote'''])
end
% if fractions are given for nfeatures and nlearners, turn them into
% absolute numbers
if param.nsamples < 1
param.nsamples= round(param.nsamples*N);
end
if param.nfeatures < 1
param.nfeatures= round(param.nfeatures*F);
end
% if we want stratification, we need to calculate how many samples of each
% class we need in the subselected data
if param.stratify > 0
% Indices of the samples for each class
cidx = arrayfun( @(c) find(clabel==c), 1:nclasses, 'Un', 0);
% total number of samples in each class
Ntotal = arrayfun( @(c) sum(clabel==c), 1:nclasses);
% number of selected samples from each class
Nsub= floor(param.nsamples * Ntotal / N);
% due to flooring above, Nsub might not add up to nsamples. If samples
% are missing, equally add samples to the classes until the discrepancy
% is gone
addN = param.nsamples - sum(Nsub);
cc = 1;
while addN > 0
Nsub(cc) = Nsub(cc) + 1;
cc = mod(cc, nclasses)+1;
addN = addN - 1;
end
end
%% Get learner hyperparameters
param.learner_param = mv_get_hyperparameter(param.learner, param.learner_param);
%% Select random features for the learners
random_features = sparse(false(F,param.nlearners));
for ll=1:param.nlearners
random_features(randperm(F,param.nfeatures),ll)=true;
end
%% Select random samples for the learners
% We have to consider different cases
random_samples = zeros(param.nsamples,param.nlearners);
if param.stratify
% We need to fill up the random_samples vector with samples belonging
% to each class. Here, it is identified which indices belong to which
% class
class_sample_idx = zeros(nclasses,2);
for cc=1:nclasses
if cc==1, class_sample_idx(cc,1) = 1;
else
class_sample_idx(cc,1) = class_sample_idx(cc-1,2)+1;
end
class_sample_idx(cc,2) = sum(Nsub(1:cc));
end
for cc=1:nclasses
for ll=1:param.nlearners
if param.bootstrap % draw with replacement
random_samples(class_sample_idx(cc,1):class_sample_idx(cc,2),ll) = cidx{cc}(randi(Ntotal(cc),1,Nsub(cc)));
else % draw without replacement
random_samples(class_sample_idx(cc,1):class_sample_idx(cc,2),ll) = cidx{cc}(randperm(Ntotal(cc),Nsub(cc)));
end
end
end
else
% no stratification, draw samples without caring for class labels
if param.bootstrap
for ll=1:param.nlearners
random_samples(:,ll)=randi(N,1,param.nsamples);
end
else % draw without replacement
for ll=1:param.nlearners
random_samples(:,ll)=randperm(N,param.nsamples);
end
end
end
random_samples = sort(random_samples);
%% Train learner ensemble
cf = struct('random_features',random_features,'strategy',param.strategy,...
'nlearners',param.nlearners,'simplify',param.simplify, 'nclasses', nclasses);
cf.train= eval(['@train_' param.learner ]);
cf.test= eval(['@test_' param.learner ]);
if param.simplify
% In linear classifiers, the operation of the ensemble is equivalent to
% the operation of a single classifier with appropriate weight w and
% threshold b.
% To obtain a single w, one can pad all w's with zeros (for the discarded
% features) and then add up the w's.
cf.w = zeros(F,1);
cf.b = 0;
for ll=1:param.nlearners
tmp = cf.train(X(random_samples(:,ll),random_features(:,ll)),clabel(random_samples(:,ll)),param.learner_param);
cf.w(random_features(:,ll)) = cf.w(random_features(:,ll)) + tmp.w;
cf.b = cf.b + tmp.b;
end
cf.w = cf.w / param.nlearners;
cf.b = cf.b / param.nlearners;
else
% Initialise struct array of learners
cf.classifier(param.nlearners) = cf.train(param.learner_param, X(random_samples(:,param.nlearners),random_features(:,param.nlearners)),clabel(random_samples(:,param.nlearners)));
% Train all the other learners
for ll=1:param.nlearners-1
cf.classifier(ll) = cf.train(param.learner_param, X(random_samples(:,ll),random_features(:,ll)),clabel(random_samples(:,ll)));
end
end