-
Notifications
You must be signed in to change notification settings - Fork 0
/
infogain.m
47 lines (34 loc) · 1.05 KB
/
infogain.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
%function [max_gain_feature, gain] = infogain(x,y)
function info_gains = infogain(x,y)
max_gain_feature = 0;
info_gains = zeros(1, size(x,2));
% calculate H(y)
classes = unique(y);
hy = 0;
for c=classes'
py = sum(y==c)/size(y,1);
hy = hy + py*log2(py);
end
hy = -hy;
% iterate over all features (columns)
for col=1:size(x,2)
features = unique(x(:,col));
% calculate entropy
hyx = 0;
for f=features'
pf = sum(x(:,col)==f)/size(x,1);
yf = y(find(x(:,col)==f));
% calculate h for classes given feature f
yclasses = unique(yf);
hyf = 0;
for yc=yclasses'
pyf = sum(yf==yc)/size(yf,1);
hyf = hyf + pyf*log2(pyf);
end
hyf = -hyf;
hyx = hyx + pf * hyf;
end
info_gains(col) = hy - hyx;
end
%[gain, max_gain_feature] = max(info_gains);
end