forked from ryanbressler/CloudForest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
densitytarget.go
84 lines (71 loc) · 2.25 KB
/
densitytarget.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package CloudForest
import (
"fmt"
)
/*
DensityTarget is used for density estimating trees. It contains a set of features and the
count of cases.
*/
type DensityTarget struct {
Features *[]Feature
N int
}
func (target *DensityTarget) GetName() string {
return "DensityTarget"
}
/*
DensityTarget.SplitImpurity is a density estimating version of SplitImpurity.
*/
func (target *DensityTarget) SplitImpurity(l *[]int, r *[]int, m *[]int, allocs *BestSplitAllocs) (impurityDecrease float64) {
nl := float64(len(*l))
nr := float64(len(*r))
nm := 0.0
impurityDecrease = nl * target.Impurity(l, nil)
impurityDecrease += nr * target.Impurity(r, nil)
if m != nil && len(*m) > 0 {
nm = float64(len(*m))
impurityDecrease += nm * target.Impurity(m, nil)
}
impurityDecrease /= nl + nr + nm
return
}
//UpdateSImpFromAllocs willl be called when splits are being built by moving cases from r to l as in learning from numerical variables.
//Here it just wraps SplitImpurity but it can be implemented to provide further optimization.
func (target *DensityTarget) UpdateSImpFromAllocs(l *[]int, r *[]int, m *[]int, allocs *BestSplitAllocs, movedRtoL *[]int) (impurityDecrease float64) {
return target.SplitImpurity(l, r, m, allocs)
}
//DensityTarget.Impurity uses the impurity measure defined in "Density Estimating Trees"
//by Parikshit Ram and Alexander G. Gray
func (target *DensityTarget) Impurity(cases *[]int, counter *[]int) (e float64) {
t := len(*cases)
e = float64(t*t) / float64(target.N*target.N)
for _, f := range *target.Features {
switch f.(type) {
case CatFeature:
bigenoughcounter := make([]int, f.NCats())
e /= f.Span(cases, &bigenoughcounter)
case NumFeature:
e /= f.Span(cases, nil)
}
}
return
}
//DensityTarget.FindPredicted returns the string representation of the density in the region
//spaned by the specified cases.
func (target *DensityTarget) FindPredicted(cases []int) string {
t := len(cases)
e := float64(t) / float64(target.N)
for _, f := range *target.Features {
switch f.(type) {
case CatFeature:
bigenoughcounter := make([]int, f.NCats())
e /= f.Span(&cases, &bigenoughcounter)
case NumFeature:
e /= f.Span(&cases, nil)
}
}
return fmt.Sprintf("%v", e)
}
func (target *DensityTarget) NCats() int {
return 0
}