forked from ryanbressler/CloudForest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
forrest.go
69 lines (54 loc) · 1.84 KB
/
forrest.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package CloudForest
import ()
//Forest represents a collection of decision trees grown to predict Target.
type Forest struct {
//Forest string
Target string
Trees []*Tree
Intercept float64
}
/*
GrowRandomForest grows a forest using Brieman and Cutler's method. For many cases it
it will yield better performance to re-implment this method to write trees directly to disk or grow
trees in parallel. See the grow forest command line utility for an example of this.
target is the feature to predict.
nSamples is the number of cases to sample (with replacement) for each tree.
mTry is the number of candidate features to evaluate at each node.
nTrees is the number of trees to grow.
leafSize is the minimum number of cases that should end up on a leaf.
itter indicates weather to use iterative splitting for all categorical features or only those
with more then 6 categories.
*/
func GrowRandomForest(fm *FeatureMatrix,
target Target,
candidates []int,
nSamples int,
mTry int,
nTrees int,
leafSize int,
maxDepth int,
splitmissing bool,
force bool,
vet bool,
evaloob bool,
importance *[]*RunningMean) (f *Forest) {
f = &Forest{target.GetName(), make([]*Tree, 0, nTrees), 0.0}
switch target.(type) {
case TargetWithIntercept:
f.Intercept = target.(TargetWithIntercept).Intercept()
}
//Slices for reuse during search for best splitter.
allocs := NewBestSplitAllocs(nSamples, target)
for i := 0; i < nTrees; i++ {
nCases := fm.Data[0].Length()
cases := SampleWithReplacment(nSamples, nCases)
f.Trees = append(f.Trees, NewTree())
f.Trees[i].Grow(fm, target, cases, candidates, nil, mTry, leafSize, maxDepth, splitmissing, force, vet, evaloob, false, importance, nil, allocs)
switch target.(type) {
case BoostingTarget:
ls, ps := f.Trees[i].Partition(fm)
f.Trees[i].Weight = target.(BoostingTarget).Boost(ls, ps)
}
}
return
}