Skip to content

Commit

Permalink
Update node construction to stop allocating channels for small datase…
Browse files Browse the repository at this point in the history
…ts (#12)
  • Loading branch information
minkezhang authored Aug 7, 2022
1 parent da71276 commit 403303d
Showing 1 changed file with 56 additions and 39 deletions.
95 changes: 56 additions & 39 deletions internal/node/tree/tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import (
vnd "github.com/downflux/go-geometry/nd/vector"
)

const (
NLargeData = 128
)

type O[T point.P] struct {
Data []T
K vnd.D
Expand Down Expand Up @@ -77,48 +81,61 @@ func New[T point.P](o O[T]) *N[T] {
k: o.K,
}

// Node construction can be concurrent since we guarantee child nodes
// will never access data across the high / low boundary. Note that this
// does increase the number of allocs ~3x, and is less performant for
// low data sizes (i.e. for less than ~10k points). We can optionally
// branch tree construction on the length of incoming data if we find
// the performance prohibitively slow.
l := make(chan *N[T])
r := make(chan *N[T])

go func(ch chan<- *N[T]) {
ch <- New[T](O[T]{
Data: o.Data[0:pivot],
Axis: (o.Axis + 1) % o.K,
K: o.K,
N: o.N,
// There is no need to continue shuffling data in child
// nodes, as we are making the assumption the root
// shuffle was random enough.
inorder: true,
})
close(ch)
}(l)
go func(ch chan<- *N[T]) {
ch <- New[T](O[T]{
Data: o.Data[pivot+1 : len(o.Data)],
Axis: (o.Axis + 1) % o.K,
K: o.K,
N: o.N,
inorder: true,
})
close(ch)
}(r)

// Skip adding child nodes if they do not contain data -- this prevents
// extraneous leaves from being added.
if n := <-l; len(n.Data()) > 0 {
node.left = n
ol := O[T]{
Data: o.Data[0:pivot],
Axis: (o.Axis + 1) % o.K,
K: o.K,
N: o.N,
// There is no need to continue shuffling data in child
// nodes, as we are making the assumption the root
// shuffle was random enough.
inorder: true,
}
if n := <-r; len(n.Data()) > 0 {
node.right = n
or := O[T]{
Data: o.Data[pivot+1 : len(o.Data)],
Axis: (o.Axis + 1) % o.K,
K: o.K,
N: o.N,
inorder: true,
}

// Channel overhead is too high relative to the stack when the dataset
// is small. Quick experimentation has determined a value which seems
// "good enough" to branch on, though this figure can probably be dialed
// in more as architectures change.
if len(o.Data) < NLargeData {
if n := New[T](ol); len(n.Data()) > 0 {
node.left = n
}
if n := New[T](or); len(n.Data()) > 0 {
node.right = n
}
} else {
// Node construction can be concurrent since we guarantee child nodes
// will never access data across the high / low boundary. Note that this
// does increase the number of allocs ~3x, and is less performant for
// low data sizes (i.e. for less than ~10k points).
l := make(chan *N[T])
r := make(chan *N[T])

go func(ch chan<- *N[T]) {
ch <- New[T](ol)
close(ch)
}(l)
go func(ch chan<- *N[T]) {
ch <- New[T](or)
close(ch)
}(r)

// Skip adding child nodes if they do not contain data -- this prevents
// extraneous leaves from being added.
if n := <-l; len(n.Data()) > 0 {
node.left = n
}
if n := <-r; len(n.Data()) > 0 {
node.right = n
}
}
return node
}

Expand Down

0 comments on commit 403303d

Please sign in to comment.