Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

k-D tree v2 #6

Merged
merged 43 commits into from
Jul 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
2726995
Add initial attempt at refactoring k-D tree with random pivot points
minkezhang Jul 20, 2022
511983f
Add initial node implementation
minkezhang Jul 21, 2022
f0d80a1
Add initial node implementation
minkezhang Jul 21, 2022
8f002c2
Add simple tests for k-D notes
minkezhang Jul 21, 2022
0dcf677
Add more tests for node constructor
minkezhang Jul 22, 2022
353a328
Add more node tests
minkezhang Jul 22, 2022
ae6b4c4
Add tests for k-D tree construction
minkezhang Jul 22, 2022
fadb114
Improve tree construction performance by making child node constructi…
minkezhang Jul 22, 2022
7e11335
Formalize node interface
minkezhang Jul 23, 2022
6cfe8bf
Add point priority queue
minkezhang Jul 23, 2022
6d7151a
Add documentation on node interface and ensure left node contains poi…
minkezhang Jul 23, 2022
53dddee
Add initial KNN implmentation
minkezhang Jul 25, 2022
910755b
Implement KNN
minkezhang Jul 25, 2022
e27de82
Add KNN performance tests
minkezhang Jul 26, 2022
2363fde
Refactor performance tests
minkezhang Jul 26, 2022
289b993
Add brute force comparison
minkezhang Jul 26, 2022
6effabf
Add buffer to KNN
minkezhang Jul 26, 2022
91dad64
Add detached node struct
minkezhang Jul 27, 2022
4f913c0
Migrate to using concrete vector instead of vector interface for a 3x…
minkezhang Jul 27, 2022
37263b4
Tweak performance parameters
minkezhang Jul 27, 2022
746eff9
Move bruteforce to mock kd directory
minkezhang Jul 28, 2022
cc43391
Migrate perf test to correct file
minkezhang Jul 28, 2022
1e27f8f
Move performance tests
minkezhang Jul 28, 2022
c4429d1
Add KNN correctness test
minkezhang Jul 28, 2022
5e8dd93
Add k-D tree data API
minkezhang Jul 28, 2022
822964b
Fix cmp order
minkezhang Jul 28, 2022
e8caec3
Fix knn
minkezhang Jul 28, 2022
d8580af
Migrate to detached node for KNN tests
minkezhang Jul 28, 2022
ea19ef2
Move tests around
minkezhang Jul 28, 2022
f76ea42
Add rangesearch
minkezhang Jul 29, 2022
8da308c
Add rangesearch correctness tests
minkezhang Jul 29, 2022
df02f53
Add perf tests for range search
minkezhang Jul 29, 2022
811d2ec
Make container interface public
minkezhang Jul 29, 2022
a906b25
Rename container.I -> container.C
minkezhang Jul 29, 2022
e7a9c68
Refactor container API
minkezhang Jul 29, 2022
09e7d16
Add container rebalance API
minkezhang Jul 30, 2022
15ba433
Add docstring for container API
minkezhang Jul 30, 2022
9950a15
Add Insert and Delete for bruteforce
minkezhang Jul 30, 2022
ffa548f
Add k-D node insert and delete
minkezhang Jul 30, 2022
9a347a8
Add k-D tree node insert and remove test framework
minkezhang Jul 30, 2022
a21a73e
Add k-D tree node insert and remove test framework
minkezhang Jul 30, 2022
8aea8ed
Add node insert tests
minkezhang Jul 30, 2022
27b6915
Add remove node tests
minkezhang Jul 30, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions x/container/bruteforce/bruteforce.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package bruteforce

import (
"sort"

"github.com/downflux/go-geometry/nd/hyperrectangle"
"github.com/downflux/go-geometry/nd/vector"
"github.com/downflux/go-kd/x/filter"
"github.com/downflux/go-kd/x/internal/perf/util"
"github.com/downflux/go-kd/x/point"
)

type L[T point.P] []T

func New[T point.P](d []T) *L[T] {
data := make([]T, len(d))
if l := copy(data, d); l != len(d) {
panic("could not copy data into brute force list")
}
m := L[T](data)
return &m
}

func (m *L[T]) KNN(p vector.V, k int, f filter.F[T]) []T {
sort.Sort(util.L[T]{
Data: *m,
P: p,
})

var data []T
for _, p := range *m {
if f(p) {
data = append(data, p)
}
if len(data) == k {
return data
}
}
return data
}

func (m *L[T]) RangeSearch(q hyperrectangle.R, f filter.F[T]) []T {
var data []T
for _, p := range m.Data() {
if q.In(p.P()) && f(p) {
data = append(data, p)
}
}
return data
}

func (m *L[T]) Insert(p T) { *m = append(*m, p) }
func (m *L[T]) Remove(p vector.V, f filter.F[T]) (bool, T) {
var blank T
for i, q := range *m {
if vector.Within(p, q.P()) && f(q) {
(*m)[i], (*m)[len(*m)-1] = (*m)[len(*m)-1], blank
*m = (*m)[:len(*m)-1]
return true, q
}
}
return false, blank
}

func (m *L[T]) Data() []T { return *m }
func (m *L[T]) Balance() {}
144 changes: 144 additions & 0 deletions x/container/bruteforce/bruteforce_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package bruteforce

import (
"testing"

"github.com/downflux/go-geometry/nd/vector"
"github.com/downflux/go-kd/x/container"
"github.com/downflux/go-kd/x/point/mock"
"github.com/google/go-cmp/cmp"
)

var _ container.C[mock.P] = &L[mock.P]{}

func TestDelete(t *testing.T) {
type config struct {
name string
data []*mock.P
vs []vector.V

want []*mock.P
}

configs := []config{
{
name: "Nil",
data: nil,
vs: []vector.V{
mock.U(100),
},
want: []*mock.P{},
},
{
name: "Simple",
data: []*mock.P{
&mock.P{X: mock.U(50)},
&mock.P{X: mock.U(100)},
},
vs: []vector.V{
mock.U(100),
},
want: []*mock.P{
&mock.P{X: mock.U(50)},
},
},
{
name: "Degenerate",
data: []*mock.P{
&mock.P{X: mock.U(100), Data: "A"},
&mock.P{X: mock.U(100), Data: "B"},
},
vs: []vector.V{
mock.U(100),
},
want: []*mock.P{
&mock.P{X: mock.U(100), Data: "B"},
},
},
}

for _, c := range configs {
t.Run(c.name, func(t *testing.T) {
l := New(c.data)
for _, v := range c.vs {
l.Remove(v, func(p *mock.P) bool { return vector.Within(v, p.P()) })
}

got := l.Data()
if diff := cmp.Diff(c.want, got); diff != "" {
t.Errorf("Data() mismatch (-want +got):\n%v", diff)
}

})
}
}

func TestInsert(t *testing.T) {
type config struct {
name string
data []*mock.P
ps []*mock.P

want []*mock.P
}

configs := []config{
{
name: "Trivial",
data: nil,
ps: []*mock.P{
&mock.P{X: mock.U(100)},
},
want: []*mock.P{
&mock.P{X: mock.U(100)},
},
},
{
name: "MultipleInsert",
data: nil,
ps: []*mock.P{
&mock.P{X: mock.U(101)},
&mock.P{X: mock.U(100)},
&mock.P{X: mock.U(202)},
},
want: []*mock.P{
&mock.P{X: mock.U(101)},
&mock.P{X: mock.U(100)},
&mock.P{X: mock.U(202)},
},
},
{
name: "MultipleInsert/NonNil",
data: []*mock.P{
&mock.P{X: mock.U(4)},
&mock.P{X: mock.U(5)},
},
ps: []*mock.P{
&mock.P{X: mock.U(101)},
&mock.P{X: mock.U(100)},
&mock.P{X: mock.U(202)},
},
want: []*mock.P{
&mock.P{X: mock.U(4)},
&mock.P{X: mock.U(5)},
&mock.P{X: mock.U(101)},
&mock.P{X: mock.U(100)},
&mock.P{X: mock.U(202)},
},
},
}

for _, c := range configs {
t.Run(c.name, func(t *testing.T) {
l := New(c.data)
for _, p := range c.ps {
l.Insert(p)
}

got := l.Data()
if diff := cmp.Diff(c.want, got); diff != "" {
t.Errorf("Data() mismatch (-want +got):\n%v", diff)
}
})
}
}
42 changes: 42 additions & 0 deletions x/container/container.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Package container exports the expected storage API used for querying a set of
// objects in a system. This may be used to more freely move between different
// implementations as the conditions of the system change, e.g. when the number
// or density of agents reach some threshold.
package container

import (
"github.com/downflux/go-geometry/nd/hyperrectangle"
"github.com/downflux/go-geometry/nd/vector"
"github.com/downflux/go-kd/x/filter"
"github.com/downflux/go-kd/x/point"
)

type C[T point.P] interface {
// KNN returns the k-nearest neighbors of the given search coordinates.
//
// N.B.: KNN will return at max k neighbors; in the degenerate case that
// multiple data points reside at the same spacial coordinate, this
// function will arbitrarily return a subset of these to fulfill the
// k-neighbors constraint.
KNN(p vector.V, k int, f filter.F[T]) []T

// Data returns all data stored in the K-D tree.
Data() []T

// RangeSearch returns a set of data points in the given bounding box.
// Data points are added to the returned set if they fall inside the
// bounding box and passes the given filter function.
RangeSearch(q hyperrectangle.R, f filter.F[T]) []T

// Balance() upates the container after a set of mutations. For a k-D
// tree, this is a rebalance operation.
Balance()

// Insert adds a new data point into the container.
Insert(p T)

// Remove deletes an existing data point from the container. This
// function will delete the first matching point with the given
// coordinates.
Remove(p vector.V, f filter.F[T]) (bool, T)
}
20 changes: 20 additions & 0 deletions x/container/kd/kd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package kd

import (
"github.com/downflux/go-geometry/nd/hyperrectangle"
"github.com/downflux/go-geometry/nd/vector"
"github.com/downflux/go-kd/x/filter"
"github.com/downflux/go-kd/x/kd"
"github.com/downflux/go-kd/x/point"
)

type KD[T point.P] kd.KD[T]

func (t *KD[T]) KNN(p vector.V, k int, f filter.F[T]) []T { return kd.KNN((*kd.KD[T])(t), p, k, f) }
func (t *KD[T]) RangeSearch(q hyperrectangle.R, f filter.F[T]) []T {
return kd.RangeSearch((*kd.KD[T])(t), q, f)
}
func (t *KD[T]) Data() []T { return kd.Data((*kd.KD[T])(t)) }
func (t *KD[T]) Balance() { (*kd.KD[T])(t).Balance() }
func (t *KD[T]) Insert(p T) { (*kd.KD[T])(t).Insert(p) }
func (t *KD[T]) Remove(v vector.V, f filter.F[T]) (bool, T) { return (*kd.KD[T])(t).Remove(v, f) }
8 changes: 8 additions & 0 deletions x/container/kd/kd_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package kd

import (
"github.com/downflux/go-kd/x/container"
"github.com/downflux/go-kd/x/point/mock"
)

var _ container.C[mock.P] = &KD[mock.P]{}
7 changes: 7 additions & 0 deletions x/filter/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package filter

import (
"github.com/downflux/go-kd/x/point"
)

type F[T point.P] func(p T) bool
8 changes: 2 additions & 6 deletions x/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@ module github.com/downflux/go-kd/x

go 1.18

require github.com/downflux/go-kd v0.4.2

require github.com/downflux/go-geometry v0.7.0

require (
github.com/kyroy/kdtree v0.0.0-20200419114247-70830f883f1d // indirect
github.com/kyroy/priority-queue v0.0.0-20180327160706-6e21825e7e0c // indirect
github.com/downflux/go-geometry v0.10.2
github.com/google/go-cmp v0.5.6
)
23 changes: 5 additions & 18 deletions x/go.sum
Original file line number Diff line number Diff line change
@@ -1,21 +1,8 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/downflux/go-geometry v0.7.0 h1:2Uv6T6Q08UpYqk9sQsRr0GkVLZwn3JGRMHjzYQFKHzc=
github.com/downflux/go-geometry v0.7.0/go.mod h1:bOw8DHBLJWY8Q7erBWH0Dk4R1poNt5v3huDhUNfxqDU=
github.com/downflux/go-kd v0.4.2 h1:Po+aWBoXNN180mAB1i5jg+xApNbUn5HIkUJEl9g27Ww=
github.com/downflux/go-kd v0.4.2/go.mod h1:wP5Trk29xnmvxbTY0djz8FF+/digzEVUQfC1Y+cEVcU=
github.com/downflux/go-geometry v0.10.2 h1:Z79Khzl6AKMSMLnM5xG75fEOL1fmIWlF14+8j+r01D0=
github.com/downflux/go-geometry v0.10.2/go.mod h1:XWTzSaMiRMAxupAR+cXAsa1Q75TCSp1Shc/ydsJ0xVE=
github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/jupp0r/go-priority-queue v0.0.0-20160601094913-ab1073853bde h1:+5PMaaQtDUwOcJIUlmX89P0J3iwTvErTmyn5WghzXAQ=
github.com/jupp0r/go-priority-queue v0.0.0-20160601094913-ab1073853bde/go.mod h1:RDgD/dfPmIwFH0qdUOjw71HjtWg56CtyLIoHL+R1wJw=
github.com/kyroy/kdtree v0.0.0-20200419114247-70830f883f1d h1:1n5M/49q9H6QtNJiiVL/W5mqgT1UdlGQ7oLP+DkJ1vs=
github.com/kyroy/kdtree v0.0.0-20200419114247-70830f883f1d/go.mod h1:6oJGQK7VSg3RxSQ7QspgqpCmKjIbAslgT2wBXbFJUZw=
github.com/kyroy/priority-queue v0.0.0-20180327160706-6e21825e7e0c h1:1c7+XOOGQ19cXjZ1Ss/irljQxgPvb+8z+jNEprCXl20=
github.com/kyroy/priority-queue v0.0.0-20180327160706-6e21825e7e0c/go.mod h1:R477L6j2/dUcE0q0aftk0kR5Xt93W7g1066AodcJhEo=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
63 changes: 63 additions & 0 deletions x/internal/knn/knn.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package knn

import (
"math"

"github.com/downflux/go-kd/x/internal/node"
"github.com/downflux/go-kd/x/point"
"github.com/downflux/go-kd/x/point/pq"
"github.com/downflux/go-kd/x/vector"

vnd "github.com/downflux/go-geometry/nd/vector"
)

func path[T point.P](n node.N[T], p vnd.V) []node.N[T] {
if n.Nil() {
return nil
}
if n.Leaf() {
return []node.N[T]{n}
}

// Note that we are bypassing the v == n.Pivot() stop condition check --
// we are always continuing to the leaf ndoe. This is necessary for
// finding multiple closest neighbors, as we care about points in the
// tree which do not have to coincide with the point coordinates.
if vector.Comparator(n.Axis()).Less(p, n.Pivot()) {
return append(path(n.L(), p), n)
}
return append(path(n.R(), p), n)
}

func KNN[T point.P](n node.N[T], p vnd.V, k int, f func(p T) bool) []T {
q := pq.New[T](k)
knn(n, p, q, vnd.V(make([]float64, p.Dimension())), f)

ps := make([]T, q.Len())
for i := q.Len() - 1; i >= 0; i-- {
ps[i] = q.Pop()
}
return ps
}

func knn[T point.P](n node.N[T], p vnd.V, q *pq.PQ[T], buf vnd.V, f func(p T) bool) {
for _, n := range path[T](n, p) {
for _, datum := range n.Data() {
vnd.SubBuf(p, datum.P(), buf)
if d := vnd.SquaredMagnitude(buf); (!q.Full() || d < q.Priority()) && f(datum) {
q.Push(datum, d)
}
}

if !n.Leaf() {
vnd.SubBuf(p, n.Pivot(), buf)
if q.Priority() > math.Pow(buf.X(n.Axis()), 2) {
if vector.Comparator(n.Axis()).Less(p, n.Pivot()) {
knn(n.R(), p, q, buf, f)
} else {
knn(n.L(), p, q, buf, f)
}
}
}
}
}
Loading