diff --git a/internal/core/algorithm/ngt/ngt.go b/internal/core/algorithm/ngt/ngt.go index 84270ad528..6847df909d 100644 --- a/internal/core/algorithm/ngt/ngt.go +++ b/internal/core/algorithm/ngt/ngt.go @@ -156,8 +156,8 @@ const ( // ------------------------------------------------------------- // dimension constraints // -------------------------------------------------------------. - ngtVectorDimensionSizeLimit = 1 << 16 - minimumDimensionSize = algorithm.MinimumVectorDimensionSize + VectorDimensionSizeLimit = 1<<32 - 1 + minimumDimensionSize = algorithm.MinimumVectorDimensionSize // -------------------------------------------------------------. ) @@ -685,8 +685,8 @@ func (n *ngt) GetVector(id uint) ([]float32, error) { if results == nil { return nil, n.newGoError(ebuf) } - ret = (*[ngtVectorDimensionSizeLimit]float32)(unsafe.Pointer(results))[:dimension:dimension] - // for _, elem := range (*[ngtVectorDimensionSizeLimit]C.float)(unsafe.Pointer(results))[:dimension:dimension]{ + ret = (*[VectorDimensionSizeLimit]float32)(unsafe.Pointer(results))[:dimension:dimension] + // for _, elem := range (*[VectorDimensionSizeLimit]C.float)(unsafe.Pointer(results))[:dimension:dimension]{ // ret = append(ret, float32(elem)) // } case Uint8: @@ -697,7 +697,7 @@ func (n *ngt) GetVector(id uint) ([]float32, error) { return nil, n.newGoError(ebuf) } ret = make([]float32, 0, dimension) - for _, elem := range (*[ngtVectorDimensionSizeLimit]C.uint8_t)(unsafe.Pointer(results))[:dimension:dimension] { + for _, elem := range (*[VectorDimensionSizeLimit]C.uint8_t)(unsafe.Pointer(results))[:dimension:dimension] { ret = append(ret, float32(elem)) } default: diff --git a/internal/core/algorithm/ngt/ngt_test.go b/internal/core/algorithm/ngt/ngt_test.go index 81f3e88d2c..9586637ad5 100644 --- a/internal/core/algorithm/ngt/ngt_test.go +++ b/internal/core/algorithm/ngt/ngt_test.go @@ -183,7 +183,7 @@ func TestNew(t *testing.T) { }, }, want: want{ - err: errors.NewErrCriticalOption("dimension", 1, errors.ErrInvalidDimensionSize(1, ngtVectorDimensionSizeLimit)), + err: errors.NewErrCriticalOption("dimension", 1, errors.ErrInvalidDimensionSize(1, VectorDimensionSizeLimit)), }, }, } @@ -761,7 +761,7 @@ func Test_gen(t *testing.T) { }, }, want: want{ - err: errors.NewErrCriticalOption("dimension", 1, errors.ErrInvalidDimensionSize(1, ngtVectorDimensionSizeLimit)), + err: errors.NewErrCriticalOption("dimension", 1, errors.ErrInvalidDimensionSize(1, VectorDimensionSizeLimit)), }, }, } diff --git a/internal/core/algorithm/ngt/option.go b/internal/core/algorithm/ngt/option.go index e227a7c09f..78700e0d43 100644 --- a/internal/core/algorithm/ngt/option.go +++ b/internal/core/algorithm/ngt/option.go @@ -86,8 +86,8 @@ func WithBulkInsertChunkSize(size int) Option { // WithDimension represents the option to set the dimension for NGT. func WithDimension(size int) Option { return func(n *ngt) error { - if size > ngtVectorDimensionSizeLimit || size < minimumDimensionSize { - err := errors.ErrInvalidDimensionSize(size, ngtVectorDimensionSizeLimit) + if size > VectorDimensionSizeLimit || size < minimumDimensionSize { + err := errors.ErrInvalidDimensionSize(size, VectorDimensionSizeLimit) return errors.NewErrCriticalOption("dimension", size, err) } diff --git a/internal/core/algorithm/ngt/option_test.go b/internal/core/algorithm/ngt/option_test.go index c0fe4101e8..2042effc61 100644 --- a/internal/core/algorithm/ngt/option_test.go +++ b/internal/core/algorithm/ngt/option_test.go @@ -20,6 +20,7 @@ package ngt import ( "math" "reflect" + "strconv" "testing" "github.com/vdaas/vald/internal/errors" @@ -329,13 +330,15 @@ func TestWithDimension(t *testing.T) { }, }, { - name: "set success when the size is 0", + name: "return error when the size is 0", args: args{ size: 0, }, want: want{ obj: &T{}, - err: errors.New("invalid critical option, name: dimension, val: 0: dimension size 0 is invalid, the supporting dimension size must be between 2 ~ 65536"), + err: errors.New( + "invalid critical option, name: dimension, val: 0: dimension size 0 is invalid, the supporting dimension size must be between 2 ~ " + strconv.Itoa(VectorDimensionSizeLimit), + ), }, }, { @@ -348,9 +351,9 @@ func TestWithDimension(t *testing.T) { }, }, { - name: "set success when the size is 65536", + name: "set success when the size is VectorDimensionSizeLimit", args: args{ - size: 65536, + size: VectorDimensionSizeLimit, }, want: want{ obj: &T{}, @@ -363,7 +366,9 @@ func TestWithDimension(t *testing.T) { }, want: want{ obj: &T{}, - err: errors.New("invalid critical option, name: dimension, val: 1: dimension size 1 is invalid, the supporting dimension size must be between 2 ~ 65536"), + err: errors.New( + "invalid critical option, name: dimension, val: 1: dimension size 1 is invalid, the supporting dimension size must be between 2 ~ " + strconv.Itoa(VectorDimensionSizeLimit), + ), }, }, { @@ -373,27 +378,40 @@ func TestWithDimension(t *testing.T) { }, want: want{ obj: &T{}, - err: errors.New("invalid critical option, name: dimension, val: -100: dimension size -100 is invalid, the supporting dimension size must be between 2 ~ 65536"), + err: errors.New( + "invalid critical option, name: dimension, val: -100: dimension size -100 is invalid, the supporting dimension size must be between 2 ~ " + strconv.Itoa(VectorDimensionSizeLimit), + ), }, }, { - name: "return error when the size is 65537", + name: "return error when the size is larger than VectorDimensionSizeLimit", args: args{ - size: 65537, + size: VectorDimensionSizeLimit + 1, }, want: want{ obj: &T{}, - err: errors.New("invalid critical option, name: dimension, val: 65537: dimension size 65537 is invalid, the supporting dimension size must be between 2 ~ 65536"), + err: errors.New( + "invalid critical option, name: dimension, val: 4294967296: dimension size 4294967296 is invalid, the supporting dimension size must be between 2 ~ " + strconv.Itoa( + VectorDimensionSizeLimit, + ), + ), }, }, { - name: "return error when the size is MaxInt32", + name: "set success when the size is MaxInt32", args: args{ size: math.MaxInt32, }, want: want{ - obj: &T{}, - err: errors.New("invalid critical option, name: dimension, val: 2147483647: dimension size 2147483647 is invalid, the supporting dimension size must be between 2 ~ 65536"), + obj: func() *T { + t := &T{ + dimension: math.MaxInt32, + } + if err := t.setup(); err != nil { + return nil + } + return t + }(), }, }, { @@ -403,7 +421,11 @@ func TestWithDimension(t *testing.T) { }, want: want{ obj: &T{}, - err: errors.New("invalid critical option, name: dimension, val: -2147483648: dimension size -2147483648 is invalid, the supporting dimension size must be between 2 ~ 65536"), + err: errors.New( + "invalid critical option, name: dimension, val: -2147483648: dimension size -2147483648 is invalid, the supporting dimension size must be between 2 ~ " + strconv.Itoa( + VectorDimensionSizeLimit, + ), + ), }, }, } diff --git a/internal/net/control/control_darwin.go b/internal/net/control/control_darwin.go index 67ce3fbfcf..e29f4550e7 100644 --- a/internal/net/control/control_darwin.go +++ b/internal/net/control/control_darwin.go @@ -1,4 +1,3 @@ -//go:build darwin && !linux && !windows && !wasm && !js // +build darwin,!linux,!windows,!wasm,!js // diff --git a/internal/net/control/control_other.go b/internal/net/control/control_other.go index 05306d5c19..117b492c37 100644 --- a/internal/net/control/control_other.go +++ b/internal/net/control/control_other.go @@ -1,4 +1,3 @@ -//go:build wasm && js && !windows && !linux && !darwin // +build wasm,js,!windows,!linux,!darwin // diff --git a/internal/net/control/control_unix.go b/internal/net/control/control_unix.go index 39ed43ba1a..ed22e62b2c 100644 --- a/internal/net/control/control_unix.go +++ b/internal/net/control/control_unix.go @@ -1,4 +1,3 @@ -//go:build linux && !windows && !wasm && !js && !darwin // +build linux,!windows,!wasm,!js,!darwin // diff --git a/internal/net/control/control_windows.go b/internal/net/control/control_windows.go index cd20c216f3..8d66834622 100644 --- a/internal/net/control/control_windows.go +++ b/internal/net/control/control_windows.go @@ -1,4 +1,3 @@ -//go:build windows // +build windows // diff --git a/internal/runner/runner_race_test.go b/internal/runner/runner_race_test.go index 89a0d379df..a6deaaf31a 100644 --- a/internal/runner/runner_race_test.go +++ b/internal/runner/runner_race_test.go @@ -1,4 +1,3 @@ -//go:build !race // +build !race // diff --git a/internal/singleflight/singleflight_test.go b/internal/singleflight/singleflight_test.go index b5b8c9e571..20954be6d7 100644 --- a/internal/singleflight/singleflight_test.go +++ b/internal/singleflight/singleflight_test.go @@ -1,4 +1,3 @@ -//go:build !race // +build !race // diff --git a/internal/timeutil/location/set_test.go b/internal/timeutil/location/set_test.go index 82945e1ac8..18027abb50 100644 --- a/internal/timeutil/location/set_test.go +++ b/internal/timeutil/location/set_test.go @@ -1,4 +1,3 @@ -//go:build !race // +build !race // diff --git a/tests/e2e/crud/crud_test.go b/tests/e2e/crud/crud_test.go index 3ec1deed98..3a48d04377 100644 --- a/tests/e2e/crud/crud_test.go +++ b/tests/e2e/crud/crud_test.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/hdf5/hdf5.go b/tests/e2e/hdf5/hdf5.go index 5c9083cf0e..c8ebd2e88c 100644 --- a/tests/e2e/hdf5/hdf5.go +++ b/tests/e2e/hdf5/hdf5.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/kubernetes/client/client.go b/tests/e2e/kubernetes/client/client.go index 5dd261b415..2f696f79c3 100644 --- a/tests/e2e/kubernetes/client/client.go +++ b/tests/e2e/kubernetes/client/client.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/kubernetes/portforward/portforward.go b/tests/e2e/kubernetes/portforward/portforward.go index 622d9ff0a1..270cc8d293 100644 --- a/tests/e2e/kubernetes/portforward/portforward.go +++ b/tests/e2e/kubernetes/portforward/portforward.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/multiapis/multiapis_test.go b/tests/e2e/multiapis/multiapis_test.go index 24bbbfa1f1..ec963f34f3 100644 --- a/tests/e2e/multiapis/multiapis_test.go +++ b/tests/e2e/multiapis/multiapis_test.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/operation/doc.go b/tests/e2e/operation/doc.go index edc3e17c2d..c09ee5a55b 100644 --- a/tests/e2e/operation/doc.go +++ b/tests/e2e/operation/doc.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/operation/multi.go b/tests/e2e/operation/multi.go index 1f263760ba..cf15592a85 100644 --- a/tests/e2e/operation/multi.go +++ b/tests/e2e/operation/multi.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/operation/operation.go b/tests/e2e/operation/operation.go index c5c5227642..17c07be050 100644 --- a/tests/e2e/operation/operation.go +++ b/tests/e2e/operation/operation.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/operation/stream.go b/tests/e2e/operation/stream.go index 19069c4af5..b8ad83e78a 100644 --- a/tests/e2e/operation/stream.go +++ b/tests/e2e/operation/stream.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/e2e/sidecar/sidecar_test.go b/tests/e2e/sidecar/sidecar_test.go index 230ea13f83..3c57cb7ce4 100644 --- a/tests/e2e/sidecar/sidecar_test.go +++ b/tests/e2e/sidecar/sidecar_test.go @@ -1,4 +1,3 @@ -//go:build e2e // +build e2e // diff --git a/tests/performance/max_vector_dim_test.go b/tests/performance/max_vector_dim_test.go new file mode 100644 index 0000000000..65a4aa8149 --- /dev/null +++ b/tests/performance/max_vector_dim_test.go @@ -0,0 +1,300 @@ +// +// Copyright (C) 2019-2022 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +package performance + +import ( + "bufio" + "context" + "os" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/vdaas/vald/apis/grpc/v1/payload" + "github.com/vdaas/vald/internal/config" + "github.com/vdaas/vald/internal/core/algorithm/ngt" + "github.com/vdaas/vald/internal/errgroup" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/test/data/vector" + "github.com/vdaas/vald/pkg/agent/core/ngt/handler/grpc" + "github.com/vdaas/vald/pkg/agent/core/ngt/service" +) + +const ( + maxBit = 32 + freeMemLimit = 500 // Limit of free memory remaining(MB) +) + +func init_ngt_service(dim int) (service.NGT, error) { + cfg := &config.NGT{ + Dimension: dim, + DistanceType: ngt.L2.String(), + ObjectType: ngt.Float.String(), + EnableInMemoryMode: true, + AutoIndexDurationLimit: "96h", + AutoIndexCheckDuration: "96h", + AutoSaveIndexDuration: "96h", + AutoIndexLength: 10000000000, + KVSDB: &config.KVSDB{ + Concurrency: 1, + }, + VQueue: &config.VQueue{ + InsertBufferPoolSize: 100, + DeleteBufferPoolSize: 100, + }, + } + ngt, err := service.New(cfg.Bind()) + if err != nil { + return nil, err + } + return ngt, nil +} + +func parse(raw string) (key string, value int) { + text := strings.ReplaceAll(raw[:len(raw)-2], " ", "") + keyValue := strings.Split(text, ":") + val := 0 + if keyValue[1] != "" { + val, err := strconv.Atoi(keyValue[1]) + if err != nil { + panic(err) + } + return keyValue[0], val + } + return keyValue[0], val +} + +// Test for investigation of max dimension size for agent handler +func TestMaxDimInsert(t *testing.T) { + t.Helper() + eg, ctx := errgroup.New(context.Background()) + mu := sync.Mutex{} + // Get the above the limit of bit (2~32) + bits := make([]int, 0, maxBit-1) + ticker := time.NewTicker(5 * time.Second) + eg.Go(func() error { + for { + select { + case <-ctx.Done(): + if ok := mu.TryLock(); !ok { + mu.Unlock() + } + return nil + case <-ticker.C: + f, err := os.Open("/proc/meminfo") + if err != nil { + panic(err) + } + bufio.NewScanner(f) + scanner := bufio.NewScanner(f) + var MemFree int + for scanner.Scan() { + key, value := parse(scanner.Text()) + switch key { + case "MemFree": + MemFree = value + } + } + err = f.Close() + if err != nil { + panic(err) + } + if MemFree/1024 < freeMemLimit { + t.Logf("MemFree reaches the limit: current : %dMb, limit : %dMb", MemFree/1024, freeMemLimit) + return errors.New("Memory Limit") + } + } + } + }) + eg.Go(func() error { + for bit := 2; bit <= maxBit; bit++ { + select { + case <-ctx.Done(): + t.Log("canceld") + return nil + default: + dim := 1 << bit + if bit == maxBit { + dim-- + } + if dim > ngt.VectorDimensionSizeLimit { + t.Fatal(errors.ErrInvalidDimensionSize(dim, ngt.VectorDimensionSizeLimit)) + } + t.Logf("Start test: dimension = %d (bit = %d)", dim, bit) + ngt, err := init_ngt_service(dim) + time.Sleep(100 * time.Millisecond) + if err != nil { + t.Errorf("[Fail] Create NGT service: %#v", err) + return err + } + vec := vector.GaussianDistributedFloat32VectorGenerator(1, dim)[0] + err = ngt.Insert(strconv.Itoa(dim), vec) + if err != nil { + t.Errorf("Insert error: %#v", err) + return err + } + t.Logf("Insert is finished: dimension = %d (bit = %d)", dim, bit) + err = ngt.CreateIndex(ctx, 10) + if err != nil { + t.Errorf("CreateIndex is failed: %#v", err) + return err + } + t.Logf("CreateIndex is finished: dimension = %d (bit = %d)", dim, bit) + err = ngt.Close(ctx) + if err != nil { + t.Errorf("NGT close error: %#v", err) + return err + } + mu.Lock() + bits = append(bits, bit) + mu.Unlock() + t.Logf("All processes are finished: dimension = %d (bit = %d)", dim, bit) + } + t.Log("Wait for memory release") + time.Sleep(30 * time.Second) + } + return nil + }) + eg.Wait() + // Get the max bit, which the environment finish process, from bits + var max_bit int + for _, v := range bits { + if max_bit < v { + max_bit = v + } + } + t.Logf("Max bit is %d", max_bit) +} + +// Test for investigation of max dimension size for agent handler with gRPC +func TestMaxDimInsertGRPC(t *testing.T) { + // MaxUint64 cannot be used due to overflows + t.Helper() + eg, ctx := errgroup.New(context.Background()) + mu := sync.Mutex{} + // Get the above the limit of bit (2~32) + bits := make([]int, 0, maxBit-1) + ticker := time.NewTicker(5 * time.Second) + eg.Go(func() error { + for { + select { + case <-ctx.Done(): + if ok := mu.TryLock(); !ok { + mu.Unlock() + } + return nil + case <-ticker.C: + f, err := os.Open("/proc/meminfo") + if err != nil { + panic(err) + } + bufio.NewScanner(f) + scanner := bufio.NewScanner(f) + var MemFree int + for scanner.Scan() { + key, value := parse(scanner.Text()) + switch key { + case "MemFree": + MemFree = value + } + } + err = f.Close() + if err != nil { + panic(err) + } + if MemFree/1024 < freeMemLimit { + t.Logf("MemFree reaches the limit: current : %dMb, limit : %dMb", MemFree/1024, freeMemLimit) + return errors.New("Memory Limit") + } + } + } + }) + eg.Go(func() error { + for bit := 2; bit <= maxBit; bit++ { + select { + case <-ctx.Done(): + t.Log("canceld") + return nil + default: + dim := 1 << bit + if bit == maxBit { + dim-- + } + if dim > ngt.VectorDimensionSizeLimit { + t.Fatal(errors.ErrInvalidDimensionSize(dim, ngt.VectorDimensionSizeLimit)) + } + t.Logf("Start test: dimension = %d (bit = %d)", dim, bit) + ngt, err := init_ngt_service(dim) + time.Sleep(100 * time.Millisecond) + if err != nil { + t.Errorf("[Fail] Create NGT service: %#v", err) + return err + } + s, err := grpc.New(grpc.WithNGT(ngt)) + if err != nil { + t.Errorf("[Error] Failed to create grpc service: %#v", s) + return err + } + vec := vector.GaussianDistributedFloat32VectorGenerator(1, dim)[0] + req := &payload.Insert_Request{ + Vector: &payload.Object_Vector{ + Id: strconv.Itoa(dim), + Vector: vec, + }, + Config: &payload.Insert_Config{ + SkipStrictExistCheck: false, + }, + } + _, err = s.Insert(ctx, req) + if err != nil { + t.Errorf("[Error] Failed to Insert Vector (Dim = %d): %#v", dim, err) + return err + } + t.Logf("Insert is finished: dimension = %d (bit = %d)", dim, bit) + _, err = s.CreateIndex(ctx, &payload.Control_CreateIndexRequest{ + PoolSize: 10, + }) + if err != nil { + t.Errorf("CreateIndex is failed: %#v", err) + return err + } + t.Logf("CreateIndex is finished: dimension = %d (bit = %d)", dim, bit) + err = ngt.Close(ctx) + if err != nil { + return err + } + mu.Lock() + bits = append(bits, bit) + mu.Unlock() + t.Logf("All processes are finished: dimension = %d (bit = %d)", dim, bit) + } + t.Log("Wait for memory release") + time.Sleep(30 * time.Second) + } + return nil + }) + eg.Wait() + // Get the max bit, which the environment finish process, from bits + var max_bit int + for _, v := range bits { + if max_bit < v { + max_bit = v + } + } + t.Logf("Max bit is %d", max_bit) +}