From 2c06460dcc9611980ccf016baeae15b3985acae0 Mon Sep 17 00:00:00 2001 From: Roman Atachiants Date: Sun, 27 Oct 2024 22:53:37 +0400 Subject: [PATCH] Normalize to speed up distance calc (#1) --- bruteforce.go | 23 ++++++++- bruteforce_test.go | 2 +- internal/cosine/cosine_apple.c | 13 ++++- internal/cosine/cosine_avx.c | 17 +++++-- internal/cosine/cosine_neon.c | 13 ++++- internal/cosine/simd/cosine_apple.go | 3 ++ internal/cosine/simd/cosine_apple.s | 62 ++++++++++++++++++++++++ internal/cosine/simd/cosine_avx.go | 3 ++ internal/cosine/simd/cosine_avx.s | 71 ++++++++++++++++++++++++++++ internal/cosine/simd/cosine_neon.go | 3 ++ internal/cosine/simd/cosine_neon.s | 62 ++++++++++++++++++++++++ internal/cosine/simd/cosine_stub.go | 5 +- internal/cosine/simd/simd.go | 28 +++++++++-- internal/cosine/simd/simd_test.go | 45 ++++++++++++++---- 14 files changed, 331 insertions(+), 19 deletions(-) diff --git a/bruteforce.go b/bruteforce.go index 98e1d97..23b4d21 100644 --- a/bruteforce.go +++ b/bruteforce.go @@ -4,6 +4,7 @@ package search import ( + "math" "sort" "github.com/kelindar/search/internal/cosine/simd" @@ -36,6 +37,8 @@ func NewIndex[T any]() *Index[T] { // Add adds a new vector to the search index. func (b *Index[T]) Add(vx Vector, item T) { + normalize(vx) + b.arr = append(b.arr, entry[T]{ Vector: vx, Value: item, @@ -48,10 +51,13 @@ func (b *Index[T]) Search(query Vector, k int) []Result[T] { return nil } + // Normalize and quantize the query vector + normalize(query) + var relevance float64 dst := make(minheap[T], 0, k) for _, v := range b.arr { - simd.Cosine(&relevance, v.Vector, query) + simd.DotProduct(&relevance, query, v.Vector) result := Result[T]{ entry: v, Relevance: relevance, @@ -73,6 +79,21 @@ func (b *Index[T]) Search(query Vector, k int) []Result[T] { return dst } +// Normalize normalizes the vector, resulting in a unit vector. This allows us +// to do a simple dot product to calculate the cosine similarity instead of +// the full cosine distance. +func normalize(v []float32) { + norm := float32(0) + for _, x := range v { + norm += x * x + } + + norm = float32(math.Sqrt(float64(norm))) + for i := range v { + v[i] /= norm + } +} + // --------------------------------- Heap --------------------------------- // minheap is a min-heap of top values, ordered by relevance. diff --git a/bruteforce_test.go b/bruteforce_test.go index 2cdbb50..f237289 100644 --- a/bruteforce_test.go +++ b/bruteforce_test.go @@ -13,7 +13,7 @@ import ( /* cpu: 13th Gen Intel(R) Core(TM) i7-13700K -BenchmarkIndex/search-24 4029 298055 ns/op 272 B/op 3 allocs/op +BenchmarkIndex/search-24 5366 217116 ns/op 272 B/op 3 allocs/op */ func BenchmarkIndex(b *testing.B) { data, err := loadDataset() diff --git a/internal/cosine/cosine_apple.c b/internal/cosine/cosine_apple.c index aa05d5d..8829c6e 100644 --- a/internal/cosine/cosine_apple.c +++ b/internal/cosine/cosine_apple.c @@ -26,4 +26,15 @@ void f32_cosine_distance(const float *x, const float *y, double *result, const u double cosine_similarity = (double)sum_xy / (double)denominator; *result = cosine_similarity; -} \ No newline at end of file +} + +void f32_dot_product(const float *x, const float *y, double *result, const uint64_t size) { + float sum = 0.0f; + + #pragma clang loop vectorize(enable) interleave(enable) + for (uint64_t i = 0; i < size; i++) { + sum += x[i] * y[i]; + } + + *result = (double)sum; +} diff --git a/internal/cosine/cosine_avx.c b/internal/cosine/cosine_avx.c index 5af3bae..7ddf109 100644 --- a/internal/cosine/cosine_avx.c +++ b/internal/cosine/cosine_avx.c @@ -13,8 +13,8 @@ void f32_cosine_distance(const float *x, const float *y, double *result, const u #pragma clang loop vectorize(enable) interleave_count(2) for (uint64_t i = 0; i < size; i++) { sum_xy += x[i] * y[i]; // Sum of x * y - sum_xx += x[i] * x[i]; // Sum of x * x - sum_yy += y[i] * y[i]; // Sum of y * y + sum_xx += x[i] * x[i]; // Sum of x * x + sum_yy += y[i] * y[i]; // Sum of y * y } // Calculate the final result @@ -26,4 +26,15 @@ void f32_cosine_distance(const float *x, const float *y, double *result, const u double cosine_similarity = (double)sum_xy / (double)denominator; *result = cosine_similarity; -} \ No newline at end of file +} + +void f32_dot_product(const float *x, const float *y, double *result, const uint64_t size) { + float sum = 0.0f; + + #pragma clang loop vectorize(enable) interleave(enable) + for (uint64_t i = 0; i < size; i++) { + sum += x[i] * y[i]; + } + + *result = (double)sum; +} diff --git a/internal/cosine/cosine_neon.c b/internal/cosine/cosine_neon.c index aa05d5d..8829c6e 100644 --- a/internal/cosine/cosine_neon.c +++ b/internal/cosine/cosine_neon.c @@ -26,4 +26,15 @@ void f32_cosine_distance(const float *x, const float *y, double *result, const u double cosine_similarity = (double)sum_xy / (double)denominator; *result = cosine_similarity; -} \ No newline at end of file +} + +void f32_dot_product(const float *x, const float *y, double *result, const uint64_t size) { + float sum = 0.0f; + + #pragma clang loop vectorize(enable) interleave(enable) + for (uint64_t i = 0; i < size; i++) { + sum += x[i] * y[i]; + } + + *result = (double)sum; +} diff --git a/internal/cosine/simd/cosine_apple.go b/internal/cosine/simd/cosine_apple.go index 9d7a7f3..4561eb7 100644 --- a/internal/cosine/simd/cosine_apple.go +++ b/internal/cosine/simd/cosine_apple.go @@ -7,3 +7,6 @@ import "unsafe" //go:noescape,nosplit func f32_cosine_distance(x unsafe.Pointer, y unsafe.Pointer, result unsafe.Pointer, size uint64) + +//go:noescape,nosplit +func f32_dot_product(x unsafe.Pointer, y unsafe.Pointer, result unsafe.Pointer, size uint64) diff --git a/internal/cosine/simd/cosine_apple.s b/internal/cosine/simd/cosine_apple.s index 133caef..ac2c8e0 100644 --- a/internal/cosine/simd/cosine_apple.s +++ b/internal/cosine/simd/cosine_apple.s @@ -114,3 +114,65 @@ BB0_11: WORD $0xfd000040 // str d0, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 ; 16-byte Folded Reload WORD $0xd65f03c0 // ret + +TEXT ·f32_dot_product(SB), $0-32 + MOVD x+0(FP), R0 + MOVD y+8(FP), R1 + MOVD result+16(FP), R2 + MOVD size+24(FP), R3 + WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill + WORD $0x910003fd // mov x29, sp + WORD $0xb40000c3 // cbz x3, LBB1_3 + WORD $0xf100207f // cmp x3, #8 + WORD $0x54000102 // b.hs LBB1_4 + WORD $0xd2800008 // mov x8, #0 + WORD $0x2f00e400 // movi d0, #0000000000000000 + WORD $0x14000018 // b LBB1_7 + +BB1_3: + WORD $0x2f00e400 // movi d0, #0000000000000000 + WORD $0xfd000040 // str d0, [x2] + WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 ; 16-byte Folded Reload + WORD $0xd65f03c0 // ret + +BB1_4: + WORD $0x927df068 // and x8, x3, #0xfffffffffffffff8 + WORD $0x91004009 // add x9, x0, #16 + WORD $0x9100402a // add x10, x1, #16 + WORD $0x6f00e400 // movi.2d v0, #0000000000000000 + WORD $0xaa0803eb // mov x11, x8 + WORD $0x6f00e401 // movi.2d v1, #0000000000000000 + +BB1_5: + WORD $0xad7f8d22 // ldp q2, q3, [x9, #-16] + WORD $0xad7f9544 // ldp q4, q5, [x10, #-16] + WORD $0x4e22cc80 // fmla.4s v0, v4, v2 + WORD $0x4e23cca1 // fmla.4s v1, v5, v3 + WORD $0x91008129 // add x9, x9, #32 + WORD $0x9100814a // add x10, x10, #32 + WORD $0xf100216b // subs x11, x11, #8 + WORD $0x54ffff21 // b.ne LBB1_5 + WORD $0x4e20d420 // fadd.4s v0, v1, v0 + WORD $0x6e20d400 // faddp.4s v0, v0, v0 + WORD $0x7e30d800 // faddp.2s s0, v0 + WORD $0xeb03011f // cmp x8, x3 + WORD $0x54000140 // b.eq LBB1_9 + +BB1_7: + WORD $0xcb080069 // sub x9, x3, x8 + WORD $0xd37ef50a // lsl x10, x8, #2 + WORD $0x8b0a0028 // add x8, x1, x10 + WORD $0x8b0a000a // add x10, x0, x10 + +BB1_8: + WORD $0xbc404541 // ldr s1, [x10], #4 + WORD $0xbc404502 // ldr s2, [x8], #4 + WORD $0x1f010040 // fmadd s0, s2, s1, s0 + WORD $0xf1000529 // subs x9, x9, #1 + WORD $0x54ffff81 // b.ne LBB1_8 + +BB1_9: + WORD $0x1e22c000 // fcvt d0, s0 + WORD $0xfd000040 // str d0, [x2] + WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 ; 16-byte Folded Reload + WORD $0xd65f03c0 // ret diff --git a/internal/cosine/simd/cosine_avx.go b/internal/cosine/simd/cosine_avx.go index 96ef196..5b41ea0 100644 --- a/internal/cosine/simd/cosine_avx.go +++ b/internal/cosine/simd/cosine_avx.go @@ -7,3 +7,6 @@ import "unsafe" //go:noescape,nosplit func f32_cosine_distance(x unsafe.Pointer, y unsafe.Pointer, result unsafe.Pointer, size uint64) + +//go:noescape,nosplit +func f32_dot_product(x unsafe.Pointer, y unsafe.Pointer, result unsafe.Pointer, size uint64) diff --git a/internal/cosine/simd/cosine_avx.s b/internal/cosine/simd/cosine_avx.s index d123f8c..14e197d 100644 --- a/internal/cosine/simd/cosine_avx.s +++ b/internal/cosine/simd/cosine_avx.s @@ -104,3 +104,74 @@ LBB0_9: BYTE $0x5d // pop rbp WORD $0xf8c5; BYTE $0x77 // vzeroupper BYTE $0xc3 // ret + +TEXT ·f32_dot_product(SB), $0-32 + MOVQ x+0(FP), DI + MOVQ y+8(FP), SI + MOVQ result+16(FP), DX + MOVQ size+24(FP), CX + BYTE $0x55 // push rbp + WORD $0x8948; BYTE $0xe5 // mov rbp, rsp + LONG $0xf8e48348 // and rsp, -8 + WORD $0x8548; BYTE $0xc9 // test rcx, rcx + JE LBB1_1 + LONG $0x20f98348 // cmp rcx, 32 + JAE LBB1_5 + LONG $0xc057f8c5 // vxorps xmm0, xmm0, xmm0 + WORD $0x3145; BYTE $0xc0 // xor r8d, r8d + JMP LBB1_4 + +LBB1_1: + LONG $0xc057f8c5 // vxorps xmm0, xmm0, xmm0 + LONG $0x0211fbc5 // vmovsd qword ptr [rdx], xmm0 + WORD $0x8948; BYTE $0xec // mov rsp, rbp + BYTE $0x5d // pop rbp + BYTE $0xc3 // ret + +LBB1_5: + WORD $0x8949; BYTE $0xc8 // mov r8, rcx + LONG $0xe0e08349 // and r8, -32 + LONG $0xc057f8c5 // vxorps xmm0, xmm0, xmm0 + WORD $0xc031 // xor eax, eax + LONG $0xc957f0c5 // vxorps xmm1, xmm1, xmm1 + LONG $0xd257e8c5 // vxorps xmm2, xmm2, xmm2 + LONG $0xdb57e0c5 // vxorps xmm3, xmm3, xmm3 + +LBB1_6: + LONG $0x2410fcc5; BYTE $0x86 // vmovups ymm4, ymmword ptr [rsi + 4*rax] + LONG $0x6c10fcc5; WORD $0x2086 // vmovups ymm5, ymmword ptr [rsi + 4*rax + 32] + LONG $0x7410fcc5; WORD $0x4086 // vmovups ymm6, ymmword ptr [rsi + 4*rax + 64] + LONG $0x7c10fcc5; WORD $0x6086 // vmovups ymm7, ymmword ptr [rsi + 4*rax + 96] + LONG $0xb85de2c4; WORD $0x8704 // vfmadd231ps ymm0, ymm4, ymmword ptr [rdi + 4*rax] + LONG $0xb855e2c4; WORD $0x874c; BYTE $0x20 // vfmadd231ps ymm1, ymm5, ymmword ptr [rdi + 4*rax + 32] + LONG $0xb84de2c4; WORD $0x8754; BYTE $0x40 // vfmadd231ps ymm2, ymm6, ymmword ptr [rdi + 4*rax + 64] + LONG $0xb845e2c4; WORD $0x875c; BYTE $0x60 // vfmadd231ps ymm3, ymm7, ymmword ptr [rdi + 4*rax + 96] + LONG $0x20c08348 // add rax, 32 + WORD $0x3949; BYTE $0xc0 // cmp r8, rax + JNE LBB1_6 + LONG $0xc058f4c5 // vaddps ymm0, ymm1, ymm0 + LONG $0xc058ecc5 // vaddps ymm0, ymm2, ymm0 + LONG $0xc058e4c5 // vaddps ymm0, ymm3, ymm0 + LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1 + LONG $0xc158f8c5 // vaddps xmm0, xmm0, xmm1 + LONG $0x0579e3c4; WORD $0x01c8 // vpermilpd xmm1, xmm0, 1 + LONG $0xc158f8c5 // vaddps xmm0, xmm0, xmm1 + LONG $0xc816fac5 // vmovshdup xmm1, xmm0 + LONG $0xc158fac5 // vaddss xmm0, xmm0, xmm1 + WORD $0x3949; BYTE $0xc8 // cmp r8, rcx + JE LBB1_8 + +LBB1_4: + LONG $0x107aa1c4; WORD $0x860c // vmovss xmm1, dword ptr [rsi + 4*r8] + LONG $0xb971a2c4; WORD $0x8704 // vfmadd231ss xmm0, xmm1, dword ptr [rdi + 4*r8] + WORD $0xff49; BYTE $0xc0 // inc r8 + WORD $0x394c; BYTE $0xc1 // cmp rcx, r8 + JNE LBB1_4 + +LBB1_8: + LONG $0xc05afac5 // vcvtss2sd xmm0, xmm0, xmm0 + LONG $0x0211fbc5 // vmovsd qword ptr [rdx], xmm0 + WORD $0x8948; BYTE $0xec // mov rsp, rbp + BYTE $0x5d // pop rbp + WORD $0xf8c5; BYTE $0x77 // vzeroupper + BYTE $0xc3 // ret diff --git a/internal/cosine/simd/cosine_neon.go b/internal/cosine/simd/cosine_neon.go index 53a0866..c15fa79 100644 --- a/internal/cosine/simd/cosine_neon.go +++ b/internal/cosine/simd/cosine_neon.go @@ -7,3 +7,6 @@ import "unsafe" //go:noescape,nosplit func f32_cosine_distance(x unsafe.Pointer, y unsafe.Pointer, result unsafe.Pointer, size uint64) + +//go:noescape,nosplit +func f32_dot_product(x unsafe.Pointer, y unsafe.Pointer, result unsafe.Pointer, size uint64) diff --git a/internal/cosine/simd/cosine_neon.s b/internal/cosine/simd/cosine_neon.s index 767c1e8..df7566f 100644 --- a/internal/cosine/simd/cosine_neon.s +++ b/internal/cosine/simd/cosine_neon.s @@ -114,3 +114,65 @@ LBB0_11: WORD $0xfd000040 // str d0, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 WORD $0xd65f03c0 // ret + +TEXT ·f32_dot_product(SB), $0-32 + MOVD x+0(FP), R0 + MOVD y+8(FP), R1 + MOVD result+16(FP), R2 + MOVD size+24(FP), R3 + WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! + WORD $0x910003fd // mov x29, sp + WORD $0xb40000c3 // cbz x3, .LBB1_3 + WORD $0xf100207f // cmp x3, #8 + WORD $0x54000102 // b.hs .LBB1_4 + WORD $0x2f00e400 // movi d0, #0000000000000000 + WORD $0xaa1f03e8 // mov x8, xzr + WORD $0x14000018 // b .LBB1_7 + +LBB1_3: + WORD $0x2f00e400 // movi d0, #0000000000000000 + WORD $0xfd000040 // str d0, [x2] + WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + WORD $0xd65f03c0 // ret + +LBB1_4: + WORD $0x927df068 // and x8, x3, #0xfffffffffffffff8 + WORD $0x91004009 // add x9, x0, #16 + WORD $0x6f00e400 // movi v0.2d, #0000000000000000 + WORD $0x9100402a // add x10, x1, #16 + WORD $0x6f00e401 // movi v1.2d, #0000000000000000 + WORD $0xaa0803eb // mov x11, x8 + +LBB1_5: + WORD $0xad7f8d22 // ldp q2, q3, [x9, #-16] + WORD $0x91008129 // add x9, x9, #32 + WORD $0xf100216b // subs x11, x11, #8 + WORD $0xad7f9544 // ldp q4, q5, [x10, #-16] + WORD $0x9100814a // add x10, x10, #32 + WORD $0x4e22cc80 // fmla v0.4s, v4.4s, v2.4s + WORD $0x4e23cca1 // fmla v1.4s, v5.4s, v3.4s + WORD $0x54ffff21 // b.ne .LBB1_5 + WORD $0x4e20d420 // fadd v0.4s, v1.4s, v0.4s + WORD $0xeb03011f // cmp x8, x3 + WORD $0x6e20d400 // faddp v0.4s, v0.4s, v0.4s + WORD $0x7e30d800 // faddp s0, v0.2s + WORD $0x54000140 // b.eq .LBB1_9 + +LBB1_7: + WORD $0xd37ef50a // lsl x10, x8, #2 + WORD $0xcb080069 // sub x9, x3, x8 + WORD $0x8b0a0028 // add x8, x1, x10 + WORD $0x8b0a000a // add x10, x0, x10 + +LBB1_8: + WORD $0xbc404541 // ldr s1, [x10], #4 + WORD $0xbc404502 // ldr s2, [x8], #4 + WORD $0xf1000529 // subs x9, x9, #1 + WORD $0x1f010040 // fmadd s0, s2, s1, s0 + WORD $0x54ffff81 // b.ne .LBB1_8 + +LBB1_9: + WORD $0x1e22c000 // fcvt d0, s0 + WORD $0xfd000040 // str d0, [x2] + WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + WORD $0xd65f03c0 // ret diff --git a/internal/cosine/simd/cosine_stub.go b/internal/cosine/simd/cosine_stub.go index 2d43863..f8d481a 100644 --- a/internal/cosine/simd/cosine_stub.go +++ b/internal/cosine/simd/cosine_stub.go @@ -4,7 +4,10 @@ package simd import "unsafe" -// stub func f32_cosine_distance(x unsafe.Pointer, y unsafe.Pointer, result unsafe.Pointer, size uint64) { panic("not implemented") } + +func f32_dot_product(x unsafe.Pointer, y unsafe.Pointer, result unsafe.Pointer, size uint64) { + panic("not implemented") +} diff --git a/internal/cosine/simd/simd.go b/internal/cosine/simd/simd.go index 8d1c523..0ef4c1f 100644 --- a/internal/cosine/simd/simd.go +++ b/internal/cosine/simd/simd.go @@ -25,12 +25,26 @@ func Cosine(dst *float64, a, b []float32) { case hardware: f32_cosine_distance(unsafe.Pointer(&a[0]), unsafe.Pointer(&b[0]), unsafe.Pointer(dst), uint64(len(a))) default: - *dst = cosine(a, b) + *dst = genericCosine(a, b) } } -// cosine calculates the cosine similarity between two vectors -func cosine(x, y []float32) float64 { +// DotProduct calculates the dot product between two vectors and stores the result in the destination +func DotProduct(dst *float64, a, b []float32) { + if len(a) != len(b) { + panic("vectors must be of same length") + } + + switch { + case hardware: + f32_dot_product(unsafe.Pointer(&a[0]), unsafe.Pointer(&b[0]), unsafe.Pointer(dst), uint64(len(a))) + default: + *dst = genericDotProduct(a, b) + } +} + +// genericCosine calculates the genericCosine similarity between two vectors +func genericCosine(x, y []float32) float64 { var sum_xy, sum_xx, sum_yy float64 for i := range x { sum_xy += float64(x[i] * y[i]) @@ -45,3 +59,11 @@ func cosine(x, y []float32) float64 { return sum_xy / denominator } + +func genericDotProduct(a, b []float32) float64 { + var sum float64 + for i := range a { + sum += float64(a[i] * b[i]) + } + return sum +} diff --git a/internal/cosine/simd/simd_test.go b/internal/cosine/simd/simd_test.go index ee07d22..9cc52b0 100644 --- a/internal/cosine/simd/simd_test.go +++ b/internal/cosine/simd/simd_test.go @@ -9,27 +9,44 @@ import ( /* cpu: 13th Gen Intel(R) Core(TM) i7-13700K -BenchmarkCosine/std-24 14911036 80.46 ns/op 0 B/op 0 allocs/op -BenchmarkCosine/our-24 61780514 18.11 ns/op 0 B/op 0 allocs/op +BenchmarkSIMD/cos-std-24 14839326 81.02 ns/op 0 B/op 0 allocs/op +BenchmarkSIMD/cos-acc-24 66064378 18.21 ns/op 0 B/op 0 allocs/op +BenchmarkSIMD/dot-std-24 14868597 81.11 ns/op 0 B/op 0 allocs/op +BenchmarkSIMD/dot-acc-24 125554860 9.564 ns/op 0 B/op 0 allocs/op */ -func BenchmarkCosine(b *testing.B) { +func BenchmarkSIMD(b *testing.B) { x := randVec() y := randVec() - b.Run("std", func(b *testing.B) { + b.Run("cos-std", func(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - cosine(x, y) + genericCosine(x, y) } }) - b.Run("our", func(b *testing.B) { + b.Run("cos-acc", func(b *testing.B) { var out float64 b.ResetTimer() for i := 0; i < b.N; i++ { Cosine(&out, x, y) } }) + + b.Run("dot-std", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + genericDotProduct(x, y) + } + }) + + b.Run("dot-acc", func(b *testing.B) { + var out float64 + b.ResetTimer() + for i := 0; i < b.N; i++ { + DotProduct(&out, x, y) + } + }) } func TestCosine(t *testing.T) { @@ -39,8 +56,20 @@ func TestCosine(t *testing.T) { var actual float64 Cosine(&actual, x, y) - expect := cosine(x, y) - assert.InDelta(t, expect, actual, 1e-4, "expected %v, got %v", cosine(x, y), actual) + expect := genericCosine(x, y) + assert.InDelta(t, expect, actual, 1e-4, "expected %v, got %v", genericCosine(x, y), actual) + } +} + +func TestDotProduct(t *testing.T) { + for i := 0; i < 100; i++ { + x := randVec() + y := randVec() + + var actual float64 + DotProduct(&actual, x, y) + expect := genericDotProduct(x, y) + assert.InDelta(t, expect, actual, 1e-4, "expected %v, got %v", expect, actual) } }