diff --git a/bits.go b/bits.go index f61013a..ba812a3 100644 --- a/bits.go +++ b/bits.go @@ -37,7 +37,7 @@ func metaMatchH2(m *metadata, h h2) bitset { } func metaMatchEmpty(m *metadata) bitset { - return hasZeroByte(castUint64(m) ^ hiBits) + return hasZeroByte(castUint64(m)) } func nextMatch(b *bitset) uint32 { diff --git a/bits_amd64.go b/bits_amd64.go index 2d95b40..7256673 100644 --- a/bits_amd64.go +++ b/bits_amd64.go @@ -31,12 +31,12 @@ const ( type bitset uint16 func metaMatchH2(m *metadata, h h2) bitset { - b := simd.MatchMetadata((*[16]int8)(m), int8(h)) + b := simd.MatchMetadata((*[groupSize]uint8)(m), uint8(h)) return bitset(b) } func metaMatchEmpty(m *metadata) bitset { - b := simd.MatchMetadata((*[16]int8)(m), empty) + b := simd.MatchEmpty((*[groupSize]uint8)(m)) return bitset(b) } diff --git a/bits_test.go b/bits_test.go index b928d55..ce75a31 100644 --- a/bits_test.go +++ b/bits_test.go @@ -26,37 +26,40 @@ import ( func TestMatchMetadata(t *testing.T) { var meta metadata for i := range meta { - meta[i] = int8(i) + meta[i] = uint8(i) + h2Offset } t.Run("metaMatchH2", func(t *testing.T) { - for _, x := range meta { - mask := metaMatchH2(&meta, h2(x)) + for i, m := range meta { + mask := metaMatchH2(&meta, h2(m)) assert.NotZero(t, mask) - assert.Equal(t, uint32(x), nextMatch(&mask)) + assert.Equal(t, uint32(i), nextMatch(&mask)) } }) t.Run("metaMatchEmpty", func(t *testing.T) { mask := metaMatchEmpty(&meta) assert.Equal(t, mask, bitset(0)) + for i := range meta { meta[i] = empty mask = metaMatchEmpty(&meta) assert.NotZero(t, mask) assert.Equal(t, uint32(i), nextMatch(&mask)) - meta[i] = int8(i) + meta[i] = uint8(i) + h2Offset } }) t.Run("nextMatch", func(t *testing.T) { + const needle = uint8(42) + h2Offset + // test iterating multiple matches - meta = newEmptyMetadata() + meta = metadata{} mask := metaMatchEmpty(&meta) for i := range meta { assert.Equal(t, uint32(i), nextMatch(&mask)) } for i := 0; i < len(meta); i += 2 { - meta[i] = int8(42) + meta[i] = needle } - mask = metaMatchH2(&meta, h2(42)) + mask = metaMatchH2(&meta, h2(needle)) for i := 0; i < len(meta); i += 2 { assert.Equal(t, uint32(i), nextMatch(&mask)) } @@ -66,7 +69,7 @@ func TestMatchMetadata(t *testing.T) { func BenchmarkMatchMetadata(b *testing.B) { var meta metadata for i := range meta { - meta[i] = int8(i) + meta[i] = uint8(i) } var mask bitset for i := 0; i < b.N; i++ { @@ -90,14 +93,13 @@ func nextPow2(x uint32) uint32 { } func TestConstants(t *testing.T) { - c1, c2 := empty, tombstone - assert.Equal(t, byte(0b1000_0000), byte(c1)) - assert.Equal(t, byte(0b1000_0000), reinterpretCast(c1)) - assert.Equal(t, byte(0b1111_1110), byte(c2)) - assert.Equal(t, byte(0b1111_1110), reinterpretCast(c2)) + assert.Equal(t, byte(0b0000_0000), empty) + assert.Equal(t, byte(0b0000_0000), reinterpretCast(empty)) + assert.Equal(t, byte(0b0000_0001), tombstone) + assert.Equal(t, byte(0b0000_0001), reinterpretCast(tombstone)) } -func reinterpretCast(i int8) byte { +func reinterpretCast(i uint8) byte { return *(*byte)(unsafe.Pointer(&i)) } diff --git a/map.go b/map.go index d2b188e..8945405 100644 --- a/map.go +++ b/map.go @@ -36,7 +36,7 @@ type Map[K comparable, V any] struct { // metadata is the h2 metadata array for a group. // find operations first probe the controls bytes // to filter candidates before matching keys -type metadata [groupSize]int8 +type metadata [groupSize]uint8 // group is a group of 16 key-value pairs type group[K comparable, V any] struct { @@ -47,15 +47,16 @@ type group[K comparable, V any] struct { const ( h1Mask uint64 = 0xffff_ffff_ffff_ff80 h2Mask uint64 = 0x0000_0000_0000_007f - empty int8 = -128 // 0b1000_0000 - tombstone int8 = -2 // 0b1111_1110 + h2Offset = 2 + empty uint8 = 0b0000_0000 + tombstone uint8 = 0b0000_0001 ) // h1 is a 57 bit hash prefix type h1 uint64 // h2 is a 7 bit hash suffix -type h2 int8 +type h2 uint8 // NewMap constructs a Map. func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { @@ -66,9 +67,6 @@ func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { hash: maphash.NewHasher[K](), limit: groups * maxAvgGroupLoad, } - for i := range m.ctrl { - m.ctrl[i] = newEmptyMetadata() - } return } @@ -150,7 +148,7 @@ func (m *Map[K, V]) Put(key K, value V) { s := nextMatch(&matches) m.groups[g].keys[s] = key m.groups[g].values[s] = value - m.ctrl[g][s] = int8(lo) + m.ctrl[g][s] = uint8(lo) m.resident++ return } @@ -236,10 +234,8 @@ func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) { // Clear removes all elements from the Map. func (m *Map[K, V]) Clear() { - for i, c := range m.ctrl { - for j := range c { - m.ctrl[i][j] = empty - } + for i := range m.ctrl { + m.ctrl[i] = metadata{} } var k K var v V @@ -302,9 +298,6 @@ func (m *Map[K, V]) rehash(n uint32) { groups, ctrl := m.groups, m.ctrl m.groups = make([]group[K, V], n) m.ctrl = make([]metadata, n) - for i := range m.ctrl { - m.ctrl[i] = newEmptyMetadata() - } m.hash = maphash.NewSeed(m.hash) m.limit = n * maxAvgGroupLoad m.resident, m.dead = 0, 0 @@ -333,15 +326,11 @@ func numGroups(n uint32) (groups uint32) { return } -func newEmptyMetadata() (meta metadata) { - for i := range meta { - meta[i] = empty - } - return -} - +// splitHash extracts the h1 and h2 components from a 64 bit hash. +// h1 is the upper 57 bits, h2 is the lower 7 bits plus two. +// By adding 2, it ensures that h2 is never uint8(0) or uint8(1). func splitHash(h uint64) (h1, h2) { - return h1((h & h1Mask) >> 7), h2(h & h2Mask) + return h1((h & h1Mask) >> 7), h2(h&h2Mask) + h2Offset } func probeStart(hi h1, groups int) uint32 { diff --git a/map_bench_test.go b/map_bench_test.go index ed4b096..397926d 100644 --- a/map_bench_test.go +++ b/map_bench_test.go @@ -54,6 +54,34 @@ func BenchmarkInt64Maps(b *testing.B) { } } +func BenchmarkNewMap(b *testing.B) { + sizes := []int{16, 128, 1024, 8192, 131072} + for _, n := range sizes { + b.Run("n="+strconv.Itoa(n), func(b *testing.B) { + for i := 0; i < b.N; i++ { + m := NewMap[int, int](uint32(n)) + m.Count() + } + }) + } +} + +func BenchmarkMap_Put_Growing(b *testing.B) { + sizes := []int{16, 128, 1024, 8192, 131072} + for _, n := range sizes { + b.Run("n="+strconv.Itoa(n), func(b *testing.B) { + data := generateInt64Data(n) + b.ResetTimer() + for i := 0; i < b.N; i++ { + m := NewMap[int64, int64](uint32(n)) + for _, k := range data { + m.Put(k, k) + } + } + }) + } +} + func TestMemoryFootprint(t *testing.T) { t.Skip("unskip for memory footprint stats") var samples []float64 diff --git a/simd/asm.go b/simd/asm.go deleted file mode 100644 index 6e775ee..0000000 --- a/simd/asm.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2023 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build ignore -// +build ignore - -package main - -import ( - . "github.com/mmcloughlin/avo/build" - . "github.com/mmcloughlin/avo/operand" -) - -func main() { - ConstraintExpr("amd64") - - TEXT("MatchMetadata", NOSPLIT, "func(metadata *[16]int8, hash int8) uint16") - Doc("MatchMetadata performs a 16-way probe of |metadata| using SSE instructions", - "nb: |metadata| must be an aligned pointer") - m := Mem{Base: Load(Param("metadata"), GP64())} - h := Load(Param("hash"), GP32()) - mask := GP32() - - x0, x1, x2 := XMM(), XMM(), XMM() - MOVD(h, x0) - PXOR(x1, x1) - PSHUFB(x1, x0) - MOVOU(m, x2) - PCMPEQB(x2, x0) - PMOVMSKB(x0, mask) - - Store(mask.As16(), ReturnIndex(0)) - RET() - Generate() -} diff --git a/simd/match.s b/simd/match.s index 4ae29e7..705c320 100644 --- a/simd/match.s +++ b/simd/match.s @@ -1,14 +1,14 @@ -// Code generated by command: go run asm.go -out match.s -stubs match_amd64.go. DO NOT EDIT. +// Code generated by command: go run asm.go -pkg simd -out ../match.s -stubs ../match_amd64.go. DO NOT EDIT. //go:build amd64 #include "textflag.h" -// func MatchMetadata(metadata *[16]int8, hash int8) uint16 +// func MatchMetadata(metadata *[16]uint8, hash uint8) uint16 // Requires: SSE2, SSSE3 TEXT ·MatchMetadata(SB), NOSPLIT, $0-18 MOVQ metadata+0(FP), AX - MOVBLSX hash+8(FP), CX + MOVBLZX hash+8(FP), CX MOVD CX, X0 PXOR X1, X1 PSHUFB X1, X0 @@ -17,3 +17,14 @@ TEXT ·MatchMetadata(SB), NOSPLIT, $0-18 PMOVMSKB X0, AX MOVW AX, ret+16(FP) RET + +// func MatchEmpty(metadata *[16]uint8) uint16 +// Requires: SSE2 +TEXT ·MatchEmpty(SB), NOSPLIT, $0-10 + MOVQ metadata+0(FP), AX + PXOR X0, X0 + MOVOU (AX), X1 + PCMPEQB X1, X0 + PMOVMSKB X0, AX + MOVW AX, ret+8(FP) + RET diff --git a/simd/match_amd64.go b/simd/match_amd64.go index 538c8e1..ab8d0b6 100644 --- a/simd/match_amd64.go +++ b/simd/match_amd64.go @@ -1,4 +1,4 @@ -// Code generated by command: go run asm.go -out match.s -stubs match_amd64.go. DO NOT EDIT. +// Code generated by command: go run asm.go -pkg simd -out ../match.s -stubs ../match_amd64.go. DO NOT EDIT. //go:build amd64 @@ -6,4 +6,8 @@ package simd // MatchMetadata performs a 16-way probe of |metadata| using SSE instructions // nb: |metadata| must be an aligned pointer -func MatchMetadata(metadata *[16]int8, hash int8) uint16 +func MatchMetadata(metadata *[16]uint8, hash uint8) uint16 + +// MatchMetadata performs a 16-way probe of zero byte using SSE instructions +// nb: |metadata| must be an aligned pointer +func MatchEmpty(metadata *[16]uint8) uint16 diff --git a/simd/src/asm.go b/simd/src/asm.go new file mode 100644 index 0000000..58410c2 --- /dev/null +++ b/simd/src/asm.go @@ -0,0 +1,63 @@ +//go:generate go run asm.go -pkg simd -out ../match.s -stubs ../match_amd64.go +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + . "github.com/mmcloughlin/avo/build" + . "github.com/mmcloughlin/avo/operand" +) + +func main() { + ConstraintExpr("amd64") + + { + TEXT("MatchMetadata", NOSPLIT, "func(metadata *[16]uint8, hash uint8) uint16") + Doc("MatchMetadata performs a 16-way probe of |metadata| using SSE instructions", + "nb: |metadata| must be an aligned pointer") + m := Mem{Base: Load(Param("metadata"), GP64())} + h := Load(Param("hash"), GP32()) + mask := GP32() + + x0, x1, x2 := XMM(), XMM(), XMM() + MOVD(h, x0) + PXOR(x1, x1) + PSHUFB(x1, x0) + MOVOU(m, x2) + PCMPEQB(x2, x0) + PMOVMSKB(x0, mask) + + Store(mask.As16(), ReturnIndex(0)) + RET() + } + + { + TEXT("MatchEmpty", NOSPLIT, "func(metadata *[16]uint8) uint16") + Doc("MatchMetadata performs a 16-way probe of zero byte using SSE instructions", + "nb: |metadata| must be an aligned pointer") + m := Mem{Base: Load(Param("metadata"), GP64())} + mask := GP32() + + x0, x1 := XMM(), XMM() + PXOR(x0, x0) + MOVOU(m, x1) + PCMPEQB(x1, x0) + PMOVMSKB(x0, mask) + + Store(mask.As16(), ReturnIndex(0)) + RET() + } + Generate() +} diff --git a/simd/src/go.mod b/simd/src/go.mod new file mode 100644 index 0000000..2b58947 --- /dev/null +++ b/simd/src/go.mod @@ -0,0 +1,10 @@ +module github.com/dolthub/swiss/simd/src + +go 1.22.4 + +require github.com/mmcloughlin/avo v0.6.0 + +require ( + golang.org/x/mod v0.14.0 // indirect + golang.org/x/tools v0.16.1 // indirect +) diff --git a/simd/src/go.sum b/simd/src/go.sum new file mode 100644 index 0000000..483bba8 --- /dev/null +++ b/simd/src/go.sum @@ -0,0 +1,8 @@ +github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY= +github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8= +golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= +golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= +golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA= +golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0=