From 9625e3e263e4bee40df8cfe65cd45aa0155ffad8 Mon Sep 17 00:00:00 2001 From: Michal Hruby Date: Sun, 29 Oct 2023 00:49:02 +0100 Subject: [PATCH 1/2] Remove bounds checks --- bitset.go | 110 ++++++++++++++++++++++++++++----------- bitset_benchmark_test.go | 81 ++++++++++++++++++++++++++++ popcnt_19.go | 16 ++++++ 3 files changed, 176 insertions(+), 31 deletions(-) diff --git a/bitset.go b/bitset.go index 43a0dac..039045b 100644 --- a/bitset.go +++ b/bitset.go @@ -247,8 +247,13 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet { var startWord uint = start >> log2WordSize var endWord uint = end >> log2WordSize b.set[startWord] ^= ^(^uint64(0) << wordsIndex(start)) - for i := startWord; i < endWord; i++ { - b.set[i] = ^b.set[i] + if endWord > 0 { + // bounds check elimination + data := b.set + _ = data[endWord-1] + for i := startWord; i < endWord; i++ { + data[i] = ^data[i] + } } if end&(wordSize-1) != 0 { b.set[endWord] ^= ^uint64(0) >> wordsIndex(-end) @@ -427,7 +432,11 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { if w != 0 { return i + trailingZeroes64(w), true } - x = x + 1 + x++ + // bounds check elimination in the loop + if x < 0 { + return 0, false + } for x < len(b.set) { if b.set[x] != 0 { return uint(x)*wordSize + trailingZeroes64(b.set[x]), true @@ -516,6 +525,10 @@ func (b *BitSet) NextClear(i uint) (uint, bool) { return index, true } x++ + // bounds check elimination in the loop + if x < 0 { + return 0, false + } for x < len(b.set) { index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) if b.set[x] != allBits && index < b.length { @@ -615,6 +628,12 @@ func (b *BitSet) Equal(c *BitSet) bool { return true } wn := b.wordCount() + // bounds check elimination + if wn <= 0 { + return true + } + _ = b.set[wn-1] + _ = c.set[wn-1] for p := 0; p < wn; p++ { if c.set[p] != b.set[p] { return false @@ -635,9 +654,9 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) result = b.Clone() // clone b (in case b is bigger than compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } for i := 0; i < l; i++ { result.set[i] = b.set[i] &^ compare.set[i] @@ -649,9 +668,9 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } cnt := uint64(0) cnt += popcntMaskSlice(b.set[:l], compare.set[:l]) @@ -664,12 +683,19 @@ func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() + } + if l <= 0 { + return } + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] for i := 0; i < l; i++ { - b.set[i] &^= compare.set[i] + data[i] &^= cmpData[i] } } @@ -712,15 +738,24 @@ func (b *BitSet) IntersectionCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceIntersection(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - for i := 0; i < l; i++ { - b.set[i] &= compare.set[i] + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() + } + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + + for i := 0; i < l; i++ { + data[i] &= cmpData[i] + } } - for i := l; i < len(b.set); i++ { - b.set[i] = 0 + if l >= 0 { + for i := l; i < len(b.set); i++ { + b.set[i] = 0 + } } if compare.length > 0 { if compare.length-1 >= b.length { @@ -760,15 +795,22 @@ func (b *BitSet) UnionCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceUnion(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } if compare.length > 0 && compare.length-1 >= b.length { b.extendSet(compare.length - 1) } - for i := 0; i < l; i++ { - b.set[i] |= compare.set[i] + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + + for i := 0; i < l; i++ { + data[i] |= cmpData[i] + } } if len(compare.set) > l { for i := l; i < len(compare.set); i++ { @@ -808,15 +850,21 @@ func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } if compare.length > 0 && compare.length-1 >= b.length { b.extendSet(compare.length - 1) } - for i := 0; i < l; i++ { - b.set[i] ^= compare.set[i] + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + for i := 0; i < l; i++ { + data[i] ^= cmpData[i] + } } if len(compare.set) > l { for i := l; i < len(compare.set); i++ { diff --git a/bitset_benchmark_test.go b/bitset_benchmark_test.go index 627eecb..93a6b33 100644 --- a/bitset_benchmark_test.go +++ b/bitset_benchmark_test.go @@ -89,6 +89,87 @@ func BenchmarkSparseIterate(b *testing.B) { } } +// go test -bench=BitsetOps +func BenchmarkBitsetOps(b *testing.B) { + // let's not write into s inside the benchmarks + s := New(100000) + for i := 0; i < 100000; i += 100 { + s.Set(uint(i)) + } + cpy := s.Clone() + + b.Run("Equal", func(b *testing.B) { + for i := 0; i < b.N; i++ { + s.Equal(cpy) + } + }) + + b.Run("FlipRange", func(b *testing.B) { + s = s.Clone() + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.FlipRange(0, 100000) + } + }) + + b.Run("NextSet", func(b *testing.B) { + s = New(100000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.NextSet(0) + } + }) + + b.Run("NextClear", func(b *testing.B) { + s = New(100000) + s.FlipRange(0, 100000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.NextClear(0) + } + }) + + b.Run("DifferenceCardinality", func(b *testing.B) { + empty := New(100000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.DifferenceCardinality(empty) + } + }) + + b.Run("InPlaceDifference", func(b *testing.B) { + s = s.Clone() + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.InPlaceDifference(cpy) + } + }) + + b.Run("InPlaceUnion", func(b *testing.B) { + s = s.Clone() + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.InPlaceUnion(cpy) + } + }) + + b.Run("InPlaceIntersection", func(b *testing.B) { + s = s.Clone() + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.InPlaceIntersection(cpy) + } + }) + + b.Run("InPlaceSymmetricDifference", func(b *testing.B) { + s = s.Clone() + b.ResetTimer() + for i := 0; i < b.N; i++ { + s.InPlaceSymmetricDifference(cpy) + } + }) +} + // go test -bench=LemireCreate // see http://lemire.me/blog/2016/09/22/swift-versus-java-the-bitset-performance-test/ func BenchmarkLemireCreate(b *testing.B) { diff --git a/popcnt_19.go b/popcnt_19.go index 9a3766a..7855c04 100644 --- a/popcnt_19.go +++ b/popcnt_19.go @@ -15,6 +15,10 @@ func popcntSlice(s []uint64) uint64 { func popcntMaskSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] &^ m[i]) } @@ -23,6 +27,10 @@ func popcntMaskSlice(s, m []uint64) uint64 { func popcntAndSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] & m[i]) } @@ -31,6 +39,10 @@ func popcntAndSlice(s, m []uint64) uint64 { func popcntOrSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] | m[i]) } @@ -39,6 +51,10 @@ func popcntOrSlice(s, m []uint64) uint64 { func popcntXorSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] ^ m[i]) } From dbb95273c073ab548a90bacd19fd82bc44936341 Mon Sep 17 00:00:00 2001 From: Michal Hruby Date: Sun, 29 Oct 2023 01:20:19 +0100 Subject: [PATCH 2/2] speed up NextClear --- bitset.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bitset.go b/bitset.go index 039045b..642cba6 100644 --- a/bitset.go +++ b/bitset.go @@ -441,7 +441,7 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { if b.set[x] != 0 { return uint(x)*wordSize + trailingZeroes64(b.set[x]), true } - x = x + 1 + x++ } return 0, false @@ -530,9 +530,11 @@ func (b *BitSet) NextClear(i uint) (uint, bool) { return 0, false } for x < len(b.set) { - index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) - if b.set[x] != allBits && index < b.length { - return index, true + if b.set[x] != allBits { + index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) + if index < b.length { + return index, true + } } x++ }