diff --git a/go.mod b/go.mod index db886143a8eb..706fb2453272 100644 --- a/go.mod +++ b/go.mod @@ -112,6 +112,8 @@ require ( k8s.io/klog v1.0.0 ) +require github.com/willf/bloom v2.0.3+incompatible + require ( cloud.google.com/go v0.100.2 // indirect cloud.google.com/go/compute v1.3.0 // indirect @@ -235,12 +237,14 @@ require ( github.com/sercand/kuberesolver v2.4.0+incompatible // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect + github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 // indirect github.com/spf13/cast v1.3.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/objx v0.2.0 // indirect github.com/uber/jaeger-lib v2.4.1+incompatible // indirect github.com/ugorji/go/codec v1.1.7 // indirect github.com/weaveworks/promrus v1.2.0 // indirect + github.com/willf/bitset v1.1.11 // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/xdg-go/stringprep v1.0.2 // indirect github.com/yuin/gopher-lua v0.0.0-20200816102855-ee81675732da // indirect diff --git a/go.sum b/go.sum index cf40f5921ab3..75760e1e8b6d 100644 --- a/go.sum +++ b/go.sum @@ -1902,7 +1902,10 @@ github.com/weaveworks/promrus v1.2.0/go.mod h1:SaE82+OJ91yqjrE1rsvBWVzNZKcHYFtMU github.com/willf/bitset v1.1.3/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.9/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.11-0.20200630133818-d5bec3311243/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= +github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= +github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA= +github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8= github.com/wvanbergen/kafka v0.0.0-20171203153745-e2edea948ddf/go.mod h1:nxx7XRXbR9ykhnC8lXqQyJS0rfvJGxKyKw/sT1YOttg= github.com/wvanbergen/kazoo-go v0.0.0-20180202103751-f72d8611297a/go.mod h1:vQQATAGxVK20DC1rRubTJbZDDhhpA4QfU02pMdPxGO4= github.com/xanzy/go-gitlab v0.15.0/go.mod h1:8zdQa/ri1dfn8eS3Ir1SyfvOKlw7WBJ8DVThkpGiXrs= diff --git a/pkg/storage/stores/tsdb/head_manager.go b/pkg/storage/stores/tsdb/head_manager.go index 589e34a6baf5..186367190fd1 100644 --- a/pkg/storage/stores/tsdb/head_manager.go +++ b/pkg/storage/stores/tsdb/head_manager.go @@ -601,6 +601,14 @@ func (t *tenantHeads) LabelValues(ctx context.Context, userID string, from, thro } +func (t *tenantHeads) Stats(ctx context.Context, userID string, from, through model.Time, blooms *StatsBlooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*StatsBlooms, error) { + idx, ok := t.tenantIndex(userID, from, through) + if !ok { + return blooms, nil + } + return idx.Stats(ctx, userID, from, through, blooms, shard, matchers...) +} + // helper only used in building TSDBs func (t *tenantHeads) forAll(fn func(user string, ls labels.Labels, chks index.ChunkMetas)) error { for i, shard := range t.tenants { diff --git a/pkg/storage/stores/tsdb/index.go b/pkg/storage/stores/tsdb/index.go index 1df7184a50de..df29eca95c2f 100644 --- a/pkg/storage/stores/tsdb/index.go +++ b/pkg/storage/stores/tsdb/index.go @@ -50,6 +50,22 @@ type Index interface { Series(ctx context.Context, userID string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) LabelNames(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]string, error) LabelValues(ctx context.Context, userID string, from, through model.Time, name string, matchers ...*labels.Matcher) ([]string, error) + Stats(ctx context.Context, userID string, from, through model.Time, blooms *StatsBlooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*StatsBlooms, error) +} + +type Stats struct { + Streams uint64 + Chunks uint64 + Bytes uint64 + Entries uint64 +} + +func (s Stats) Merge(x Stats) Stats { + s.Streams += x.Streams + s.Chunks += x.Chunks + s.Bytes += x.Bytes + s.Entries += x.Entries + return s } type NoopIndex struct{} @@ -71,4 +87,8 @@ func (NoopIndex) LabelValues(ctx context.Context, userID string, from, through m return nil, nil } +func (NoopIndex) Stats(ctx context.Context, userID string, from, through model.Time, blooms *StatsBlooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*StatsBlooms, error) { + return nil, nil +} + func (NoopIndex) SetChunkFilterer(chunkFilter chunk.RequestChunkFilterer) {} diff --git a/pkg/storage/stores/tsdb/index_shipper_querier.go b/pkg/storage/stores/tsdb/index_shipper_querier.go index d292822a62eb..45c192535d06 100644 --- a/pkg/storage/stores/tsdb/index_shipper_querier.go +++ b/pkg/storage/stores/tsdb/index_shipper_querier.go @@ -110,3 +110,12 @@ func (i *indexShipperQuerier) LabelValues(ctx context.Context, userID string, fr } return idx.LabelValues(ctx, userID, from, through, name, matchers...) } + +func (i *indexShipperQuerier) Stats(ctx context.Context, userID string, from, through model.Time, blooms *StatsBlooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*StatsBlooms, error) { + idx, err := i.indices(ctx, from, through, userID) + if err != nil { + return blooms, err + } + + return idx.Stats(ctx, userID, from, through, blooms, shard, matchers...) +} diff --git a/pkg/storage/stores/tsdb/lazy_index.go b/pkg/storage/stores/tsdb/lazy_index.go index 9a3da11b565c..e29dcb4d1563 100644 --- a/pkg/storage/stores/tsdb/lazy_index.go +++ b/pkg/storage/stores/tsdb/lazy_index.go @@ -64,3 +64,11 @@ func (f LazyIndex) LabelValues(ctx context.Context, userID string, from, through } return i.LabelValues(ctx, userID, from, through, name, matchers...) } + +func (f LazyIndex) Stats(ctx context.Context, userID string, from, through model.Time, blooms *StatsBlooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*StatsBlooms, error) { + i, err := f() + if err != nil { + return nil, err + } + return i.Stats(ctx, userID, from, through, blooms, shard, matchers...) +} diff --git a/pkg/storage/stores/tsdb/multi_file_index.go b/pkg/storage/stores/tsdb/multi_file_index.go index 9a74455a0fee..834063fead2c 100644 --- a/pkg/storage/stores/tsdb/multi_file_index.go +++ b/pkg/storage/stores/tsdb/multi_file_index.go @@ -235,3 +235,14 @@ func (i *MultiIndex) LabelValues(ctx context.Context, userID string, from, throu return results, nil } + +func (i *MultiIndex) Stats(ctx context.Context, userID string, from, through model.Time, blooms *StatsBlooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*StatsBlooms, error) { + if blooms == nil { + blooms = BloomPool.Get() + } + + _, err := i.forIndices(ctx, from, through, func(ctx context.Context, idx Index) (interface{}, error) { + return idx.Stats(ctx, userID, from, through, blooms, shard, matchers...) + }) + return blooms, err +} diff --git a/pkg/storage/stores/tsdb/multitenant.go b/pkg/storage/stores/tsdb/multitenant.go index 5f8fdcd39020..e252826ab107 100644 --- a/pkg/storage/stores/tsdb/multitenant.go +++ b/pkg/storage/stores/tsdb/multitenant.go @@ -88,3 +88,7 @@ func (m *MultiTenantIndex) LabelValues(ctx context.Context, userID string, from, } return m.idx.LabelValues(ctx, userID, from, through, name, withTenantLabelMatcher(userID, matchers)...) } + +func (m *MultiTenantIndex) Stats(ctx context.Context, userID string, from, through model.Time, blooms *StatsBlooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*StatsBlooms, error) { + return m.idx.Stats(ctx, userID, from, through, blooms, shard, withTenantLabelMatcher(userID, matchers)...) +} diff --git a/pkg/storage/stores/tsdb/pool.go b/pkg/storage/stores/tsdb/pool.go index 0b02f5c11be6..86bdb64f5d8a 100644 --- a/pkg/storage/stores/tsdb/pool.go +++ b/pkg/storage/stores/tsdb/pool.go @@ -1,8 +1,12 @@ package tsdb import ( + "encoding/binary" "sync" + "github.com/prometheus/common/model" + "github.com/willf/bloom" + "github.com/grafana/loki/pkg/storage/stores/tsdb/index" ) @@ -10,6 +14,7 @@ var ( ChunkMetasPool = &index.ChunkMetasPool // re-exporting SeriesPool PoolSeries ChunkRefsPool PoolChunkRefs + BloomPool PoolBloom ) type PoolSeries struct { @@ -45,3 +50,89 @@ func (p *PoolChunkRefs) Put(xs []ChunkRef) { //nolint:staticcheck p.pool.Put(xs) } + +type PoolBloom struct { + pool sync.Pool +} + +func (p *PoolBloom) Get() *StatsBlooms { + if x := p.pool.Get(); x != nil { + return x.(*StatsBlooms) + } + + return newStatsBlooms() + +} + +func (p *PoolBloom) Put(x *StatsBlooms) { + x.Streams.ClearAll() + x.Chunks.ClearAll() + x.stats = Stats{} + p.pool.Put(x) +} + +// These are very expensive in terms of memory usage, +// each requiring ~12.5MB. Therefore we heavily rely on pool usage. +// See https://hur.st/bloomfilter for play around with this idea. +func newStatsBlooms() *StatsBlooms { + // 1 million streams @ 1% error =~ 1.14MB + streams := bloom.NewWithEstimates(1e6, 0.01) + // 10 million chunks @ 1% error =~ 11.43MB + chunks := bloom.NewWithEstimates(10e6, 0.01) + return &StatsBlooms{ + Streams: streams, + Chunks: chunks, + } +} + +// TODO(owen-d): shard this across a slice of smaller bloom filters to reduce +// lock contention +// Bloom filters for estimating duplicate statistics across both series +// and chunks within TSDB indices. These are used to calculate data topology +// statistics prior to running queries. +type StatsBlooms struct { + sync.RWMutex + Streams, Chunks *bloom.BloomFilter + stats Stats +} + +func (b *StatsBlooms) Stats() Stats { return b.stats } + +func (b *StatsBlooms) AddStream(fp model.Fingerprint) { + key := make([]byte, 8) + binary.BigEndian.PutUint64(key, uint64(fp)) + b.add(b.Streams, key, func() { + b.stats.Streams++ + }) +} + +func (b *StatsBlooms) AddChunk(fp model.Fingerprint, chk index.ChunkMeta) { + // fingerprint + mintime + maxtime + checksum + ln := 8 + 8 + 8 + 4 + key := make([]byte, ln) + binary.BigEndian.PutUint64(key, uint64(fp)) + binary.BigEndian.PutUint64(key[8:], uint64(chk.MinTime)) + binary.BigEndian.PutUint64(key[16:], uint64(chk.MaxTime)) + binary.BigEndian.PutUint32(key[24:], chk.Checksum) + b.add(b.Chunks, key, func() { + b.stats.Chunks++ + b.stats.Bytes += uint64(chk.KB << 10) + b.stats.Entries += uint64(chk.Entries) + }) +} + +func (b *StatsBlooms) add(filter *bloom.BloomFilter, key []byte, update func()) { + b.RLock() + ok := filter.Test(key) + b.RUnlock() + + if ok { + return + } + + b.Lock() + defer b.Unlock() + if ok = filter.TestAndAdd(key); !ok { + update() + } +} diff --git a/pkg/storage/stores/tsdb/pool_test.go b/pkg/storage/stores/tsdb/pool_test.go new file mode 100644 index 000000000000..934faa0fa367 --- /dev/null +++ b/pkg/storage/stores/tsdb/pool_test.go @@ -0,0 +1,47 @@ +package tsdb + +import ( + "sync" + "testing" + + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/storage/stores/tsdb/index" +) + +func TestStatsBloom_Stream(t *testing.T) { + sb := BloomPool.Get() + var wg sync.WaitGroup + for i := 0; i < 40; i++ { + wg.Add(1) + go func(x int) { + sb.AddStream(model.Fingerprint(x % 2)) + wg.Done() + }(i) + } + wg.Wait() + + require.Equal(t, uint64(2), sb.stats.Streams) +} + +func TestStatsBloom_Chunks(t *testing.T) { + sb := BloomPool.Get() + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func(x int) { + sb.AddChunk(model.Fingerprint(x%2), index.ChunkMeta{ + Checksum: uint32(x) % 4, + KB: 1, + Entries: 1, + }) + wg.Done() + }(i) + } + wg.Wait() + + require.Equal(t, 4, int(sb.stats.Chunks)) + require.Equal(t, 4<<10, int(sb.stats.Bytes)) + require.Equal(t, 4, int(sb.stats.Entries)) +} diff --git a/pkg/storage/stores/tsdb/single_file_index.go b/pkg/storage/stores/tsdb/single_file_index.go index 9937d1fd2708..b71bd813dae3 100644 --- a/pkg/storage/stores/tsdb/single_file_index.go +++ b/pkg/storage/stores/tsdb/single_file_index.go @@ -261,3 +261,30 @@ func (i *TSDBIndex) Identifier(tenant string) SingleTenantTSDBIdentifier { Checksum: i.Checksum(), } } + +func (i *TSDBIndex) Stats(ctx context.Context, userID string, from, through model.Time, blooms *StatsBlooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*StatsBlooms, error) { + if blooms == nil { + blooms = BloomPool.Get() + } + queryBounds := newBounds(from, through) + + if err := i.forSeries(ctx, shard, + func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) { + // TODO(owen-d): use logarithmic approach + var addedStream bool + for _, chk := range chks { + if Overlap(queryBounds, chk) { + if !addedStream { + blooms.AddStream(fp) + addedStream = true + } + blooms.AddChunk(fp, chk) + } + } + }, + matchers...); err != nil { + return blooms, err + } + + return blooms, nil +} diff --git a/vendor/github.com/spaolacci/murmur3/.gitignore b/vendor/github.com/spaolacci/murmur3/.gitignore new file mode 100644 index 000000000000..00268614f045 --- /dev/null +++ b/vendor/github.com/spaolacci/murmur3/.gitignore @@ -0,0 +1,22 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe diff --git a/vendor/github.com/spaolacci/murmur3/.travis.yml b/vendor/github.com/spaolacci/murmur3/.travis.yml new file mode 100644 index 000000000000..9bfca9c8b22f --- /dev/null +++ b/vendor/github.com/spaolacci/murmur3/.travis.yml @@ -0,0 +1,7 @@ +language: go + +go: + - 1.x + - master + +script: go test diff --git a/vendor/github.com/spaolacci/murmur3/LICENSE b/vendor/github.com/spaolacci/murmur3/LICENSE new file mode 100644 index 000000000000..2a46fd750072 --- /dev/null +++ b/vendor/github.com/spaolacci/murmur3/LICENSE @@ -0,0 +1,24 @@ +Copyright 2013, Sébastien Paolacci. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the library nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/spaolacci/murmur3/README.md b/vendor/github.com/spaolacci/murmur3/README.md new file mode 100644 index 000000000000..e463678a05e6 --- /dev/null +++ b/vendor/github.com/spaolacci/murmur3/README.md @@ -0,0 +1,86 @@ +murmur3 +======= + +[![Build Status](https://travis-ci.org/spaolacci/murmur3.svg?branch=master)](https://travis-ci.org/spaolacci/murmur3) + +Native Go implementation of Austin Appleby's third MurmurHash revision (aka +MurmurHash3). + +Reference algorithm has been slightly hacked as to support the streaming mode +required by Go's standard [Hash interface](http://golang.org/pkg/hash/#Hash). + + +Benchmarks +---------- + +Go tip as of 2014-06-12 (i.e almost go1.3), core i7 @ 3.4 Ghz. All runs +include hasher instantiation and sequence finalization. + +
+
+Benchmark32_1        500000000     7.69 ns/op      130.00 MB/s
+Benchmark32_2        200000000     8.83 ns/op      226.42 MB/s
+Benchmark32_4        500000000     7.99 ns/op      500.39 MB/s
+Benchmark32_8        200000000     9.47 ns/op      844.69 MB/s
+Benchmark32_16       100000000     12.1 ns/op     1321.61 MB/s
+Benchmark32_32       100000000     18.3 ns/op     1743.93 MB/s
+Benchmark32_64        50000000     30.9 ns/op     2071.64 MB/s
+Benchmark32_128       50000000     57.6 ns/op     2222.96 MB/s
+Benchmark32_256       20000000      116 ns/op     2188.60 MB/s
+Benchmark32_512       10000000      226 ns/op     2260.59 MB/s
+Benchmark32_1024       5000000      452 ns/op     2263.73 MB/s
+Benchmark32_2048       2000000      891 ns/op     2296.02 MB/s
+Benchmark32_4096       1000000     1787 ns/op     2290.92 MB/s
+Benchmark32_8192        500000     3593 ns/op     2279.68 MB/s
+Benchmark128_1       100000000     26.1 ns/op       38.33 MB/s
+Benchmark128_2       100000000     29.0 ns/op       69.07 MB/s
+Benchmark128_4        50000000     29.8 ns/op      134.17 MB/s
+Benchmark128_8        50000000     31.6 ns/op      252.86 MB/s
+Benchmark128_16      100000000     26.5 ns/op      603.42 MB/s
+Benchmark128_32      100000000     28.6 ns/op     1117.15 MB/s
+Benchmark128_64       50000000     35.5 ns/op     1800.97 MB/s
+Benchmark128_128      50000000     50.9 ns/op     2515.50 MB/s
+Benchmark128_256      20000000     76.9 ns/op     3330.11 MB/s
+Benchmark128_512      20000000      135 ns/op     3769.09 MB/s
+Benchmark128_1024     10000000      250 ns/op     4094.38 MB/s
+Benchmark128_2048      5000000      477 ns/op     4290.75 MB/s
+Benchmark128_4096      2000000      940 ns/op     4353.29 MB/s
+Benchmark128_8192      1000000     1838 ns/op     4455.47 MB/s
+
+
+ + +
+
+benchmark              Go1.0 MB/s    Go1.1 MB/s  speedup    Go1.2 MB/s  speedup    Go1.3 MB/s  speedup
+Benchmark32_1               98.90        118.59    1.20x        114.79    0.97x        130.00    1.13x
+Benchmark32_2              168.04        213.31    1.27x        210.65    0.99x        226.42    1.07x
+Benchmark32_4              414.01        494.19    1.19x        490.29    0.99x        500.39    1.02x
+Benchmark32_8              662.19        836.09    1.26x        836.46    1.00x        844.69    1.01x
+Benchmark32_16             917.46       1304.62    1.42x       1297.63    0.99x       1321.61    1.02x
+Benchmark32_32            1141.93       1737.54    1.52x       1728.24    0.99x       1743.93    1.01x
+Benchmark32_64            1289.47       2039.51    1.58x       2038.20    1.00x       2071.64    1.02x
+Benchmark32_128           1299.23       2097.63    1.61x       2177.13    1.04x       2222.96    1.02x
+Benchmark32_256           1369.90       2202.34    1.61x       2213.15    1.00x       2188.60    0.99x
+Benchmark32_512           1399.56       2255.72    1.61x       2264.49    1.00x       2260.59    1.00x
+Benchmark32_1024          1410.90       2285.82    1.62x       2270.99    0.99x       2263.73    1.00x
+Benchmark32_2048          1422.14       2297.62    1.62x       2269.59    0.99x       2296.02    1.01x
+Benchmark32_4096          1420.53       2307.81    1.62x       2273.43    0.99x       2290.92    1.01x
+Benchmark32_8192          1424.79       2312.87    1.62x       2286.07    0.99x       2279.68    1.00x
+Benchmark128_1               8.32         30.15    3.62x         30.84    1.02x         38.33    1.24x
+Benchmark128_2              16.38         59.72    3.65x         59.37    0.99x         69.07    1.16x
+Benchmark128_4              32.26        112.96    3.50x        114.24    1.01x        134.17    1.17x
+Benchmark128_8              62.68        217.88    3.48x        218.18    1.00x        252.86    1.16x
+Benchmark128_16            128.47        451.57    3.51x        474.65    1.05x        603.42    1.27x
+Benchmark128_32            246.18        910.42    3.70x        871.06    0.96x       1117.15    1.28x
+Benchmark128_64            449.05       1477.64    3.29x       1449.24    0.98x       1800.97    1.24x
+Benchmark128_128           762.61       2222.42    2.91x       2217.30    1.00x       2515.50    1.13x
+Benchmark128_256          1179.92       3005.46    2.55x       2931.55    0.98x       3330.11    1.14x
+Benchmark128_512          1616.51       3590.75    2.22x       3592.08    1.00x       3769.09    1.05x
+Benchmark128_1024         1964.36       3979.67    2.03x       4034.01    1.01x       4094.38    1.01x
+Benchmark128_2048         2225.07       4156.93    1.87x       4244.17    1.02x       4290.75    1.01x
+Benchmark128_4096         2360.15       4299.09    1.82x       4392.35    1.02x       4353.29    0.99x
+Benchmark128_8192         2411.50       4356.84    1.81x       4480.68    1.03x       4455.47    0.99x
+
+
+ diff --git a/vendor/github.com/spaolacci/murmur3/murmur.go b/vendor/github.com/spaolacci/murmur3/murmur.go new file mode 100644 index 000000000000..1252cf73a79b --- /dev/null +++ b/vendor/github.com/spaolacci/murmur3/murmur.go @@ -0,0 +1,64 @@ +// Copyright 2013, Sébastien Paolacci. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package murmur3 implements Austin Appleby's non-cryptographic MurmurHash3. + + Reference implementation: + http://code.google.com/p/smhasher/wiki/MurmurHash3 + + History, characteristics and (legacy) perfs: + https://sites.google.com/site/murmurhash/ + https://sites.google.com/site/murmurhash/statistics +*/ +package murmur3 + +type bmixer interface { + bmix(p []byte) (tail []byte) + Size() (n int) + reset() +} + +type digest struct { + clen int // Digested input cumulative length. + tail []byte // 0 to Size()-1 bytes view of `buf'. + buf [16]byte // Expected (but not required) to be Size() large. + seed uint32 // Seed for initializing the hash. + bmixer +} + +func (d *digest) BlockSize() int { return 1 } + +func (d *digest) Write(p []byte) (n int, err error) { + n = len(p) + d.clen += n + + if len(d.tail) > 0 { + // Stick back pending bytes. + nfree := d.Size() - len(d.tail) // nfree ∈ [1, d.Size()-1]. + if nfree < len(p) { + // One full block can be formed. + block := append(d.tail, p[:nfree]...) + p = p[nfree:] + _ = d.bmix(block) // No tail. + } else { + // Tail's buf is large enough to prevent reallocs. + p = append(d.tail, p...) + } + } + + d.tail = d.bmix(p) + + // Keep own copy of the 0 to Size()-1 pending bytes. + nn := copy(d.buf[:], d.tail) + d.tail = d.buf[:nn] + + return n, nil +} + +func (d *digest) Reset() { + d.clen = 0 + d.tail = nil + d.bmixer.reset() +} diff --git a/vendor/github.com/spaolacci/murmur3/murmur128.go b/vendor/github.com/spaolacci/murmur3/murmur128.go new file mode 100644 index 000000000000..a4b618b5f3d9 --- /dev/null +++ b/vendor/github.com/spaolacci/murmur3/murmur128.go @@ -0,0 +1,203 @@ +package murmur3 + +import ( + //"encoding/binary" + "hash" + "unsafe" +) + +const ( + c1_128 = 0x87c37b91114253d5 + c2_128 = 0x4cf5ad432745937f +) + +// Make sure interfaces are correctly implemented. +var ( + _ hash.Hash = new(digest128) + _ Hash128 = new(digest128) + _ bmixer = new(digest128) +) + +// Hash128 represents a 128-bit hasher +// Hack: the standard api doesn't define any Hash128 interface. +type Hash128 interface { + hash.Hash + Sum128() (uint64, uint64) +} + +// digest128 represents a partial evaluation of a 128 bites hash. +type digest128 struct { + digest + h1 uint64 // Unfinalized running hash part 1. + h2 uint64 // Unfinalized running hash part 2. +} + +// New128 returns a 128-bit hasher +func New128() Hash128 { return New128WithSeed(0) } + +// New128WithSeed returns a 128-bit hasher set with explicit seed value +func New128WithSeed(seed uint32) Hash128 { + d := new(digest128) + d.seed = seed + d.bmixer = d + d.Reset() + return d +} + +func (d *digest128) Size() int { return 16 } + +func (d *digest128) reset() { d.h1, d.h2 = uint64(d.seed), uint64(d.seed) } + +func (d *digest128) Sum(b []byte) []byte { + h1, h2 := d.Sum128() + return append(b, + byte(h1>>56), byte(h1>>48), byte(h1>>40), byte(h1>>32), + byte(h1>>24), byte(h1>>16), byte(h1>>8), byte(h1), + + byte(h2>>56), byte(h2>>48), byte(h2>>40), byte(h2>>32), + byte(h2>>24), byte(h2>>16), byte(h2>>8), byte(h2), + ) +} + +func (d *digest128) bmix(p []byte) (tail []byte) { + h1, h2 := d.h1, d.h2 + + nblocks := len(p) / 16 + for i := 0; i < nblocks; i++ { + t := (*[2]uint64)(unsafe.Pointer(&p[i*16])) + k1, k2 := t[0], t[1] + + k1 *= c1_128 + k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31) + k1 *= c2_128 + h1 ^= k1 + + h1 = (h1 << 27) | (h1 >> 37) // rotl64(h1, 27) + h1 += h2 + h1 = h1*5 + 0x52dce729 + + k2 *= c2_128 + k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33) + k2 *= c1_128 + h2 ^= k2 + + h2 = (h2 << 31) | (h2 >> 33) // rotl64(h2, 31) + h2 += h1 + h2 = h2*5 + 0x38495ab5 + } + d.h1, d.h2 = h1, h2 + return p[nblocks*d.Size():] +} + +func (d *digest128) Sum128() (h1, h2 uint64) { + + h1, h2 = d.h1, d.h2 + + var k1, k2 uint64 + switch len(d.tail) & 15 { + case 15: + k2 ^= uint64(d.tail[14]) << 48 + fallthrough + case 14: + k2 ^= uint64(d.tail[13]) << 40 + fallthrough + case 13: + k2 ^= uint64(d.tail[12]) << 32 + fallthrough + case 12: + k2 ^= uint64(d.tail[11]) << 24 + fallthrough + case 11: + k2 ^= uint64(d.tail[10]) << 16 + fallthrough + case 10: + k2 ^= uint64(d.tail[9]) << 8 + fallthrough + case 9: + k2 ^= uint64(d.tail[8]) << 0 + + k2 *= c2_128 + k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33) + k2 *= c1_128 + h2 ^= k2 + + fallthrough + + case 8: + k1 ^= uint64(d.tail[7]) << 56 + fallthrough + case 7: + k1 ^= uint64(d.tail[6]) << 48 + fallthrough + case 6: + k1 ^= uint64(d.tail[5]) << 40 + fallthrough + case 5: + k1 ^= uint64(d.tail[4]) << 32 + fallthrough + case 4: + k1 ^= uint64(d.tail[3]) << 24 + fallthrough + case 3: + k1 ^= uint64(d.tail[2]) << 16 + fallthrough + case 2: + k1 ^= uint64(d.tail[1]) << 8 + fallthrough + case 1: + k1 ^= uint64(d.tail[0]) << 0 + k1 *= c1_128 + k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31) + k1 *= c2_128 + h1 ^= k1 + } + + h1 ^= uint64(d.clen) + h2 ^= uint64(d.clen) + + h1 += h2 + h2 += h1 + + h1 = fmix64(h1) + h2 = fmix64(h2) + + h1 += h2 + h2 += h1 + + return h1, h2 +} + +func fmix64(k uint64) uint64 { + k ^= k >> 33 + k *= 0xff51afd7ed558ccd + k ^= k >> 33 + k *= 0xc4ceb9fe1a85ec53 + k ^= k >> 33 + return k +} + +/* +func rotl64(x uint64, r byte) uint64 { + return (x << r) | (x >> (64 - r)) +} +*/ + +// Sum128 returns the MurmurHash3 sum of data. It is equivalent to the +// following sequence (without the extra burden and the extra allocation): +// hasher := New128() +// hasher.Write(data) +// return hasher.Sum128() +func Sum128(data []byte) (h1 uint64, h2 uint64) { return Sum128WithSeed(data, 0) } + +// Sum128WithSeed returns the MurmurHash3 sum of data. It is equivalent to the +// following sequence (without the extra burden and the extra allocation): +// hasher := New128WithSeed(seed) +// hasher.Write(data) +// return hasher.Sum128() +func Sum128WithSeed(data []byte, seed uint32) (h1 uint64, h2 uint64) { + d := &digest128{h1: uint64(seed), h2: uint64(seed)} + d.seed = seed + d.tail = d.bmix(data) + d.clen = len(data) + return d.Sum128() +} diff --git a/vendor/github.com/spaolacci/murmur3/murmur32.go b/vendor/github.com/spaolacci/murmur3/murmur32.go new file mode 100644 index 000000000000..e32c99511ff6 --- /dev/null +++ b/vendor/github.com/spaolacci/murmur3/murmur32.go @@ -0,0 +1,167 @@ +package murmur3 + +// http://code.google.com/p/guava-libraries/source/browse/guava/src/com/google/common/hash/Murmur3_32HashFunction.java + +import ( + "hash" + "unsafe" +) + +// Make sure interfaces are correctly implemented. +var ( + _ hash.Hash = new(digest32) + _ hash.Hash32 = new(digest32) + _ bmixer = new(digest32) +) + +const ( + c1_32 uint32 = 0xcc9e2d51 + c2_32 uint32 = 0x1b873593 +) + +// digest32 represents a partial evaluation of a 32 bites hash. +type digest32 struct { + digest + h1 uint32 // Unfinalized running hash. +} + +// New32 returns new 32-bit hasher +func New32() hash.Hash32 { return New32WithSeed(0) } + +// New32WithSeed returns new 32-bit hasher set with explicit seed value +func New32WithSeed(seed uint32) hash.Hash32 { + d := new(digest32) + d.seed = seed + d.bmixer = d + d.Reset() + return d +} + +func (d *digest32) Size() int { return 4 } + +func (d *digest32) reset() { d.h1 = d.seed } + +func (d *digest32) Sum(b []byte) []byte { + h := d.Sum32() + return append(b, byte(h>>24), byte(h>>16), byte(h>>8), byte(h)) +} + +// Digest as many blocks as possible. +func (d *digest32) bmix(p []byte) (tail []byte) { + h1 := d.h1 + + nblocks := len(p) / 4 + for i := 0; i < nblocks; i++ { + k1 := *(*uint32)(unsafe.Pointer(&p[i*4])) + + k1 *= c1_32 + k1 = (k1 << 15) | (k1 >> 17) // rotl32(k1, 15) + k1 *= c2_32 + + h1 ^= k1 + h1 = (h1 << 13) | (h1 >> 19) // rotl32(h1, 13) + h1 = h1*4 + h1 + 0xe6546b64 + } + d.h1 = h1 + return p[nblocks*d.Size():] +} + +func (d *digest32) Sum32() (h1 uint32) { + + h1 = d.h1 + + var k1 uint32 + switch len(d.tail) & 3 { + case 3: + k1 ^= uint32(d.tail[2]) << 16 + fallthrough + case 2: + k1 ^= uint32(d.tail[1]) << 8 + fallthrough + case 1: + k1 ^= uint32(d.tail[0]) + k1 *= c1_32 + k1 = (k1 << 15) | (k1 >> 17) // rotl32(k1, 15) + k1 *= c2_32 + h1 ^= k1 + } + + h1 ^= uint32(d.clen) + + h1 ^= h1 >> 16 + h1 *= 0x85ebca6b + h1 ^= h1 >> 13 + h1 *= 0xc2b2ae35 + h1 ^= h1 >> 16 + + return h1 +} + +/* +func rotl32(x uint32, r byte) uint32 { + return (x << r) | (x >> (32 - r)) +} +*/ + +// Sum32 returns the MurmurHash3 sum of data. It is equivalent to the +// following sequence (without the extra burden and the extra allocation): +// hasher := New32() +// hasher.Write(data) +// return hasher.Sum32() +func Sum32(data []byte) uint32 { return Sum32WithSeed(data, 0) } + +// Sum32WithSeed returns the MurmurHash3 sum of data. It is equivalent to the +// following sequence (without the extra burden and the extra allocation): +// hasher := New32WithSeed(seed) +// hasher.Write(data) +// return hasher.Sum32() +func Sum32WithSeed(data []byte, seed uint32) uint32 { + + h1 := seed + + nblocks := len(data) / 4 + var p uintptr + if len(data) > 0 { + p = uintptr(unsafe.Pointer(&data[0])) + } + p1 := p + uintptr(4*nblocks) + for ; p < p1; p += 4 { + k1 := *(*uint32)(unsafe.Pointer(p)) + + k1 *= c1_32 + k1 = (k1 << 15) | (k1 >> 17) // rotl32(k1, 15) + k1 *= c2_32 + + h1 ^= k1 + h1 = (h1 << 13) | (h1 >> 19) // rotl32(h1, 13) + h1 = h1*4 + h1 + 0xe6546b64 + } + + tail := data[nblocks*4:] + + var k1 uint32 + switch len(tail) & 3 { + case 3: + k1 ^= uint32(tail[2]) << 16 + fallthrough + case 2: + k1 ^= uint32(tail[1]) << 8 + fallthrough + case 1: + k1 ^= uint32(tail[0]) + k1 *= c1_32 + k1 = (k1 << 15) | (k1 >> 17) // rotl32(k1, 15) + k1 *= c2_32 + h1 ^= k1 + } + + h1 ^= uint32(len(data)) + + h1 ^= h1 >> 16 + h1 *= 0x85ebca6b + h1 ^= h1 >> 13 + h1 *= 0xc2b2ae35 + h1 ^= h1 >> 16 + + return h1 +} diff --git a/vendor/github.com/spaolacci/murmur3/murmur64.go b/vendor/github.com/spaolacci/murmur3/murmur64.go new file mode 100644 index 000000000000..65a410ae0b9c --- /dev/null +++ b/vendor/github.com/spaolacci/murmur3/murmur64.go @@ -0,0 +1,57 @@ +package murmur3 + +import ( + "hash" +) + +// Make sure interfaces are correctly implemented. +var ( + _ hash.Hash = new(digest64) + _ hash.Hash64 = new(digest64) + _ bmixer = new(digest64) +) + +// digest64 is half a digest128. +type digest64 digest128 + +// New64 returns a 64-bit hasher +func New64() hash.Hash64 { return New64WithSeed(0) } + +// New64WithSeed returns a 64-bit hasher set with explicit seed value +func New64WithSeed(seed uint32) hash.Hash64 { + d := (*digest64)(New128WithSeed(seed).(*digest128)) + return d +} + +func (d *digest64) Sum(b []byte) []byte { + h1 := d.Sum64() + return append(b, + byte(h1>>56), byte(h1>>48), byte(h1>>40), byte(h1>>32), + byte(h1>>24), byte(h1>>16), byte(h1>>8), byte(h1)) +} + +func (d *digest64) Sum64() uint64 { + h1, _ := (*digest128)(d).Sum128() + return h1 +} + +// Sum64 returns the MurmurHash3 sum of data. It is equivalent to the +// following sequence (without the extra burden and the extra allocation): +// hasher := New64() +// hasher.Write(data) +// return hasher.Sum64() +func Sum64(data []byte) uint64 { return Sum64WithSeed(data, 0) } + +// Sum64WithSeed returns the MurmurHash3 sum of data. It is equivalent to the +// following sequence (without the extra burden and the extra allocation): +// hasher := New64WithSeed(seed) +// hasher.Write(data) +// return hasher.Sum64() +func Sum64WithSeed(data []byte, seed uint32) uint64 { + d := &digest128{h1: uint64(seed), h2: uint64(seed)} + d.seed = seed + d.tail = d.bmix(data) + d.clen = len(data) + h1, _ := d.Sum128() + return h1 +} diff --git a/vendor/github.com/willf/bitset/.gitignore b/vendor/github.com/willf/bitset/.gitignore new file mode 100644 index 000000000000..5c204d28b0e3 --- /dev/null +++ b/vendor/github.com/willf/bitset/.gitignore @@ -0,0 +1,26 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof + +target diff --git a/vendor/github.com/willf/bitset/.travis.yml b/vendor/github.com/willf/bitset/.travis.yml new file mode 100644 index 000000000000..094aa5ce070c --- /dev/null +++ b/vendor/github.com/willf/bitset/.travis.yml @@ -0,0 +1,37 @@ +language: go + +sudo: false + +branches: + except: + - release + +branches: + only: + - master + - travis + +go: + - "1.11.x" + - tip + +matrix: + allow_failures: + - go: tip + +before_install: + - if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi; + - if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi; + - go get github.com/mattn/goveralls + +before_script: + - make deps + +script: + - make qa + +after_failure: + - cat ./target/test/report.xml + +after_success: + - if [ "$TRAVIS_GO_VERSION" = "1.11.1" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi; diff --git a/vendor/github.com/willf/bitset/LICENSE b/vendor/github.com/willf/bitset/LICENSE new file mode 100644 index 000000000000..59cab8a939be --- /dev/null +++ b/vendor/github.com/willf/bitset/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2014 Will Fitzgerald. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/willf/bitset/README.md b/vendor/github.com/willf/bitset/README.md new file mode 100644 index 000000000000..50338e71dfdb --- /dev/null +++ b/vendor/github.com/willf/bitset/README.md @@ -0,0 +1,94 @@ +# bitset + +*Go language library to map between non-negative integers and boolean values* + +[![Test](https://github.com/willf/bitset/workflows/Test/badge.svg)](https://github.com/willf/bitset/actions?query=workflow%3ATest) +[![Master Coverage Status](https://coveralls.io/repos/willf/bitset/badge.svg?branch=master&service=github)](https://coveralls.io/github/willf/bitset?branch=master) +[![Go Report Card](https://goreportcard.com/badge/github.com/willf/bitset)](https://goreportcard.com/report/github.com/willf/bitset) +[![PkgGoDev](https://pkg.go.dev/badge/github.com/willf/bitset?tab=doc)](https://pkg.go.dev/github.com/willf/bitset?tab=doc) + + +## Description + +Package bitset implements bitsets, a mapping between non-negative integers and boolean values. +It should be more efficient than map[uint] bool. + +It provides methods for setting, clearing, flipping, and testing individual integers. + +But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits. + +BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk. On creation, a hint can be given for the number of bits that will be used. + +Many of the methods, including Set, Clear, and Flip, return a BitSet pointer, which allows for chaining. + +### Example use: + +```go +package main + +import ( + "fmt" + "math/rand" + + "github.com/willf/bitset" +) + +func main() { + fmt.Printf("Hello from BitSet!\n") + var b bitset.BitSet + // play some Go Fish + for i := 0; i < 100; i++ { + card1 := uint(rand.Intn(52)) + card2 := uint(rand.Intn(52)) + b.Set(card1) + if b.Test(card2) { + fmt.Println("Go Fish!") + } + b.Clear(card1) + } + + // Chaining + b.Set(10).Set(11) + + for i, e := b.NextSet(0); e; i, e = b.NextSet(i + 1) { + fmt.Println("The following bit is set:", i) + } + if b.Intersection(bitset.New(100).Set(10)).Count() == 1 { + fmt.Println("Intersection works.") + } else { + fmt.Println("Intersection doesn't work???") + } +} +``` + +As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. + +Package documentation is at: https://pkg.go.dev/github.com/willf/bitset?tab=doc + +## Memory Usage + +The memory usage of a bitset using N bits is at least N/8 bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). + +## Implementation Note + +Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed. + +It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `unit64`). If so, the version will be bumped. + +## Installation + +```bash +go get github.com/willf/bitset +``` + +## Contributing + +If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)") + +## Running all tests + +Before committing the code, please check if it passes tests, has adequate coverage, etc. +```bash +go test +go test -cover +``` diff --git a/vendor/github.com/willf/bitset/azure-pipelines.yml b/vendor/github.com/willf/bitset/azure-pipelines.yml new file mode 100644 index 000000000000..f9b295918404 --- /dev/null +++ b/vendor/github.com/willf/bitset/azure-pipelines.yml @@ -0,0 +1,39 @@ +# Go +# Build your Go project. +# Add steps that test, save build artifacts, deploy, and more: +# https://docs.microsoft.com/azure/devops/pipelines/languages/go + +trigger: +- master + +pool: + vmImage: 'Ubuntu-16.04' + +variables: + GOBIN: '$(GOPATH)/bin' # Go binaries path + GOROOT: '/usr/local/go1.11' # Go installation path + GOPATH: '$(system.defaultWorkingDirectory)/gopath' # Go workspace path + modulePath: '$(GOPATH)/src/github.com/$(build.repository.name)' # Path to the module's code + +steps: +- script: | + mkdir -p '$(GOBIN)' + mkdir -p '$(GOPATH)/pkg' + mkdir -p '$(modulePath)' + shopt -s extglob + shopt -s dotglob + mv !(gopath) '$(modulePath)' + echo '##vso[task.prependpath]$(GOBIN)' + echo '##vso[task.prependpath]$(GOROOT)/bin' + displayName: 'Set up the Go workspace' + +- script: | + go version + go get -v -t -d ./... + if [ -f Gopkg.toml ]; then + curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh + dep ensure + fi + go build -v . + workingDirectory: '$(modulePath)' + displayName: 'Get dependencies, then build' diff --git a/vendor/github.com/willf/bitset/bitset.go b/vendor/github.com/willf/bitset/bitset.go new file mode 100644 index 000000000000..21e889da2e06 --- /dev/null +++ b/vendor/github.com/willf/bitset/bitset.go @@ -0,0 +1,931 @@ +/* +Package bitset implements bitsets, a mapping +between non-negative integers and boolean values. It should be more +efficient than map[uint] bool. + +It provides methods for setting, clearing, flipping, and testing +individual integers. + +But it also provides set intersection, union, difference, +complement, and symmetric operations, as well as tests to +check whether any, all, or no bits are set, and querying a +bitset's current length and number of positive bits. + +BitSets are expanded to the size of the largest set bit; the +memory allocation is approximately Max bits, where Max is +the largest set bit. BitSets are never shrunk. On creation, +a hint can be given for the number of bits that will be used. + +Many of the methods, including Set,Clear, and Flip, return +a BitSet pointer, which allows for chaining. + +Example use: + + import "bitset" + var b BitSet + b.Set(10).Set(11) + if b.Test(1000) { + b.Clear(1000) + } + if B.Intersection(bitset.New(100).Set(10)).Count() > 1 { + fmt.Println("Intersection works.") + } + +As an alternative to BitSets, one should check out the 'big' package, +which provides a (less set-theoretical) view of bitsets. + +*/ +package bitset + +import ( + "bufio" + "bytes" + "encoding/base64" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "io" + "strconv" +) + +// the wordSize of a bit set +const wordSize = uint(64) + +// log2WordSize is lg(wordSize) +const log2WordSize = uint(6) + +// allBits has every bit set +const allBits uint64 = 0xffffffffffffffff + +// default binary BigEndian +var binaryOrder binary.ByteOrder = binary.BigEndian + +// default json encoding base64.URLEncoding +var base64Encoding = base64.URLEncoding + +// Base64StdEncoding Marshal/Unmarshal BitSet with base64.StdEncoding(Default: base64.URLEncoding) +func Base64StdEncoding() { base64Encoding = base64.StdEncoding } + +// LittleEndian Marshal/Unmarshal Binary as Little Endian(Default: binary.BigEndian) +func LittleEndian() { binaryOrder = binary.LittleEndian } + +// A BitSet is a set of bits. The zero value of a BitSet is an empty set of length 0. +type BitSet struct { + length uint + set []uint64 +} + +// Error is used to distinguish errors (panics) generated in this package. +type Error string + +// safeSet will fixup b.set to be non-nil and return the field value +func (b *BitSet) safeSet() []uint64 { + if b.set == nil { + b.set = make([]uint64, wordsNeeded(0)) + } + return b.set +} + +// From is a constructor used to create a BitSet from an array of integers +func From(buf []uint64) *BitSet { + return &BitSet{uint(len(buf)) * 64, buf} +} + +// Bytes returns the bitset as array of integers +func (b *BitSet) Bytes() []uint64 { + return b.set +} + +// wordsNeeded calculates the number of words needed for i bits +func wordsNeeded(i uint) int { + if i > (Cap() - wordSize + 1) { + return int(Cap() >> log2WordSize) + } + return int((i + (wordSize - 1)) >> log2WordSize) +} + +// New creates a new BitSet with a hint that length bits will be required +func New(length uint) (bset *BitSet) { + defer func() { + if r := recover(); r != nil { + bset = &BitSet{ + 0, + make([]uint64, 0), + } + } + }() + + bset = &BitSet{ + length, + make([]uint64, wordsNeeded(length)), + } + + return bset +} + +// Cap returns the total possible capacity, or number of bits +func Cap() uint { + return ^uint(0) +} + +// Len returns the number of bits in the BitSet. +// Note the difference to method Count, see example. +func (b *BitSet) Len() uint { + return b.length +} + +// extendSetMaybe adds additional words to incorporate new bits if needed +func (b *BitSet) extendSetMaybe(i uint) { + if i >= b.length { // if we need more bits, make 'em + if i >= Cap() { + panic("You are exceeding the capacity") + } + nsize := wordsNeeded(i + 1) + if b.set == nil { + b.set = make([]uint64, nsize) + } else if cap(b.set) >= nsize { + b.set = b.set[:nsize] // fast resize + } else if len(b.set) < nsize { + newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x + copy(newset, b.set) + b.set = newset + } + b.length = i + 1 + } +} + +// Test whether bit i is set. +func (b *BitSet) Test(i uint) bool { + if i >= b.length { + return false + } + return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 +} + +// Set bit i to 1, the capacity of the bitset is automatically +// increased accordingly. +// If i>= Cap(), this function will panic. +// Warning: using a very large value for 'i' +// may lead to a memory shortage and a panic: the caller is responsible +// for providing sensible parameters in line with their memory capacity. +func (b *BitSet) Set(i uint) *BitSet { + b.extendSetMaybe(i) + b.set[i>>log2WordSize] |= 1 << (i & (wordSize - 1)) + return b +} + +// Clear bit i to 0 +func (b *BitSet) Clear(i uint) *BitSet { + if i >= b.length { + return b + } + b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1)) + return b +} + +// SetTo sets bit i to value. +// If i>= Cap(), this function will panic. +// Warning: using a very large value for 'i' +// may lead to a memory shortage and a panic: the caller is responsible +// for providing sensible parameters in line with their memory capacity. +func (b *BitSet) SetTo(i uint, value bool) *BitSet { + if value { + return b.Set(i) + } + return b.Clear(i) +} + +// Flip bit at i. +// If i>= Cap(), this function will panic. +// Warning: using a very large value for 'i' +// may lead to a memory shortage and a panic: the caller is responsible +// for providing sensible parameters in line with their memory capacity. +func (b *BitSet) Flip(i uint) *BitSet { + if i >= b.length { + return b.Set(i) + } + b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1)) + return b +} + +// Shrink shrinks BitSet so that the provided value is the last possible +// set value. It clears all bits > the provided index and reduces the size +// and length of the set. +// +// Note that the parameter value is not the new length in bits: it is the +// maximal value that can be stored in the bitset after the function call. +// The new length in bits is the parameter value + 1. Thus it is not possible +// to use this function to set the length to 0, the minimal value of the length +// after this function call is 1. +// +// A new slice is allocated to store the new bits, so you may see an increase in +// memory usage until the GC runs. Normally this should not be a problem, but if you +// have an extremely large BitSet its important to understand that the old BitSet will +// remain in memory until the GC frees it. +func (b *BitSet) Shrink(lastbitindex uint) *BitSet { + length := lastbitindex + 1 + idx := wordsNeeded(length) + if idx > len(b.set) { + return b + } + shrunk := make([]uint64, idx) + copy(shrunk, b.set[:idx]) + b.set = shrunk + b.length = length + b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) + return b +} + +// Compact shrinks BitSet to so that we preserve all set bits, while minimizing +// memory usage. Compact calls Shrink. +func (b *BitSet) Compact() *BitSet { + idx := len(b.set) - 1 + for ; idx >= 0 && b.set[idx] == 0; idx-- { + } + newlength := uint((idx + 1) << log2WordSize) + if newlength >= b.length { + return b // nothing to do + } + if newlength > 0 { + return b.Shrink(newlength - 1) + } + // We preserve one word + return b.Shrink(63) +} + +// InsertAt takes an index which indicates where a bit should be +// inserted. Then it shifts all the bits in the set to the left by 1, starting +// from the given index position, and sets the index position to 0. +// +// Depending on the size of your BitSet, and where you are inserting the new entry, +// this method could be extremely slow and in some cases might cause the entire BitSet +// to be recopied. +func (b *BitSet) InsertAt(idx uint) *BitSet { + insertAtElement := (idx >> log2WordSize) + + // if length of set is a multiple of wordSize we need to allocate more space first + if b.isLenExactMultiple() { + b.set = append(b.set, uint64(0)) + } + + var i uint + for i = uint(len(b.set) - 1); i > insertAtElement; i-- { + // all elements above the position where we want to insert can simply by shifted + b.set[i] <<= 1 + + // we take the most significant bit of the previous element and set it as + // the least significant bit of the current element + b.set[i] |= (b.set[i-1] & 0x8000000000000000) >> 63 + } + + // generate a mask to extract the data that we need to shift left + // within the element where we insert a bit + dataMask := ^(uint64(1)< 0x40000 { + buffer.WriteString("...") + break + } + buffer.WriteString(strconv.FormatInt(int64(i), 10)) + i, e = b.NextSet(i + 1) + if e { + buffer.WriteString(",") + } + } + buffer.WriteString("}") + return buffer.String() +} + +// DeleteAt deletes the bit at the given index position from +// within the bitset +// All the bits residing on the left of the deleted bit get +// shifted right by 1 +// The running time of this operation may potentially be +// relatively slow, O(length) +func (b *BitSet) DeleteAt(i uint) *BitSet { + // the index of the slice element where we'll delete a bit + deleteAtElement := i >> log2WordSize + + // generate a mask for the data that needs to be shifted right + // within that slice element that gets modified + dataMask := ^((uint64(1) << (i & (wordSize - 1))) - 1) + + // extract the data that we'll shift right from the slice element + data := b.set[deleteAtElement] & dataMask + + // set the masked area to 0 while leaving the rest as it is + b.set[deleteAtElement] &= ^dataMask + + // shift the previously extracted data to the right and then + // set it in the previously masked area + b.set[deleteAtElement] |= (data >> 1) & dataMask + + // loop over all the consecutive slice elements to copy each + // lowest bit into the highest position of the previous element, + // then shift the entire content to the right by 1 + for i := int(deleteAtElement) + 1; i < len(b.set); i++ { + b.set[i-1] |= (b.set[i] & 1) << 63 + b.set[i] >>= 1 + } + + b.length = b.length - 1 + + return b +} + +// NextSet returns the next bit set from the specified index, +// including possibly the current index +// along with an error code (true = valid, false = no set bit found) +// for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...} +// +// Users concerned with performance may want to use NextSetMany to +// retrieve several values at once. +func (b *BitSet) NextSet(i uint) (uint, bool) { + x := int(i >> log2WordSize) + if x >= len(b.set) { + return 0, false + } + w := b.set[x] + w = w >> (i & (wordSize - 1)) + if w != 0 { + return i + trailingZeroes64(w), true + } + x = x + 1 + for x < len(b.set) { + if b.set[x] != 0 { + return uint(x)*wordSize + trailingZeroes64(b.set[x]), true + } + x = x + 1 + + } + return 0, false +} + +// NextSetMany returns many next bit sets from the specified index, +// including possibly the current index and up to cap(buffer). +// If the returned slice has len zero, then no more set bits were found +// +// buffer := make([]uint, 256) // this should be reused +// j := uint(0) +// j, buffer = bitmap.NextSetMany(j, buffer) +// for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) { +// for k := range buffer { +// do something with buffer[k] +// } +// j += 1 +// } +// +// +// It is possible to retrieve all set bits as follow: +// +// indices := make([]uint, bitmap.Count()) +// bitmap.NextSetMany(0, indices) +// +// However if bitmap.Count() is large, it might be preferable to +// use several calls to NextSetMany, for performance reasons. +func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { + myanswer := buffer + capacity := cap(buffer) + x := int(i >> log2WordSize) + if x >= len(b.set) || capacity == 0 { + return 0, myanswer[:0] + } + skip := i & (wordSize - 1) + word := b.set[x] >> skip + myanswer = myanswer[:capacity] + size := int(0) + for word != 0 { + r := trailingZeroes64(word) + t := word & ((^word) + 1) + myanswer[size] = r + i + size++ + if size == capacity { + goto End + } + word = word ^ t + } + x++ + for idx, word := range b.set[x:] { + for word != 0 { + r := trailingZeroes64(word) + t := word & ((^word) + 1) + myanswer[size] = r + (uint(x+idx) << 6) + size++ + if size == capacity { + goto End + } + word = word ^ t + } + } +End: + if size > 0 { + return myanswer[size-1], myanswer[:size] + } + return 0, myanswer[:0] +} + +// NextClear returns the next clear bit from the specified index, +// including possibly the current index +// along with an error code (true = valid, false = no bit found i.e. all bits are set) +func (b *BitSet) NextClear(i uint) (uint, bool) { + x := int(i >> log2WordSize) + if x >= len(b.set) { + return 0, false + } + w := b.set[x] + w = w >> (i & (wordSize - 1)) + wA := allBits >> (i & (wordSize - 1)) + index := i + trailingZeroes64(^w) + if w != wA && index < b.length { + return index, true + } + x++ + for x < len(b.set) { + index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) + if b.set[x] != allBits && index < b.length { + return index, true + } + x++ + } + return 0, false +} + +// ClearAll clears the entire BitSet +func (b *BitSet) ClearAll() *BitSet { + if b != nil && b.set != nil { + for i := range b.set { + b.set[i] = 0 + } + } + return b +} + +// wordCount returns the number of words used in a bit set +func (b *BitSet) wordCount() int { + return len(b.set) +} + +// Clone this BitSet +func (b *BitSet) Clone() *BitSet { + c := New(b.length) + if b.set != nil { // Clone should not modify current object + copy(c.set, b.set) + } + return c +} + +// Copy into a destination BitSet +// Returning the size of the destination BitSet +// like array copy +func (b *BitSet) Copy(c *BitSet) (count uint) { + if c == nil { + return + } + if b.set != nil { // Copy should not modify current object + copy(c.set, b.set) + } + count = c.length + if b.length < c.length { + count = b.length + } + return +} + +// Count (number of set bits). +// Also known as "popcount" or "popularity count". +func (b *BitSet) Count() uint { + if b != nil && b.set != nil { + return uint(popcntSlice(b.set)) + } + return 0 +} + +// Equal tests the equivalence of two BitSets. +// False if they are of different sizes, otherwise true +// only if all the same bits are set +func (b *BitSet) Equal(c *BitSet) bool { + if c == nil || b == nil { + return c == b + } + if b.length != c.length { + return false + } + if b.length == 0 { // if they have both length == 0, then could have nil set + return true + } + // testing for equality shoud not transform the bitset (no call to safeSet) + + for p, v := range b.set { + if c.set[p] != v { + return false + } + } + return true +} + +func panicIfNull(b *BitSet) { + if b == nil { + panic(Error("BitSet must not be null")) + } +} + +// Difference of base set and other set +// This is the BitSet equivalent of &^ (and not) +func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { + panicIfNull(b) + panicIfNull(compare) + result = b.Clone() // clone b (in case b is bigger than compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l; i++ { + result.set[i] = b.set[i] &^ compare.set[i] + } + return +} + +// DifferenceCardinality computes the cardinality of the differnce +func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + cnt := uint64(0) + cnt += popcntMaskSlice(b.set[:l], compare.set[:l]) + cnt += popcntSlice(b.set[l:]) + return uint(cnt) +} + +// InPlaceDifference computes the difference of base set and other set +// This is the BitSet equivalent of &^ (and not) +func (b *BitSet) InPlaceDifference(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l; i++ { + b.set[i] &^= compare.set[i] + } +} + +// Convenience function: return two bitsets ordered by +// increasing length. Note: neither can be nil +func sortByLength(a *BitSet, b *BitSet) (ap *BitSet, bp *BitSet) { + if a.length <= b.length { + ap, bp = a, b + } else { + ap, bp = b, a + } + return +} + +// Intersection of base set and other set +// This is the BitSet equivalent of & (and) +func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + result = New(b.length) + for i, word := range b.set { + result.set[i] = word & compare.set[i] + } + return +} + +// IntersectionCardinality computes the cardinality of the union +func (b *BitSet) IntersectionCardinality(compare *BitSet) uint { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := popcntAndSlice(b.set, compare.set) + return uint(cnt) +} + +// InPlaceIntersection destructively computes the intersection of +// base set and the compare set. +// This is the BitSet equivalent of & (and) +func (b *BitSet) InPlaceIntersection(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l; i++ { + b.set[i] &= compare.set[i] + } + for i := l; i < len(b.set); i++ { + b.set[i] = 0 + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } +} + +// Union of base set and other set +// This is the BitSet equivalent of | (or) +func (b *BitSet) Union(compare *BitSet) (result *BitSet) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + result = compare.Clone() + for i, word := range b.set { + result.set[i] = word | compare.set[i] + } + return +} + +// UnionCardinality computes the cardinality of the uniton of the base set +// and the compare set. +func (b *BitSet) UnionCardinality(compare *BitSet) uint { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := popcntOrSlice(b.set, compare.set) + if len(compare.set) > len(b.set) { + cnt += popcntSlice(compare.set[len(b.set):]) + } + return uint(cnt) +} + +// InPlaceUnion creates the destructive union of base set and compare set. +// This is the BitSet equivalent of | (or). +func (b *BitSet) InPlaceUnion(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } + for i := 0; i < l; i++ { + b.set[i] |= compare.set[i] + } + if len(compare.set) > l { + for i := l; i < len(compare.set); i++ { + b.set[i] = compare.set[i] + } + } +} + +// SymmetricDifference of base set and other set +// This is the BitSet equivalent of ^ (xor) +func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + // compare is bigger, so clone it + result = compare.Clone() + for i, word := range b.set { + result.set[i] = word ^ compare.set[i] + } + return +} + +// SymmetricDifferenceCardinality computes the cardinality of the symmetric difference +func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := popcntXorSlice(b.set, compare.set) + if len(compare.set) > len(b.set) { + cnt += popcntSlice(compare.set[len(b.set):]) + } + return uint(cnt) +} + +// InPlaceSymmetricDifference creates the destructive SymmetricDifference of base set and other set +// This is the BitSet equivalent of ^ (xor) +func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } + for i := 0; i < l; i++ { + b.set[i] ^= compare.set[i] + } + if len(compare.set) > l { + for i := l; i < len(compare.set); i++ { + b.set[i] = compare.set[i] + } + } +} + +// Is the length an exact multiple of word sizes? +func (b *BitSet) isLenExactMultiple() bool { + return b.length%wordSize == 0 +} + +// Clean last word by setting unused bits to 0 +func (b *BitSet) cleanLastWord() { + if !b.isLenExactMultiple() { + b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize) + } +} + +// Complement computes the (local) complement of a biset (up to length bits) +func (b *BitSet) Complement() (result *BitSet) { + panicIfNull(b) + result = New(b.length) + for i, word := range b.set { + result.set[i] = ^word + } + result.cleanLastWord() + return +} + +// All returns true if all bits are set, false otherwise. Returns true for +// empty sets. +func (b *BitSet) All() bool { + panicIfNull(b) + return b.Count() == b.length +} + +// None returns true if no bit is set, false otherwise. Returns true for +// empty sets. +func (b *BitSet) None() bool { + panicIfNull(b) + if b != nil && b.set != nil { + for _, word := range b.set { + if word > 0 { + return false + } + } + return true + } + return true +} + +// Any returns true if any bit is set, false otherwise +func (b *BitSet) Any() bool { + panicIfNull(b) + return !b.None() +} + +// IsSuperSet returns true if this is a superset of the other set +func (b *BitSet) IsSuperSet(other *BitSet) bool { + for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) { + if !b.Test(i) { + return false + } + } + return true +} + +// IsStrictSuperSet returns true if this is a strict superset of the other set +func (b *BitSet) IsStrictSuperSet(other *BitSet) bool { + return b.Count() > other.Count() && b.IsSuperSet(other) +} + +// DumpAsBits dumps a bit set as a string of bits +func (b *BitSet) DumpAsBits() string { + if b.set == nil { + return "." + } + buffer := bytes.NewBufferString("") + i := len(b.set) - 1 + for ; i >= 0; i-- { + fmt.Fprintf(buffer, "%064b.", b.set[i]) + } + return buffer.String() +} + +// BinaryStorageSize returns the binary storage requirements +func (b *BitSet) BinaryStorageSize() int { + return binary.Size(uint64(0)) + binary.Size(b.set) +} + +// WriteTo writes a BitSet to a stream +func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { + length := uint64(b.length) + + // Write length + err := binary.Write(stream, binaryOrder, length) + if err != nil { + return 0, err + } + + // Write set + err = binary.Write(stream, binaryOrder, b.set) + return int64(b.BinaryStorageSize()), err +} + +// ReadFrom reads a BitSet from a stream written using WriteTo +func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { + var length uint64 + + // Read length first + err := binary.Read(stream, binaryOrder, &length) + if err != nil { + return 0, err + } + newset := New(uint(length)) + + if uint64(newset.length) != length { + return 0, errors.New("unmarshalling error: type mismatch") + } + + // Read remaining bytes as set + err = binary.Read(stream, binaryOrder, newset.set) + if err != nil { + return 0, err + } + + *b = *newset + return int64(b.BinaryStorageSize()), nil +} + +// MarshalBinary encodes a BitSet into a binary form and returns the result. +func (b *BitSet) MarshalBinary() ([]byte, error) { + var buf bytes.Buffer + writer := bufio.NewWriter(&buf) + + _, err := b.WriteTo(writer) + if err != nil { + return []byte{}, err + } + + err = writer.Flush() + + return buf.Bytes(), err +} + +// UnmarshalBinary decodes the binary form generated by MarshalBinary. +func (b *BitSet) UnmarshalBinary(data []byte) error { + buf := bytes.NewReader(data) + reader := bufio.NewReader(buf) + + _, err := b.ReadFrom(reader) + + return err +} + +// MarshalJSON marshals a BitSet as a JSON structure +func (b *BitSet) MarshalJSON() ([]byte, error) { + buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize())) + _, err := b.WriteTo(buffer) + if err != nil { + return nil, err + } + + // URLEncode all bytes + return json.Marshal(base64Encoding.EncodeToString(buffer.Bytes())) +} + +// UnmarshalJSON unmarshals a BitSet from JSON created using MarshalJSON +func (b *BitSet) UnmarshalJSON(data []byte) error { + // Unmarshal as string + var s string + err := json.Unmarshal(data, &s) + if err != nil { + return err + } + + // URLDecode string + buf, err := base64Encoding.DecodeString(s) + if err != nil { + return err + } + + _, err = b.ReadFrom(bytes.NewReader(buf)) + return err +} diff --git a/vendor/github.com/willf/bitset/popcnt.go b/vendor/github.com/willf/bitset/popcnt.go new file mode 100644 index 000000000000..76577a838284 --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt.go @@ -0,0 +1,53 @@ +package bitset + +// bit population count, take from +// https://code.google.com/p/go/issues/detail?id=4988#c11 +// credit: https://code.google.com/u/arnehormann/ +func popcount(x uint64) (n uint64) { + x -= (x >> 1) & 0x5555555555555555 + x = (x>>2)&0x3333333333333333 + x&0x3333333333333333 + x += x >> 4 + x &= 0x0f0f0f0f0f0f0f0f + x *= 0x0101010101010101 + return x >> 56 +} + +func popcntSliceGo(s []uint64) uint64 { + cnt := uint64(0) + for _, x := range s { + cnt += popcount(x) + } + return cnt +} + +func popcntMaskSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] &^ m[i]) + } + return cnt +} + +func popcntAndSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] & m[i]) + } + return cnt +} + +func popcntOrSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] | m[i]) + } + return cnt +} + +func popcntXorSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] ^ m[i]) + } + return cnt +} diff --git a/vendor/github.com/willf/bitset/popcnt_19.go b/vendor/github.com/willf/bitset/popcnt_19.go new file mode 100644 index 000000000000..fc8ff4f367c2 --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt_19.go @@ -0,0 +1,45 @@ +// +build go1.9 + +package bitset + +import "math/bits" + +func popcntSlice(s []uint64) uint64 { + var cnt int + for _, x := range s { + cnt += bits.OnesCount64(x) + } + return uint64(cnt) +} + +func popcntMaskSlice(s, m []uint64) uint64 { + var cnt int + for i := range s { + cnt += bits.OnesCount64(s[i] &^ m[i]) + } + return uint64(cnt) +} + +func popcntAndSlice(s, m []uint64) uint64 { + var cnt int + for i := range s { + cnt += bits.OnesCount64(s[i] & m[i]) + } + return uint64(cnt) +} + +func popcntOrSlice(s, m []uint64) uint64 { + var cnt int + for i := range s { + cnt += bits.OnesCount64(s[i] | m[i]) + } + return uint64(cnt) +} + +func popcntXorSlice(s, m []uint64) uint64 { + var cnt int + for i := range s { + cnt += bits.OnesCount64(s[i] ^ m[i]) + } + return uint64(cnt) +} diff --git a/vendor/github.com/willf/bitset/popcnt_amd64.go b/vendor/github.com/willf/bitset/popcnt_amd64.go new file mode 100644 index 000000000000..4cf64f24ad03 --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt_amd64.go @@ -0,0 +1,68 @@ +// +build !go1.9 +// +build amd64,!appengine + +package bitset + +// *** the following functions are defined in popcnt_amd64.s + +//go:noescape + +func hasAsm() bool + +// useAsm is a flag used to select the GO or ASM implementation of the popcnt function +var useAsm = hasAsm() + +//go:noescape + +func popcntSliceAsm(s []uint64) uint64 + +//go:noescape + +func popcntMaskSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntAndSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntOrSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntXorSliceAsm(s, m []uint64) uint64 + +func popcntSlice(s []uint64) uint64 { + if useAsm { + return popcntSliceAsm(s) + } + return popcntSliceGo(s) +} + +func popcntMaskSlice(s, m []uint64) uint64 { + if useAsm { + return popcntMaskSliceAsm(s, m) + } + return popcntMaskSliceGo(s, m) +} + +func popcntAndSlice(s, m []uint64) uint64 { + if useAsm { + return popcntAndSliceAsm(s, m) + } + return popcntAndSliceGo(s, m) +} + +func popcntOrSlice(s, m []uint64) uint64 { + if useAsm { + return popcntOrSliceAsm(s, m) + } + return popcntOrSliceGo(s, m) +} + +func popcntXorSlice(s, m []uint64) uint64 { + if useAsm { + return popcntXorSliceAsm(s, m) + } + return popcntXorSliceGo(s, m) +} diff --git a/vendor/github.com/willf/bitset/popcnt_amd64.s b/vendor/github.com/willf/bitset/popcnt_amd64.s new file mode 100644 index 000000000000..666c0dcc17f5 --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt_amd64.s @@ -0,0 +1,104 @@ +// +build !go1.9 +// +build amd64,!appengine + +TEXT ·hasAsm(SB),4,$0-1 +MOVQ $1, AX +CPUID +SHRQ $23, CX +ANDQ $1, CX +MOVB CX, ret+0(FP) +RET + +#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2 + +TEXT ·popcntSliceAsm(SB),4,$0-32 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntSliceEnd +popcntSliceLoop: +BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX +ADDQ DX, AX +ADDQ $8, SI +LOOP popcntSliceLoop +popcntSliceEnd: +MOVQ AX, ret+24(FP) +RET + +TEXT ·popcntMaskSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntMaskSliceEnd +MOVQ m+24(FP), DI +popcntMaskSliceLoop: +MOVQ (DI), DX +NOTQ DX +ANDQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntMaskSliceLoop +popcntMaskSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntAndSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntAndSliceEnd +MOVQ m+24(FP), DI +popcntAndSliceLoop: +MOVQ (DI), DX +ANDQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntAndSliceLoop +popcntAndSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntOrSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntOrSliceEnd +MOVQ m+24(FP), DI +popcntOrSliceLoop: +MOVQ (DI), DX +ORQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntOrSliceLoop +popcntOrSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntXorSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntXorSliceEnd +MOVQ m+24(FP), DI +popcntXorSliceLoop: +MOVQ (DI), DX +XORQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntXorSliceLoop +popcntXorSliceEnd: +MOVQ AX, ret+48(FP) +RET diff --git a/vendor/github.com/willf/bitset/popcnt_generic.go b/vendor/github.com/willf/bitset/popcnt_generic.go new file mode 100644 index 000000000000..21e0ff7b4fc5 --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt_generic.go @@ -0,0 +1,24 @@ +// +build !go1.9 +// +build !amd64 appengine + +package bitset + +func popcntSlice(s []uint64) uint64 { + return popcntSliceGo(s) +} + +func popcntMaskSlice(s, m []uint64) uint64 { + return popcntMaskSliceGo(s, m) +} + +func popcntAndSlice(s, m []uint64) uint64 { + return popcntAndSliceGo(s, m) +} + +func popcntOrSlice(s, m []uint64) uint64 { + return popcntOrSliceGo(s, m) +} + +func popcntXorSlice(s, m []uint64) uint64 { + return popcntXorSliceGo(s, m) +} diff --git a/vendor/github.com/willf/bitset/trailing_zeros_18.go b/vendor/github.com/willf/bitset/trailing_zeros_18.go new file mode 100644 index 000000000000..c52b61be9fc2 --- /dev/null +++ b/vendor/github.com/willf/bitset/trailing_zeros_18.go @@ -0,0 +1,14 @@ +// +build !go1.9 + +package bitset + +var deBruijn = [...]byte{ + 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, + 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, + 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, + 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, +} + +func trailingZeroes64(v uint64) uint { + return uint(deBruijn[((v&-v)*0x03f79d71b4ca8b09)>>58]) +} diff --git a/vendor/github.com/willf/bitset/trailing_zeros_19.go b/vendor/github.com/willf/bitset/trailing_zeros_19.go new file mode 100644 index 000000000000..36a988e714d1 --- /dev/null +++ b/vendor/github.com/willf/bitset/trailing_zeros_19.go @@ -0,0 +1,9 @@ +// +build go1.9 + +package bitset + +import "math/bits" + +func trailingZeroes64(v uint64) uint { + return uint(bits.TrailingZeros64(v)) +} diff --git a/vendor/github.com/willf/bloom/.gitignore b/vendor/github.com/willf/bloom/.gitignore new file mode 100644 index 000000000000..5c204d28b0e3 --- /dev/null +++ b/vendor/github.com/willf/bloom/.gitignore @@ -0,0 +1,26 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof + +target diff --git a/vendor/github.com/willf/bloom/.travis.yml b/vendor/github.com/willf/bloom/.travis.yml new file mode 100644 index 000000000000..7b8fd3006492 --- /dev/null +++ b/vendor/github.com/willf/bloom/.travis.yml @@ -0,0 +1,38 @@ +language: go + +sudo: false + +branches: + except: + - release + +branches: + only: + - master + - develop + - travis + +go: + - 1.8 + - tip + +matrix: + allow_failures: + - go: tip + +before_install: + - if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi; + - if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi; + - go get github.com/mattn/goveralls + +before_script: + - make deps + +script: + - make qa + +after_failure: + - cat ./target/test/report.xml + +after_success: + - if [ "$TRAVIS_GO_VERSION" = "1.8" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi; diff --git a/vendor/github.com/willf/bloom/LICENSE b/vendor/github.com/willf/bloom/LICENSE new file mode 100644 index 000000000000..3b9d36aa6e26 --- /dev/null +++ b/vendor/github.com/willf/bloom/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2014 Will Fitzgerald. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/willf/bloom/Makefile b/vendor/github.com/willf/bloom/Makefile new file mode 100644 index 000000000000..88f2e343790c --- /dev/null +++ b/vendor/github.com/willf/bloom/Makefile @@ -0,0 +1,197 @@ +# MAKEFILE +# +# @author Nicola Asuni +# @link https://github.com/willf/bloom +# ------------------------------------------------------------------------------ + +# List special make targets that are not associated with files +.PHONY: help all test format fmtcheck vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan qa deps clean nuke + +# Use bash as shell (Note: Ubuntu now uses dash which doesn't support PIPESTATUS). +SHELL=/bin/bash + +# CVS path (path to the parent dir containing the project) +CVSPATH=github.com/willf + +# Project owner +OWNER=willf + +# Project vendor +VENDOR=willf + +# Project name +PROJECT=bloom + +# Project version +VERSION=$(shell cat VERSION) + +# Name of RPM or DEB package +PKGNAME=${VENDOR}-${PROJECT} + +# Current directory +CURRENTDIR=$(shell pwd) + +# GO lang path +ifneq ($(GOPATH),) + ifeq ($(findstring $(GOPATH),$(CURRENTDIR)),) + # the defined GOPATH is not valid + GOPATH= + endif +endif +ifeq ($(GOPATH),) + # extract the GOPATH + GOPATH=$(firstword $(subst /src/, ,$(CURRENTDIR))) +endif + +# --- MAKE TARGETS --- + +# Display general help about this command +help: + @echo "" + @echo "$(PROJECT) Makefile." + @echo "GOPATH=$(GOPATH)" + @echo "The following commands are available:" + @echo "" + @echo " make qa : Run all the tests" + @echo " make test : Run the unit tests" + @echo "" + @echo " make format : Format the source code" + @echo " make fmtcheck : Check if the source code has been formatted" + @echo " make vet : Check for suspicious constructs" + @echo " make lint : Check for style errors" + @echo " make coverage : Generate the coverage report" + @echo " make cyclo : Generate the cyclomatic complexity report" + @echo " make ineffassign : Detect ineffectual assignments" + @echo " make misspell : Detect commonly misspelled words in source files" + @echo " make structcheck : Find unused struct fields" + @echo " make varcheck : Find unused global variables and constants" + @echo " make errcheck : Check that error return values are used" + @echo " make gosimple : Suggest code simplifications" + @echo " make astscan : GO AST scanner" + @echo "" + @echo " make docs : Generate source code documentation" + @echo "" + @echo " make deps : Get the dependencies" + @echo " make clean : Remove any build artifact" + @echo " make nuke : Deletes any intermediate file" + @echo "" + +# Alias for help target +all: help + +# Run the unit tests +test: + @mkdir -p target/test + @mkdir -p target/report + GOPATH=$(GOPATH) \ + go test \ + -covermode=atomic \ + -bench=. \ + -race \ + -cpuprofile=target/report/cpu.out \ + -memprofile=target/report/mem.out \ + -mutexprofile=target/report/mutex.out \ + -coverprofile=target/report/coverage.out \ + -v ./... | \ + tee >(PATH=$(GOPATH)/bin:$(PATH) go-junit-report > target/test/report.xml); \ + test $${PIPESTATUS[0]} -eq 0 + +# Format the source code +format: + @find . -type f -name "*.go" -exec gofmt -s -w {} \; + +# Check if the source code has been formatted +fmtcheck: + @mkdir -p target + @find . -type f -name "*.go" -exec gofmt -s -d {} \; | tee target/format.diff + @test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; } + +# Check for syntax errors +vet: + GOPATH=$(GOPATH) go vet . + +# Check for style errors +lint: + GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint . + +# Generate the coverage report +coverage: + @mkdir -p target/report + GOPATH=$(GOPATH) \ + go tool cover -html=target/report/coverage.out -o target/report/coverage.html + +# Report cyclomatic complexity +cyclo: + @mkdir -p target/report + GOPATH=$(GOPATH) gocyclo -avg ./ | tee target/report/cyclo.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Detect ineffectual assignments +ineffassign: + @mkdir -p target/report + GOPATH=$(GOPATH) ineffassign ./ | tee target/report/ineffassign.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Detect commonly misspelled words in source files +misspell: + @mkdir -p target/report + GOPATH=$(GOPATH) misspell -error ./ | tee target/report/misspell.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Find unused struct fields +structcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) structcheck -a ./ | tee target/report/structcheck.txt + +# Find unused global variables and constants +varcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) varcheck -e ./ | tee target/report/varcheck.txt + +# Check that error return values are used +errcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) errcheck ./ | tee target/report/errcheck.txt + +# Suggest code simplifications +gosimple: + @mkdir -p target/report + GOPATH=$(GOPATH) gosimple ./ | tee target/report/gosimple.txt + +# AST scanner +astscan: + @mkdir -p target/report + GOPATH=$(GOPATH) gas .//*.go | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Generate source docs +docs: + @mkdir -p target/docs + nohup sh -c 'GOPATH=$(GOPATH) godoc -http=127.0.0.1:6060' > target/godoc_server.log 2>&1 & + wget --directory-prefix=target/docs/ --execute robots=off --retry-connrefused --recursive --no-parent --adjust-extension --page-requisites --convert-links http://127.0.0.1:6060/pkg/github.com/${VENDOR}/${PROJECT}/ ; kill -9 `lsof -ti :6060` + @echo ''${PKGNAME}' Documentation ...' > target/docs/index.html + +# Alias to run all quality-assurance checks +qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan + +# --- INSTALL --- + +# Get the dependencies +deps: + GOPATH=$(GOPATH) go get ./... + GOPATH=$(GOPATH) go get github.com/golang/lint/golint + GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report + GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov + GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo + GOPATH=$(GOPATH) go get github.com/gordonklaus/ineffassign + GOPATH=$(GOPATH) go get github.com/client9/misspell/cmd/misspell + GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/structcheck + GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck + GOPATH=$(GOPATH) go get github.com/kisielk/errcheck + GOPATH=$(GOPATH) go get honnef.co/go/tools/cmd/gosimple + GOPATH=$(GOPATH) go get github.com/GoASTScanner/gas + +# Remove any build artifact +clean: + GOPATH=$(GOPATH) go clean ./... + +# Deletes any intermediate file +nuke: + rm -rf ./target + GOPATH=$(GOPATH) go clean -i ./... diff --git a/vendor/github.com/willf/bloom/README.md b/vendor/github.com/willf/bloom/README.md new file mode 100644 index 000000000000..c554a35bf342 --- /dev/null +++ b/vendor/github.com/willf/bloom/README.md @@ -0,0 +1,69 @@ +Bloom filters +------------- + +[![Master Build Status](https://secure.travis-ci.org/willf/bloom.png?branch=master)](https://travis-ci.org/willf/bloom?branch=master) +[![Coverage Status](https://coveralls.io/repos/github/willf/bloom/badge.svg?branch=master)](https://coveralls.io/github/willf/bloom?branch=master) +[![Go Report Card](https://goreportcard.com/badge/github.com/willf/bloom)](https://goreportcard.com/report/github.com/willf/bloom) +[![GoDoc](https://godoc.org/github.com/willf/bloom?status.svg)](http://godoc.org/github.com/willf/bloom) + +A Bloom filter is a representation of a set of _n_ items, where the main +requirement is to make membership queries; _i.e._, whether an item is a +member of a set. + +A Bloom filter has two parameters: _m_, a maximum size (typically a reasonably large multiple of the cardinality of the set to represent) and _k_, the number of hashing functions on elements of the set. (The actual hashing functions are important, too, but this is not a parameter for this implementation). A Bloom filter is backed by a [BitSet](https://github.com/willf/bitset); a key is represented in the filter by setting the bits at each value of the hashing functions (modulo _m_). Set membership is done by _testing_ whether the bits at each value of the hashing functions (again, modulo _m_) are set. If so, the item is in the set. If the item is actually in the set, a Bloom filter will never fail (the true positive rate is 1.0); but it is susceptible to false positives. The art is to choose _k_ and _m_ correctly. + +In this implementation, the hashing functions used is [murmurhash](github.com/spaolacci/murmur3), a non-cryptographic hashing function. + +This implementation accepts keys for setting and testing as `[]byte`. Thus, to +add a string item, `"Love"`: + + n := uint(1000) + filter := bloom.New(20*n, 5) // load of 20, 5 keys + filter.Add([]byte("Love")) + +Similarly, to test if `"Love"` is in bloom: + + if filter.Test([]byte("Love")) + +For numeric data, I recommend that you look into the encoding/binary library. But, for example, to add a `uint32` to the filter: + + i := uint32(100) + n1 := make([]byte, 4) + binary.BigEndian.PutUint32(n1, i) + filter.Add(n1) + +Finally, there is a method to estimate the false positive rate of a particular +bloom filter for a set of size _n_: + + if filter.EstimateFalsePositiveRate(1000) > 0.001 + +Given the particular hashing scheme, it's best to be empirical about this. Note +that estimating the FP rate will clear the Bloom filter. + +Discussion here: [Bloom filter](https://groups.google.com/d/topic/golang-nuts/6MktecKi1bE/discussion) + +Godoc documentation: https://godoc.org/github.com/willf/bloom + +## Installation + +```bash +go get -u github.com/willf/bloom +``` + +## Contributing + +If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)") + +This project include a Makefile that allows you to test and build the project with simple commands. +To see all available options: +```bash +make help +``` + +## Running all tests + +Before committing the code, please check if it passes all tests using (note: this will install some dependencies): +```bash +make deps +make qa +``` diff --git a/vendor/github.com/willf/bloom/VERSION b/vendor/github.com/willf/bloom/VERSION new file mode 100644 index 000000000000..50ffc5aa7f69 --- /dev/null +++ b/vendor/github.com/willf/bloom/VERSION @@ -0,0 +1 @@ +2.0.3 diff --git a/vendor/github.com/willf/bloom/bloom.go b/vendor/github.com/willf/bloom/bloom.go new file mode 100644 index 000000000000..743514f7ea5e --- /dev/null +++ b/vendor/github.com/willf/bloom/bloom.go @@ -0,0 +1,362 @@ +/* +Package bloom provides data structures and methods for creating Bloom filters. + +A Bloom filter is a representation of a set of _n_ items, where the main +requirement is to make membership queries; _i.e._, whether an item is a +member of a set. + +A Bloom filter has two parameters: _m_, a maximum size (typically a reasonably large +multiple of the cardinality of the set to represent) and _k_, the number of hashing +functions on elements of the set. (The actual hashing functions are important, too, +but this is not a parameter for this implementation). A Bloom filter is backed by +a BitSet; a key is represented in the filter by setting the bits at each value of the +hashing functions (modulo _m_). Set membership is done by _testing_ whether the +bits at each value of the hashing functions (again, modulo _m_) are set. If so, +the item is in the set. If the item is actually in the set, a Bloom filter will +never fail (the true positive rate is 1.0); but it is susceptible to false +positives. The art is to choose _k_ and _m_ correctly. + +In this implementation, the hashing functions used is murmurhash, +a non-cryptographic hashing function. + +This implementation accepts keys for setting as testing as []byte. Thus, to +add a string item, "Love": + + uint n = 1000 + filter := bloom.New(20*n, 5) // load of 20, 5 keys + filter.Add([]byte("Love")) + +Similarly, to test if "Love" is in bloom: + + if filter.Test([]byte("Love")) + +For numeric data, I recommend that you look into the binary/encoding library. But, +for example, to add a uint32 to the filter: + + i := uint32(100) + n1 := make([]byte,4) + binary.BigEndian.PutUint32(n1,i) + f.Add(n1) + +Finally, there is a method to estimate the false positive rate of a particular +Bloom filter for a set of size _n_: + + if filter.EstimateFalsePositiveRate(1000) > 0.001 + +Given the particular hashing scheme, it's best to be empirical about this. Note +that estimating the FP rate will clear the Bloom filter. +*/ +package bloom + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "math" + + "github.com/spaolacci/murmur3" + "github.com/willf/bitset" +) + +// A BloomFilter is a representation of a set of _n_ items, where the main +// requirement is to make membership queries; _i.e._, whether an item is a +// member of a set. +type BloomFilter struct { + m uint + k uint + b *bitset.BitSet +} + +func max(x, y uint) uint { + if x > y { + return x + } + return y +} + +// New creates a new Bloom filter with _m_ bits and _k_ hashing functions +// We force _m_ and _k_ to be at least one to avoid panics. +func New(m uint, k uint) *BloomFilter { + return &BloomFilter{max(1, m), max(1, k), bitset.New(m)} +} + +// From creates a new Bloom filter with len(_data_) * 64 bits and _k_ hashing +// functions. The data slice is not going to be reset. +func From(data []uint64, k uint) *BloomFilter { + m := uint(len(data) * 64) + return &BloomFilter{m, k, bitset.From(data)} +} + +// baseHashes returns the four hash values of data that are used to create k +// hashes +func baseHashes(data []byte) [4]uint64 { + a1 := []byte{1} // to grab another bit of data + hasher := murmur3.New128() + hasher.Write(data) // #nosec + v1, v2 := hasher.Sum128() + hasher.Write(a1) // #nosec + v3, v4 := hasher.Sum128() + return [4]uint64{ + v1, v2, v3, v4, + } +} + +// location returns the ith hashed location using the four base hash values +func location(h [4]uint64, i uint) uint64 { + ii := uint64(i) + return h[ii%2] + ii*h[2+(((ii+(ii%2))%4)/2)] +} + +// location returns the ith hashed location using the four base hash values +func (f *BloomFilter) location(h [4]uint64, i uint) uint { + return uint(location(h, i) % uint64(f.m)) +} + +// EstimateParameters estimates requirements for m and k. +// Based on https://bitbucket.org/ww/bloom/src/829aa19d01d9/bloom.go +// used with permission. +func EstimateParameters(n uint, p float64) (m uint, k uint) { + m = uint(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2))) + k = uint(math.Ceil(math.Log(2) * float64(m) / float64(n))) + return +} + +// NewWithEstimates creates a new Bloom filter for about n items with fp +// false positive rate +func NewWithEstimates(n uint, fp float64) *BloomFilter { + m, k := EstimateParameters(n, fp) + return New(m, k) +} + +// Cap returns the capacity, _m_, of a Bloom filter +func (f *BloomFilter) Cap() uint { + return f.m +} + +// K returns the number of hash functions used in the BloomFilter +func (f *BloomFilter) K() uint { + return f.k +} + +// Add data to the Bloom Filter. Returns the filter (allows chaining) +func (f *BloomFilter) Add(data []byte) *BloomFilter { + h := baseHashes(data) + for i := uint(0); i < f.k; i++ { + f.b.Set(f.location(h, i)) + } + return f +} + +// Merge the data from two Bloom Filters. +func (f *BloomFilter) Merge(g *BloomFilter) error { + // Make sure the m's and k's are the same, otherwise merging has no real use. + if f.m != g.m { + return fmt.Errorf("m's don't match: %d != %d", f.m, g.m) + } + + if f.k != g.k { + return fmt.Errorf("k's don't match: %d != %d", f.m, g.m) + } + + f.b.InPlaceUnion(g.b) + return nil +} + +// Copy creates a copy of a Bloom filter. +func (f *BloomFilter) Copy() *BloomFilter { + fc := New(f.m, f.k) + fc.Merge(f) // #nosec + return fc +} + +// AddString to the Bloom Filter. Returns the filter (allows chaining) +func (f *BloomFilter) AddString(data string) *BloomFilter { + return f.Add([]byte(data)) +} + +// Test returns true if the data is in the BloomFilter, false otherwise. +// If true, the result might be a false positive. If false, the data +// is definitely not in the set. +func (f *BloomFilter) Test(data []byte) bool { + h := baseHashes(data) + for i := uint(0); i < f.k; i++ { + if !f.b.Test(f.location(h, i)) { + return false + } + } + return true +} + +// TestString returns true if the string is in the BloomFilter, false otherwise. +// If true, the result might be a false positive. If false, the data +// is definitely not in the set. +func (f *BloomFilter) TestString(data string) bool { + return f.Test([]byte(data)) +} + +// TestLocations returns true if all locations are set in the BloomFilter, false +// otherwise. +func (f *BloomFilter) TestLocations(locs []uint64) bool { + for i := 0; i < len(locs); i++ { + if !f.b.Test(uint(locs[i] % uint64(f.m))) { + return false + } + } + return true +} + +// TestAndAdd is the equivalent to calling Test(data) then Add(data). +// Returns the result of Test. +func (f *BloomFilter) TestAndAdd(data []byte) bool { + present := true + h := baseHashes(data) + for i := uint(0); i < f.k; i++ { + l := f.location(h, i) + if !f.b.Test(l) { + present = false + } + f.b.Set(l) + } + return present +} + +// TestAndAddString is the equivalent to calling Test(string) then Add(string). +// Returns the result of Test. +func (f *BloomFilter) TestAndAddString(data string) bool { + return f.TestAndAdd([]byte(data)) +} + +// ClearAll clears all the data in a Bloom filter, removing all keys +func (f *BloomFilter) ClearAll() *BloomFilter { + f.b.ClearAll() + return f +} + +// EstimateFalsePositiveRate returns, for a BloomFilter with a estimate of m bits +// and k hash functions, what the false positive rate will be +// while storing n entries; runs 100,000 tests. This is an empirical +// test using integers as keys. As a side-effect, it clears the BloomFilter. +func (f *BloomFilter) EstimateFalsePositiveRate(n uint) (fpRate float64) { + rounds := uint32(100000) + f.ClearAll() + n1 := make([]byte, 4) + for i := uint32(0); i < uint32(n); i++ { + binary.BigEndian.PutUint32(n1, i) + f.Add(n1) + } + fp := 0 + // test for number of rounds + for i := uint32(0); i < rounds; i++ { + binary.BigEndian.PutUint32(n1, i+uint32(n)+1) + if f.Test(n1) { + //fmt.Printf("%v failed.\n", i+uint32(n)+1) + fp++ + } + } + fpRate = float64(fp) / (float64(rounds)) + f.ClearAll() + return +} + +// bloomFilterJSON is an unexported type for marshaling/unmarshaling BloomFilter struct. +type bloomFilterJSON struct { + M uint `json:"m"` + K uint `json:"k"` + B *bitset.BitSet `json:"b"` +} + +// MarshalJSON implements json.Marshaler interface. +func (f *BloomFilter) MarshalJSON() ([]byte, error) { + return json.Marshal(bloomFilterJSON{f.m, f.k, f.b}) +} + +// UnmarshalJSON implements json.Unmarshaler interface. +func (f *BloomFilter) UnmarshalJSON(data []byte) error { + var j bloomFilterJSON + err := json.Unmarshal(data, &j) + if err != nil { + return err + } + f.m = j.M + f.k = j.K + f.b = j.B + return nil +} + +// WriteTo writes a binary representation of the BloomFilter to an i/o stream. +// It returns the number of bytes written. +func (f *BloomFilter) WriteTo(stream io.Writer) (int64, error) { + err := binary.Write(stream, binary.BigEndian, uint64(f.m)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(f.k)) + if err != nil { + return 0, err + } + numBytes, err := f.b.WriteTo(stream) + return numBytes + int64(2*binary.Size(uint64(0))), err +} + +// ReadFrom reads a binary representation of the BloomFilter (such as might +// have been written by WriteTo()) from an i/o stream. It returns the number +// of bytes read. +func (f *BloomFilter) ReadFrom(stream io.Reader) (int64, error) { + var m, k uint64 + err := binary.Read(stream, binary.BigEndian, &m) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &k) + if err != nil { + return 0, err + } + b := &bitset.BitSet{} + numBytes, err := b.ReadFrom(stream) + if err != nil { + return 0, err + } + f.m = uint(m) + f.k = uint(k) + f.b = b + return numBytes + int64(2*binary.Size(uint64(0))), nil +} + +// GobEncode implements gob.GobEncoder interface. +func (f *BloomFilter) GobEncode() ([]byte, error) { + var buf bytes.Buffer + _, err := f.WriteTo(&buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// GobDecode implements gob.GobDecoder interface. +func (f *BloomFilter) GobDecode(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := f.ReadFrom(buf) + + return err +} + +// Equal tests for the equality of two Bloom filters +func (f *BloomFilter) Equal(g *BloomFilter) bool { + return f.m == g.m && f.k == g.k && f.b.Equal(g.b) +} + +// Locations returns a list of hash locations representing a data item. +func Locations(data []byte, k uint) []uint64 { + locs := make([]uint64, k) + + // calculate locations + h := baseHashes(data) + for i := uint(0); i < k; i++ { + locs[i] = location(h, i) + } + + return locs +} diff --git a/vendor/modules.txt b/vendor/modules.txt index b016aa5c0e94..bc3c094b9c53 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -968,6 +968,9 @@ github.com/sirupsen/logrus # github.com/sony/gobreaker v0.4.1 ## explicit; go 1.12 github.com/sony/gobreaker +# github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 +## explicit +github.com/spaolacci/murmur3 # github.com/spf13/afero v1.6.0 ## explicit; go 1.13 github.com/spf13/afero @@ -1052,6 +1055,12 @@ github.com/weaveworks/common/user # github.com/weaveworks/promrus v1.2.0 ## explicit github.com/weaveworks/promrus +# github.com/willf/bitset v1.1.11 +## explicit; go 1.14 +github.com/willf/bitset +# github.com/willf/bloom v2.0.3+incompatible +## explicit +github.com/willf/bloom # github.com/xdg-go/pbkdf2 v1.0.0 ## explicit; go 1.9 github.com/xdg-go/pbkdf2