From e4bd3f014dd4b9c2423758b36ba7dcfb74759243 Mon Sep 17 00:00:00 2001 From: Denys Zhdanov Date: Sun, 23 Jun 2024 12:56:46 +0200 Subject: [PATCH 1/5] Adding Cuckoo filter to cache for new metrics --- cache/cache.go | 54 ++++++++++++++++++++++++++++++++++++++++----- cache/cache_test.go | 16 ++++++++++++++ 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/cache/cache.go b/cache/cache.go index 755f46514..95cae397a 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -6,6 +6,7 @@ Based on https://github.com/orcaman/concurrent-map import ( "fmt" + cuckoo "github.com/seiflotfy/cuckoofilter" "io" "sync" "sync/atomic" @@ -59,6 +60,7 @@ type Cache struct { } newMetricsChan chan string + newMetricCf *cuckoo.Filter throttle func(ps *points.Points, inCache bool) bool } @@ -95,6 +97,7 @@ func New() *Cache { c.settings.Store(&settings) c.writeoutQueue = NewWriteoutQueue(c) + c.newMetricCf = nil return c } @@ -130,6 +133,13 @@ func (c *Cache) SetMaxSize(maxSize uint32) { c.settings.Store(&newSettings) } +// SetBloomSize of bloom filter +func (c *Cache) SetBloomSize(bloomSize uint) { + if bloomSize > 0 { + c.newMetricCf = cuckoo.NewFilter(bloomSize) + } +} + func (c *Cache) SetTagsEnabled(value bool) { s := c.settings.Load().(*cacheSettings) newSettings := *s @@ -149,6 +159,10 @@ func (c *Cache) Stat(send helper.StatCallback) { send("metrics", float64(c.Len())) send("maxSize", float64(s.maxSize)) send("notConfirmed", float64(c.NotConfirmedLength())) + // report elements in bloom filter + if c.newMetricCf != nil { + send("cfCount", float64(c.newMetricCf.Count())) + } helper.SendAndSubstractUint32("queries", &c.stat.queryCnt, send) helper.SendAndSubstractUint32("tagsNormalizeErrors", &c.stat.tagsNormalizeErrors, send) @@ -259,6 +273,15 @@ func (c *Cache) DivertToXlog(w io.Writer) { c.settings.Store(&newSettings) } +// send metric to the new metrics channel +func sendMetricToNewMetricChan(c *Cache, metric string) { + select { + case c.newMetricsChan <- metric: + default: + atomic.AddUint32(&c.stat.droppedRealtimeIndex, 1) + } +} + // Sets the given value under the specified key. func (c *Cache) Add(p *points.Points) { s := c.settings.Load().(*cacheSettings) @@ -301,15 +324,22 @@ func (c *Cache) Add(p *points.Points) { if shard.adds != nil { shard.adds[p.Metric] = struct{}{} } - if c.newMetricsChan != nil { - select { - case c.newMetricsChan <- p.Metric: - default: - atomic.AddUint32(&c.stat.droppedRealtimeIndex, 1) - } + // if no bloom filter - just add metric to new channel + // if missed in cache, as it was before + if c.newMetricsChan != nil && c.newMetricCf == nil { + sendMetricToNewMetricChan(c, p.Metric) } } + // if we have both new metric channel and bloom filter + if c.newMetricsChan != nil && c.newMetricCf != nil { + // add metric to new metric channel if missed in bloom + // despite what we have it in cache (new behaviour) + if !c.newMetricCf.Lookup([]byte(p.Metric)) { + sendMetricToNewMetricChan(c, p.Metric) + } + c.newMetricCf.Insert([]byte(p.Metric)) + } atomic.AddInt32(&c.stat.size, int32(count)) } @@ -322,6 +352,12 @@ func (c *Cache) Pop(key string) (p *points.Points, exists bool) { delete(shard.items, key) shard.Unlock() + // we probably can skip that, but I'm a bit worry + // of effectiveness of bloom filter over time + if c.newMetricsChan != nil && c.newMetricCf != nil { + c.newMetricCf.Delete([]byte(p.Metric)) + } + if exists { atomic.AddInt32(&c.stat.size, -int32(len(p.Data))) } @@ -346,6 +382,12 @@ func (c *Cache) PopNotConfirmed(key string) (p *points.Points, exists bool) { } shard.Unlock() + // we probably can skip that, but I'm a bit worry + // of effectiveness of bloom filter over time + if c.newMetricsChan != nil && c.newMetricCf != nil { + c.newMetricCf.Delete([]byte(p.Metric)) + } + if exists { atomic.AddInt32(&c.stat.size, -int32(len(p.Data))) } diff --git a/cache/cache_test.go b/cache/cache_test.go index c64e2fc59..a7b413c81 100644 --- a/cache/cache_test.go +++ b/cache/cache_test.go @@ -12,12 +12,28 @@ func TestCache(t *testing.T) { c := New() + // init new metric channel + ch := make(chan string, 3) + c.SetNewMetricsChan(ch) + // set bloom size + c.SetBloomSize(3) + c.Add(points.OnePoint("hello.world", 42, 10)) if c.Size() != 1 { t.FailNow() } + // check if new metric added to bloom filter + if c.newMetricCf.Count() != 1 { + t.FailNow() + } + + // check if new metric added to new metric channel + if len(c.newMetricsChan) != 1 { + t.FailNow() + } + c.Add(points.OnePoint("hello.world", 15, 12)) if c.Size() != 2 { From 8927c07db9bcf7b54aac64c14ca20233dbe69c57 Mon Sep 17 00:00:00 2001 From: Denys Zhdanov Date: Sun, 23 Jun 2024 12:58:05 +0200 Subject: [PATCH 2/5] Sync mods and vendor --- go.mod | 2 + go.sum | 6 + vendor/github.com/dgryski/go-metro/LICENSE | 24 ++ vendor/github.com/dgryski/go-metro/README | 6 + vendor/github.com/dgryski/go-metro/metro.py | 199 ++++++++++ .../github.com/dgryski/go-metro/metro128.go | 94 +++++ vendor/github.com/dgryski/go-metro/metro64.go | 89 +++++ .../github.com/dgryski/go-metro/metro_amd64.s | 373 ++++++++++++++++++ .../github.com/dgryski/go-metro/metro_stub.go | 10 + .../seiflotfy/cuckoofilter/.gitignore | 26 ++ .../github.com/seiflotfy/cuckoofilter/LICENSE | 22 ++ .../seiflotfy/cuckoofilter/README.md | 62 +++ .../seiflotfy/cuckoofilter/bucket.go | 45 +++ .../seiflotfy/cuckoofilter/cuckoofilter.go | 165 ++++++++ .../github.com/seiflotfy/cuckoofilter/doc.go | 35 ++ .../cuckoofilter/scalable_cuckoofilter.go | 170 ++++++++ .../github.com/seiflotfy/cuckoofilter/util.go | 52 +++ vendor/modules.txt | 6 + 18 files changed, 1386 insertions(+) create mode 100644 vendor/github.com/dgryski/go-metro/LICENSE create mode 100644 vendor/github.com/dgryski/go-metro/README create mode 100644 vendor/github.com/dgryski/go-metro/metro.py create mode 100644 vendor/github.com/dgryski/go-metro/metro128.go create mode 100644 vendor/github.com/dgryski/go-metro/metro64.go create mode 100644 vendor/github.com/dgryski/go-metro/metro_amd64.s create mode 100644 vendor/github.com/dgryski/go-metro/metro_stub.go create mode 100644 vendor/github.com/seiflotfy/cuckoofilter/.gitignore create mode 100644 vendor/github.com/seiflotfy/cuckoofilter/LICENSE create mode 100644 vendor/github.com/seiflotfy/cuckoofilter/README.md create mode 100644 vendor/github.com/seiflotfy/cuckoofilter/bucket.go create mode 100644 vendor/github.com/seiflotfy/cuckoofilter/cuckoofilter.go create mode 100644 vendor/github.com/seiflotfy/cuckoofilter/doc.go create mode 100644 vendor/github.com/seiflotfy/cuckoofilter/scalable_cuckoofilter.go create mode 100644 vendor/github.com/seiflotfy/cuckoofilter/util.go diff --git a/go.mod b/go.mod index a237dd20d..c49580a34 100644 --- a/go.mod +++ b/go.mod @@ -34,6 +34,7 @@ require ( ) require ( + github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb golang.org/x/net v0.25.0 google.golang.org/protobuf v1.34.1 ) @@ -47,6 +48,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 // indirect github.com/eapache/go-resiliency v1.6.0 // indirect github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 // indirect github.com/eapache/queue v1.1.0 // indirect diff --git a/go.sum b/go.sum index 664f41e60..432205b13 100644 --- a/go.sum +++ b/go.sum @@ -45,6 +45,9 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/go-expirecache v0.0.0-20170314133854-743ef98b2adb h1:X9MwMz6mVZEWcbhsri5TwaCm/Q4USFdAAmy1T7RCGjw= github.com/dgryski/go-expirecache v0.0.0-20170314133854-743ef98b2adb/go.mod h1:pD/+9DfmmQ+xvOI1fxUltHV69BxC1aeTILPQg9Kw1hE= +github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= +github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 h1:y7y0Oa6UawqTFPCDw9JG6pdKt4F9pAhHv0B7FMGaGD0= +github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0 h1:b+7JSiBM+hnLQjP/lXztks5hnLt1PS46hktG9VOJgzo= github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0/go.mod h1:qzKC/DpcxK67zaSHdCmIv3L9WJViHVinYXN2S7l3RM8= github.com/dgryski/httputil v0.0.0-20160116060654-189c2918cd08 h1:BGzXzhmOgLHlylvQ27Tcgz235JvonPEgdMtpaZaeZt0= @@ -208,6 +211,8 @@ github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5X github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb h1:XfLJSPIOUX+osiMraVgIrMR27uMXnRJWGm1+GL8/63U= +github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb/go.mod h1:bR6DqgcAl1zTcOX8/pE2Qkj9XO00eCNqmKb7lXP8EAg= github.com/sevlyar/go-daemon v0.1.6 h1:EUh1MDjEM4BI109Jign0EaknA2izkOyi0LV3ro3QQGs= github.com/sevlyar/go-daemon v0.1.6/go.mod h1:6dJpPatBT9eUwM5VCw9Bt6CdX9Tk6UWvhW3MebLDRKE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -410,6 +415,7 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/vendor/github.com/dgryski/go-metro/LICENSE b/vendor/github.com/dgryski/go-metro/LICENSE new file mode 100644 index 000000000..6243b617c --- /dev/null +++ b/vendor/github.com/dgryski/go-metro/LICENSE @@ -0,0 +1,24 @@ +This package is a mechanical translation of the reference C++ code for +MetroHash, available at https://github.com/jandrewrogers/MetroHash + +The MIT License (MIT) + +Copyright (c) 2016 Damian Gryski + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/dgryski/go-metro/README b/vendor/github.com/dgryski/go-metro/README new file mode 100644 index 000000000..5ecebb385 --- /dev/null +++ b/vendor/github.com/dgryski/go-metro/README @@ -0,0 +1,6 @@ +MetroHash + +This package is a mechanical translation of the reference C++ code for +MetroHash, available at https://github.com/jandrewrogers/MetroHash + +I claim no additional copyright over the original implementation. diff --git a/vendor/github.com/dgryski/go-metro/metro.py b/vendor/github.com/dgryski/go-metro/metro.py new file mode 100644 index 000000000..8dd4d26e6 --- /dev/null +++ b/vendor/github.com/dgryski/go-metro/metro.py @@ -0,0 +1,199 @@ +import peachpy.x86_64 + +k0 = 0xD6D018F5 +k1 = 0xA2AA033B +k2 = 0x62992FC1 +k3 = 0x30BC5B29 + +def advance(p,l,c): + ADD(p,c) + SUB(l,c) + +def imul(r,k): + t = GeneralPurposeRegister64() + MOV(t, k) + IMUL(r, t) + +def update32(v, p,idx, k, vadd): + r = GeneralPurposeRegister64() + MOV(r, [p + idx]) + imul(r, k) + ADD(v, r) + ROR(v, 29) + ADD(v, vadd) + +def final32(v, regs, keys): + r = GeneralPurposeRegister64() + MOV(r, v[regs[1]]) + ADD(r, v[regs[2]]) + imul(r, keys[0]) + ADD(r, v[regs[3]]) + ROR(r, 37) + imul(r, keys[1]) + XOR(v[regs[0]], r) + +seed = Argument(uint64_t) +buffer_base = Argument(ptr()) +buffer_len = Argument(int64_t) +buffer_cap = Argument(int64_t) + +def makeHash(name, args): + with Function(name, args, uint64_t) as function: + + reg_ptr = GeneralPurposeRegister64() + reg_ptr_len = GeneralPurposeRegister64() + reg_hash = GeneralPurposeRegister64() + + LOAD.ARGUMENT(reg_hash, seed) + LOAD.ARGUMENT(reg_ptr, buffer_base) + LOAD.ARGUMENT(reg_ptr_len, buffer_len) + + imul(reg_hash, k0) + r = GeneralPurposeRegister64() + MOV(r, k2*k0) + ADD(reg_hash, r) + + after32 = Label("after32") + + CMP(reg_ptr_len, 32) + JL(after32) + v = [GeneralPurposeRegister64() for _ in range(4)] + for i in range(4): + MOV(v[i], reg_hash) + + with Loop() as loop: + update32(v[0], reg_ptr, 0, k0, v[2]) + update32(v[1], reg_ptr, 8, k1, v[3]) + update32(v[2], reg_ptr, 16, k2, v[0]) + update32(v[3], reg_ptr, 24, k3, v[1]) + + ADD(reg_ptr, 32) + SUB(reg_ptr_len, 32) + CMP(reg_ptr_len, 32) + JGE(loop.begin) + + final32(v, [2,0,3,1], [k0, k1]) + final32(v, [3,1,2,0], [k1, k0]) + final32(v, [0,0,2,3], [k0, k1]) + final32(v, [1,1,3,2], [k1, k0]) + + XOR(v[0], v[1]) + ADD(reg_hash, v[0]) + + LABEL(after32) + + after16 = Label("after16") + CMP(reg_ptr_len, 16) + JL(after16) + + for i in range(2): + MOV(v[i], [reg_ptr]) + imul(v[i], k2) + ADD(v[i], reg_hash) + + advance(reg_ptr, reg_ptr_len, 8) + + ROR(v[i], 29) + imul(v[i], k3) + + r = GeneralPurposeRegister64() + MOV(r, v[0]) + imul(r, k0) + ROR(r, 21) + ADD(r, v[1]) + XOR(v[0], r) + + MOV(r, v[1]) + imul(r, k3) + ROR(r, 21) + ADD(r, v[0]) + XOR(v[1], r) + + ADD(reg_hash, v[1]) + + LABEL(after16) + + after8 = Label("after8") + CMP(reg_ptr_len, 8) + JL(after8) + + r = GeneralPurposeRegister64() + MOV(r, [reg_ptr]) + imul(r, k3) + ADD(reg_hash, r) + advance(reg_ptr, reg_ptr_len, 8) + + MOV(r, reg_hash) + ROR(r, 55) + imul(r, k1) + XOR(reg_hash, r) + + LABEL(after8) + + after4 = Label("after4") + CMP(reg_ptr_len, 4) + JL(after4) + + r = GeneralPurposeRegister64() + XOR(r, r) + MOV(r.as_dword, dword[reg_ptr]) + imul(r, k3) + ADD(reg_hash, r) + advance(reg_ptr, reg_ptr_len, 4) + + MOV(r, reg_hash) + ROR(r, 26) + imul(r, k1) + XOR(reg_hash, r) + + LABEL(after4) + + after2 = Label("after2") + CMP(reg_ptr_len, 2) + JL(after2) + + r = GeneralPurposeRegister64() + XOR(r,r) + MOV(r.as_word, word[reg_ptr]) + imul(r, k3) + ADD(reg_hash, r) + advance(reg_ptr, reg_ptr_len, 2) + + MOV(r, reg_hash) + ROR(r, 48) + imul(r, k1) + XOR(reg_hash, r) + + LABEL(after2) + + after1 = Label("after1") + CMP(reg_ptr_len, 1) + JL(after1) + + r = GeneralPurposeRegister64() + MOVZX(r, byte[reg_ptr]) + imul(r, k3) + ADD(reg_hash, r) + + MOV(r, reg_hash) + ROR(r, 37) + imul(r, k1) + XOR(reg_hash, r) + + LABEL(after1) + + r = GeneralPurposeRegister64() + MOV(r, reg_hash) + ROR(r, 28) + XOR(reg_hash, r) + + imul(reg_hash, k0) + + MOV(r, reg_hash) + ROR(r, 29) + XOR(reg_hash, r) + + RETURN(reg_hash) + +makeHash("Hash64", (buffer_base, buffer_len, buffer_cap, seed)) +makeHash("Hash64Str", (buffer_base, buffer_len, seed)) \ No newline at end of file diff --git a/vendor/github.com/dgryski/go-metro/metro128.go b/vendor/github.com/dgryski/go-metro/metro128.go new file mode 100644 index 000000000..e8dd8ddbf --- /dev/null +++ b/vendor/github.com/dgryski/go-metro/metro128.go @@ -0,0 +1,94 @@ +package metro + +import "encoding/binary" + +func rotate_right(v uint64, k uint) uint64 { + return (v >> k) | (v << (64 - k)) +} + +func Hash128(buffer []byte, seed uint64) (uint64, uint64) { + + const ( + k0 = 0xC83A91E1 + k1 = 0x8648DBDB + k2 = 0x7BDEC03B + k3 = 0x2F5870A5 + ) + + ptr := buffer + + var v [4]uint64 + + v[0] = (seed - k0) * k3 + v[1] = (seed + k1) * k2 + + if len(ptr) >= 32 { + v[2] = (seed + k0) * k2 + v[3] = (seed - k1) * k3 + + for len(ptr) >= 32 { + v[0] += binary.LittleEndian.Uint64(ptr) * k0 + ptr = ptr[8:] + v[0] = rotate_right(v[0], 29) + v[2] + v[1] += binary.LittleEndian.Uint64(ptr) * k1 + ptr = ptr[8:] + v[1] = rotate_right(v[1], 29) + v[3] + v[2] += binary.LittleEndian.Uint64(ptr) * k2 + ptr = ptr[8:] + v[2] = rotate_right(v[2], 29) + v[0] + v[3] += binary.LittleEndian.Uint64(ptr) * k3 + ptr = ptr[8:] + v[3] = rotate_right(v[3], 29) + v[1] + } + + v[2] ^= rotate_right(((v[0]+v[3])*k0)+v[1], 21) * k1 + v[3] ^= rotate_right(((v[1]+v[2])*k1)+v[0], 21) * k0 + v[0] ^= rotate_right(((v[0]+v[2])*k0)+v[3], 21) * k1 + v[1] ^= rotate_right(((v[1]+v[3])*k1)+v[2], 21) * k0 + } + + if len(ptr) >= 16 { + v[0] += binary.LittleEndian.Uint64(ptr) * k2 + ptr = ptr[8:] + v[0] = rotate_right(v[0], 33) * k3 + v[1] += binary.LittleEndian.Uint64(ptr) * k2 + ptr = ptr[8:] + v[1] = rotate_right(v[1], 33) * k3 + v[0] ^= rotate_right((v[0]*k2)+v[1], 45) * k1 + v[1] ^= rotate_right((v[1]*k3)+v[0], 45) * k0 + } + + if len(ptr) >= 8 { + v[0] += binary.LittleEndian.Uint64(ptr) * k2 + ptr = ptr[8:] + v[0] = rotate_right(v[0], 33) * k3 + v[0] ^= rotate_right((v[0]*k2)+v[1], 27) * k1 + } + + if len(ptr) >= 4 { + v[1] += uint64(binary.LittleEndian.Uint32(ptr)) * k2 + ptr = ptr[4:] + v[1] = rotate_right(v[1], 33) * k3 + v[1] ^= rotate_right((v[1]*k3)+v[0], 46) * k0 + } + + if len(ptr) >= 2 { + v[0] += uint64(binary.LittleEndian.Uint16(ptr)) * k2 + ptr = ptr[2:] + v[0] = rotate_right(v[0], 33) * k3 + v[0] ^= rotate_right((v[0]*k2)+v[1], 22) * k1 + } + + if len(ptr) >= 1 { + v[1] += uint64(ptr[0]) * k2 + v[1] = rotate_right(v[1], 33) * k3 + v[1] ^= rotate_right((v[1]*k3)+v[0], 58) * k0 + } + + v[0] += rotate_right((v[0]*k0)+v[1], 13) + v[1] += rotate_right((v[1]*k1)+v[0], 37) + v[0] += rotate_right((v[0]*k2)+v[1], 13) + v[1] += rotate_right((v[1]*k3)+v[0], 37) + + return v[0], v[1] +} diff --git a/vendor/github.com/dgryski/go-metro/metro64.go b/vendor/github.com/dgryski/go-metro/metro64.go new file mode 100644 index 000000000..458a91219 --- /dev/null +++ b/vendor/github.com/dgryski/go-metro/metro64.go @@ -0,0 +1,89 @@ +//go:build noasm || !amd64 || !gc || purego +// +build noasm !amd64 !gc purego + +package metro + +import ( + "encoding/binary" + "math/bits" +) + +func Hash64(buffer []byte, seed uint64) uint64 { + + const ( + k0 = 0xD6D018F5 + k1 = 0xA2AA033B + k2 = 0x62992FC1 + k3 = 0x30BC5B29 + ) + + ptr := buffer + + hash := (seed + k2) * k0 + + if len(ptr) >= 32 { + v0, v1, v2, v3 := hash, hash, hash, hash + + for len(ptr) >= 32 { + v0 += binary.LittleEndian.Uint64(ptr[:8]) * k0 + v0 = bits.RotateLeft64(v0, -29) + v2 + v1 += binary.LittleEndian.Uint64(ptr[8:16]) * k1 + v1 = bits.RotateLeft64(v1, -29) + v3 + v2 += binary.LittleEndian.Uint64(ptr[16:24]) * k2 + v2 = bits.RotateLeft64(v2, -29) + v0 + v3 += binary.LittleEndian.Uint64(ptr[24:32]) * k3 + v3 = bits.RotateLeft64(v3, -29) + v1 + ptr = ptr[32:] + } + + v2 ^= bits.RotateLeft64(((v0+v3)*k0)+v1, -37) * k1 + v3 ^= bits.RotateLeft64(((v1+v2)*k1)+v0, -37) * k0 + v0 ^= bits.RotateLeft64(((v0+v2)*k0)+v3, -37) * k1 + v1 ^= bits.RotateLeft64(((v1+v3)*k1)+v2, -37) * k0 + hash += v0 ^ v1 + } + + if len(ptr) >= 16 { + v0 := hash + (binary.LittleEndian.Uint64(ptr[:8]) * k2) + v0 = bits.RotateLeft64(v0, -29) * k3 + v1 := hash + (binary.LittleEndian.Uint64(ptr[8:16]) * k2) + v1 = bits.RotateLeft64(v1, -29) * k3 + v0 ^= bits.RotateLeft64(v0*k0, -21) + v1 + v1 ^= bits.RotateLeft64(v1*k3, -21) + v0 + hash += v1 + ptr = ptr[16:] + } + + if len(ptr) >= 8 { + hash += binary.LittleEndian.Uint64(ptr[:8]) * k3 + ptr = ptr[8:] + hash ^= bits.RotateLeft64(hash, -55) * k1 + } + + if len(ptr) >= 4 { + hash += uint64(binary.LittleEndian.Uint32(ptr[:4])) * k3 + hash ^= bits.RotateLeft64(hash, -26) * k1 + ptr = ptr[4:] + } + + if len(ptr) >= 2 { + hash += uint64(binary.LittleEndian.Uint16(ptr[:2])) * k3 + ptr = ptr[2:] + hash ^= bits.RotateLeft64(hash, -48) * k1 + } + + if len(ptr) >= 1 { + hash += uint64(ptr[0]) * k3 + hash ^= bits.RotateLeft64(hash, -37) * k1 + } + + hash ^= bits.RotateLeft64(hash, -28) + hash *= k0 + hash ^= bits.RotateLeft64(hash, -29) + + return hash +} + +func Hash64Str(buffer string, seed uint64) uint64 { + return Hash64([]byte(buffer), seed) +} diff --git a/vendor/github.com/dgryski/go-metro/metro_amd64.s b/vendor/github.com/dgryski/go-metro/metro_amd64.s new file mode 100644 index 000000000..f580ab830 --- /dev/null +++ b/vendor/github.com/dgryski/go-metro/metro_amd64.s @@ -0,0 +1,373 @@ +// +build !noasm +// +build gc +// +build !purego + +// Generated by PeachPy 0.2.0 from metro.py + +// func Hash64(buffer_base uintptr, buffer_len int64, buffer_cap int64, seed uint64) uint64 +TEXT ·Hash64(SB),4,$0-40 + MOVQ seed+24(FP), AX + MOVQ buffer_base+0(FP), BX + MOVQ buffer_len+8(FP), CX + MOVQ $3603962101, DX + IMULQ DX, AX + MOVQ $5961697176435608501, DX + ADDQ DX, AX + CMPQ CX, $32 + JLT after32 + MOVQ AX, DX + MOVQ AX, DI + MOVQ AX, SI + MOVQ AX, BP +loop_begin: + MOVQ 0(BX), R8 + MOVQ $3603962101, R9 + IMULQ R9, R8 + ADDQ R8, DX + RORQ $29, DX + ADDQ SI, DX + MOVQ 8(BX), R8 + MOVQ $2729050939, R9 + IMULQ R9, R8 + ADDQ R8, DI + RORQ $29, DI + ADDQ BP, DI + MOVQ 16(BX), R8 + MOVQ $1654206401, R9 + IMULQ R9, R8 + ADDQ R8, SI + RORQ $29, SI + ADDQ DX, SI + MOVQ 24(BX), R8 + MOVQ $817650473, R9 + IMULQ R9, R8 + ADDQ R8, BP + RORQ $29, BP + ADDQ DI, BP + ADDQ $32, BX + SUBQ $32, CX + CMPQ CX, $32 + JGE loop_begin + MOVQ DX, R8 + ADDQ BP, R8 + MOVQ $3603962101, R9 + IMULQ R9, R8 + ADDQ DI, R8 + RORQ $37, R8 + MOVQ $2729050939, R9 + IMULQ R9, R8 + XORQ R8, SI + MOVQ DI, R8 + ADDQ SI, R8 + MOVQ $2729050939, R9 + IMULQ R9, R8 + ADDQ DX, R8 + RORQ $37, R8 + MOVQ $3603962101, R9 + IMULQ R9, R8 + XORQ R8, BP + MOVQ DX, R8 + ADDQ SI, R8 + MOVQ $3603962101, R9 + IMULQ R9, R8 + ADDQ BP, R8 + RORQ $37, R8 + MOVQ $2729050939, R9 + IMULQ R9, R8 + XORQ R8, DX + MOVQ DI, R8 + ADDQ BP, R8 + MOVQ $2729050939, BP + IMULQ BP, R8 + ADDQ SI, R8 + RORQ $37, R8 + MOVQ $3603962101, SI + IMULQ SI, R8 + XORQ R8, DI + XORQ DI, DX + ADDQ DX, AX +after32: + CMPQ CX, $16 + JLT after16 + MOVQ 0(BX), DX + MOVQ $1654206401, DI + IMULQ DI, DX + ADDQ AX, DX + ADDQ $8, BX + SUBQ $8, CX + RORQ $29, DX + MOVQ $817650473, DI + IMULQ DI, DX + MOVQ 0(BX), DI + MOVQ $1654206401, SI + IMULQ SI, DI + ADDQ AX, DI + ADDQ $8, BX + SUBQ $8, CX + RORQ $29, DI + MOVQ $817650473, SI + IMULQ SI, DI + MOVQ DX, SI + MOVQ $3603962101, BP + IMULQ BP, SI + RORQ $21, SI + ADDQ DI, SI + XORQ SI, DX + MOVQ DI, SI + MOVQ $817650473, BP + IMULQ BP, SI + RORQ $21, SI + ADDQ DX, SI + XORQ SI, DI + ADDQ DI, AX +after16: + CMPQ CX, $8 + JLT after8 + MOVQ 0(BX), DX + MOVQ $817650473, DI + IMULQ DI, DX + ADDQ DX, AX + ADDQ $8, BX + SUBQ $8, CX + MOVQ AX, DX + RORQ $55, DX + MOVQ $2729050939, DI + IMULQ DI, DX + XORQ DX, AX +after8: + CMPQ CX, $4 + JLT after4 + XORQ DX, DX + MOVL 0(BX), DX + MOVQ $817650473, DI + IMULQ DI, DX + ADDQ DX, AX + ADDQ $4, BX + SUBQ $4, CX + MOVQ AX, DX + RORQ $26, DX + MOVQ $2729050939, DI + IMULQ DI, DX + XORQ DX, AX +after4: + CMPQ CX, $2 + JLT after2 + XORQ DX, DX + MOVW 0(BX), DX + MOVQ $817650473, DI + IMULQ DI, DX + ADDQ DX, AX + ADDQ $2, BX + SUBQ $2, CX + MOVQ AX, DX + RORQ $48, DX + MOVQ $2729050939, DI + IMULQ DI, DX + XORQ DX, AX +after2: + CMPQ CX, $1 + JLT after1 + MOVBQZX 0(BX), BX + MOVQ $817650473, CX + IMULQ CX, BX + ADDQ BX, AX + MOVQ AX, BX + RORQ $37, BX + MOVQ $2729050939, CX + IMULQ CX, BX + XORQ BX, AX +after1: + MOVQ AX, BX + RORQ $28, BX + XORQ BX, AX + MOVQ $3603962101, BX + IMULQ BX, AX + MOVQ AX, BX + RORQ $29, BX + XORQ BX, AX + MOVQ AX, ret+32(FP) + RET + +// func Hash64Str(buffer_base uintptr, buffer_len int64, seed uint64) uint64 +TEXT ·Hash64Str(SB),4,$0-32 + MOVQ seed+16(FP), AX + MOVQ buffer_base+0(FP), BX + MOVQ buffer_len+8(FP), CX + MOVQ $3603962101, DX + IMULQ DX, AX + MOVQ $5961697176435608501, DX + ADDQ DX, AX + CMPQ CX, $32 + JLT after32 + MOVQ AX, DX + MOVQ AX, DI + MOVQ AX, SI + MOVQ AX, BP +loop_begin: + MOVQ 0(BX), R8 + MOVQ $3603962101, R9 + IMULQ R9, R8 + ADDQ R8, DX + RORQ $29, DX + ADDQ SI, DX + MOVQ 8(BX), R8 + MOVQ $2729050939, R9 + IMULQ R9, R8 + ADDQ R8, DI + RORQ $29, DI + ADDQ BP, DI + MOVQ 16(BX), R8 + MOVQ $1654206401, R9 + IMULQ R9, R8 + ADDQ R8, SI + RORQ $29, SI + ADDQ DX, SI + MOVQ 24(BX), R8 + MOVQ $817650473, R9 + IMULQ R9, R8 + ADDQ R8, BP + RORQ $29, BP + ADDQ DI, BP + ADDQ $32, BX + SUBQ $32, CX + CMPQ CX, $32 + JGE loop_begin + MOVQ DX, R8 + ADDQ BP, R8 + MOVQ $3603962101, R9 + IMULQ R9, R8 + ADDQ DI, R8 + RORQ $37, R8 + MOVQ $2729050939, R9 + IMULQ R9, R8 + XORQ R8, SI + MOVQ DI, R8 + ADDQ SI, R8 + MOVQ $2729050939, R9 + IMULQ R9, R8 + ADDQ DX, R8 + RORQ $37, R8 + MOVQ $3603962101, R9 + IMULQ R9, R8 + XORQ R8, BP + MOVQ DX, R8 + ADDQ SI, R8 + MOVQ $3603962101, R9 + IMULQ R9, R8 + ADDQ BP, R8 + RORQ $37, R8 + MOVQ $2729050939, R9 + IMULQ R9, R8 + XORQ R8, DX + MOVQ DI, R8 + ADDQ BP, R8 + MOVQ $2729050939, BP + IMULQ BP, R8 + ADDQ SI, R8 + RORQ $37, R8 + MOVQ $3603962101, SI + IMULQ SI, R8 + XORQ R8, DI + XORQ DI, DX + ADDQ DX, AX +after32: + CMPQ CX, $16 + JLT after16 + MOVQ 0(BX), DX + MOVQ $1654206401, DI + IMULQ DI, DX + ADDQ AX, DX + ADDQ $8, BX + SUBQ $8, CX + RORQ $29, DX + MOVQ $817650473, DI + IMULQ DI, DX + MOVQ 0(BX), DI + MOVQ $1654206401, SI + IMULQ SI, DI + ADDQ AX, DI + ADDQ $8, BX + SUBQ $8, CX + RORQ $29, DI + MOVQ $817650473, SI + IMULQ SI, DI + MOVQ DX, SI + MOVQ $3603962101, BP + IMULQ BP, SI + RORQ $21, SI + ADDQ DI, SI + XORQ SI, DX + MOVQ DI, SI + MOVQ $817650473, BP + IMULQ BP, SI + RORQ $21, SI + ADDQ DX, SI + XORQ SI, DI + ADDQ DI, AX +after16: + CMPQ CX, $8 + JLT after8 + MOVQ 0(BX), DX + MOVQ $817650473, DI + IMULQ DI, DX + ADDQ DX, AX + ADDQ $8, BX + SUBQ $8, CX + MOVQ AX, DX + RORQ $55, DX + MOVQ $2729050939, DI + IMULQ DI, DX + XORQ DX, AX +after8: + CMPQ CX, $4 + JLT after4 + XORQ DX, DX + MOVL 0(BX), DX + MOVQ $817650473, DI + IMULQ DI, DX + ADDQ DX, AX + ADDQ $4, BX + SUBQ $4, CX + MOVQ AX, DX + RORQ $26, DX + MOVQ $2729050939, DI + IMULQ DI, DX + XORQ DX, AX +after4: + CMPQ CX, $2 + JLT after2 + XORQ DX, DX + MOVW 0(BX), DX + MOVQ $817650473, DI + IMULQ DI, DX + ADDQ DX, AX + ADDQ $2, BX + SUBQ $2, CX + MOVQ AX, DX + RORQ $48, DX + MOVQ $2729050939, DI + IMULQ DI, DX + XORQ DX, AX +after2: + CMPQ CX, $1 + JLT after1 + MOVBQZX 0(BX), BX + MOVQ $817650473, CX + IMULQ CX, BX + ADDQ BX, AX + MOVQ AX, BX + RORQ $37, BX + MOVQ $2729050939, CX + IMULQ CX, BX + XORQ BX, AX +after1: + MOVQ AX, BX + RORQ $28, BX + XORQ BX, AX + MOVQ $3603962101, BX + IMULQ BX, AX + MOVQ AX, BX + RORQ $29, BX + XORQ BX, AX + MOVQ AX, ret+24(FP) + RET diff --git a/vendor/github.com/dgryski/go-metro/metro_stub.go b/vendor/github.com/dgryski/go-metro/metro_stub.go new file mode 100644 index 000000000..ccb97f11f --- /dev/null +++ b/vendor/github.com/dgryski/go-metro/metro_stub.go @@ -0,0 +1,10 @@ +//go:build !noasm && amd64 && gc && !purego +// +build !noasm,amd64,gc,!purego + +package metro + +//go:generate python -m peachpy.x86_64 metro.py -S -o metro_amd64.s -mabi=goasm +//go:noescape + +func Hash64(buffer []byte, seed uint64) uint64 +func Hash64Str(buffer string, seed uint64) uint64 diff --git a/vendor/github.com/seiflotfy/cuckoofilter/.gitignore b/vendor/github.com/seiflotfy/cuckoofilter/.gitignore new file mode 100644 index 000000000..11b90db8d --- /dev/null +++ b/vendor/github.com/seiflotfy/cuckoofilter/.gitignore @@ -0,0 +1,26 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof + +.idea diff --git a/vendor/github.com/seiflotfy/cuckoofilter/LICENSE b/vendor/github.com/seiflotfy/cuckoofilter/LICENSE new file mode 100644 index 000000000..58393c98c --- /dev/null +++ b/vendor/github.com/seiflotfy/cuckoofilter/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Seif Lotfy + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/vendor/github.com/seiflotfy/cuckoofilter/README.md b/vendor/github.com/seiflotfy/cuckoofilter/README.md new file mode 100644 index 000000000..2a77fb393 --- /dev/null +++ b/vendor/github.com/seiflotfy/cuckoofilter/README.md @@ -0,0 +1,62 @@ +# Cuckoo Filter + +[![GoDoc](https://godoc.org/github.com/seiflotfy/cuckoofilter?status.svg)](https://godoc.org/github.com/seiflotfy/cuckoofilter) [![CodeHunt.io](https://img.shields.io/badge/vote-codehunt.io-02AFD1.svg)](http://codehunt.io/sub/cuckoo-filter/?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) + +Cuckoo filter is a Bloom filter replacement for approximated set-membership queries. While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space. + +Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%). + +For details about the algorithm and citations please use this article for now + +["Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) + +## Implementation details + +The paper cited above leaves several parameters to choose. In this implementation + +1. Every element has 2 possible bucket indices +2. Buckets have a static size of 4 fingerprints +3. Fingerprints have a static size of 8 bits + +1 and 2 are suggested to be the optimum by the authors. The choice of 3 comes down to the desired false positive rate. Given a target false positive rate of `r` and a bucket size `b`, they suggest choosing the fingerprint size `f` using + + f >= log2(2b/r) bits + +With the 8 bit fingerprint size in this repository, you can expect `r ~= 0.03`. +[Other implementations](https://github.com/panmari/cuckoofilter) use 16 bit, which correspond to a false positive rate of `r ~= 0.0001`. + +## Example usage: +```go +package main + +import "fmt" +import cuckoo "github.com/seiflotfy/cuckoofilter" + +func main() { + cf := cuckoo.NewFilter(1000) + cf.InsertUnique([]byte("geeky ogre")) + + // Lookup a string (and it a miss) if it exists in the cuckoofilter + cf.Lookup([]byte("hello")) + + count := cf.Count() + fmt.Println(count) // count == 1 + + // Delete a string (and it a miss) + cf.Delete([]byte("hello")) + + count = cf.Count() + fmt.Println(count) // count == 1 + + // Delete a string (a hit) + cf.Delete([]byte("geeky ogre")) + + count = cf.Count() + fmt.Println(count) // count == 0 + + cf.Reset() // reset +} +``` + +## Documentation: +["Cuckoo Filter on GoDoc"](http://godoc.org/github.com/seiflotfy/cuckoofilter) diff --git a/vendor/github.com/seiflotfy/cuckoofilter/bucket.go b/vendor/github.com/seiflotfy/cuckoofilter/bucket.go new file mode 100644 index 000000000..4a83fc503 --- /dev/null +++ b/vendor/github.com/seiflotfy/cuckoofilter/bucket.go @@ -0,0 +1,45 @@ +package cuckoo + +type fingerprint byte + +type bucket [bucketSize]fingerprint + +const ( + nullFp = 0 + bucketSize = 4 +) + +func (b *bucket) insert(fp fingerprint) bool { + for i, tfp := range b { + if tfp == nullFp { + b[i] = fp + return true + } + } + return false +} + +func (b *bucket) delete(fp fingerprint) bool { + for i, tfp := range b { + if tfp == fp { + b[i] = nullFp + return true + } + } + return false +} + +func (b *bucket) getFingerprintIndex(fp fingerprint) int { + for i, tfp := range b { + if tfp == fp { + return i + } + } + return -1 +} + +func (b *bucket) reset() { + for i := range b { + b[i] = nullFp + } +} diff --git a/vendor/github.com/seiflotfy/cuckoofilter/cuckoofilter.go b/vendor/github.com/seiflotfy/cuckoofilter/cuckoofilter.go new file mode 100644 index 000000000..ec0d246de --- /dev/null +++ b/vendor/github.com/seiflotfy/cuckoofilter/cuckoofilter.go @@ -0,0 +1,165 @@ +package cuckoo + +import ( + "fmt" + "math/bits" + "math/rand" +) + +const maxCuckooCount = 500 + +// Filter is a probabilistic counter +type Filter struct { + buckets []bucket + count uint + bucketPow uint +} + +// NewFilter returns a new cuckoofilter with a given capacity. +// A capacity of 1000000 is a normal default, which allocates +// about ~1MB on 64-bit machines. +func NewFilter(capacity uint) *Filter { + capacity = getNextPow2(uint64(capacity)) / bucketSize + if capacity == 0 { + capacity = 1 + } + buckets := make([]bucket, capacity) + return &Filter{ + buckets: buckets, + count: 0, + bucketPow: uint(bits.TrailingZeros(capacity)), + } +} + +// Lookup returns true if data is in the counter +func (cf *Filter) Lookup(data []byte) bool { + i1, fp := getIndexAndFingerprint(data, cf.bucketPow) + if cf.buckets[i1].getFingerprintIndex(fp) > -1 { + return true + } + i2 := getAltIndex(fp, i1, cf.bucketPow) + return cf.buckets[i2].getFingerprintIndex(fp) > -1 +} + +// Reset ... +func (cf *Filter) Reset() { + for i := range cf.buckets { + cf.buckets[i].reset() + } + cf.count = 0 +} + +func randi(i1, i2 uint) uint { + if rand.Intn(2) == 0 { + return i1 + } + return i2 +} + +// Insert inserts data into the counter and returns true upon success +func (cf *Filter) Insert(data []byte) bool { + i1, fp := getIndexAndFingerprint(data, cf.bucketPow) + if cf.insert(fp, i1) { + return true + } + i2 := getAltIndex(fp, i1, cf.bucketPow) + if cf.insert(fp, i2) { + return true + } + return cf.reinsert(fp, randi(i1, i2)) +} + +// InsertUnique inserts data into the counter if not exists and returns true upon success +func (cf *Filter) InsertUnique(data []byte) bool { + if cf.Lookup(data) { + return false + } + return cf.Insert(data) +} + +func (cf *Filter) insert(fp fingerprint, i uint) bool { + if cf.buckets[i].insert(fp) { + cf.count++ + return true + } + return false +} + +func (cf *Filter) reinsert(fp fingerprint, i uint) bool { + for k := 0; k < maxCuckooCount; k++ { + j := rand.Intn(bucketSize) + oldfp := fp + fp = cf.buckets[i][j] + cf.buckets[i][j] = oldfp + + // look in the alternate location for that random element + i = getAltIndex(fp, i, cf.bucketPow) + if cf.insert(fp, i) { + return true + } + } + return false +} + +// Delete data from counter if exists and return if deleted or not +func (cf *Filter) Delete(data []byte) bool { + i1, fp := getIndexAndFingerprint(data, cf.bucketPow) + if cf.delete(fp, i1) { + return true + } + i2 := getAltIndex(fp, i1, cf.bucketPow) + return cf.delete(fp, i2) +} + +func (cf *Filter) delete(fp fingerprint, i uint) bool { + if cf.buckets[i].delete(fp) { + if cf.count > 0 { + cf.count-- + } + return true + } + return false +} + +// Count returns the number of items in the counter +func (cf *Filter) Count() uint { + return cf.count +} + +// Encode returns a byte slice representing a Cuckoofilter +func (cf *Filter) Encode() []byte { + bytes := make([]byte, len(cf.buckets)*bucketSize) + for i, b := range cf.buckets { + for j, f := range b { + index := (i * len(b)) + j + bytes[index] = byte(f) + } + } + return bytes +} + +// Decode returns a Cuckoofilter from a byte slice +func Decode(bytes []byte) (*Filter, error) { + var count uint + if len(bytes)%bucketSize != 0 { + return nil, fmt.Errorf("expected bytes to be multiple of %d, got %d", bucketSize, len(bytes)) + } + if len(bytes) == 0 { + return nil, fmt.Errorf("bytes can not be empty") + } + buckets := make([]bucket, len(bytes)/4) + for i, b := range buckets { + for j := range b { + index := (i * len(b)) + j + if bytes[index] != 0 { + buckets[i][j] = fingerprint(bytes[index]) + count++ + } + } + } + return &Filter{ + buckets: buckets, + count: count, + bucketPow: uint(bits.TrailingZeros(uint(len(buckets)))), + }, nil +} diff --git a/vendor/github.com/seiflotfy/cuckoofilter/doc.go b/vendor/github.com/seiflotfy/cuckoofilter/doc.go new file mode 100644 index 000000000..6f6cbf828 --- /dev/null +++ b/vendor/github.com/seiflotfy/cuckoofilter/doc.go @@ -0,0 +1,35 @@ +/* +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* +Package cuckoo provides a Cuckoo Filter, a Bloom filter replacement for approximated set-membership queries. + +While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space. + +Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%). + +For details about the algorithm and citations please use this article: + +"Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky +(https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) + +Note: +This implementation uses a a static bucket size of 4 fingerprints and a fingerprint size of 1 byte based on my understanding of an optimal bucket/fingerprint/size ratio from the aforementioned paper.*/ +package cuckoo diff --git a/vendor/github.com/seiflotfy/cuckoofilter/scalable_cuckoofilter.go b/vendor/github.com/seiflotfy/cuckoofilter/scalable_cuckoofilter.go new file mode 100644 index 000000000..693184c9d --- /dev/null +++ b/vendor/github.com/seiflotfy/cuckoofilter/scalable_cuckoofilter.go @@ -0,0 +1,170 @@ +package cuckoo + +import ( + "bytes" + "encoding/gob" +) + +const ( + DefaultLoadFactor = 0.9 + DefaultCapacity = 10000 +) + +type ScalableCuckooFilter struct { + filters []*Filter + loadFactor float32 + //when scale(last filter size * loadFactor >= capacity) get new filter capacity + scaleFactor func(capacity uint) uint +} + +type option func(*ScalableCuckooFilter) + +type Store struct { + Bytes [][]byte + LoadFactor float32 +} + +/* + by default option the grow capacity is: + capacity , total + 4096 4096 + 8192 12288 +16384 28672 +32768 61440 +65536 126,976 +*/ +func NewScalableCuckooFilter(opts ...option) *ScalableCuckooFilter { + sfilter := new(ScalableCuckooFilter) + for _, opt := range opts { + opt(sfilter) + } + configure(sfilter) + return sfilter +} + +func (sf *ScalableCuckooFilter) Lookup(data []byte) bool { + for _, filter := range sf.filters { + if filter.Lookup(data) { + return true + } + } + return false +} + +func (sf *ScalableCuckooFilter) Reset() { + for _, filter := range sf.filters { + filter.Reset() + } +} + +func (sf *ScalableCuckooFilter) Insert(data []byte) bool { + needScale := false + lastFilter := sf.filters[len(sf.filters)-1] + if (float32(lastFilter.count) / float32(len(lastFilter.buckets))) > sf.loadFactor { + needScale = true + } else { + b := lastFilter.Insert(data) + needScale = !b + } + if !needScale { + return true + } + newFilter := NewFilter(sf.scaleFactor(uint(len(lastFilter.buckets)))) + sf.filters = append(sf.filters, newFilter) + return newFilter.Insert(data) +} + +func (sf *ScalableCuckooFilter) InsertUnique(data []byte) bool { + if sf.Lookup(data) { + return false + } + return sf.Insert(data) +} + +func (sf *ScalableCuckooFilter) Delete(data []byte) bool { + for _, filter := range sf.filters { + if filter.Delete(data) { + return true + } + } + return false +} + +func (sf *ScalableCuckooFilter) Count() uint { + var sum uint + for _, filter := range sf.filters { + sum += filter.count + } + return sum + +} + +func (sf *ScalableCuckooFilter) Encode() []byte { + slice := make([][]byte, len(sf.filters)) + for i, filter := range sf.filters { + encode := filter.Encode() + slice[i] = encode + } + store := &Store{ + Bytes: slice, + LoadFactor: sf.loadFactor, + } + buf := bytes.NewBuffer(nil) + enc := gob.NewEncoder(buf) + err := enc.Encode(store) + if err != nil { + return nil + } + return buf.Bytes() +} + +func (sf *ScalableCuckooFilter) DecodeWithParam(fBytes []byte, opts ...option) (*ScalableCuckooFilter, error) { + instance, err := DecodeScalableFilter(fBytes) + if err != nil { + return nil, err + } + for _, opt := range opts { + opt(instance) + } + return instance, nil +} + +func DecodeScalableFilter(fBytes []byte) (*ScalableCuckooFilter, error) { + buf := bytes.NewBuffer(fBytes) + dec := gob.NewDecoder(buf) + store := &Store{} + err := dec.Decode(store) + if err != nil { + return nil, err + } + filterSize := len(store.Bytes) + instance := NewScalableCuckooFilter(func(filter *ScalableCuckooFilter) { + filter.filters = make([]*Filter, filterSize) + }, func(filter *ScalableCuckooFilter) { + filter.loadFactor = store.LoadFactor + }) + for i, oneBytes := range store.Bytes { + filter, err := Decode(oneBytes) + if err != nil { + return nil, err + } + instance.filters[i] = filter + } + return instance, nil + +} + +func configure(sfilter *ScalableCuckooFilter) { + if sfilter.loadFactor == 0 { + sfilter.loadFactor = DefaultLoadFactor + } + if sfilter.scaleFactor == nil { + sfilter.scaleFactor = func(currentSize uint) uint { + return currentSize * bucketSize * 2 + } + } + if sfilter.filters == nil { + initFilter := NewFilter(DefaultCapacity) + sfilter.filters = []*Filter{initFilter} + } +} diff --git a/vendor/github.com/seiflotfy/cuckoofilter/util.go b/vendor/github.com/seiflotfy/cuckoofilter/util.go new file mode 100644 index 000000000..2a0f65b13 --- /dev/null +++ b/vendor/github.com/seiflotfy/cuckoofilter/util.go @@ -0,0 +1,52 @@ +package cuckoo + +import ( + metro "github.com/dgryski/go-metro" +) + +var ( + altHash = [256]uint{} + masks = [65]uint{} +) + +func init() { + for i := 0; i < 256; i++ { + altHash[i] = (uint(metro.Hash64([]byte{byte(i)}, 1337))) + } + for i := uint(0); i <= 64; i++ { + masks[i] = (1 << i) - 1 + } +} + +func getAltIndex(fp fingerprint, i uint, bucketPow uint) uint { + mask := masks[bucketPow] + hash := altHash[fp] & mask + return (i & mask) ^ hash +} + +func getFingerprint(hash uint64) byte { + // Use least significant bits for fingerprint. + fp := byte(hash%255 + 1) + return fp +} + +// getIndicesAndFingerprint returns the 2 bucket indices and fingerprint to be used +func getIndexAndFingerprint(data []byte, bucketPow uint) (uint, fingerprint) { + hash := metro.Hash64(data, 1337) + fp := getFingerprint(hash) + // Use most significant bits for deriving index. + i1 := uint(hash>>32) & masks[bucketPow] + return i1, fingerprint(fp) +} + +func getNextPow2(n uint64) uint { + n-- + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + n |= n >> 32 + n++ + return uint(n) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 5a9152ff4..cecc5ce8d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -64,6 +64,9 @@ github.com/davecgh/go-spew/spew # github.com/dgryski/go-expirecache v0.0.0-20170314133854-743ef98b2adb ## explicit github.com/dgryski/go-expirecache +# github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 +## explicit +github.com/dgryski/go-metro # github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0 ## explicit github.com/dgryski/go-trigram @@ -276,6 +279,9 @@ github.com/prometheus/procfs/internal/util # github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 ## explicit github.com/rcrowley/go-metrics +# github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb +## explicit; go 1.15 +github.com/seiflotfy/cuckoofilter # github.com/sevlyar/go-daemon v0.1.6 ## explicit github.com/sevlyar/go-daemon From 6cbcc82dfd1997027a23dea586f969981c085dd8 Mon Sep 17 00:00:00 2001 From: Denys Zhdanov Date: Sun, 23 Jun 2024 13:03:44 +0200 Subject: [PATCH 3/5] Adding bloom size as cache parameter --- carbon/app.go | 2 ++ carbon/config.go | 1 + 2 files changed, 3 insertions(+) diff --git a/carbon/app.go b/carbon/app.go index 1855b7c58..e75da2543 100644 --- a/carbon/app.go +++ b/carbon/app.go @@ -178,6 +178,7 @@ func (app *App) ReloadConfig() error { app.Cache.SetMaxSize(app.Config.Cache.MaxSize) app.Cache.SetWriteStrategy(app.Config.Cache.WriteStrategy) app.Cache.SetTagsEnabled(app.Config.Tags.Enabled) + app.Cache.SetBloomSize(app.Config.Cache.BloomSize) if app.Persister != nil { app.Persister.Stop() @@ -349,6 +350,7 @@ func (app *App) Start() (err error) { core.SetMaxSize(conf.Cache.MaxSize) core.SetWriteStrategy(conf.Cache.WriteStrategy) core.SetTagsEnabled(conf.Tags.Enabled) + core.SetBloomSize(conf.Cache.BloomSize) app.Cache = core diff --git a/carbon/config.go b/carbon/config.go index 1b3d3572c..af40b8d27 100644 --- a/carbon/config.go +++ b/carbon/config.go @@ -80,6 +80,7 @@ type whisperConfig struct { type cacheConfig struct { MaxSize uint32 `toml:"max-size"` WriteStrategy string `toml:"write-strategy"` + BloomSize uint `toml:"bloom-size"` } type carbonlinkConfig struct { From a9d3ed6dedb4daf4b3488af7254f134c7c0824ce Mon Sep 17 00:00:00 2001 From: Denys Zhdanov Date: Mon, 24 Jun 2024 09:55:21 +0200 Subject: [PATCH 4/5] Added bloom-size to README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index add11d242..df622f885 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,8 @@ max-size = 1000000 # "noop" - pick metrics to write in unspecified order, # requires least CPU and improves cache responsiveness write-strategy = "max" +# If > 0 use bloom filter to detect new metrics instead of cache +bloom-size = 0 [udp] listen = ":2003" From 0718510395029e08da473f6dea5942bffdbcd86c Mon Sep 17 00:00:00 2001 From: Denys Zhdanov Date: Mon, 24 Jun 2024 10:43:31 +0200 Subject: [PATCH 5/5] Fixing linter --- cache/cache.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cache/cache.go b/cache/cache.go index 95cae397a..2b3aa8fb0 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -6,12 +6,13 @@ Based on https://github.com/orcaman/concurrent-map import ( "fmt" - cuckoo "github.com/seiflotfy/cuckoofilter" "io" "sync" "sync/atomic" "time" + cuckoo "github.com/seiflotfy/cuckoofilter" + "github.com/go-graphite/go-carbon/helper" "github.com/go-graphite/go-carbon/points" "github.com/go-graphite/go-carbon/tags"