From f48bfee577f1028a2d720d022791631fd064c1cb Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 9 May 2022 16:15:50 -0400 Subject: [PATCH] Fixing duplicate issue. --- binaryfusefilter.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/binaryfusefilter.go b/binaryfusefilter.go index 5c0d074..22a6bc1 100644 --- a/binaryfusefilter.go +++ b/binaryfusefilter.go @@ -4,6 +4,7 @@ import ( "errors" "math" "math/bits" + "sort" ) type BinaryFuse8 struct { @@ -86,6 +87,7 @@ func mod3(x uint8) uint8 { // PopulateBinaryFuse8 fills a BinaryFuse8 filter with provided keys. // The function may return an error after too many iterations: it is unlikely. +// If your input has duplicates, it may get sorted. func PopulateBinaryFuse8(keys []uint64) (*BinaryFuse8, error) { size := uint32(len(keys)) filter := &BinaryFuse8{} @@ -240,6 +242,12 @@ func PopulateBinaryFuse8(keys []uint64) (*BinaryFuse8, error) { // Success size = stacksize break + } else if duplicates > 0 { + // Duplicates were found, but we did not + // manage to remove them all. We may simply sort the key to + // solve the issue. This will run in time O(n log n) and it + // mutates the input. + sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] }) } for i := uint32(0); i < size; i++ { reverseOrder[i] = 0