From d91f2b03e9acd1c69bec415f2840619890a53a3e Mon Sep 17 00:00:00 2001 From: Akshay Vadher Date: Fri, 19 Jul 2024 19:44:52 +0530 Subject: [PATCH 1/2] Added histogram test --- cuid2_collision_test.go | 14 +++++-- cuid2_histogram_test.go | 83 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 cuid2_histogram_test.go diff --git a/cuid2_collision_test.go b/cuid2_collision_test.go index 3d3ffb9..f90ab29 100644 --- a/cuid2_collision_test.go +++ b/cuid2_collision_test.go @@ -32,6 +32,11 @@ func TestCollision(t *testing.T) { checkHistogram(t, n/numPools, v.Histogram) m.Unlock() } + CheckCollision(t, ids) + fmt.Printf("Sample ids %v\n", ids[:10]) +} + +func CheckCollision(t *testing.T, ids []string) { set := make(map[string]struct{}, len(ids)) for _, id := range ids { set[id] = struct{}{} @@ -39,7 +44,6 @@ func TestCollision(t *testing.T) { if len(set) < len(ids) { t.Errorf("Collision detected. len(set) %d, len(ids) %d", len(set), len(ids)) } - fmt.Printf("Sample ids %v\n", ids[:10]) } func checkHistogram(t *testing.T, numberOfIds int, histogram []int64) { @@ -63,7 +67,11 @@ type IdPoolResponse struct { } func createIdPool(t *testing.T, max int, poolId int, idPoolResponseChan chan *IdPoolResponse, wg *sync.WaitGroup) { - defer wg.Done() + idPoolResponseChan <- CreateIdPool(t, max, poolId) + wg.Done() +} + +func CreateIdPool(t *testing.T, max int, poolId int) *IdPoolResponse { set := make(map[string]struct{}, max) for i := 0; i < max; i++ { id := CreateId() @@ -94,7 +102,7 @@ func createIdPool(t *testing.T, max int, poolId int, idPoolResponseChan chan *Id bucketCount := 20 histogram := buildHistogram(numbers, bucketCount) fmt.Printf("Histogram created for pool %d\n", poolId) - idPoolResponseChan <- &IdPoolResponse{ + return &IdPoolResponse{ Ids: ids, Numbers: numbers, Histogram: histogram, diff --git a/cuid2_histogram_test.go b/cuid2_histogram_test.go new file mode 100644 index 0000000..9936304 --- /dev/null +++ b/cuid2_histogram_test.go @@ -0,0 +1,83 @@ +package cuid2 + +import ( + "fmt" + "math" + "math/rand/v2" + "strings" + "testing" +) + +func TestHistogram(t *testing.T) { + n := 100000 + fmt.Printf("Testing %d unique ids\n", n) + poolId := rand.IntN(100) + poolResponse := CreateIdPool(t, n, poolId) + ids := poolResponse.Ids + sampleIds := ids[:10] + fmt.Printf("Sample ids %v\n", sampleIds) + t.Run("Test collision", func(t *testing.T) { + CheckCollision(t, ids) + }) + t.Run("Test char frequency", func(t *testing.T) { + testCharFrequency(t, n, ids) + }) + t.Run("Test histogram", func(t *testing.T) { + testHistogram(t, poolResponse, n) + }) + +} + +func testCharFrequency(t *testing.T, n int, ids []string) { + tolerance := 0.1 + idLength := 23 + totalLetters := idLength * n + base := 36 + expectedBinSize := math.Ceil(float64(totalLetters) / float64(base)) + minBinSize := math.Round(expectedBinSize * (1 - tolerance)) + maxBinSize := math.Round(expectedBinSize * (1 + tolerance)) + + // Drop the first character because it will always be a letter, making + // the letter frequency skewed. + testIds := make([]string, len(ids)) + for i, id := range ids { + testIds[i] = id[2:] + } + charFrequencies := make(map[string]int) + for _, id := range testIds { + chars := strings.Split(id, "") + for _, char := range chars { + charFrequencies[char] += 1 + } + } + fmt.Println("Testing character frequency...") + fmt.Printf("expectedBinSize %v\n", expectedBinSize) + fmt.Printf("minBinSize %v\n", minBinSize) + fmt.Printf("maxBinSize %v\n", maxBinSize) + fmt.Printf("charFrequencies %v\n", charFrequencies) + for k, v := range charFrequencies { + if float64(v) < minBinSize || float64(v) > maxBinSize { + t.Errorf("The char %v is out of the expected bin size with value %v\n", k, v) + } + } + if len(charFrequencies) != base { + t.Errorf("Not all of the chars are presention in ids. Got only %v\n", len(charFrequencies)) + } +} + +func testHistogram(t *testing.T, poolResponse *IdPoolResponse, n int) { + histogram := poolResponse.Histogram + expectedBinSize := math.Ceil(float64(n) / float64(len(histogram))) + tolerance := 0.1 + minBinSize := math.Round(expectedBinSize * (1 - tolerance)) + maxBinSize := math.Round(expectedBinSize * (1 + tolerance)) + fmt.Printf("Histogram %v\n", histogram) + fmt.Printf("expectedBinSize %v\n", expectedBinSize) + fmt.Printf("minBinSize %v\n", minBinSize) + fmt.Printf("maxBinSize %v\n", maxBinSize) + for _, i := range histogram { + if float64(i) < minBinSize || float64(i) > maxBinSize { + t.Errorf("Histogram is out of distribution tolerance") + } + } +} From 9b8fd246b00a092a4c89e40938ef029324d795f2 Mon Sep 17 00:00:00 2001 From: Akshay Vadher Date: Fri, 19 Jul 2024 20:13:27 +0530 Subject: [PATCH 2/2] Review comment --- cuid2_histogram_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuid2_histogram_test.go b/cuid2_histogram_test.go index 9936304..688559f 100644 --- a/cuid2_histogram_test.go +++ b/cuid2_histogram_test.go @@ -41,7 +41,7 @@ func testCharFrequency(t *testing.T, n int, ids []string) { // the letter frequency skewed. testIds := make([]string, len(ids)) for i, id := range ids { - testIds[i] = id[2:] + testIds[i] = id[1:] } charFrequencies := make(map[string]int) for _, id := range testIds {