cosmos · mergify · Oct 14, 2021 · Aug 29, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -48,6 +48,7 @@ Ref: https://keepachangelog.com/en/1.0.0/
 * [\#9837](https://github.com/cosmos/cosmos-sdk/issues/9837) `--generate-only` flag will accept the keyname now.
 * [\#10045](https://github.com/cosmos/cosmos-sdk/pull/10045) Revert [#8549](https://github.com/cosmos/cosmos-sdk/pull/8549). Do not route grpc queries through Tendermint.
 * [\#10326](https://github.com/cosmos/cosmos-sdk/pull/10326) `x/authz` add query all grants by granter query.
+* [\#10024](https://github.com/cosmos/cosmos-sdk/pull/10024) `store/cachekv` performance improvement by reduced growth factor for iterator ranging by using binary searches to find dirty items when unsorted key count >= 1024
 
 ### API Breaking Changes
 

diff --git a/store/cachekv/search_test.go b/store/cachekv/search_test.go
@@ -0,0 +1,141 @@
+package cachekv
+
+import "testing"
+
+func TestFindStartIndex(t *testing.T) {
+	tests := []struct {
+		name    string
+		sortedL []string
+		query   string
+		want    int
+	}{
+		{
+			name:    "non-existent value",
+			sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
+			query:   "o",
+			want:    8,
+		},
+		{
+			name:    "dupes start at index 0",
+			sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
+			query:   "a",
+			want:    0,
+		},
+		{
+			name:    "dupes start at non-index 0",
+			sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
+			query:   "c",
+			want:    1,
+		},
+		{
+			name:    "at end",
+			sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"},
+			query:   "z",
+			want:    7,
+		},
+		{
+			name:    "dupes at end",
+			sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"},
+			query:   "z",
+			want:    7,
+		},
+		{
+			name:    "entirely dupes",
+			sortedL: []string{"z", "z", "z", "z", "z"},
+			query:   "z",
+			want:    0,
+		},
+		{
+			name:    "non-existent but within >=start",
+			sortedL: []string{"z", "z", "z", "z", "z"},
+			query:   "p",
+			want:    0,
+		},
+		{
+			name:    "non-existent and out of range",
+			sortedL: []string{"d", "e", "f", "g", "h"},
+			query:   "z",
+			want:    -1,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			body := tt.sortedL
+			got := findStartIndex(body, tt.query)
+			if got != tt.want {
+				t.Fatalf("Got: %d, want: %d", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestFindEndIndex(t *testing.T) {
+	tests := []struct {
+		name    string
+		sortedL []string
+		query   string
+		want    int
+	}{
+		{
+			name:    "non-existent value",
+			sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
+			query:   "o",
+			want:    7,
+		},
+		{
+			name:    "dupes start at index 0",
+			sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
+			query:   "a",
+			want:    0,
+		},
+		{
+			name:    "dupes start at non-index 0",
+			sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
+			query:   "c",
+			want:    1,
+		},
+		{
+			name:    "at end",
+			sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"},
+			query:   "z",
+			want:    7,
+		},
+		{
+			name:    "dupes at end",
+			sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"},
+			query:   "z",
+			want:    7,
+		},
+		{
+			name:    "entirely dupes",
+			sortedL: []string{"z", "z", "z", "z", "z"},
+			query:   "z",
+			want:    0,
+		},
+		{
+			name:    "non-existent and out of range",
+			sortedL: []string{"z", "z", "z", "z", "z"},
+			query:   "p",
+			want:    -1,
+		},
+		{
+			name:    "non-existent and out of range",
+			sortedL: []string{"d", "e", "f", "g", "h"},
+			query:   "z",
+			want:    4,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			body := tt.sortedL
+			got := findEndIndex(body, tt.query)
+			if got != tt.want {
+				t.Fatalf("Got: %d, want: %d", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/store/cachekv/store.go b/store/cachekv/store.go
@@ -179,8 +179,94 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
 	return newCacheMergeIterator(parent, cache, ascending)
 }
 
+func findStartIndex(strL []string, startQ string) int {
+	// Modified binary search to find the very first element in >=startQ.
+	if len(strL) == 0 {
+		return -1
+	}
+
+	var left, right, mid int
+	right = len(strL) - 1
+	for left <= right {
+		mid = (left + right) >> 1
+		midStr := strL[mid]
+		if midStr == startQ {
+			// Handle condition where there might be multiple values equal to startQ.
+			// We are looking for the very first value < midStL, that i+1 will be the first
+			// element >= midStr.
+			for i := mid - 1; i >= 0; i-- {
+				if strL[i] != midStr {
+					return i + 1
+				}
+			}
+			return 0
+		}
+		if midStr < startQ {
+			left = mid + 1
+		} else { // midStrL > startQ
+			right = mid - 1
+		}
+	}
+	if left >= 0 && left < len(strL) && strL[left] >= startQ {
+		return left
+	}
+	return -1
+}
+
+func findEndIndex(strL []string, endQ string) int {
+	if len(strL) == 0 {
+		return -1
+	}
+
+	// Modified binary search to find the very first element <endQ.
+	var left, right, mid int
+	right = len(strL) - 1
+	for left <= right {
+		mid = (left + right) >> 1
+		midStr := strL[mid]
+		if midStr == endQ {
+			// Handle condition where there might be multiple values equal to startQ.
+			// We are looking for the very first value < midStL, that i+1 will be the first
+			// element >= midStr.
+			for i := mid - 1; i >= 0; i-- {
+				if strL[i] < midStr {
+					return i + 1
+				}
+			}
+			return 0
+		}
+		if midStr < endQ {
+			left = mid + 1
+		} else { // midStrL > startQ
+			right = mid - 1
+		}
+	}
+
+	// Binary search failed, now let's find a value less than endQ.
+	for i := right; i >= 0; i-- {
+		if strL[i] < endQ {
+			return i
+		}
+	}
+
+	return -1
+}
+
+type sortState int
+
+const (
+	stateUnsorted sortState = iota
+	stateAlreadySorted
+)
+
 // Constructs a slice of dirty items, to use w/ memIterator.
 func (store *Store) dirtyItems(start, end []byte) {
+	startStr, endStr := conv.UnsafeBytesToStr(start), conv.UnsafeBytesToStr(end)
+	if startStr > endStr {
+		// Nothing to do here.
+		return
+	}
+
 	n := len(store.unsortedCache)
 	unsorted := make([]*kv.Pair, 0)
 	// If the unsortedCache is too big, its costs too much to determine
@@ -189,24 +275,49 @@ func (store *Store) dirtyItems(start, end []byte) {
 	// O(N^2) overhead.
 	// Even without that, too many range checks eventually becomes more expensive
 	// than just not having the cache.
-	if n >= 1024 {
-		for key := range store.unsortedCache {
-			cacheValue := store.cache[key]
-			unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
-		}
-	} else {
-		// else do a linear scan to determine if the unsorted pairs are in the pool.
+	if n < 1024 {
 		for key := range store.unsortedCache {
 			if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
 				cacheValue := store.cache[key]
 				unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
 			}
 		}
+		store.clearUnsortedCacheSubset(unsorted, stateUnsorted)
+		return
+	}
+
+	// Otherwise it is large so perform a modified binary search to find
+	// the target ranges for the keys that we should be looking for.
+	strL := make([]string, 0, n)
+	for key := range store.unsortedCache {
+		strL = append(strL, key)
 	}
-	store.clearUnsortedCacheSubset(unsorted)
+	sort.Strings(strL)
+
+	// Now find the values within the domain
+	//  [start, end)
+	startIndex := findStartIndex(strL, startStr)
+	endIndex := findEndIndex(strL, endStr)
+
+	if endIndex < 0 {
+		endIndex = len(strL) - 1
+	}
+	if startIndex < 0 {
+		startIndex = 0
+	}
+
+	kvL := make([]*kv.Pair, 0)
+	for i := startIndex; i <= endIndex; i++ {
+		key := strL[i]
+		cacheValue := store.cache[key]
+		kvL = append(kvL, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
+	}
+
+	// kvL was already sorted so pass it in as is.
+	store.clearUnsortedCacheSubset(kvL, stateAlreadySorted)
 }
 
-func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
+func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair, sortState sortState) {
 	n := len(store.unsortedCache)
 	if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
 		for key := range store.unsortedCache {
@@ -217,9 +328,12 @@ func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
 			delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key))
 		}
 	}
-	sort.Slice(unsorted, func(i, j int) bool {
-		return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
-	})
+
+	if sortState == stateUnsorted {
+		sort.Slice(unsorted, func(i, j int) bool {
+			return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
+		})
+	}
 
 	for _, item := range unsorted {
 		if item.Value == nil {