perf(txindex): Lower allocation overhead of txIndex matchRange (backp… (

#27) (#31) * perf(txindex): Lower allocation overhead of txIndex matchRange (backport cometbft#2839) (cometbft#2884) In Osmosis we see massive amounts of heap pressure/allocations coming from txIndex matchRange. (Screenshot below from ~1 hour of heap profiling) ![image](https://github.com/cometbft/cometbft/assets/6440154/bf2dfe89-56f0-4824-815b-c5822d20568b) This PR is expected to fully compatibly drop this down by a factor of 3. It: - Does not get Key() twice (160GB allocation saved) - Uses no heap allocations for isTagKey (120GB saved) - Does not string cast or do strings.Split in parsing the value (~400GB expected saved) - Reuses the big.Int (24GB saved) The remaining RAM overhead from .Key() needs a cometbft-db API change. The remaining RAM overhead from extracting the value can be saved with an unsafe call for casting the output to string with no heap allocation, but we can do that in a separate PR. --- - [x] Tests written/updated - All existing tests still apply - [x] Changelog entry added in `.changelog` (we use [unclog](https://github.com/informalsystems/unclog) to manage our changelog) - [x] Updated relevant documentation (`docs/` or `spec/`) and code comments - [x] Title follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) spec <hr>This is an automatic backport of pull request cometbft#2839 done by [Mergify](https://mergify.com). --------- Co-authored-by: Dev Ojha <ValarDragon@users.noreply.github.com> Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com> * add changelog --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Dev Ojha <ValarDragon@users.noreply.github.com> Co-authored-by: Anton Kaliaev <anton.kalyaev@gmail.com> (cherry picked from commit efd1ea2) Co-authored-by: Adam Tucker <adam@osmosis.team>
osmosis-labs · Apr 30, 2024 · f9bf103 · f9bf103
1 parent 8f84614
commit f9bf103
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 41 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+* [#27](https://github.com/osmosis-labs/cometbft/pull/27) Lower allocation overhead of txIndex matchRange
+
 ## v0.37.4-v24-osmo-3
 
 * [#21](https://github.com/osmosis-labs/cometbft/pull/21) Move websocket logs to Debug

diff --git a/state/txindex/kv/kv.go b/state/txindex/kv/kv.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/hex"
+	"errors"
 	"fmt"
 	"math/big"
 	"sort"
@@ -23,8 +24,9 @@ import (
 )
 
 const (
-	tagKeySeparator   = "/"
-	eventSeqSeparator = "$es$"
+	tagKeySeparator     = "/"
+	tagKeySeparatorRune = '/'
+	eventSeqSeparator   = "$es$"
 )
 
 var _ txindex.TxIndexer = (*TxIndex)(nil)
@@ -385,20 +387,20 @@ func lookForHash(conditions []query.Condition) (hash []byte, ok bool, err error)
 	return
 }
 
-func (txi *TxIndex) setTmpHashes(tmpHeights map[string]TxInfo, it dbm.Iterator, height int64) {
-	eventSeq := extractEventSeqFromKey(it.Key())
-	txInfo := TxInfo{
-		TxBytes: it.Value(),
-		Height:  height,
-	}
-	tmpHeights[string(it.Value())+eventSeq] = txInfo
-}
-
 type TxInfo struct {
 	TxBytes []byte
 	Height  int64
 }
 
+func (*TxIndex) setTmpHashes(tmpHeights map[string]TxInfo, key, value []byte, height int64) {
+	eventSeq := extractEventSeqFromKey(key)
+	txInfo := TxInfo{
+		TxBytes: value,
+		Height:  height,
+	}
+	tmpHeights[string(value)+eventSeq] = txInfo
+}
+
 // match returns all matching txs by hash that meet a given condition and start
 // key. An already filtered result (filteredHashes) is provided such that any
 // non-intersecting matches are removed.
@@ -438,12 +440,17 @@ func (txi *TxIndex) match(
 
 			// If we have a height range in a query, we need only transactions
 			// for this height
-			keyHeight, err := extractHeightFromKey(it.Key())
-			if err != nil || !checkHeightConditions(heightInfo, keyHeight) {
+			key := it.Key()
+			keyHeight, err := extractHeightFromKey(key)
+			if err != nil {
 				continue
 			}
 
-			txi.setTmpHashes(tmpHashes, it, keyHeight)
+			withinBounds := checkHeightConditions(heightInfo, keyHeight)
+			if !withinBounds {
+				continue
+			}
+			txi.setTmpHashes(tmpHashes, key, it.Value(), keyHeight)
 			// Potentially exit early.
 			select {
 			case <-ctx.Done():
@@ -466,11 +473,16 @@ func (txi *TxIndex) match(
 
 	EXISTS_LOOP:
 		for ; it.Valid(); it.Next() {
-			keyHeight, err := extractHeightFromKey(it.Key())
-			if err != nil || !checkHeightConditions(heightInfo, keyHeight) {
+			key := it.Key()
+			keyHeight, err := extractHeightFromKey(key)
+			if err != nil {
+				continue
+			}
+			withinBounds := checkHeightConditions(heightInfo, keyHeight)
+			if !withinBounds {
 				continue
 			}
-			txi.setTmpHashes(tmpHashes, it, keyHeight)
+			txi.setTmpHashes(tmpHashes, key, it.Value(), keyHeight)
 
 			// Potentially exit early.
 			select {
@@ -500,11 +512,16 @@ func (txi *TxIndex) match(
 			}
 
 			if strings.Contains(extractValueFromKey(it.Key()), c.Operand.(string)) {
-				keyHeight, err := extractHeightFromKey(it.Key())
-				if err != nil || !checkHeightConditions(heightInfo, keyHeight) {
+				key := it.Key()
+				keyHeight, err := extractHeightFromKey(key)
+				if err != nil {
 					continue
 				}
-				txi.setTmpHashes(tmpHashes, it, keyHeight)
+				withinBounds := checkHeightConditions(heightInfo, keyHeight)
+				if !withinBounds {
+					continue
+				}
+				txi.setTmpHashes(tmpHashes, key, it.Value(), keyHeight)
 			}
 
 			// Potentially exit early.
@@ -584,17 +601,21 @@ func (txi *TxIndex) matchRange(
 		panic(err)
 	}
 	defer it.Close()
+	bigIntValue := new(big.Int)
 
 LOOP:
 	for ; it.Valid(); it.Next() {
-		if !isTagKey(it.Key()) {
+		// TODO: We need to make a function for getting it.Key() as a byte slice with no copies.
+		// It currently copies the source data (which can change on a subsequent .Next() call) but that
+		// is not an issue for us.
+		key := it.Key()
+		if !isTagKey(key) {
 			continue
 		}
 
 		if _, ok := qr.AnyBound().(*big.Int); ok {
-			v := new(big.Int)
-			eventValue := extractValueFromKey(it.Key())
-			v, ok := v.SetString(eventValue, 10)
+			value := extractValueFromKey(key)
+			v, ok := bigIntValue.SetString(value, 10)
 			if !ok {
 				continue LOOP
 			}
@@ -606,14 +627,15 @@ LOOP:
 			if qr.Key != types.TxHeightKey {
 				// If the query condition specifies a height range, we need to check if the height
 				// of the transaction is within the range
-				if !checkHeightConditions(heightInfo, keyHeight) {
-					continue LOOP
+				withinBounds := checkHeightConditions(heightInfo, keyHeight)
+				if !withinBounds {
+					continue
 				}
 
 			}
 
 			if checkBounds(qr, v) {
-				txi.setTmpHashes(tmpHashes, it, keyHeight)
+				txi.setTmpHashes(tmpHashes, key, it.Value(), keyHeight)
 			}
 
 			// XXX: passing time in a ABCI Events is not yet implemented
@@ -677,29 +699,62 @@ func isTagKey(key []byte) bool {
 	// tags should 4. Alternatively it should be 3 if the event was not indexed
 	// with the corresponding event sequence. However, some attribute values in
 	// production can contain the tag separator. Therefore, the condition is >= 3.
-	numTags := strings.Count(string(key), tagKeySeparator)
-	return numTags >= 3
+	numTags := 0
+	for i := 0; i < len(key); i++ {
+		if key[i] == tagKeySeparatorRune {
+			numTags++
+			if numTags >= 3 {
+				return true
+			}
+		}
+	}
+	return false
 }
 
 func extractHeightFromKey(key []byte) (int64, error) {
-	parts := strings.SplitN(string(key), tagKeySeparator, -1)
+	// the height is the second last element in the key.
+	// Find the position of the last occurrence of tagKeySeparator
+	endPos := bytes.LastIndexByte(key, tagKeySeparatorRune)
+	if endPos == -1 {
+		return 0, errors.New("separator not found")
+	}
+
+	// Find the position of the second last occurrence of tagKeySeparator
+	startPos := bytes.LastIndexByte(key[:endPos-1], tagKeySeparatorRune)
+	if startPos == -1 {
+		return 0, errors.New("second last separator not found")
+	}
 
-	return strconv.ParseInt(parts[len(parts)-2], 10, 64)
+	// Extract the height part of the key
+	height, err := strconv.ParseInt(string(key[startPos+1:endPos]), 10, 64)
+	if err != nil {
+		return 0, err
+	}
+	return height, nil
 }
-func extractValueFromKey(key []byte) string {
-	keyString := string(key)
-	parts := strings.SplitN(keyString, tagKeySeparator, -1)
-	partsLen := len(parts)
-	value := strings.TrimPrefix(keyString, parts[0]+tagKeySeparator)
 
-	suffix := ""
-	suffixLen := 2
+func extractValueFromKey(key []byte) string {
+	// Find the positions of tagKeySeparator in the byte slice
+	var indices []int
+	for i, b := range key {
+		if b == tagKeySeparatorRune {
+			indices = append(indices, i)
+		}
+	}
 
-	for i := 1; i <= suffixLen; i++ {
-		suffix = tagKeySeparator + parts[partsLen-i] + suffix
+	// If there are less than 2 occurrences of tagKeySeparator, return an empty string
+	if len(indices) < 2 {
+		return ""
 	}
-	return strings.TrimSuffix(value, suffix)
 
+	// Extract the value between the first and second last occurrence of tagKeySeparator
+	value := key[indices[0]+1 : indices[len(indices)-2]]
+
+	// Trim any leading or trailing whitespace
+	value = bytes.TrimSpace(value)
+
+	// TODO: Do an unsafe cast to avoid an extra allocation here
+	return string(value)
 }
 
 func extractEventSeqFromKey(key []byte) string {
@@ -712,6 +767,7 @@ func extractEventSeqFromKey(key []byte) string {
 	}
 	return "0"
 }
+
 func keyForEvent(key string, value string, result *abci.TxResult, eventSeq int64) []byte {
 	return []byte(fmt.Sprintf("%s/%s/%d/%d%s",
 		key,