Skip to content

Commit

Permalink
store/cachekv, x/bank/types: algorithmically fix pathologically slow …
Browse files Browse the repository at this point in the history
…code (#8719)

After continuously profiling InitGensis with 100K accounts, it showed
pathologically slow code, that was the result of a couple of patterns:
* Unconditional and not always necessary map lookups
* O(n^2) sdk.AccAddressFromBech32 retrievals when the code is expensive,
during a quicksort

The remedy involved 4 parts:
* O(n) sdk.AccAddressFromBech32 invocations, down from O(n^2) in the quicksort
* Only doing map lookups when the domain key check has passed
* Using a black magic compiler technique of the map clearing idiom
* Zero allocation []byte<->string conversion

With 100K accounts, this brings InitGenesis down to ~6min, instead of
20+min, it reduces the sort code from ~7sec down to 50ms.

Also some simple benchmark reflect the change:
```shell
name                    old time/op    new time/op    delta
SanitizeBalances500-8     19.3ms ±10%     1.5ms ± 5%  -92.46%  (p=0.000 n=20+20)
SanitizeBalances1000-8    41.9ms ± 8%     3.0ms ±12%  -92.92%  (p=0.000 n=20+20)

name                    old alloc/op   new alloc/op   delta
SanitizeBalances500-8     9.05MB ± 6%    0.56MB ± 0%  -93.76%  (p=0.000 n=20+18)
SanitizeBalances1000-8    20.2MB ± 3%     1.1MB ± 0%  -94.37%  (p=0.000 n=20+19)

name                    old allocs/op  new allocs/op  delta
SanitizeBalances500-8      72.4k ± 6%      4.5k ± 0%  -93.76%  (p=0.000 n=20+20)
SanitizeBalances1000-8      162k ± 3%        9k ± 0%  -94.40%  (p=0.000 n=20+20)
```

The CPU profiles show the radical change as per
#7766 (comment)

Later on, we shall do more profiling and fixes but for now this brings
down the run-time for InitGenesis.

Fixes #7766

Co-authored-by: Alessio Treglia <alessio@tendermint.com>
  • Loading branch information
odeke-em and Alessio Treglia authored Feb 27, 2021
1 parent 68e7a3a commit dbb9923
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Ref: https://keepachangelog.com/en/1.0.0/
* (x/bank) [\#8614](https://github.com/cosmos/cosmos-sdk/issues/8614) Add `Name` and `Symbol` fields to denom metadata
* (x/auth) [\#8522](https://github.com/cosmos/cosmos-sdk/pull/8522) Allow to query all stored accounts
* (crypto/types) [\#8600](https://github.com/cosmos/cosmos-sdk/pull/8600) `CompactBitArray`: optimize the `NumTrueBitsBefore` method and add an `Equal` method.
* (store/cachekv), (x/bank/types) [\#8719](https://github.com/cosmos/cosmos-sdk/pull/8719) algorithmically fix pathologically slow code



Expand Down
38 changes: 35 additions & 3 deletions store/cachekv/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"bytes"
"container/list"
"io"
"reflect"
"sort"
"sync"
"time"
"unsafe"

dbm "github.com/tendermint/tm-db"

Expand Down Expand Up @@ -177,18 +179,48 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
return newCacheMergeIterator(parent, cache, ascending)
}

// strToByte is meant to make a zero allocation conversion
// from string -> []byte to speed up operations, it is not meant
// to be used generally, but for a specific pattern to check for available
// keys within a domain.
func strToByte(s string) []byte {
var b []byte
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
hdr.Cap = len(s)
hdr.Len = len(s)
hdr.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
return b
}

// byteSliceToStr is meant to make a zero allocation conversion
// from []byte -> string to speed up operations, it is not meant
// to be used generally, but for a specific pattern to delete keys
// from a map.
func byteSliceToStr(b []byte) string {
hdr := (*reflect.StringHeader)(unsafe.Pointer(&b))
return *(*string)(unsafe.Pointer(hdr))
}

// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
unsorted := make([]*kv.Pair, 0)

n := len(store.unsortedCache)
for key := range store.unsortedCache {
cacheValue := store.cache[key]

if dbm.IsKeyInDomain([]byte(key), start, end) {
if dbm.IsKeyInDomain(strToByte(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}

if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
}
} else { // Otherwise, normally delete the unsorted keys from the map.
for _, kv := range unsorted {
delete(store.unsortedCache, byteSliceToStr(kv.Key))
}
}

sort.Slice(unsorted, func(i, j int) bool {
Expand Down
45 changes: 38 additions & 7 deletions x/bank/types/balance.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,47 @@ func (b Balance) Validate() error {

// SanitizeGenesisBalances sorts addresses and coin sets.
func SanitizeGenesisBalances(balances []Balance) []Balance {
sort.Slice(balances, func(i, j int) bool {
addr1, _ := sdk.AccAddressFromBech32(balances[i].Address)
addr2, _ := sdk.AccAddressFromBech32(balances[j].Address)
return bytes.Compare(addr1.Bytes(), addr2.Bytes()) < 0
})

// Given that this function sorts balances, using the standard library's
// Quicksort based algorithms, we have algorithmic complexities of:
// * Best case: O(nlogn)
// * Worst case: O(n^2)
// The comparator used MUST be cheap to use lest we incur expenses like we had
// before whereby sdk.AccAddressFromBech32, which is a very expensive operation
// compared n * n elements yet discarded computations each time, as per:
// https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734
// with this change the first step is to extract out and singly produce the values
// that'll be used for comparisons and keep them cheap and fast.

// 1. Retrieve the byte equivalents for each Balance's address and maintain a mapping of
// its Balance, as the mapper will be used in sorting.
type addrToBalance struct {
// We use a pointer here to avoid averse effects of value copying
// wasting RAM all around.
balance *Balance
accAddr []byte
}
adL := make([]*addrToBalance, 0, len(balances))
for _, balance := range balances {
balance.Coins = balance.Coins.Sort()
balance := balance
addr, _ := sdk.AccAddressFromBech32(balance.Address)
adL = append(adL, &addrToBalance{
balance: &balance,
accAddr: []byte(addr),
})
}

// 2. Sort with the cheap mapping, using the mapper's
// already accAddr bytes values which is a cheaper operation.
sort.Slice(adL, func(i, j int) bool {
ai, aj := adL[i], adL[j]
return bytes.Compare(ai.accAddr, aj.accAddr) < 0
})

// 3. To complete the sorting, we have to now just insert
// back the balances in the order returned by the sort.
for i, ad := range adL {
balances[i] = *ad.balance
}
return balances
}

Expand Down
78 changes: 78 additions & 0 deletions x/bank/types/balance_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package types_test

import (
"bytes"
"testing"

"github.com/stretchr/testify/require"

"github.com/cosmos/cosmos-sdk/crypto/keys/ed25519"
sdk "github.com/cosmos/cosmos-sdk/types"
bank "github.com/cosmos/cosmos-sdk/x/bank/types"
)
Expand Down Expand Up @@ -101,3 +103,79 @@ func TestBalance_GetAddress(t *testing.T) {
})
}
}

func TestSanitizeBalances(t *testing.T) {
// 1. Generate balances
tokens := sdk.TokensFromConsensusPower(81)
coin := sdk.NewCoin("benchcoin", tokens)
coins := sdk.Coins{coin}
addrs, _ := makeRandomAddressesAndPublicKeys(20)

var balances []bank.Balance
for _, addr := range addrs {
balances = append(balances, bank.Balance{
Address: addr.String(),
Coins: coins,
})
}
// 2. Sort the values.
sorted := bank.SanitizeGenesisBalances(balances)

// 3. Compare and ensure that all the values are sorted in ascending order.
// Invariant after sorting:
// a[i] <= a[i+1...n]
for i := 0; i < len(sorted); i++ {
ai := sorted[i]
// Ensure that every single value that comes after i is less than it.
for j := i + 1; j < len(sorted); j++ {
aj := sorted[j]

if got := bytes.Compare(ai.GetAddress(), aj.GetAddress()); got > 0 {
t.Errorf("Balance(%d) > Balance(%d)", i, j)
}
}
}
}

func makeRandomAddressesAndPublicKeys(n int) (accL []sdk.AccAddress, pkL []*ed25519.PubKey) {
for i := 0; i < n; i++ {
pk := ed25519.GenPrivKey().PubKey().(*ed25519.PubKey)
pkL = append(pkL, pk)
accL = append(accL, sdk.AccAddress(pk.Address()))
}
return accL, pkL
}

var sink, revert interface{}

func BenchmarkSanitizeBalances500(b *testing.B) {
benchmarkSanitizeBalances(b, 500)
}

func BenchmarkSanitizeBalances1000(b *testing.B) {
benchmarkSanitizeBalances(b, 1000)
}

func benchmarkSanitizeBalances(b *testing.B, nAddresses int) {
b.ReportAllocs()
tokens := sdk.TokensFromConsensusPower(81)
coin := sdk.NewCoin("benchcoin", tokens)
coins := sdk.Coins{coin}
addrs, _ := makeRandomAddressesAndPublicKeys(nAddresses)

b.ResetTimer()
for i := 0; i < b.N; i++ {
var balances []bank.Balance
for _, addr := range addrs {
balances = append(balances, bank.Balance{
Address: addr.String(),
Coins: coins,
})
}
sink = bank.SanitizeGenesisBalances(balances)
}
if sink == nil {
b.Fatal("Benchmark did not run")
}
sink = revert
}

0 comments on commit dbb9923

Please sign in to comment.