Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trie, tests/fuzzers: implement a stacktrie fuzzer + stacktrie fixes #21799

Merged
merged 9 commits into from
Nov 9, 2020
23 changes: 23 additions & 0 deletions tests/fuzzers/stacktrie/debug/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package main

import (
"fmt"
"io/ioutil"
"os"

"github.com/ethereum/go-ethereum/tests/fuzzers/stacktrie"
)

func main() {
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: debug <file>")
os.Exit(1)
}
crasher := os.Args[1]
data, err := ioutil.ReadFile(crasher)
if err != nil {
fmt.Fprintf(os.Stderr, "error loading crasher %v: %v", crasher, err)
os.Exit(1)
}
stacktrie.Debug(data)
}
197 changes: 197 additions & 0 deletions tests/fuzzers/stacktrie/trie_fuzzer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
// Copyright 2020 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package stacktrie

import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"hash"
"io"
"sort"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/trie"
"golang.org/x/crypto/sha3"
)

type fuzzer struct {
input io.Reader
exhausted bool
debugging bool
}

func (f *fuzzer) read(size int) []byte {
out := make([]byte, size)
if _, err := f.input.Read(out); err != nil {
f.exhausted = true
}
return out
}

func (f *fuzzer) readSlice(min, max int) []byte {
var a uint16
binary.Read(f.input, binary.LittleEndian, &a)
size := min + int(a)%(max-min)
out := make([]byte, size)
if _, err := f.input.Read(out); err != nil {
f.exhausted = true
}
return out
}

// spongeDb is a dummy db backend which accumulates writes in a sponge
type spongeDb struct {
sponge hash.Hash
debug bool
}

func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement me") }
func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") }
func (s *spongeDb) Delete(key []byte) error { panic("implement me") }
func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} }
func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") }
func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
func (s *spongeDb) Close() error { return nil }

func (s *spongeDb) Put(key []byte, value []byte) error {
if s.debug {
fmt.Printf("db.Put %x : %x\n", key, value)
}
s.sponge.Write(key)
s.sponge.Write(value)
return nil
}
func (s *spongeDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { panic("implement me") }

// spongeBatch is a dummy batch which immediately writes to the underlying spongedb
type spongeBatch struct {
db *spongeDb
}

func (b *spongeBatch) Put(key, value []byte) error {
b.db.Put(key, value)
return nil
}
func (b *spongeBatch) Delete(key []byte) error { panic("implement me") }
func (b *spongeBatch) ValueSize() int { return 100 }
func (b *spongeBatch) Write() error { return nil }
func (b *spongeBatch) Reset() {}
func (b *spongeBatch) Replay(w ethdb.KeyValueWriter) error { return nil }

type kv struct {
k, v []byte
}
type kvs []kv

func (k kvs) Len() int {
return len(k)
}

func (k kvs) Less(i, j int) bool {
return bytes.Compare(k[i].k, k[j].k) < 0
}

func (k kvs) Swap(i, j int) {
k[j], k[i] = k[i], k[j]
}

// The function must return
// 1 if the fuzzer should increase priority of the
// given input during subsequent fuzzing (for example, the input is lexically
// correct and was parsed successfully);
// -1 if the input must not be added to corpus even if gives new coverage; and
// 0 otherwise
// other values are reserved for future use.
func Fuzz(data []byte) int {
f := fuzzer{
input: bytes.NewReader(data),
exhausted: false,
}
return f.fuzz()
}

func Debug(data []byte) int {
f := fuzzer{
input: bytes.NewReader(data),
exhausted: false,
debugging: true,
}
return f.fuzz()
}

func (f *fuzzer) fuzz() int {

// This spongeDb is used to check the sequence of disk-db-writes
var (
spongeA = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
dbA = trie.NewDatabase(spongeA)
trieA, _ = trie.New(common.Hash{}, dbA)
spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
trieB = trie.NewStackTrie(spongeB)
vals kvs
useful bool
maxElements = 10000
)
// Fill the trie with elements
for i := 0; !f.exhausted && i < maxElements; i++ {
k := f.read(32)
v := f.readSlice(1, 500)
if f.exhausted {
// If it was exhausted while reading, the value may be all zeroes,
// thus 'deletion' which is not supported on stacktrie
break
}
vals = append(vals, kv{k: k, v: v})
trieA.Update(k, v)
useful = true
}
if !useful {
return 0
}
// Flush trie -> database
rootA, err := trieA.Commit(nil)
if err != nil {
panic(err)
}
// Flush memdb -> disk (sponge)
dbA.Commit(rootA, false, nil)

// Stacktrie requires sorted insertion
sort.Sort(vals)
for _, kv := range vals {
if f.debugging {
fmt.Printf("{\"0x%x\" , \"0x%x\"} // stacktrie.Update\n", kv.k, kv.v)
}
trieB.Update(kv.k, kv.v)
}
rootB := trieB.Hash()
if _, err := trieB.Commit(); err != nil {
panic(err)
}
if rootA != rootB {
panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
}
sumA := spongeA.sponge.Sum(nil)
sumB := spongeB.sponge.Sum(nil)
if !bytes.Equal(sumA, sumB) {
panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB))
}
return 1
}
11 changes: 9 additions & 2 deletions tests/fuzzers/trie/trie-fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,22 @@ func Generate(input []byte) randTest {
return steps
}

// The function must return
// 1 if the fuzzer should increase priority of the
// given input during subsequent fuzzing (for example, the input is lexically
// correct and was parsed successfully);
// -1 if the input must not be added to corpus even if gives new coverage; and
// 0 otherwise
// other values are reserved for future use.
func Fuzz(input []byte) int {
program := Generate(input)
if len(program) == 0 {
return -1
return 0
}
if err := runRandTest(program); err != nil {
panic(err)
}
return 0
return 1
}

func runRandTest(rt randTest) error {
Expand Down
39 changes: 27 additions & 12 deletions trie/stacktrie.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,19 +314,22 @@ func (st *StackTrie) hash() {
panic(err)
}
case extNode:
st.children[0].hash()
h = newHasher(false)
defer returnHasherToPool(h)
h.tmp.Reset()
st.children[0].hash()
// This is also possible:
//sz := hexToCompactInPlace(st.key)
//n := [][]byte{
// st.key[:sz],
// st.children[0].val,
//}
n := [][]byte{
hexToCompact(st.key),
st.children[0].val,
var valuenode node
if len(st.children[0].val) < 32 {
valuenode = rawNode(st.children[0].val)
} else {
valuenode = hashNode(st.children[0].val)
}
n := struct {
Key []byte
Val node
}{
Key: hexToCompact(st.key),
Val: valuenode,
}
if err := rlp.Encode(&h.tmp, n); err != nil {
panic(err)
Expand Down Expand Up @@ -406,6 +409,18 @@ func (st *StackTrie) Commit() (common.Hash, error) {
return common.Hash{}, ErrCommitDisabled
}
st.hash()
h := common.BytesToHash(st.val)
return h, nil
if len(st.val) != 32 {
// If the node's RLP isn't 32 bytes long, the node will not
// be hashed (and committed), and instead contain the rlp-encoding of the
// node. For the top level node, we need to force the hashing+commit.
ret := make([]byte, 32)
h := newHasher(false)
defer returnHasherToPool(h)
h.sha.Reset()
h.sha.Write(st.val)
h.sha.Read(ret)
st.db.Put(ret, st.val)
return common.BytesToHash(ret), nil
}
return common.BytesToHash(st.val), nil
}
49 changes: 49 additions & 0 deletions trie/stacktrie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,52 @@ func TestDerivableList(t *testing.T) {
}
}
}

// TestUpdateSmallNodes tests a case where the leaves are small (both key and value),
// which causes a lot of node-within-node. This case was found via fuzzing.
func TestUpdateSmallNodes(t *testing.T) {
st := NewStackTrie(nil)
nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
kvs := []struct {
K string
V string
}{
{"63303030", "3041"}, // stacktrie.Update
{"65", "3000"}, // stacktrie.Update
}
for _, kv := range kvs {
nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
}
if nt.Hash() != st.Hash() {
t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
}
}
holiman marked this conversation as resolved.
Show resolved Hide resolved

// TestUpdateVariableKeys contains a case which stacktrie fails: when keys of different
// sizes are used, and the second one has the same prefix as the first, then the
// stacktrie fails, since it's unable to 'expand' on an already added leaf.
// For all practical purposes, this is fine, since keys are fixed-size length
// in account and storage tries.
//
// The test is marked as 'skipped', and exists just to have the behaviour documented.
// This case was found via fuzzing.
func TestUpdateVariableKeys(t *testing.T) {
t.SkipNow()
st := NewStackTrie(nil)
nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
kvs := []struct {
K string
V string
}{
{"0x33303534636532393561313031676174", "303030"},
{"0x3330353463653239356131303167617430", "313131"},
}
for _, kv := range kvs {
nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
}
if nt.Hash() != st.Hash() {
t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
}
}
36 changes: 36 additions & 0 deletions trie/trie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,42 @@ func TestCommitSequenceStackTrie(t *testing.T) {
}
}

// TestCommitSequenceSmallRoot tests that a trie which is essentially only a
// small (<32 byte) shortnode with an included value is properly committed to a
// database.
// This case might not matter, since in practice, all keys are 32 bytes, which means
// that even a small trie which contains a leaf will have an extension making it
// not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do.
func TestCommitSequenceSmallRoot(t *testing.T) {
s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"}
db := NewDatabase(s)
trie, _ := New(common.Hash{}, db)
// Another sponge is used for the stacktrie commits
stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"}
stTrie := NewStackTrie(stackTrieSponge)
// Add a single small-element to the trie(s)
key := make([]byte, 5)
key[0] = 1
trie.TryUpdate(key, []byte{0x1})
stTrie.TryUpdate(key, []byte{0x1})
// Flush trie -> database
root, _ := trie.Commit(nil)
// Flush memdb -> disk (sponge)
db.Commit(root, false, nil)
// And flush stacktrie -> disk
stRoot, err := stTrie.Commit()
if err != nil {
t.Fatalf("Failed to commit stack trie %v", err)
}
if stRoot != root {
t.Fatalf("root wrong, got %x exp %x", stRoot, root)
}
fmt.Printf("root: %x\n", stRoot)
if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) {
t.Fatalf("test, disk write sequence wrong:\ngot %x exp %x\n", got, exp)
}
}

// BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie.
// This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically,
// storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple
Expand Down