Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/evm: benchmarking via statetest command + filter by name, index and fork #30442

Merged
merged 13 commits into from
Nov 8, 2024
135 changes: 109 additions & 26 deletions cmd/evm/staterunner.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,47 @@ import (
"encoding/json"
"fmt"
"os"
"testing"
"time"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/core/vm"
"github.com/ethereum/go-ethereum/eth/tracers/logger"
"github.com/ethereum/go-ethereum/internal/flags"
"github.com/ethereum/go-ethereum/tests"
"github.com/urfave/cli/v2"
)

var (
ForkFlag = &cli.StringFlag{
jwasinger marked this conversation as resolved.
Show resolved Hide resolved
Name: "statetest.fork",
Usage: "The hard-fork to run the test against",
Category: flags.VMCategory,
}
IdxFlag = &cli.IntFlag{
Name: "statetest.index",
Usage: "The index of the subtest to run",
Category: flags.VMCategory,
Value: -1, // default to select all subtest indices
}
TestNameFlag = &cli.StringFlag{
Name: "statetest.name",
Usage: "The name of the state test to run",
Category: flags.VMCategory,
}
)
var stateTestCommand = &cli.Command{
Action: stateTestCmd,
Name: "statetest",
Usage: "Executes the given state tests. Filenames can be fed via standard input (batch mode) or as an argument (one-off execution).",
ArgsUsage: "<file>",
Flags: []cli.Flag{
ForkFlag,
IdxFlag,
TestNameFlag,
},
}

// StatetestResult contains the execution status after running a state test, any
Expand Down Expand Up @@ -67,7 +93,7 @@ func stateTestCmd(ctx *cli.Context) error {
}
// Load the test content from the input file
if len(ctx.Args().First()) != 0 {
return runStateTest(ctx.Args().First(), cfg, ctx.Bool(DumpFlag.Name))
return runStateTest(ctx, ctx.Args().First(), cfg, ctx.Bool(DumpFlag.Name), ctx.Bool(BenchFlag.Name))
}
// Read filenames from stdin and execute back-to-back
scanner := bufio.NewScanner(os.Stdin)
Expand All @@ -76,15 +102,48 @@ func stateTestCmd(ctx *cli.Context) error {
if len(fname) == 0 {
return nil
}
if err := runStateTest(fname, cfg, ctx.Bool(DumpFlag.Name)); err != nil {
if err := runStateTest(ctx, fname, cfg, ctx.Bool(DumpFlag.Name), ctx.Bool(BenchFlag.Name)); err != nil {
return err
}
}
return nil
}

type stateTestCase struct {
name string
test tests.StateTest
st tests.StateSubtest
}

// collectMatchedSubtests returns test cases which match against provided filtering CLI parameters
func collectMatchedSubtests(ctx *cli.Context, testsByName map[string]tests.StateTest) []stateTestCase {
var res []stateTestCase
subtestName := ctx.String(TestNameFlag.Name)
if subtestName != "" {
if subtest, ok := testsByName[subtestName]; ok {
testsByName := make(map[string]tests.StateTest)
testsByName[subtestName] = subtest
}
}
idx := ctx.Int(IdxFlag.Name)
fork := ctx.String(ForkFlag.Name)

for key, test := range testsByName {
for _, st := range test.Subtests() {
if idx != -1 && st.Index != idx {
continue
}
if fork != "" && st.Fork != fork {
continue
}
res = append(res, stateTestCase{name: key, st: st, test: test})
}
}
return res
}

// runStateTest loads the state-test given by fname, and executes the test.
func runStateTest(fname string, cfg vm.Config, dump bool) error {
func runStateTest(ctx *cli.Context, fname string, cfg vm.Config, dump bool, bench bool) error {
src, err := os.ReadFile(fname)
if err != nil {
return err
Expand All @@ -94,33 +153,57 @@ func runStateTest(fname string, cfg vm.Config, dump bool) error {
return err
}

matchingTests := collectMatchedSubtests(ctx, testsByName)

// Iterate over all the tests, run them and aggregate the results
results := make([]StatetestResult, 0, len(testsByName))
for key, test := range testsByName {
for _, st := range test.Subtests() {
// Run the test and aggregate the result
result := &StatetestResult{Name: key, Fork: st.Fork, Pass: true}
test.Run(st, cfg, false, rawdb.HashScheme, func(err error, tstate *tests.StateTestState) {
var root common.Hash
if tstate.StateDB != nil {
root = tstate.StateDB.IntermediateRoot(false)
result.Root = &root
fmt.Fprintf(os.Stderr, "{\"stateRoot\": \"%#x\"}\n", root)
if dump { // Dump any state to aid debugging
cpy, _ := state.New(root, tstate.StateDB.Database())
dump := cpy.RawDump(nil)
result.State = &dump
}
var results []StatetestResult
for _, test := range matchingTests {
jwasinger marked this conversation as resolved.
Show resolved Hide resolved
// Run the test and aggregate the result
result := &StatetestResult{Name: test.name, Fork: test.st.Fork, Pass: true}
test.test.Run(test.st, cfg, false, rawdb.HashScheme, func(err error, tstate *tests.StateTestState) {
var root common.Hash
if tstate.StateDB != nil {
root = tstate.StateDB.IntermediateRoot(false)
result.Root = &root
fmt.Fprintf(os.Stderr, "{\"stateRoot\": \"%#x\"}\n", root)
if dump { // Dump any state to aid debugging
cpy, _ := state.New(root, tstate.StateDB.Database())
dump := cpy.RawDump(nil)
result.State = &dump
}
if err != nil {
// Test failed, mark as so
result.Pass, result.Error = false, err.Error()
}
})
results = append(results, *result)
}
}
if err != nil {
// Test failed, mark as so
result.Pass, result.Error = false, err.Error()
}
})
results = append(results, *result)
}
out, _ := json.MarshalIndent(results, "", " ")
fmt.Println(string(out))

if !bench {
return nil
} else if len(matchingTests) != 1 {
return fmt.Errorf("can only benchmark single state test case (more than one matching params)")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why though?
Wouldn't it make more sense to put the benchmarking into the loop above, and after each test.test.Run, you do a bench run, if so desired.
You might even consider integrating the bench stats into the results, but now I'm just thinking aloud... ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Benchmark every test unless otherwise-specified sounds like fine default behavior to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SO drop this whole if-clause?

}
var gasUsed uint64
result := testing.Benchmark(func(b *testing.B) {
for i := 0; i < b.N; i++ {
test := matchingTests[0]
_, _, gasUsed, _ = test.test.RunNoVerify(test.st, cfg, false, rawdb.HashScheme)
}
})
var stats execStats
// Get the average execution time from the benchmarking result.
// There are other useful stats here that could be reported.
stats.time = time.Duration(result.NsPerOp())
stats.allocs = result.AllocsPerOp()
stats.bytesAllocated = result.AllocedBytesPerOp()
fmt.Fprintf(os.Stderr, `EVM gas used: %d
execution time: %v
allocations: %d
allocated bytes: %d
`, gasUsed, stats.time, stats.allocs, stats.bytesAllocated)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would move this into its own function and do something like this

if bench {
    runBenchmark()
}
return nil

which makes it a big cleaner imo

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's ok as is

return nil
}
16 changes: 8 additions & 8 deletions tests/state_test_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ func (t *StateTest) checkError(subtest StateSubtest, err error) error {

// Run executes a specific subtest and verifies the post-state and logs
func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config, snapshotter bool, scheme string, postCheck func(err error, st *StateTestState)) (result error) {
st, root, err := t.RunNoVerify(subtest, vmconfig, snapshotter, scheme)
st, root, _, err := t.RunNoVerify(subtest, vmconfig, snapshotter, scheme)
// Invoke the callback at the end of function for further analysis.
defer func() {
postCheck(result, &st)
Expand Down Expand Up @@ -228,10 +228,10 @@ func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config, snapshotter bo

// RunNoVerify runs a specific subtest and returns the statedb and post-state root.
// Remember to call state.Close after verifying the test result!
func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapshotter bool, scheme string) (st StateTestState, root common.Hash, err error) {
func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapshotter bool, scheme string) (st StateTestState, root common.Hash, gasUsed uint64, err error) {
config, eips, err := GetChainConfig(subtest.Fork)
if err != nil {
return st, common.Hash{}, UnsupportedForkError{subtest.Fork}
return st, common.Hash{}, 0, UnsupportedForkError{subtest.Fork}
}
vmconfig.ExtraEips = eips

Expand All @@ -250,7 +250,7 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh
post := t.json.Post[subtest.Fork][subtest.Index]
msg, err := t.json.Tx.toMessage(post, baseFee)
if err != nil {
return st, common.Hash{}, err
return st, common.Hash{}, 0, err
}

{ // Blob transactions may be present after the Cancun fork.
Expand All @@ -260,7 +260,7 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh
// Here, we just do this shortcut smaller fix, since state tests do not
// utilize those codepaths
if len(msg.BlobHashes)*params.BlobTxBlobGasPerBlob > params.MaxBlobGasPerBlock {
return st, common.Hash{}, errors.New("blob gas exceeds maximum")
return st, common.Hash{}, 0, errors.New("blob gas exceeds maximum")
}
}

Expand All @@ -269,10 +269,10 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh
var ttx types.Transaction
err := ttx.UnmarshalBinary(post.TxBytes)
if err != nil {
return st, common.Hash{}, err
return st, common.Hash{}, 0, err
}
if _, err := types.Sender(types.LatestSigner(config), &ttx); err != nil {
return st, common.Hash{}, err
return st, common.Hash{}, 0, err
}
}

Expand Down Expand Up @@ -322,7 +322,7 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh
receipt := &types.Receipt{GasUsed: vmRet.UsedGas}
tracer.OnTxEnd(receipt, nil)
}
return st, root, err
return st, root, vmRet.UsedGas, err
}

func (t *StateTest) gasLimit(subtest StateSubtest) uint64 {
Expand Down