From 7d204db99f298d3306169fb5772ab1f26cf99681 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 17 May 2022 13:01:46 +0200 Subject: [PATCH] cmd/geth: add `db check-state-content` to verify integrity of trie nodes (#24840) This PR adds db tooling (geth db check-state-content) to verify the integrity of trie nodes. It iterates through the 32-byte key space in the database, which is expected to contain RLP-encoded trie nodes, addressed by hash. --- cmd/geth/dbcmd.go | 66 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/cmd/geth/dbcmd.go b/cmd/geth/dbcmd.go index c7c73a23ebd7..750be8572162 100644 --- a/cmd/geth/dbcmd.go +++ b/cmd/geth/dbcmd.go @@ -35,6 +35,7 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/trie" @@ -71,6 +72,7 @@ Remove blockchain and state databases`, dbExportCmd, dbMetadataCmd, dbMigrateFreezerCmd, + dbCheckStateContentCmd, }, } dbInspectCmd = cli.Command{ @@ -83,6 +85,16 @@ Remove blockchain and state databases`, Usage: "Inspect the storage size for each type of data in the database", Description: `This commands iterates the entire database. If the optional 'prefix' and 'start' arguments are provided, then the iteration is limited to the given subset of data.`, } + dbCheckStateContentCmd = cli.Command{ + Action: utils.MigrateFlags(checkStateContent), + Name: "check-state-content", + ArgsUsage: "", + Flags: utils.GroupFlags(utils.NetworkFlags, utils.DatabasePathFlags), + Usage: "Verify that state data is cryptographically correct", + Description: `This command iterates the entire database for 32-byte keys, looking for rlp-encoded trie nodes. +For each trie node encountered, it checks that the key corresponds to the keccak256(value). If this is not true, this indicates +a data corruption.`, + } dbStatCmd = cli.Command{ Action: utils.MigrateFlags(dbStats), Name: "stats", @@ -289,6 +301,60 @@ func inspect(ctx *cli.Context) error { return rawdb.InspectDatabase(db, prefix, start) } +func checkStateContent(ctx *cli.Context) error { + var ( + prefix []byte + start []byte + ) + if ctx.NArg() > 1 { + return fmt.Errorf("Max 1 argument: %v", ctx.Command.ArgsUsage) + } + if ctx.NArg() > 0 { + if d, err := hexutil.Decode(ctx.Args().First()); err != nil { + return fmt.Errorf("failed to hex-decode 'start': %v", err) + } else { + start = d + } + } + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, true) + defer db.Close() + var ( + it = rawdb.NewKeyLengthIterator(db.NewIterator(prefix, start), 32) + hasher = crypto.NewKeccakState() + got = make([]byte, 32) + errs int + count int + startTime = time.Now() + lastLog = time.Now() + ) + for it.Next() { + count++ + v := it.Value() + k := it.Key() + hasher.Reset() + hasher.Write(v) + hasher.Read(got) + if !bytes.Equal(k, got) { + errs++ + fmt.Printf("Error at 0x%x\n", k) + fmt.Printf(" Hash: 0x%x\n", got) + fmt.Printf(" Data: 0x%x\n", v) + } + if time.Since(lastLog) > 8*time.Second { + log.Info("Iterating the database", "at", fmt.Sprintf("%#x", k), "elapsed", common.PrettyDuration(time.Since(startTime))) + lastLog = time.Now() + } + } + if err := it.Error(); err != nil { + return err + } + log.Info("Iterated the state content", "errors", errs, "items", count) + return nil +} + func showLeveldbStats(db ethdb.KeyValueStater) { if stats, err := db.Stat("leveldb.stats"); err != nil { log.Warn("Failed to read database stats", "error", err)