From 3730027a2d5d0f5e86fbaa4e0f665f276139c74e Mon Sep 17 00:00:00 2001 From: Mathieu Hofman <86499+mhofman@users.noreply.github.com> Date: Tue, 19 Sep 2023 16:58:53 -0700 Subject: [PATCH 1/4] fix: halt-height behavior is not deterministic (#305) Port of cosmos/cosmos-sdk#16639 Co-authored-by: yihuang --- baseapp/abci.go | 61 ++++++++++++++------------------------------ baseapp/abci_test.go | 58 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 42 deletions(-) diff --git a/baseapp/abci.go b/baseapp/abci.go index 03cbc8afa65e..bdfa92203563 100644 --- a/baseapp/abci.go +++ b/baseapp/abci.go @@ -5,10 +5,8 @@ import ( "encoding/json" "errors" "fmt" - "os" "sort" "strings" - "syscall" "time" "github.com/gogo/protobuf/proto" @@ -139,6 +137,8 @@ func (app *BaseApp) BeginBlock(req abci.RequestBeginBlock) (res abci.ResponseBeg )) } + app.checkHalt(req.Header.Height, req.Header.Time) + if err := app.validateHeight(req); err != nil { panic(err) } @@ -312,6 +312,23 @@ func (app *BaseApp) deliverTxWithoutEventHistory(req abci.RequestDeliverTx) (res } } +// checkHalt checkes if height or time exceeds halt-height or halt-time respectively. +func (app *BaseApp) checkHalt(height int64, time time.Time) { + var halt bool + switch { + case app.haltHeight > 0 && uint64(height) > app.haltHeight: + halt = true + + case app.haltTime > 0 && time.Unix() > int64(app.haltTime): + halt = true + } + + if halt { + app.logger.Info("halt per configuration", "height", app.haltHeight, "time", app.haltTime) + panic(errors.New("halt application")) + } +} + // Commit implements the ABCI interface. It will commit all state that exists in // the deliver state's multi-store and includes the resulting commit ID in the // returned abci.ResponseCommit. Commit will set the check state based on the @@ -368,24 +385,6 @@ func (app *BaseApp) CommitWithoutSnapshot() (_res abci.ResponseCommit, snapshotH // empty/reset the deliver state app.deliverState = nil - var halt bool - - switch { - case app.haltHeight > 0 && uint64(header.Height) >= app.haltHeight: - halt = true - - case app.haltTime > 0 && header.Time.Unix() >= int64(app.haltTime): - halt = true - } - - if halt { - // Halt the binary and allow Tendermint to receive the ResponseCommit - // response with the commit ID hash. This will allow the node to successfully - // restart and process blocks assuming the halt configuration has been - // reset or moved to a more distant value. - app.halt() - } - if app.snapshotManager.ShouldTakeSnapshot(header.Height) { snapshotHeight = header.Height } @@ -393,28 +392,6 @@ func (app *BaseApp) CommitWithoutSnapshot() (_res abci.ResponseCommit, snapshotH return res, snapshotHeight } -// halt attempts to gracefully shutdown the node via SIGINT and SIGTERM falling -// back on os.Exit if both fail. -func (app *BaseApp) halt() { - app.logger.Info("halting node per configuration", "height", app.haltHeight, "time", app.haltTime) - - p, err := os.FindProcess(os.Getpid()) - if err == nil { - // attempt cascading signals in case SIGINT fails (os dependent) - sigIntErr := p.Signal(syscall.SIGINT) - sigTermErr := p.Signal(syscall.SIGTERM) - - if sigIntErr == nil || sigTermErr == nil { - return - } - } - - // Resort to exiting immediately if the process could not be found or killed - // via SIGINT/SIGTERM signals. - app.logger.Info("failed to send SIGINT/SIGTERM; exiting...") - os.Exit(0) -} - // Snapshot takes a snapshot of the current state and prunes any old snapshottypes. // It should be started as a goroutine func (app *BaseApp) Snapshot(height int64) { diff --git a/baseapp/abci_test.go b/baseapp/abci_test.go index 8ec3303de582..afec5be81f56 100644 --- a/baseapp/abci_test.go +++ b/baseapp/abci_test.go @@ -3,7 +3,9 @@ package baseapp import ( "encoding/json" "fmt" + "strings" "testing" + "time" "github.com/stretchr/testify/require" abci "github.com/tendermint/tendermint/abci/types" @@ -224,3 +226,59 @@ func (ps *paramStore) Get(_ sdk.Context, key []byte, ptr interface{}) { panic(err) } } + +func TestABCI_HaltChain(t *testing.T) { + logger := defaultLogger() + db := dbm.NewMemDB() + name := t.Name() + + testCases := []struct { + name string + haltHeight uint64 + haltTime uint64 + blockHeight int64 + blockTime int64 + expHalt bool + }{ + {"default", 0, 0, 10, 0, false}, + {"halt-height-edge", 10, 0, 10, 0, false}, + {"halt-height", 10, 0, 11, 0, true}, + {"halt-time-edge", 0, 10, 1, 10, false}, + {"halt-time", 0, 10, 1, 11, true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + defer func() { + rec := recover() + var err error + if rec != nil { + err = rec.(error) + } + if !tc.expHalt { + require.NoError(t, err) + } else { + require.Error(t, err) + require.True(t, strings.HasPrefix(err.Error(), "halt application")) + } + }() + + app := NewBaseApp( + name, logger, db, nil, + SetHaltHeight(tc.haltHeight), + SetHaltTime(tc.haltTime), + ) + + app.InitChain(abci.RequestInitChain{ + InitialHeight: tc.blockHeight, + }) + + app.BeginBlock(abci.RequestBeginBlock{ + Header: tmproto.Header{ + Height: tc.blockHeight, + Time: time.Unix(tc.blockTime, 0), + }, + }) + }) + } +} From 6fa3bd3bad09589ff30bfc0c84ab46bd488f4140 Mon Sep 17 00:00:00 2001 From: Michael FIG Date: Wed, 8 Nov 2023 23:13:30 -0600 Subject: [PATCH 2/4] fix(baseapp): reenable sending signals at halt-height --- CHANGELOG-Agoric.md | 5 +++-- baseapp/abci.go | 44 +++++++++++++++++++++++++++++++++++--------- baseapp/abci_test.go | 15 +++++++++++++++ 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/CHANGELOG-Agoric.md b/CHANGELOG-Agoric.md index 6006e30f7b01..789854678e6f 100644 --- a/CHANGELOG-Agoric.md +++ b/CHANGELOG-Agoric.md @@ -67,6 +67,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ ### Bug Fixes +* (baseapp) [#338](https://github.com/agoric-labs/cosmos-sdk/pull/338) Make sure we don't execute blocks beyond the halt height. Restored from [#305](https://github.com/agoric-labs/cosmos-sdk/pull/305) but compatible with older `SIGINT`, `SIGTERM` logic * (baseapp) [#413](https://github.com/agoric-labs/cosmos-sdk/pull/413) Ignore events from simulated transactions ## `v0.46.16-alpha.agoric.2.1` - 2024-03-08 @@ -85,7 +86,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ ## `v0.46.16-alpha.agoric.1` - 2024-02-05 -* Agoric/agoric-sdk#8224 Merge [cosmos/cosmos-sdk v0.46.16](https://github.com/cosmos/cosmos-sdk/releases/tag/v0.46.16) +* Agoric/agoric-sdk#8224 Merge [cosmos/cosmos-sdk v0.46.16](https://github.com/osmos/cosmos-sdk/releases/tag/v0.46.16) ### Bug Fixes @@ -99,7 +100,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ ### Bug Fixes -* (baseapp) [#337](https://github.com/agoric-labs/cosmos-sdk/pull/337) revert #305 which causes test failures in agoric-sdk +* (baseapp) [#337](https://github.com/agoric-labs/cosmos-sdk/pull/337) revert [#305](https://github.com/agoric-labs/cosmos-sdk/pull/305) which causes test failures in agoric-sdk ## `v0.45.16-alpha.agoric.1` - 2023-09-22 diff --git a/baseapp/abci.go b/baseapp/abci.go index bdfa92203563..6dd57bb3dbed 100644 --- a/baseapp/abci.go +++ b/baseapp/abci.go @@ -5,8 +5,10 @@ import ( "encoding/json" "errors" "fmt" + "os" "sort" "strings" + "syscall" "time" "github.com/gogo/protobuf/proto" @@ -312,21 +314,45 @@ func (app *BaseApp) deliverTxWithoutEventHistory(req abci.RequestDeliverTx) (res } } -// checkHalt checkes if height or time exceeds halt-height or halt-time respectively. -func (app *BaseApp) checkHalt(height int64, time time.Time) { +// checkHalt forces a state machine halt and attempts to kill the current +// process if block height or timestamp exceeds halt-height or halt-time +// respectively. +func (app *BaseApp) checkHalt(blockHeight int64, blockTime time.Time) { var halt bool - switch { - case app.haltHeight > 0 && uint64(height) > app.haltHeight: + if app.haltHeight > 0 && uint64(blockHeight) > app.haltHeight { + // height to halt has passed halt = true - - case app.haltTime > 0 && time.Unix() > int64(app.haltTime): + } else if app.haltTime > 0 && blockTime.Unix() > int64(app.haltTime) { + // time to halt has passed halt = true } - if halt { - app.logger.Info("halt per configuration", "height", app.haltHeight, "time", app.haltTime) - panic(errors.New("halt application")) + if !halt { + return } + + app.logger.Info( + "halt per configuration", + "haltHeight", app.haltHeight, + "haltTime", app.haltTime, + "blockHeight", blockHeight, + "blockTime", blockTime, + ) + + // [AGORIC] Make a best-effort attempt to kill our process. + p, err := os.FindProcess(os.Getpid()) + if err == nil { + // attempt cascading signals in case SIGINT fails (os dependent) + _ = p.Signal(syscall.SIGINT) + _ = p.Signal(syscall.SIGTERM) + // Errors in these signal calls are not meaningful to us. We tried our + // best, but we don't care (and can't tell) if or how the signal handler + // responds. + } + + // Prevent the state machine from advancing to the next block, no matter how + // the signals were handled. + panic(errors.New("halt application")) } // Commit implements the ABCI interface. It will commit all state that exists in diff --git a/baseapp/abci_test.go b/baseapp/abci_test.go index afec5be81f56..c2c39db04674 100644 --- a/baseapp/abci_test.go +++ b/baseapp/abci_test.go @@ -3,7 +3,10 @@ package baseapp import ( "encoding/json" "fmt" + "os" + "os/signal" "strings" + "syscall" "testing" "time" @@ -247,10 +250,16 @@ func TestABCI_HaltChain(t *testing.T) { {"halt-time", 0, 10, 1, 11, true}, } + sigs := make(chan os.Signal, 5) for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { + if tc.expHalt { + signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) + } + defer func() { rec := recover() + signal.Stop(sigs) var err error if rec != nil { err = rec.(error) @@ -258,6 +267,12 @@ func TestABCI_HaltChain(t *testing.T) { if !tc.expHalt { require.NoError(t, err) } else { + // ensure that we received the correct signals + require.Equal(t, syscall.SIGINT, <-sigs) + require.Equal(t, syscall.SIGTERM, <-sigs) + require.Equal(t, len(sigs), 0) + + // Check our error message. require.Error(t, err) require.True(t, strings.HasPrefix(err.Error(), "halt application")) } From 80f51b22592f818407ac810b41a11eb57cb5c61f Mon Sep 17 00:00:00 2001 From: Michael FIG Date: Thu, 4 Jul 2024 18:22:16 -0600 Subject: [PATCH 3/4] chore: update CHANGELOG-Agoric.md --- CHANGELOG-Agoric.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG-Agoric.md b/CHANGELOG-Agoric.md index 789854678e6f..6a927c70f348 100644 --- a/CHANGELOG-Agoric.md +++ b/CHANGELOG-Agoric.md @@ -41,6 +41,10 @@ Ref: https://keepachangelog.com/en/1.0.0/ * (auth, bank) Agoric/agoric-sdk#8989 Remove deprecated lien support +### Bug Fixes + +* (baseapp) [#338](https://github.com/agoric-labs/cosmos-sdk/pull/338) Make sure we don't execute blocks beyond the halt height. Restored from [#305](https://github.com/agoric-labs/cosmos-sdk/pull/305) but compatible with older `SIGINT`, `SIGTERM` logic + ## `v0.46.16-alpha.agoric.2.4` - 2024-04-19 ### Improvements @@ -67,7 +71,6 @@ Ref: https://keepachangelog.com/en/1.0.0/ ### Bug Fixes -* (baseapp) [#338](https://github.com/agoric-labs/cosmos-sdk/pull/338) Make sure we don't execute blocks beyond the halt height. Restored from [#305](https://github.com/agoric-labs/cosmos-sdk/pull/305) but compatible with older `SIGINT`, `SIGTERM` logic * (baseapp) [#413](https://github.com/agoric-labs/cosmos-sdk/pull/413) Ignore events from simulated transactions ## `v0.46.16-alpha.agoric.2.1` - 2024-03-08 @@ -86,7 +89,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ ## `v0.46.16-alpha.agoric.1` - 2024-02-05 -* Agoric/agoric-sdk#8224 Merge [cosmos/cosmos-sdk v0.46.16](https://github.com/osmos/cosmos-sdk/releases/tag/v0.46.16) +* Agoric/agoric-sdk#8224 Merge [cosmos/cosmos-sdk v0.46.16](https://github.com/cosmos/cosmos-sdk/releases/tag/v0.46.16) ### Bug Fixes From 2a09d9fac2757daea39009eab29aa45270703752 Mon Sep 17 00:00:00 2001 From: Michael FIG Date: Fri, 2 Aug 2024 14:01:24 -0600 Subject: [PATCH 4/4] test: `docker-compose` is now `docker compose` --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 040ec44aeb9e..038c407c44ed 100644 --- a/Makefile +++ b/Makefile @@ -318,7 +318,7 @@ test-cover: test-rosetta: docker build -t rosetta-ci:latest -f contrib/rosetta/rosetta-ci/Dockerfile . - docker-compose -f contrib/rosetta/docker-compose.yaml up --abort-on-container-exit --exit-code-from test_rosetta --build + docker compose -f contrib/rosetta/docker-compose.yaml up --abort-on-container-exit --exit-code-from test_rosetta --build .PHONY: test-rosetta benchmark: @@ -467,10 +467,10 @@ localnet-build-dlv: localnet-build-nodes: $(DOCKER) run --rm -v $(CURDIR)/.testnets:/data cosmossdk/simd \ testnet init-files --v 4 -o /data --starting-ip-address 192.168.10.2 --keyring-backend=test - docker-compose up -d + docker compose up -d localnet-stop: - docker-compose down + docker compose down # localnet-start will run a 4-node testnet locally. The nodes are # based off the docker images in: ./contrib/images/simd-env