From 5c55d9940e4098a3829b243016a4f53437c78ea0 Mon Sep 17 00:00:00 2001 From: Haifeng Xi Date: Fri, 17 May 2019 17:58:57 +0800 Subject: [PATCH 01/26] update version to v0.30.0-dev for the develop branch --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index deee9fc0686..2636e4c3eb9 100644 --- a/version/version.go +++ b/version/version.go @@ -18,7 +18,7 @@ const ( // TMCoreSemVer is the current version of Tendermint Core. // It's the Semantic Version of the software. // Must be a string because scripts like dist.sh read this file. - TMCoreSemVer = "0.27.4" + TMCoreSemVer = "0.30.0-dev" // ABCISemVer is the semantic version of the ABCI library ABCISemVer = "0.15.0" From 1769f5a077d530804d04a14de8538142bccb3797 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Mon, 27 May 2019 14:45:36 +0800 Subject: [PATCH 02/26] Upgrade multisig --- crypto/multisig/threshold_pubkey.go | 19 +++++++++--------- crypto/multisig/threshold_pubkey_test.go | 25 +++++++++++++++++++++++- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/crypto/multisig/threshold_pubkey.go b/crypto/multisig/threshold_pubkey.go index ca8d4230304..234d420f1d2 100644 --- a/crypto/multisig/threshold_pubkey.go +++ b/crypto/multisig/threshold_pubkey.go @@ -2,7 +2,6 @@ package multisig import ( "github.com/tendermint/tendermint/crypto" - "github.com/tendermint/tendermint/crypto/tmhash" ) // PubKeyMultisigThreshold implements a K of N threshold multisig. @@ -11,7 +10,7 @@ type PubKeyMultisigThreshold struct { PubKeys []crypto.PubKey `json:"pubkeys"` } -var _ crypto.PubKey = &PubKeyMultisigThreshold{} +var _ crypto.PubKey = PubKeyMultisigThreshold{} // NewPubKeyMultisigThreshold returns a new PubKeyMultisigThreshold. // Panics if len(pubkeys) < k or 0 >= k. @@ -22,7 +21,7 @@ func NewPubKeyMultisigThreshold(k int, pubkeys []crypto.PubKey) crypto.PubKey { if len(pubkeys) < k { panic("threshold k of n multisignature: len(pubkeys) < k") } - return &PubKeyMultisigThreshold{uint(k), pubkeys} + return PubKeyMultisigThreshold{uint(k), pubkeys} } // VerifyBytes expects sig to be an amino encoded version of a MultiSignature. @@ -31,8 +30,8 @@ func NewPubKeyMultisigThreshold(k int, pubkeys []crypto.PubKey) crypto.PubKey { // and all signatures are valid. (Not just k of the signatures) // The multisig uses a bitarray, so multiple signatures for the same key is not // a concern. -func (pk *PubKeyMultisigThreshold) VerifyBytes(msg []byte, marshalledSig []byte) bool { - var sig *Multisignature +func (pk PubKeyMultisigThreshold) VerifyBytes(msg []byte, marshalledSig []byte) bool { + var sig Multisignature err := cdc.UnmarshalBinaryBare(marshalledSig, &sig) if err != nil { return false @@ -64,19 +63,19 @@ func (pk *PubKeyMultisigThreshold) VerifyBytes(msg []byte, marshalledSig []byte) } // Bytes returns the amino encoded version of the PubKeyMultisigThreshold -func (pk *PubKeyMultisigThreshold) Bytes() []byte { +func (pk PubKeyMultisigThreshold) Bytes() []byte { return cdc.MustMarshalBinaryBare(pk) } // Address returns tmhash(PubKeyMultisigThreshold.Bytes()) -func (pk *PubKeyMultisigThreshold) Address() crypto.Address { - return crypto.Address(tmhash.Sum(pk.Bytes())) +func (pk PubKeyMultisigThreshold) Address() crypto.Address { + return crypto.AddressHash(pk.Bytes()) } // Equals returns true iff pk and other both have the same number of keys, and // all constituent keys are the same, and in the same order. -func (pk *PubKeyMultisigThreshold) Equals(other crypto.PubKey) bool { - otherKey, sameType := other.(*PubKeyMultisigThreshold) +func (pk PubKeyMultisigThreshold) Equals(other crypto.PubKey) bool { + otherKey, sameType := other.(PubKeyMultisigThreshold) if !sameType { return false } diff --git a/crypto/multisig/threshold_pubkey_test.go b/crypto/multisig/threshold_pubkey_test.go index bfc874ebedf..2d2632abd53 100644 --- a/crypto/multisig/threshold_pubkey_test.go +++ b/crypto/multisig/threshold_pubkey_test.go @@ -82,7 +82,7 @@ func TestMultiSigPubKeyEquality(t *testing.T) { msg := []byte{1, 2, 3, 4} pubkeys, _ := generatePubKeysAndSignatures(5, msg) multisigKey := NewPubKeyMultisigThreshold(2, pubkeys) - var unmarshalledMultisig *PubKeyMultisigThreshold + var unmarshalledMultisig PubKeyMultisigThreshold cdc.MustUnmarshalBinaryBare(multisigKey.Bytes(), &unmarshalledMultisig) require.True(t, multisigKey.Equals(unmarshalledMultisig)) @@ -95,6 +95,29 @@ func TestMultiSigPubKeyEquality(t *testing.T) { require.False(t, multisigKey.Equals(multisigKey2)) } +func TestAddress(t *testing.T) { + msg := []byte{1, 2, 3, 4} + pubkeys, _ := generatePubKeysAndSignatures(5, msg) + multisigKey := NewPubKeyMultisigThreshold(2, pubkeys) + require.Len(t, multisigKey.Address().Bytes(), 20) +} + +func TestPubKeyMultisigThresholdAminoToIface(t *testing.T) { + msg := []byte{1, 2, 3, 4} + pubkeys, _ := generatePubKeysAndSignatures(5, msg) + multisigKey := NewPubKeyMultisigThreshold(2, pubkeys) + + ab, err := cdc.MarshalBinaryLengthPrefixed(multisigKey) + require.NoError(t, err) + // like other crypto.Pubkey implementations (e.g. ed25519.PubKeyEd25519), + // PubKeyMultisigThreshold should be deserializable into a crypto.PubKey: + var pubKey crypto.PubKey + err = cdc.UnmarshalBinaryLengthPrefixed(ab, &pubKey) + require.NoError(t, err) + + require.Equal(t, multisigKey, pubKey) +} + func generatePubKeysAndSignatures(n int, msg []byte) (pubkeys []crypto.PubKey, signatures [][]byte) { pubkeys = make([]crypto.PubKey, n) signatures = make([][]byte, n) From 6bd2006cdc2b255cffb4e21faf3d6a18f725b7f8 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Thu, 20 Jun 2019 11:17:14 +0800 Subject: [PATCH 03/26] Close WriteBatch to prevent memory leak --- libs/db/c_level_db.go | 5 +++++ libs/db/debug_db.go | 5 +++++ libs/db/go_level_db.go | 4 ++++ libs/db/mem_batch.go | 4 ++++ libs/db/prefix_db.go | 4 ++++ libs/db/remotedb/grpcdb/server.go | 1 + libs/db/remotedb/remotedb.go | 4 ++++ libs/db/types.go | 2 ++ lite/dbprovider.go | 1 + state/txindex/kv/kv.go | 2 ++ 10 files changed, 32 insertions(+) diff --git a/libs/db/c_level_db.go b/libs/db/c_level_db.go index 7f74b2a717c..decb1af51dc 100644 --- a/libs/db/c_level_db.go +++ b/libs/db/c_level_db.go @@ -179,6 +179,11 @@ func (mBatch *cLevelDBBatch) WriteSync() { } } +// Implements Batch. +func (mBatch *cLevelDBBatch) Close() { + mBatch.batch.Close() +} + //---------------------------------------- // Iterator // NOTE This is almost identical to db/go_level_db.Iterator diff --git a/libs/db/debug_db.go b/libs/db/debug_db.go index bb361a266f2..658cd055588 100644 --- a/libs/db/debug_db.go +++ b/libs/db/debug_db.go @@ -250,3 +250,8 @@ func (dbch debugBatch) WriteSync() { fmt.Printf("%v.batch.WriteSync()\n", dbch.label) dbch.bch.WriteSync() } + +// Implements Batch. +func (dbch debugBatch) Close() { + dbch.bch.Close() +} diff --git a/libs/db/go_level_db.go b/libs/db/go_level_db.go index fd487a4dd5b..9a4358f600e 100644 --- a/libs/db/go_level_db.go +++ b/libs/db/go_level_db.go @@ -184,6 +184,10 @@ func (mBatch *goLevelDBBatch) WriteSync() { } } +// Implements Batch. +// Close is no-op for goLevelDBBatch. +func (mBatch *goLevelDBBatch) Close() {} + //---------------------------------------- // Iterator // NOTE This is almost identical to db/c_level_db.Iterator diff --git a/libs/db/mem_batch.go b/libs/db/mem_batch.go index 5c5d0c13a86..ebba43f5458 100644 --- a/libs/db/mem_batch.go +++ b/libs/db/mem_batch.go @@ -46,6 +46,10 @@ func (mBatch *memBatch) WriteSync() { mBatch.write(true) } +func (mBatch *memBatch) Close() { + mBatch.ops = nil +} + func (mBatch *memBatch) write(doSync bool) { if mtx := mBatch.db.Mutex(); mtx != nil { mtx.Lock() diff --git a/libs/db/prefix_db.go b/libs/db/prefix_db.go index 40d72560c4f..0dd06ef9d99 100644 --- a/libs/db/prefix_db.go +++ b/libs/db/prefix_db.go @@ -248,6 +248,10 @@ func (pb prefixBatch) WriteSync() { pb.source.WriteSync() } +func (pb prefixBatch) Close() { + pb.source.Close() +} + //---------------------------------------- // prefixIterator diff --git a/libs/db/remotedb/grpcdb/server.go b/libs/db/remotedb/grpcdb/server.go index 3a9955ddf55..bfe65e6109a 100644 --- a/libs/db/remotedb/grpcdb/server.go +++ b/libs/db/remotedb/grpcdb/server.go @@ -180,6 +180,7 @@ func (s *server) BatchWriteSync(c context.Context, b *protodb.Batch) (*protodb.N func (s *server) batchWrite(c context.Context, b *protodb.Batch, sync bool) (*protodb.Nothing, error) { bat := s.db.NewBatch() + defer bat.Close() for _, op := range b.Ops { switch op.Type { case protodb.Operation_SET: diff --git a/libs/db/remotedb/remotedb.go b/libs/db/remotedb/remotedb.go index 2b60d815995..c70d54b9ecb 100644 --- a/libs/db/remotedb/remotedb.go +++ b/libs/db/remotedb/remotedb.go @@ -260,3 +260,7 @@ func (bat *batch) WriteSync() { panic(fmt.Sprintf("RemoteDB.BatchWriteSync: %v", err)) } } + +func (bat *batch) Close() { + bat.ops = nil +} diff --git a/libs/db/types.go b/libs/db/types.go index 9b9c6d0b9dc..30f8afd189c 100644 --- a/libs/db/types.go +++ b/libs/db/types.go @@ -57,10 +57,12 @@ type DB interface { //---------------------------------------- // Batch +// Batch Close must be called when the program no longer needs the object. type Batch interface { SetDeleter Write() WriteSync() + Close() } type SetDeleter interface { diff --git a/lite/dbprovider.go b/lite/dbprovider.go index ef1b2a5985b..9a3636d57d6 100644 --- a/lite/dbprovider.go +++ b/lite/dbprovider.go @@ -54,6 +54,7 @@ func (dbp *DBProvider) SaveFullCommit(fc FullCommit) error { dbp.logger.Info("DBProvider.SaveFullCommit()...", "fc", fc) batch := dbp.db.NewBatch() + defer batch.Close() // Save the fc.validators. // We might be overwriting what we already have, but diff --git a/state/txindex/kv/kv.go b/state/txindex/kv/kv.go index 7b9d0007303..5df9c985f01 100644 --- a/state/txindex/kv/kv.go +++ b/state/txindex/kv/kv.go @@ -77,6 +77,7 @@ func (txi *TxIndex) Get(hash []byte) (*types.TxResult, error) { // AddBatch indexes a batch of transactions using the given list of tags. func (txi *TxIndex) AddBatch(b *txindex.Batch) error { storeBatch := txi.store.NewBatch() + defer storeBatch.Close() for _, result := range b.Ops { hash := result.Tx.Hash() @@ -108,6 +109,7 @@ func (txi *TxIndex) AddBatch(b *txindex.Batch) error { // Index indexes a single transaction using the given list of tags. func (txi *TxIndex) Index(result *types.TxResult) error { b := txi.store.NewBatch() + defer b.Close() hash := result.Tx.Hash() From c720f005eec19a734cee32e1eff44f5ed8811a3b Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Mon, 24 Jun 2019 17:23:52 +0800 Subject: [PATCH 04/26] Fix year format --- libs/log/tmfmt_logger.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/log/tmfmt_logger.go b/libs/log/tmfmt_logger.go index 247ce8fc0c8..d841263ea7c 100644 --- a/libs/log/tmfmt_logger.go +++ b/libs/log/tmfmt_logger.go @@ -90,7 +90,7 @@ func (l tmfmtLogger) Log(keyvals ...interface{}) error { // D - first character of the level, uppercase (ASCII only) // [2016-05-02|11:06:44.322] - our time format (see https://golang.org/src/time/format.go) // Stopping ... - message - enc.buf.WriteString(fmt.Sprintf("%c[%s] %-44s ", lvl[0]-32, time.Now().Format("2016-01-02|15:04:05.000"), msg)) + enc.buf.WriteString(fmt.Sprintf("%c[%s] %-44s ", lvl[0]-32, time.Now().Format("2006-01-02|15:04:05.000"), msg)) if module != unknown { enc.buf.WriteString("module=" + module + " ") From 6f16ea0967e2d365137fa4cfc9ef30fee5472c51 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Mon, 24 Jun 2019 18:20:55 +0800 Subject: [PATCH 05/26] Add "chain_id" label for all metrics --- consensus/metrics.go | 38 +++++++++++++++++++++----------------- mempool/metrics.go | 14 +++++++++----- node/node.go | 13 +++++++------ p2p/metrics.go | 18 +++++++++++------- state/metrics.go | 16 ++++++++++------ 5 files changed, 58 insertions(+), 41 deletions(-) diff --git a/consensus/metrics.go b/consensus/metrics.go index 5c1442f8602..dfab5f8ec91 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -52,107 +52,111 @@ type Metrics struct { } // PrometheusMetrics returns Metrics build using Prometheus client library. -func PrometheusMetrics(namespace string) *Metrics { +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + var labels []string + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } return &Metrics{ Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "height", Help: "Height of the chain.", - }, []string{}), + }, labels).With(labelsAndValues...), ConsensusFailure: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "consensus_failure", Help: "Consensus failure", - }, []string{"height"}), + }, labels).With(labelsAndValues...), Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "rounds", Help: "Number of rounds.", - }, []string{}), + }, labels).With(labelsAndValues...), Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "validators", Help: "Number of validators.", - }, []string{}), + }, labels).With(labelsAndValues...), ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "validators_power", Help: "Total power of all validators.", - }, []string{}), + }, labels).With(labelsAndValues...), MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "missing_validators", Help: "Number of validators who did not sign.", - }, []string{}), + }, labels).With(labelsAndValues...), MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "missing_validators_power", Help: "Total power of the missing validators.", - }, []string{}), + }, labels).With(labelsAndValues...), ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "byzantine_validators", Help: "Number of validators who tried to double sign.", - }, []string{}), + }, labels).With(labelsAndValues...), ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "byzantine_validators_power", Help: "Total power of the byzantine validators.", - }, []string{}), + }, labels).With(labelsAndValues...), BlockIntervalSeconds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "block_interval_seconds", Help: "Time between this and the last block.", - }, []string{}), + }, labels).With(labelsAndValues...), NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "num_txs", Help: "Number of transactions.", - }, []string{}), + }, labels).With(labelsAndValues...), BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "block_size_bytes", Help: "Size of the block.", - }, []string{}), + }, labels).With(labelsAndValues...), TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "total_txs", Help: "Total number of transactions.", - }, []string{}), + }, labels).With(labelsAndValues...), CommittedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "latest_block_height", Help: "The latest block height.", - }, []string{}), + }, labels).With(labelsAndValues...), FastSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "fast_syncing", Help: "Whether or not a node is fast syncing. 1 if yes, 0 if no.", - }, []string{}), + }, labels).With(labelsAndValues...), BlockParts: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "block_parts", Help: "Number of blockparts transmitted by peer.", - }, []string{"peer_id"}), + }, append(labels, "peer_id")).With(labelsAndValues...), } } diff --git a/mempool/metrics.go b/mempool/metrics.go index 3418f1efecd..d26534ff93b 100644 --- a/mempool/metrics.go +++ b/mempool/metrics.go @@ -23,33 +23,37 @@ type Metrics struct { } // PrometheusMetrics returns Metrics build using Prometheus client library. -func PrometheusMetrics(namespace string) *Metrics { +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + var labels []string + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } return &Metrics{ Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsytem, Name: "size", Help: "Size of the mempool (number of uncommitted transactions).", - }, []string{}), + }, labels).With(labelsAndValues...), TxSizeBytes: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Namespace: namespace, Subsystem: MetricsSubsytem, Name: "tx_size_bytes", Help: "Transaction sizes in bytes.", Buckets: stdprometheus.ExponentialBuckets(1, 3, 17), - }, []string{}), + }, labels).With(labelsAndValues...), FailedTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsytem, Name: "failed_txs", Help: "Number of failed transactions.", - }, []string{}), + }, labels).With(labelsAndValues...), RecheckTimes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsytem, Name: "recheck_times", Help: "Number of times transactions are rechecked in the mempool.", - }, []string{}), + }, labels).With(labelsAndValues...), } } diff --git a/node/node.go b/node/node.go index b9605f55a26..8233dc9527e 100644 --- a/node/node.go +++ b/node/node.go @@ -98,15 +98,17 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { } // MetricsProvider returns a consensus, p2p and mempool Metrics. -type MetricsProvider func() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) +type MetricsProvider func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) // DefaultMetricsProvider returns Metrics build using Prometheus client library // if Prometheus is enabled. Otherwise, it returns no-op Metrics. func DefaultMetricsProvider(config *cfg.InstrumentationConfig) MetricsProvider { - return func() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) { + return func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) { if config.Prometheus { - return cs.PrometheusMetrics(config.Namespace), p2p.PrometheusMetrics(config.Namespace), - mempl.PrometheusMetrics(config.Namespace), sm.PrometheusMetrics(config.Namespace) + return cs.PrometheusMetrics(config.Namespace, "chain_id", chainID), + p2p.PrometheusMetrics(config.Namespace, "chain_id", chainID), + mempl.PrometheusMetrics(config.Namespace, "chain_id", chainID), + sm.PrometheusMetrics(config.Namespace, "chain_id", chainID) } return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics() } @@ -235,7 +237,6 @@ func NewNode(config *cfg.Config, return nil, err } - // Create the handshaker, which calls RequestInfo, sets the AppVersion on the state, // and replays any blocks as necessary to sync tendermint with the app. consensusLogger := logger.With("module", "consensus") @@ -296,7 +297,7 @@ func NewNode(config *cfg.Config, consensusLogger.Info("This node is not a validator", "addr", addr, "pubKey", pubKey) } - csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider() + csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider(genDoc.ChainID) // Make MempoolReactor mempool := mempl.NewMempool( diff --git a/p2p/metrics.go b/p2p/metrics.go index b066fb3179a..9649ec99542 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -24,45 +24,49 @@ type Metrics struct { } // PrometheusMetrics returns Metrics build using Prometheus client library. -func PrometheusMetrics(namespace string) *Metrics { +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + var labels []string + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } return &Metrics{ Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "peers", Help: "Number of peers.", - }, []string{}), + }, labels).With(labelsAndValues...), PeerReceiveBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "peer_receive_bytes_total", Help: "Number of bytes received from a given peer.", - }, []string{"peer_id"}), + }, append(labels, "peer_id")).With(labelsAndValues...), PeerSendBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "peer_send_bytes_total", Help: "Number of bytes sent to a given peer.", - }, []string{"peer_id"}), + }, append(labels, "peer_id")).With(labelsAndValues...), PeerPendingSendBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "peer_pending_send_bytes", Help: "Number of pending bytes to be sent to a given peer.", - }, []string{"peer_id"}), + }, append(labels, "peer_id")).With(labelsAndValues...), NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "num_txs", Help: "Number of transactions submitted by each peer.", - }, []string{"peer_id"}), + }, append(labels, "peer_id")).With(labelsAndValues...), } } // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ - Peers: discard.NewGauge(), + Peers: discard.NewGauge(), PeerReceiveBytesTotal: discard.NewCounter(), PeerSendBytesTotal: discard.NewCounter(), PeerPendingSendBytes: discard.NewGauge(), diff --git a/state/metrics.go b/state/metrics.go index 2250667dbac..bf70d4fd747 100644 --- a/state/metrics.go +++ b/state/metrics.go @@ -20,7 +20,11 @@ type Metrics struct { AppHashConflict metrics.Counter } -func PrometheusMetrics(namespace string) *Metrics { +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + var labels []string + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } return &Metrics{ BlockProcessingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Namespace: namespace, @@ -28,27 +32,27 @@ func PrometheusMetrics(namespace string) *Metrics { Name: "block_processing_time", Help: "Time between BeginBlock and EndBlock in ms.", Buckets: stdprometheus.LinearBuckets(1, 10, 10), - }, []string{}), + }, labels).With(labelsAndValues...), RecheckTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "recheck_time", Help: "Time cost on recheck in ms.", Buckets: stdprometheus.LinearBuckets(1, 10, 10), - }, []string{}), + }, labels).With(labelsAndValues...), AppHashConflict: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, Name: "app_hash_conflict", Help: "App hash conflict error", - }, []string{"proposer", "height"}), + }, append(labels, "proposer", "height")).With(labelsAndValues...), } } func NopMetrics() *Metrics { return &Metrics{ BlockProcessingTime: discard.NewHistogram(), - RecheckTime: discard.NewHistogram(), - AppHashConflict: discard.NewCounter(), + RecheckTime: discard.NewHistogram(), + AppHashConflict: discard.NewCounter(), } } From 4d05bc45e024a17e4d1e2d2e7df21d1fc7166d14 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Mon, 24 Jun 2019 18:51:43 +0800 Subject: [PATCH 06/26] Return maxPerPage if per_page is greater than max --- rpc/core/pipe.go | 4 +++- rpc/core/pipe_test.go | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/rpc/core/pipe.go b/rpc/core/pipe.go index 3d745e6ad26..23649544372 100644 --- a/rpc/core/pipe.go +++ b/rpc/core/pipe.go @@ -149,8 +149,10 @@ func validatePage(page, perPage, totalCount int) int { } func validatePerPage(perPage int) int { - if perPage < 1 || perPage > maxPerPage { + if perPage < 1 { return defaultPerPage + } else if perPage > maxPerPage { + return maxPerPage } return perPage } diff --git a/rpc/core/pipe_test.go b/rpc/core/pipe_test.go index 225e3649221..ff5c114f8c2 100644 --- a/rpc/core/pipe_test.go +++ b/rpc/core/pipe_test.go @@ -59,7 +59,7 @@ func TestPaginationPerPage(t *testing.T) { {5, defaultPerPage, defaultPerPage}, {5, maxPerPage - 1, maxPerPage - 1}, {5, maxPerPage, maxPerPage}, - {5, maxPerPage + 1, defaultPerPage}, + {5, maxPerPage + 1, maxPerPage}, } for _, c := range cases { From d04f4ee8804418428e323ba48f5bf6c05dc0f015 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Tue, 25 Jun 2019 13:49:29 +0800 Subject: [PATCH 07/26] Only log "Reached max attempts to dial" once --- p2p/pex/pex_reactor.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 057aadaa264..0b043ca831a 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -471,7 +471,11 @@ func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { attempts, lastDialed := r.dialAttemptsInfo(addr) if attempts > maxAttemptsToDial { - r.Logger.Error("Reached max attempts to dial", "addr", addr, "attempts", attempts) + // Do not log the message if the addr gets readded. + if attempts+1 == maxAttemptsToDial { + r.Logger.Info("Reached max attempts to dial", "addr", addr, "attempts", attempts) + r.attemptsToDial.Store(addr.DialString(), _attemptsToDial{attempts + 1, time.Now()}) + } r.book.MarkBad(addr) return } From cd55f29aeac7adf65b5d960610455c450df3e875 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Tue, 25 Jun 2019 15:07:12 +0800 Subject: [PATCH 08/26] Better errors and new fail point --- consensus/replay.go | 4 ++-- consensus/state.go | 29 +++++++++++++++++++++++++++-- consensus/wal.go | 14 +++++++------- libs/fail/fail.go | 5 +++-- privval/file.go | 6 +++--- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/consensus/replay.go b/consensus/replay.go index 1fb71ee7473..782924ede45 100644 --- a/consensus/replay.go +++ b/consensus/replay.go @@ -145,8 +145,8 @@ func (cs *ConsensusState) catchupReplay(csHeight int64) error { if err == io.EOF { break } else if IsDataCorruptionError(err) { - cs.Logger.Debug("data has been corrupted in last height of consensus WAL", "err", err, "height", csHeight) - panic(fmt.Sprintf("data has been corrupted (%v) in last height %d of consensus WAL", err, csHeight)) + cs.Logger.Error("data has been corrupted in last height of consensus WAL", "err", err, "height", csHeight) + return err } else if err != nil { return err } diff --git a/consensus/state.go b/consensus/state.go index 55aa15b32ff..ccd75425385 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -78,9 +78,9 @@ type ConsensusState struct { evpool sm.EvidencePool // internal state - mtx sync.RWMutex + mtx sync.RWMutex cstypes.RoundState - state sm.State // State until height-1. + state sm.State // State until height-1. // state changes may be triggered by: msgs from peers, // msgs from ourself, or by timeouts @@ -296,6 +296,22 @@ func (cs *ConsensusState) OnStart() error { // reload from consensus log to catchup if cs.doWALCatchup { if err := cs.catchupReplay(cs.Height); err != nil { + // don't try to recover from data corruption error + if IsDataCorruptionError(err) { + cs.Logger.Error("Encountered corrupt WAL file", "err", err.Error()) + cs.Logger.Error("Please repair the WAL file before restarting") + fmt.Println(`You can attempt to repair the WAL as follows: +---- +WALFILE=~/.tendermint/data/cs.wal/wal +cp $WALFILE ${WALFILE}.bak # backup the file +go run scripts/wal2json/main.go $WALFILE > wal.json # this will panic, but can be ignored +rm $WALFILE # remove the corrupt file +go run scripts/json2wal/main.go wal.json $WALFILE # rebuild the file without corruption +----`) + + return err + } + cs.Logger.Error("Error on catchup replay. Proceeding to start ConsensusState anyway", "err", err.Error()) // NOTE: if we ever do return an error here, // make sure to stop the timeoutTicker @@ -620,6 +636,15 @@ func (cs *ConsensusState) receiveRoutine(maxSteps int) { cs.handleMsg(mi) case mi = <-cs.internalMsgQueue: cs.wal.WriteSync(mi) // NOTE: fsync + + if _, ok := mi.Msg.(*VoteMessage); ok { + // we actually want to simulate failing during + // the previous WriteSync, but this isn't easy to do. + // Equivalent would be to fail here and manually remove + // some bytes from the end of the wal. + fail.Fail() // XXX + } + // handles proposals, block parts, votes cs.handleMsg(mi) case ti := <-cs.timeoutTicker.Chan(): // tockChan: diff --git a/consensus/wal.go b/consensus/wal.go index b2529f19676..d56ede26d23 100644 --- a/consensus/wal.go +++ b/consensus/wal.go @@ -172,7 +172,7 @@ func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions) // NOTE: starting from the last file in the group because we're usually // searching for the last height. See replay.go min, max := wal.group.MinIndex(), wal.group.MaxIndex() - wal.Logger.Debug("Searching for height", "height", height, "min", min, "max", max) + wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max) for index := max; index >= min; index-- { gr, err = wal.group.NewReader(index) if err != nil { @@ -192,7 +192,7 @@ func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions) break } if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { - wal.Logger.Debug("Corrupted entry. Skipping...", "err", err) + wal.Logger.Error("Corrupted entry. Skipping...", "err", err) // do nothing continue } else if err != nil { @@ -203,7 +203,7 @@ func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions) if m, ok := msg.Msg.(EndHeightMessage); ok { lastHeightFound = m.Height if m.Height == height { // found - wal.Logger.Debug("Found", "height", height, "index", index) + wal.Logger.Info("Found", "height", height, "index", index) return gr, true, nil } } @@ -290,25 +290,25 @@ func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { return nil, err } if err != nil { - return nil, fmt.Errorf("failed to read checksum: %v", err) + return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)} } crc := binary.BigEndian.Uint32(b) b = make([]byte, 4) _, err = dec.rd.Read(b) if err != nil { - return nil, fmt.Errorf("failed to read length: %v", err) + return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)} } length := binary.BigEndian.Uint32(b) if length > maxMsgSizeBytes { - return nil, fmt.Errorf("length %d exceeded maximum possible value of %d bytes", length, maxMsgSizeBytes) + return nil, DataCorruptionError{fmt.Errorf("length %d exceeded maximum possible value of %d bytes", length, maxMsgSizeBytes)} } data := make([]byte, length) _, err = dec.rd.Read(data) if err != nil { - return nil, fmt.Errorf("failed to read data: %v", err) + return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v", err)} } // check checksum before decoding data diff --git a/libs/fail/fail.go b/libs/fail/fail.go index edfca13e310..d7912af5c26 100644 --- a/libs/fail/fail.go +++ b/libs/fail/fail.go @@ -72,7 +72,8 @@ func FailRand(n int) { func Exit() { fmt.Printf("*** fail-test %d ***\n", callIndex) - proc, _ := os.FindProcess(os.Getpid()) - proc.Signal(os.Interrupt) + os.Exit(1) + // proc, _ := os.FindProcess(os.Getpid()) + // proc.Signal(os.Interrupt) // panic(fmt.Sprintf("*** fail-test %d ***", callIndex)) } diff --git a/privval/file.go b/privval/file.go index ba777e1fdb9..a827a6b6c21 100644 --- a/privval/file.go +++ b/privval/file.go @@ -175,17 +175,17 @@ func (pv *FilePV) SignProposal(chainID string, proposal *types.Proposal) error { // returns error if HRS regression or no LastSignBytes. returns true if HRS is unchanged func (pv *FilePV) checkHRS(height int64, round int, step int8) (bool, error) { if pv.LastHeight > height { - return false, errors.New("Height regression") + return false, fmt.Errorf("Height regression. Got %v, last height %v", height, pv.LastHeight) } if pv.LastHeight == height { if pv.LastRound > round { - return false, errors.New("Round regression") + return false, fmt.Errorf("Round regression at height %v. Got %v, last round %v", height, round, pv.LastRound) } if pv.LastRound == round { if pv.LastStep > step { - return false, errors.New("Step regression") + return false, fmt.Errorf("Step regression at height %v round %v. Got %v, last step %v", height, round, step, pv.LastStep) } else if pv.LastStep == step { if pv.LastSignBytes != nil { if pv.LastSignature == nil { From 9fc408852aabe0461a84632b6758178632a38215 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Tue, 25 Jun 2019 16:49:38 +0800 Subject: [PATCH 09/26] Refactor GetSelectionWithBias for addressbook --- p2p/pex/addrbook.go | 125 ++++++++++++++------------------------- p2p/pex/addrbook_test.go | 37 +++++------- 2 files changed, 59 insertions(+), 103 deletions(-) diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index 3cda9ac7473..3cb91c38021 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -9,6 +9,7 @@ import ( "encoding/binary" "fmt" "math" + "math/rand" "net" "sync" "time" @@ -405,89 +406,11 @@ func (a *addrBook) GetSelectionWithBias(biasTowardsNewAddrs int) []*p2p.NetAddre bookSize*getSelectionPercent/100) numAddresses = cmn.MinInt(maxGetSelection, numAddresses) - selection := make([]*p2p.NetAddress, numAddresses) - - oldBucketToAddrsMap := make(map[int]map[string]struct{}) - var oldIndex int - newBucketToAddrsMap := make(map[int]map[string]struct{}) - var newIndex int - - // initialize counters used to count old and new added addresses. - // len(oldBucketToAddrsMap) cannot be used as multiple addresses can endup in the same bucket. - var oldAddressesAdded int - var newAddressesAdded int - // number of new addresses that, if possible, should be in the beginning of the selection - numRequiredNewAdd := percentageOfNum(biasTowardsNewAddrs, numAddresses) - - selectionIndex := 0 -ADDRS_LOOP: - for selectionIndex < numAddresses { - // biasedTowardsOldAddrs indicates if the selection can switch to old addresses - biasedTowardsOldAddrs := selectionIndex >= numRequiredNewAdd - // An old addresses is selected if: - // - the bias is for old and old addressees are still available or, - // - there are no new addresses or all new addresses have been selected. - // numAddresses <= a.nOld + a.nNew therefore it is guaranteed that there are enough - // addresses to fill the selection - pickFromOldBucket := - (biasedTowardsOldAddrs && oldAddressesAdded < a.nOld) || - a.nNew == 0 || newAddressesAdded >= a.nNew - - bucket := make(map[string]*knownAddress) - - // loop until we pick a random non-empty bucket - for len(bucket) == 0 { - if pickFromOldBucket { - oldIndex = a.rand.Intn(len(a.bucketsOld)) - bucket = a.bucketsOld[oldIndex] - } else { - newIndex = a.rand.Intn(len(a.bucketsNew)) - bucket = a.bucketsNew[newIndex] - } - } - - // pick a random index - randIndex := a.rand.Intn(len(bucket)) - - // loop over the map to return that index - var selectedAddr *p2p.NetAddress - for _, ka := range bucket { - if randIndex == 0 { - selectedAddr = ka.Addr - break - } - randIndex-- - } - - // if we have selected the address before, restart the loop - // otherwise, record it and continue - if pickFromOldBucket { - if addrsMap, ok := oldBucketToAddrsMap[oldIndex]; ok { - if _, ok = addrsMap[selectedAddr.String()]; ok { - continue ADDRS_LOOP - } - } else { - oldBucketToAddrsMap[oldIndex] = make(map[string]struct{}) - } - oldBucketToAddrsMap[oldIndex][selectedAddr.String()] = struct{}{} - oldAddressesAdded++ - } else { - if addrsMap, ok := newBucketToAddrsMap[newIndex]; ok { - if _, ok = addrsMap[selectedAddr.String()]; ok { - continue ADDRS_LOOP - } - } else { - newBucketToAddrsMap[newIndex] = make(map[string]struct{}) - } - newBucketToAddrsMap[newIndex][selectedAddr.String()] = struct{}{} - newAddressesAdded++ - } - - selection[selectionIndex] = selectedAddr - selectionIndex++ - } - + // if there are no enough old addrs, will choose new addr instead. + numRequiredNewAdd := cmn.MaxInt(percentageOfNum(biasTowardsNewAddrs, numAddresses), numAddresses-a.nOld) + selection := a.randomPickAddresses(bucketTypeNew, numRequiredNewAdd) + selection = append(selection, a.randomPickAddresses(bucketTypeOld, numAddresses-len(selection))...) return selection } @@ -726,6 +649,44 @@ func (a *addrBook) addAddress(addr, src *p2p.NetAddress) error { return nil } +func (a *addrBook) randomPickAddresses(bucketType byte, num int) []*p2p.NetAddress { + var buckets []map[string]*knownAddress + switch bucketType { + case bucketTypeNew: + buckets = a.bucketsNew + case bucketTypeOld: + buckets = a.bucketsOld + default: + panic("unexpected bucketType") + } + total := 0 + for _, bucket := range buckets { + total = total + len(bucket) + } + addresses := make([]*knownAddress, 0, total) + for _, bucket := range buckets { + for _, ka := range bucket { + addresses = append(addresses, ka) + } + } + selection := make([]*p2p.NetAddress, 0, num) + chosenSet := make(map[string]bool, num) + rand.Shuffle(total, func(i, j int) { + addresses[i], addresses[j] = addresses[j], addresses[i] + }) + for _, addr := range addresses { + if chosenSet[addr.Addr.String()] { + continue + } + chosenSet[addr.Addr.String()] = true + selection = append(selection, addr.Addr) + if len(selection) >= num { + return selection + } + } + return selection +} + // Make space in the new buckets by expiring the really bad entries. // If no bad entries are available we remove the oldest. func (a *addrBook) expireNew(bucketIdx int) { diff --git a/p2p/pex/addrbook_test.go b/p2p/pex/addrbook_test.go index 78dd7bce338..45a742e96b0 100644 --- a/p2p/pex/addrbook_test.go +++ b/p2p/pex/addrbook_test.go @@ -8,8 +8,8 @@ import ( "os" "testing" - "github.com/stretchr/testify/require" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" cmn "github.com/tendermint/tendermint/libs/common" "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" @@ -432,12 +432,12 @@ func TestPrivatePeers(t *testing.T) { func testAddrBookAddressSelection(t *testing.T, bookSize int) { // generate all combinations of old (m) and new addresses - for nOld := 0; nOld <= bookSize; nOld++ { - nNew := bookSize - nOld - dbgStr := fmt.Sprintf("book of size %d (new %d, old %d)", bookSize, nNew, nOld) + for nBookOld := 0; nBookOld <= bookSize; nBookOld++ { + nBookNew := bookSize - nBookOld + dbgStr := fmt.Sprintf("book of size %d (new %d, old %d)", bookSize, nBookNew, nBookOld) // create book and get selection - book, fname := createAddrBookWithMOldAndNNewAddrs(t, nOld, nNew) + book, fname := createAddrBookWithMOldAndNNewAddrs(t, nBookOld, nBookNew) defer deleteTempFile(fname) addrs := book.GetSelectionWithBias(biasToSelectNewPeers) assert.NotNil(t, addrs, "%s - expected a non-nil selection", dbgStr) @@ -457,27 +457,25 @@ func testAddrBookAddressSelection(t *testing.T, bookSize int) { // Given: // n - num new addrs, m - num old addrs // k - num new addrs expected in the beginning (based on bias %) - // i=min(n, k), aka expFirstNew + // i=min(n, max(k,r-m)), aka expNew // j=min(m, r-i), aka expOld // // We expect this layout: - // indices: 0...i-1 i...i+j-1 i+j...r - // addresses: N0..Ni-1 O0..Oj-1 Ni... + // indices: 0...i-1 i...i+j-1 + // addresses: N0..Ni-1 O0..Oj-1 // // There is at least one partition and at most three. var ( - k = percentageOfNum(biasToSelectNewPeers, nAddrs) - expFirstNew = cmn.MinInt(nNew, k) - expOld = cmn.MinInt(nOld, nAddrs-expFirstNew) - expNew = nAddrs - expOld - expLastNew = expNew - expFirstNew + k = percentageOfNum(biasToSelectNewPeers, nAddrs) + expNew = cmn.MinInt(nNew, cmn.MaxInt(k, nAddrs-nBookOld)) + expOld = cmn.MinInt(nOld, nAddrs-expNew) ) // Verify that the number of old and new addresses are as expected - if nNew < expNew || nNew > expNew { + if nNew != expNew { t.Fatalf("%s - expected new addrs %d, got %d", dbgStr, expNew, nNew) } - if nOld < expOld || nOld > expOld { + if nOld != expOld { t.Fatalf("%s - expected old addrs %d, got %d", dbgStr, expOld, nOld) } @@ -496,15 +494,12 @@ func testAddrBookAddressSelection(t *testing.T, bookSize int) { case expOld == 0: // all new addresses expSeqLens = []int{nAddrs} expSeqTypes = []int{1} - case expFirstNew == 0: // all old addresses + case expNew == 0: // all old addresses expSeqLens = []int{nAddrs} expSeqTypes = []int{2} - case nAddrs-expFirstNew-expOld == 0: // new addresses, old addresses - expSeqLens = []int{expFirstNew, expOld} + case nAddrs-expNew-expOld == 0: // new addresses, old addresses + expSeqLens = []int{expNew, expOld} expSeqTypes = []int{1, 2} - default: // new addresses, old addresses, new addresses - expSeqLens = []int{expFirstNew, expOld, expLastNew} - expSeqTypes = []int{1, 2, 1} } assert.Equal(t, expSeqLens, seqLens, From 4a5b8d37cee09f0bd76654416b529a6e81922753 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Tue, 25 Jun 2019 18:59:15 +0800 Subject: [PATCH 10/26] Replace OriginalAddr by SocketAddr --- node/node.go | 3 +-- p2p/dummy/peer.go | 2 +- p2p/node_info.go | 10 ++------ p2p/peer.go | 27 ++++++++++----------- p2p/peer_set_test.go | 2 +- p2p/peer_test.go | 14 ++++++----- p2p/pex/pex_reactor.go | 4 ++-- p2p/pex/pex_reactor_test.go | 47 ++++++++++++++++++++----------------- p2p/switch.go | 22 ++++++++--------- p2p/switch_test.go | 8 ++----- p2p/test_util.go | 19 ++++++++------- p2p/transport.go | 25 ++++++++++++++++---- rpc/core/consensus.go | 2 +- 13 files changed, 98 insertions(+), 87 deletions(-) diff --git a/node/node.go b/node/node.go index b9605f55a26..65b5c65e83c 100644 --- a/node/node.go +++ b/node/node.go @@ -235,7 +235,6 @@ func NewNode(config *cfg.Config, return nil, err } - // Create the handshaker, which calls RequestInfo, sets the AppVersion on the state, // and replays any blocks as necessary to sync tendermint with the app. consensusLogger := logger.With("module", "consensus") @@ -469,7 +468,7 @@ func NewNode(config *cfg.Config, addrBook := pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) // Add ourselves to addrbook to prevent dialing ourselves - addrBook.AddOurAddress(nodeInfo.NetAddress()) + addrBook.AddOurAddress(sw.NetAddress()) addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) if config.P2P.PexReactor { diff --git a/p2p/dummy/peer.go b/p2p/dummy/peer.go index 57edafc6769..d002a9d3c18 100644 --- a/p2p/dummy/peer.go +++ b/p2p/dummy/peer.go @@ -95,6 +95,6 @@ func (p *peer) Get(key string) interface{} { } // OriginalAddr always returns nil. -func (p *peer) OriginalAddr() *p2p.NetAddress { +func (p *peer) SocketAddr() *p2p.NetAddress { return nil } diff --git a/p2p/node_info.go b/p2p/node_info.go index 3e02e9c18e6..ebbd8cd6f19 100644 --- a/p2p/node_info.go +++ b/p2p/node_info.go @@ -23,14 +23,8 @@ func MaxNodeInfoSize() int { // NodeInfo exposes basic info of a node // and determines if we're compatible. type NodeInfo interface { - nodeInfoAddress - nodeInfoTransport -} - -// nodeInfoAddress exposes just the core info of a node. -type nodeInfoAddress interface { ID() ID - NetAddress() *NetAddress + nodeInfoTransport } // nodeInfoTransport validates a nodeInfo and checks @@ -221,7 +215,7 @@ func (info DefaultNodeInfo) NetAddress() *NetAddress { if err != nil { switch err.(type) { case ErrNetAddressLookup: - // XXX If the peer provided a host name and the lookup fails here + // XXX If the peer provided a host name and the lookup fails here // we're out of luck. // TODO: use a NetAddress in DefaultNodeInfo default: diff --git a/p2p/peer.go b/p2p/peer.go index 73332a2aa8f..fab3b42d46c 100644 --- a/p2p/peer.go +++ b/p2p/peer.go @@ -29,7 +29,7 @@ type Peer interface { NodeInfo() NodeInfo // peer's info Status() tmconn.ConnectionStatus - OriginalAddr() *NetAddress // original address for outbound peers + SocketAddr() *NetAddress // actual address of the socket Send(byte, []byte) bool TrySend(byte, []byte) bool @@ -46,7 +46,7 @@ type peerConn struct { persistent bool conn net.Conn // source connection - originalAddr *NetAddress // nil for inbound connections + socketAddr *NetAddress // cached RemoteIP() ip net.IP @@ -55,14 +55,14 @@ type peerConn struct { func newPeerConn( outbound, persistent bool, conn net.Conn, - originalAddr *NetAddress, + socketAddr *NetAddress, ) peerConn { return peerConn{ - outbound: outbound, - persistent: persistent, - conn: conn, - originalAddr: originalAddr, + outbound: outbound, + persistent: persistent, + conn: conn, + socketAddr: socketAddr, } } @@ -223,13 +223,12 @@ func (p *peer) NodeInfo() NodeInfo { return p.nodeInfo } -// OriginalAddr returns the original address, which was used to connect with -// the peer. Returns nil for inbound peers. -func (p *peer) OriginalAddr() *NetAddress { - if p.peerConn.outbound { - return p.peerConn.originalAddr - } - return nil +// SocketAddr returns the address of the socket. +// For outbound peers, it's the address dialed (after DNS resolution). +// For inbound peers, it's the address returned by the underlying connection +// (not what's reported in the peer's NodeInfo). +func (p *peer) SocketAddr() *NetAddress { + return p.peerConn.socketAddr } // Status returns the peer's ConnectionStatus. diff --git a/p2p/peer_set_test.go b/p2p/peer_set_test.go index 1d2372fb087..4bacb07d0e1 100644 --- a/p2p/peer_set_test.go +++ b/p2p/peer_set_test.go @@ -29,7 +29,7 @@ func (mp *mockPeer) IsPersistent() bool { return true } func (mp *mockPeer) Get(s string) interface{} { return s } func (mp *mockPeer) Set(string, interface{}) {} func (mp *mockPeer) RemoteIP() net.IP { return mp.ip } -func (mp *mockPeer) OriginalAddr() *NetAddress { return nil } +func (mp *mockPeer) SocketAddr() *NetAddress { return nil } func (mp *mockPeer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: mp.ip, Port: 8800} } func (mp *mockPeer) CloseConn() error { return nil } diff --git a/p2p/peer_test.go b/p2p/peer_test.go index 90be311317c..bf61beb4fa6 100644 --- a/p2p/peer_test.go +++ b/p2p/peer_test.go @@ -109,25 +109,27 @@ func testOutboundPeerConn( persistent bool, ourNodePrivKey crypto.PrivKey, ) (peerConn, error) { + + var pc peerConn conn, err := testDial(addr, config) if err != nil { - return peerConn{}, cmn.ErrorWrap(err, "Error creating peer") + return pc, cmn.ErrorWrap(err, "Error creating peer") } - pc, err := testPeerConn(conn, config, true, persistent, ourNodePrivKey, addr) + pc, err = testPeerConn(conn, config, true, persistent, ourNodePrivKey, addr) if err != nil { if cerr := conn.Close(); cerr != nil { - return peerConn{}, cmn.ErrorWrap(err, cerr.Error()) + return pc, cmn.ErrorWrap(err, cerr.Error()) } - return peerConn{}, err + return pc, err } // ensure dialed ID matches connection ID if addr.ID != pc.ID() { if cerr := conn.Close(); cerr != nil { - return peerConn{}, cmn.ErrorWrap(err, cerr.Error()) + return pc, cmn.ErrorWrap(err, cerr.Error()) } - return peerConn{}, ErrSwitchAuthenticationFailure{addr, pc.ID()} + return pc, ErrSwitchAuthenticationFailure{addr, pc.ID()} } return pc, nil diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 057aadaa264..94978f2f335 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -167,7 +167,7 @@ func (r *PEXReactor) AddPeer(p Peer) { } } else { // inbound peer is its own source - addr := p.NodeInfo().NetAddress() + addr := p.SocketAddr() src := addr // add to book. dont RequestAddrs right away because @@ -309,7 +309,7 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { } r.requestsSent.Delete(id) - srcAddr := src.NodeInfo().NetAddress() + srcAddr := src.SocketAddr() for _, netAddr := range addrs { // Validate netAddr. Disconnect from a peer if it sends us invalid data. if netAddr == nil { diff --git a/p2p/pex/pex_reactor_test.go b/p2p/pex/pex_reactor_test.go index 4f4ccb03948..e232797521f 100644 --- a/p2p/pex/pex_reactor_test.go +++ b/p2p/pex/pex_reactor_test.go @@ -101,7 +101,7 @@ func TestPEXReactorRunning(t *testing.T) { } addOtherNodeAddrToAddrBook := func(switchIndex, otherSwitchIndex int) { - addr := switches[otherSwitchIndex].NodeInfo().NetAddress() + addr := switches[otherSwitchIndex].NetAddress() books[switchIndex].AddAddress(addr, addr) } @@ -132,7 +132,7 @@ func TestPEXReactorReceive(t *testing.T) { r.RequestAddrs(peer) size := book.Size() - addrs := []*p2p.NetAddress{peer.NodeInfo().NetAddress()} + addrs := []*p2p.NetAddress{peer.SocketAddr()} msg := cdc.MustMarshalBinaryBare(&pexAddrsMessage{Addrs: addrs}) r.Receive(PexChannel, peer, msg) assert.Equal(t, size+1, book.Size()) @@ -189,7 +189,7 @@ func TestPEXReactorAddrsMessageAbuse(t *testing.T) { assert.True(t, r.requestsSent.Has(id)) assert.True(t, sw.Peers().Has(peer.ID())) - addrs := []*p2p.NetAddress{peer.NodeInfo().NetAddress()} + addrs := []*p2p.NetAddress{peer.SocketAddr()} msg := cdc.MustMarshalBinaryBare(&pexAddrsMessage{Addrs: addrs}) // receive some addrs. should clear the request @@ -234,7 +234,7 @@ func TestCheckSeeds(t *testing.T) { badPeerConfig = &PEXReactorConfig{ Seeds: []string{"ed3dfd27bfc4af18f67a49862f04cc100696e84d@bad.network.addr:26657", "d824b13cb5d40fa1d8a614e089357c7eff31b670@anotherbad.network.addr:26657", - seed.NodeInfo().NetAddress().String()}, + seed.NetAddress().String()}, } peer = testCreatePeerWithConfig(dir, 2, badPeerConfig) require.Nil(t, peer.Start()) @@ -268,12 +268,13 @@ func TestConnectionSpeedForPeerReceivedFromSeed(t *testing.T) { defer os.RemoveAll(dir) // nolint: errcheck // 1. create peer - peer := testCreateDefaultPeer(dir, 1) - require.Nil(t, peer.Start()) - defer peer.Stop() + peerSwitch := testCreateDefaultPeer(dir, 1) + require.Nil(t, peerSwitch.Start()) + defer peerSwitch.Stop() // 2. Create seed which knows about the peer - seed := testCreateSeed(dir, 2, []*p2p.NetAddress{peer.NodeInfo().NetAddress()}, []*p2p.NetAddress{peer.NodeInfo().NetAddress()}) + peerAddr := peerSwitch.NetAddress() + seed := testCreateSeed(dir, 2, []*p2p.NetAddress{peerAddr}, []*p2p.NetAddress{peerAddr}) require.Nil(t, seed.Start()) defer seed.Stop() @@ -300,7 +301,7 @@ func TestPEXReactorCrawlStatus(t *testing.T) { // Create a peer, add it to the peer set and the addrbook. peer := p2p.CreateRandomPeer(false) p2p.AddPeerToSwitch(pexR.Switch, peer) - addr1 := peer.NodeInfo().NetAddress() + addr1 := peer.SocketAddr() pexR.book.AddAddress(addr1, addr1) // Add a non-connected address to the book. @@ -364,7 +365,7 @@ func TestPEXReactorSeedModeFlushStop(t *testing.T) { reactor := switches[0].Reactors()["pex"].(*PEXReactor) peerID := switches[1].NodeInfo().ID() - err = switches[1].DialPeerWithAddress(switches[0].NodeInfo().NetAddress(), false) + err = switches[1].DialPeerWithAddress(switches[0].NetAddress(), false) assert.NoError(t, err) // sleep up to a second while waiting for the peer to send us a message. @@ -402,7 +403,7 @@ func TestPEXReactorDoesNotAddPrivatePeersToAddrBook(t *testing.T) { pexR.RequestAddrs(peer) size := book.Size() - addrs := []*p2p.NetAddress{peer.NodeInfo().NetAddress()} + addrs := []*p2p.NetAddress{peer.SocketAddr()} msg := cdc.MustMarshalBinaryBare(&pexAddrsMessage{Addrs: addrs}) pexR.Receive(PexChannel, peer, msg) assert.Equal(t, size, book.Size()) @@ -419,7 +420,7 @@ func TestPEXReactorDialPeer(t *testing.T) { sw.SetAddrBook(book) peer := newMockPeer() - addr := peer.NodeInfo().NetAddress() + addr := peer.SocketAddr() assert.Equal(t, 0, pexR.AttemptsToDial(addr)) @@ -472,15 +473,17 @@ func (mp mockPeer) NodeInfo() p2p.NodeInfo { ListenAddr: mp.addr.DialString(), } } -func (mockPeer) RemoteIP() net.IP { return net.ParseIP("127.0.0.1") } -func (mockPeer) Status() conn.ConnectionStatus { return conn.ConnectionStatus{} } -func (mockPeer) Send(byte, []byte) bool { return false } -func (mockPeer) TrySend(byte, []byte) bool { return false } -func (mockPeer) Set(string, interface{}) {} -func (mockPeer) Get(string) interface{} { return nil } -func (mockPeer) OriginalAddr() *p2p.NetAddress { return nil } -func (mockPeer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 8800} } -func (mockPeer) CloseConn() error { return nil } +func (mp mockPeer) RemoteIP() net.IP { return net.ParseIP("127.0.0.1") } +func (mp mockPeer) Status() conn.ConnectionStatus { return conn.ConnectionStatus{} } +func (mp mockPeer) Send(byte, []byte) bool { return false } +func (mp mockPeer) TrySend(byte, []byte) bool { return false } +func (mp mockPeer) Set(string, interface{}) {} +func (mp mockPeer) Get(string) interface{} { return nil } +func (mp mockPeer) SocketAddr() *p2p.NetAddress { return mp.addr } +func (mp mockPeer) RemoteAddr() net.Addr { + return &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 8800} +} +func (mp mockPeer) CloseConn() error { return nil } func assertPeersWithTimeout( t *testing.T, @@ -590,7 +593,7 @@ func testCreateSeed(dir string, id int, knownAddrs, srcAddrs []*p2p.NetAddress) // Starting and stopping the peer is left to the caller func testCreatePeerWithSeed(dir string, id int, seed *p2p.Switch) *p2p.Switch { conf := &PEXReactorConfig{ - Seeds: []string{seed.NodeInfo().NetAddress().String()}, + Seeds: []string{seed.NetAddress().String()}, } return testCreatePeerWithConfig(dir, id, conf) } diff --git a/p2p/switch.go b/p2p/switch.go index 383543b8844..7be8a87b6c0 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -86,6 +86,12 @@ type Switch struct { metrics *Metrics } +// NetAddress returns the address the switch is listening on. +func (sw *Switch) NetAddress() *NetAddress { + addr := sw.transport.NetAddress() + return &addr +} + // SwitchOption sets an optional parameter on the Switch. type SwitchOption func(*Switch) @@ -284,13 +290,7 @@ func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) { sw.stopAndRemovePeer(peer, reason) if peer.IsPersistent() { - addr := peer.OriginalAddr() - if addr == nil { - // FIXME: persistent peers can't be inbound right now. - // self-reported address for inbound persistent peers - addr = peer.NodeInfo().NetAddress() - } - go sw.reconnectToPeer(addr) + go sw.reconnectToPeer(peer.SocketAddr()) } } @@ -378,7 +378,7 @@ func (sw *Switch) SetAddrBook(addrBook AddrBook) { // like contributed to consensus. func (sw *Switch) MarkPeerAsGood(peer Peer) { if sw.addrBook != nil { - sw.addrBook.MarkGood(peer.NodeInfo().NetAddress()) + sw.addrBook.MarkGood(peer.SocketAddr()) } } @@ -395,7 +395,7 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b sw.Logger.Error("Error in peer's address", "err", err) } - ourAddr := sw.nodeInfo.NetAddress() + ourAddr := sw.NetAddress() // TODO: this code feels like it's in the wrong place. // The integration tests depend on the addrBook being saved @@ -526,7 +526,7 @@ func (sw *Switch) acceptRoutine() { if in >= sw.config.MaxNumInboundPeers { sw.Logger.Info( "Ignoring inbound connection: already have enough inbound peers", - "address", p.NodeInfo().NetAddress().String(), + "address", p.SocketAddr(), "have", in, "max", sw.config.MaxNumInboundPeers, ) @@ -643,7 +643,7 @@ func (sw *Switch) addPeer(p Peer) error { return err } - p.SetLogger(sw.Logger.With("peer", p.NodeInfo().NetAddress())) + p.SetLogger(sw.Logger.With("peer", p.SocketAddr())) // All good. Start peer if sw.IsRunning() { diff --git a/p2p/switch_test.go b/p2p/switch_test.go index 4f8d26b6a5d..2986851d758 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -161,10 +161,6 @@ func assertMsgReceivedWithTimeout(t *testing.T, msgBytes []byte, channel byte, r func TestSwitchFiltersOutItself(t *testing.T) { s1 := MakeSwitch(cfg, 1, "127.0.0.1", "123.123.123", initSwitchFunc) - // addr := s1.NodeInfo().NetAddress() - - // // add ourselves like we do in node.go#427 - // s1.addrBook.AddOurAddress(addr) // simulate s1 having a public IP by creating a remote peer with the same ID rp := &remotePeer{PrivKey: s1.nodeKey.PrivKey, Config: cfg} @@ -496,7 +492,7 @@ func TestSwitchAcceptRoutine(t *testing.T) { rp := &remotePeer{PrivKey: ed25519.GenPrivKey(), Config: cfg} remotePeers = append(remotePeers, rp) rp.Start() - c, err := rp.Dial(sw.NodeInfo().NetAddress()) + c, err := rp.Dial(sw.NetAddress()) require.NoError(t, err) // spawn a reading routine to prevent connection from closing go func(c net.Conn) { @@ -515,7 +511,7 @@ func TestSwitchAcceptRoutine(t *testing.T) { // 2. check we close new connections if we already have MaxNumInboundPeers peers rp := &remotePeer{PrivKey: ed25519.GenPrivKey(), Config: cfg} rp.Start() - conn, err := rp.Dial(sw.NodeInfo().NetAddress()) + conn, err := rp.Dial(sw.NetAddress()) require.NoError(t, err) // check conn is closed one := make([]byte, 1) diff --git a/p2p/test_util.go b/p2p/test_util.go index 31814465e63..eb9ea8a2b53 100644 --- a/p2p/test_util.go +++ b/p2p/test_util.go @@ -35,7 +35,8 @@ func CreateRandomPeer(outbound bool) *peer { addr, netAddr := CreateRoutableAddr() p := &peer{ peerConn: peerConn{ - outbound: outbound, + outbound: outbound, + socketAddr: netAddr, }, nodeInfo: mockNodeInfo{netAddr}, mconn: &conn.MConnection{}, @@ -174,10 +175,15 @@ func MakeSwitch( PrivKey: ed25519.GenPrivKey(), } nodeInfo := testNodeInfo(nodeKey.ID(), fmt.Sprintf("node%d", i)) + addr, err := NewNetAddressString( + IDAddressString(nodeKey.ID(), nodeInfo.(DefaultNodeInfo).ListenAddr), + ) + if err != nil { + panic(err) + } t := NewMultiplexTransport(nodeInfo, nodeKey, MConnConfig(cfg)) - addr := nodeInfo.NetAddress() if err := t.Listen(*addr); err != nil { panic(err) } @@ -214,7 +220,7 @@ func testPeerConn( cfg *config.P2PConfig, outbound, persistent bool, ourNodePrivKey crypto.PrivKey, - originalAddr *NetAddress, + socketAddr *NetAddress, ) (pc peerConn, err error) { conn := rawConn @@ -231,12 +237,7 @@ func testPeerConn( } // Only the information we already have - return peerConn{ - outbound: outbound, - persistent: persistent, - conn: conn, - originalAddr: originalAddr, - }, nil + return newPeerConn(outbound, persistent, conn, socketAddr), nil } //---------------------------------------------------------------- diff --git a/p2p/transport.go b/p2p/transport.go index 2d4420a11ad..a9716b6d79d 100644 --- a/p2p/transport.go +++ b/p2p/transport.go @@ -24,6 +24,7 @@ type IPResolver interface { // accept is the container to carry the upgraded connection and NodeInfo from an // asynchronously running routine to the Accept method. type accept struct { + netAddr *NetAddress conn net.Conn nodeInfo NodeInfo err error @@ -47,6 +48,9 @@ type peerConfig struct { // the transport. Each transport is also responsible to filter establishing // peers specific to its domain. type Transport interface { + // Listening address. + NetAddress() NetAddress + // Accept returns a newly connected Peer. Accept(peerConfig) (Peer, error) @@ -115,6 +119,7 @@ func MultiplexTransportResolver(resolver IPResolver) MultiplexTransportOption { // MultiplexTransport accepts and dials tcp connections and upgrades them to // multiplexed peers. type MultiplexTransport struct { + netAddr NetAddress listener net.Listener acceptc chan accept @@ -161,6 +166,11 @@ func NewMultiplexTransport( } } +// NetAddress implements Transport. +func (mt *MultiplexTransport) NetAddress() NetAddress { + return mt.netAddr +} + // Accept implements Transport. func (mt *MultiplexTransport) Accept(cfg peerConfig) (Peer, error) { select { @@ -173,7 +183,7 @@ func (mt *MultiplexTransport) Accept(cfg peerConfig) (Peer, error) { cfg.outbound = false - return mt.wrapPeer(a.conn, a.nodeInfo, cfg, nil), nil + return mt.wrapPeer(a.conn, a.nodeInfo, cfg, a.netAddr), nil case <-mt.closec: return nil, &ErrTransportClosed{} } @@ -224,6 +234,7 @@ func (mt *MultiplexTransport) Listen(addr NetAddress) error { return err } + mt.netAddr = addr mt.listener = ln go mt.acceptPeers() @@ -258,15 +269,21 @@ func (mt *MultiplexTransport) acceptPeers() { var ( nodeInfo NodeInfo secretConn *conn.SecretConnection + netAddr *NetAddress ) err := mt.filterConn(c) if err == nil { secretConn, nodeInfo, err = mt.upgrade(c) + if err == nil { + addr := c.RemoteAddr() + id := PubKeyToID(secretConn.RemotePubKey()) + netAddr = NewNetAddress(id, addr) + } } select { - case mt.acceptc <- accept{secretConn, nodeInfo, err}: + case mt.acceptc <- accept{netAddr, secretConn, nodeInfo, err}: // Make the upgraded peer available. case <-mt.closec: // Give up if the transport was closed. @@ -408,14 +425,14 @@ func (mt *MultiplexTransport) wrapPeer( c net.Conn, ni NodeInfo, cfg peerConfig, - dialedAddr *NetAddress, + socketAddr *NetAddress, ) Peer { peerConn := newPeerConn( cfg.outbound, cfg.persistent, c, - dialedAddr, + socketAddr, ) p := newPeer( diff --git a/rpc/core/consensus.go b/rpc/core/consensus.go index 9968a1b2aa4..891c13b1b9f 100644 --- a/rpc/core/consensus.go +++ b/rpc/core/consensus.go @@ -213,7 +213,7 @@ func DumpConsensusState() (*ctypes.ResultDumpConsensusState, error) { } peerStates[i] = ctypes.PeerStateInfo{ // Peer basic info. - NodeAddress: peer.NodeInfo().NetAddress().String(), + NodeAddress: peer.SocketAddr().String(), // Peer consensus state. PeerState: peerStateJSON, } From 03ebfe5ce478486022a68a84e31f1e99d6010e9e Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 26 Jun 2019 11:01:16 +0800 Subject: [PATCH 11/26] Bring back NodeInfo NetAddress form the dead --- p2p/node_info.go | 20 +++++++------------- p2p/pex/addrbook.go | 6 +++--- p2p/pex/addrbook_test.go | 10 +++++----- p2p/pex/pex_reactor.go | 15 ++++++++++++--- p2p/pex/pex_reactor_test.go | 2 +- p2p/switch.go | 4 ++-- p2p/switch_test.go | 2 +- p2p/test_util.go | 2 +- 8 files changed, 32 insertions(+), 29 deletions(-) diff --git a/p2p/node_info.go b/p2p/node_info.go index ebbd8cd6f19..566f8a821b0 100644 --- a/p2p/node_info.go +++ b/p2p/node_info.go @@ -24,9 +24,14 @@ func MaxNodeInfoSize() int { // and determines if we're compatible. type NodeInfo interface { ID() ID + nodeInfoAddress nodeInfoTransport } +type nodeInfoAddress interface { + NetAddress() (*NetAddress, error) +} + // nodeInfoTransport validates a nodeInfo and checks // our compatibility with it. It's for use in the handshake. type nodeInfoTransport interface { @@ -209,20 +214,9 @@ OUTER_LOOP: // it includes the authenticated peer ID and the self-reported // ListenAddr. Note that the ListenAddr is not authenticated and // may not match that address actually dialed if its an outbound peer. -func (info DefaultNodeInfo) NetAddress() *NetAddress { +func (info DefaultNodeInfo) NetAddress() (*NetAddress, error) { idAddr := IDAddressString(info.ID(), info.ListenAddr) - netAddr, err := NewNetAddressString(idAddr) - if err != nil { - switch err.(type) { - case ErrNetAddressLookup: - // XXX If the peer provided a host name and the lookup fails here - // we're out of luck. - // TODO: use a NetAddress in DefaultNodeInfo - default: - panic(err) // everything should be well formed by now - } - } - return netAddr + return NewNetAddressString(idAddr) } //----------------------------------------------------------- diff --git a/p2p/pex/addrbook.go b/p2p/pex/addrbook.go index 3cda9ac7473..717c9dff145 100644 --- a/p2p/pex/addrbook.go +++ b/p2p/pex/addrbook.go @@ -54,7 +54,7 @@ type AddrBook interface { PickAddress(biasTowardsNewAddrs int) *p2p.NetAddress // Mark address - MarkGood(*p2p.NetAddress) + MarkGood(p2p.ID) MarkAttempt(*p2p.NetAddress) MarkBad(*p2p.NetAddress) @@ -296,11 +296,11 @@ func (a *addrBook) PickAddress(biasTowardsNewAddrs int) *p2p.NetAddress { // MarkGood implements AddrBook - it marks the peer as good and // moves it into an "old" bucket. -func (a *addrBook) MarkGood(addr *p2p.NetAddress) { +func (a *addrBook) MarkGood(id p2p.ID) { a.mtx.Lock() defer a.mtx.Unlock() - ka := a.addrLookup[addr.ID] + ka := a.addrLookup[id] if ka == nil { return } diff --git a/p2p/pex/addrbook_test.go b/p2p/pex/addrbook_test.go index 78dd7bce338..c1df4f929dd 100644 --- a/p2p/pex/addrbook_test.go +++ b/p2p/pex/addrbook_test.go @@ -8,8 +8,8 @@ import ( "os" "testing" - "github.com/stretchr/testify/require" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" cmn "github.com/tendermint/tendermint/libs/common" "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" @@ -40,7 +40,7 @@ func TestAddrBookPickAddress(t *testing.T) { assert.NotNil(t, addr, "expected an address") // pick an address when we only have old address - book.MarkGood(addrSrc.addr) + book.MarkGood(addrSrc.addr.ID) addr = book.PickAddress(0) assert.NotNil(t, addr, "expected an address") addr = book.PickAddress(50) @@ -125,7 +125,7 @@ func TestAddrBookPromoteToOld(t *testing.T) { // Promote half of them for i, addrSrc := range randAddrs { if i%2 == 0 { - book.MarkGood(addrSrc.addr) + book.MarkGood(addrSrc.addr.ID) } } @@ -329,7 +329,7 @@ func TestAddrBookGetSelectionWithBias(t *testing.T) { randAddrsLen := len(randAddrs) for i, addrSrc := range randAddrs { if int((float64(i)/float64(randAddrsLen))*100) >= 20 { - book.MarkGood(addrSrc.addr) + book.MarkGood(addrSrc.addr.ID) } } @@ -571,7 +571,7 @@ func createAddrBookWithMOldAndNNewAddrs(t *testing.T, nOld, nNew int) (book *add randAddrs := randNetAddressPairs(t, nOld) for _, addr := range randAddrs { book.AddAddress(addr.addr, addr.src) - book.MarkGood(addr.addr) + book.MarkGood(addr.addr.ID) } randAddrs = randNetAddressPairs(t, nNew) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 94978f2f335..27d5078124c 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -167,12 +167,18 @@ func (r *PEXReactor) AddPeer(p Peer) { } } else { // inbound peer is its own source - addr := p.SocketAddr() + addr, err := p.NodeInfo().NetAddress() + if err != nil { + r.Logger.Error("Failed to get peer NetAddress", "err", err, "peer", p) + return + } + + // Make it explicit that addr and src are the same for an inbound peer. src := addr // add to book. dont RequestAddrs right away because // we don't trust inbound as much - let ensurePeersRoutine handle it. - err := r.book.AddAddress(addr, src) + err = r.book.AddAddress(addr, src) r.logErrAddrBook(err) } } @@ -309,7 +315,10 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { } r.requestsSent.Delete(id) - srcAddr := src.SocketAddr() + srcAddr, err := src.NodeInfo().NetAddress() + if err != nil { + return err + } for _, netAddr := range addrs { // Validate netAddr. Disconnect from a peer if it sends us invalid data. if netAddr == nil { diff --git a/p2p/pex/pex_reactor_test.go b/p2p/pex/pex_reactor_test.go index e232797521f..1b648954b1f 100644 --- a/p2p/pex/pex_reactor_test.go +++ b/p2p/pex/pex_reactor_test.go @@ -574,7 +574,7 @@ func testCreateSeed(dir string, id int, knownAddrs, srcAddrs []*p2p.NetAddress) book.SetLogger(log.TestingLogger()) for j := 0; j < len(knownAddrs); j++ { book.AddAddress(knownAddrs[j], srcAddrs[j]) - book.MarkGood(knownAddrs[j]) + book.MarkGood(knownAddrs[j].ID) } sw.SetAddrBook(book) diff --git a/p2p/switch.go b/p2p/switch.go index 7be8a87b6c0..216c4931756 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -46,7 +46,7 @@ type AddrBook interface { AddAddress(addr *NetAddress, src *NetAddress) error AddOurAddress(*NetAddress) OurAddress(*NetAddress) bool - MarkGood(*NetAddress) + MarkGood(ID) RemoveAddress(*NetAddress) HasAddress(*NetAddress) bool Save() @@ -378,7 +378,7 @@ func (sw *Switch) SetAddrBook(addrBook AddrBook) { // like contributed to consensus. func (sw *Switch) MarkPeerAsGood(peer Peer) { if sw.addrBook != nil { - sw.addrBook.MarkGood(peer.SocketAddr()) + sw.addrBook.MarkGood(peer.ID()) } } diff --git a/p2p/switch_test.go b/p2p/switch_test.go index 2986851d758..c6a8cd8aa71 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -582,7 +582,7 @@ func (book *addrBookMock) OurAddress(addr *NetAddress) bool { _, ok := book.ourAddrs[addr.String()] return ok } -func (book *addrBookMock) MarkGood(*NetAddress) {} +func (book *addrBookMock) MarkGood(ID) {} func (book *addrBookMock) HasAddress(addr *NetAddress) bool { _, ok := book.addrs[addr.String()] return ok diff --git a/p2p/test_util.go b/p2p/test_util.go index eb9ea8a2b53..bf485cd6b80 100644 --- a/p2p/test_util.go +++ b/p2p/test_util.go @@ -23,7 +23,7 @@ type mockNodeInfo struct { } func (ni mockNodeInfo) ID() ID { return ni.addr.ID } -func (ni mockNodeInfo) NetAddress() *NetAddress { return ni.addr } +func (ni mockNodeInfo) NetAddress() (*NetAddress, error) { return ni.addr, nil } func (ni mockNodeInfo) Validate() error { return nil } func (ni mockNodeInfo) CompatibleWith(other NodeInfo) error { return nil } From 9a5e236d30c6d53f04f72d9ff442e6523d7b285c Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 26 Jun 2019 16:29:22 +0800 Subject: [PATCH 12/26] Reset triggered timeout precommit --- consensus/common_test.go | 10 ++++++++-- consensus/state.go | 5 +++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/consensus/common_test.go b/consensus/common_test.go index d24290c2a7b..e8bb0dbe9a7 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -125,15 +125,21 @@ func startTestRound(cs *ConsensusState, height int64, round int) { // Create proposal block from cs1 but sign it with vs func decideProposal(cs1 *ConsensusState, vs *validatorStub, height int64, round int) (proposal *types.Proposal, block *types.Block) { + cs1.mtx.Lock() block, blockParts := cs1.createProposalBlock() + cs1.mtx.Unlock() if block == nil { // on error panic("error creating proposal block") } // Make proposal - polRound, propBlockID := cs1.ValidRound, types.BlockID{block.Hash(), blockParts.Header()} + cs1.mtx.RLock() + validRound := cs1.ValidRound + chainID := cs1.state.ChainID + cs1.mtx.RUnlock() + polRound, propBlockID := validRound, types.BlockID{block.Hash(), blockParts.Header()} proposal = types.NewProposal(height, round, polRound, propBlockID) - if err := vs.SignProposal(cs1.state.ChainID, proposal); err != nil { + if err := vs.SignProposal(chainID, proposal); err != nil { panic(err) } return diff --git a/consensus/state.go b/consensus/state.go index 55aa15b32ff..04b95ed7008 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -78,9 +78,9 @@ type ConsensusState struct { evpool sm.EvidencePool // internal state - mtx sync.RWMutex + mtx sync.RWMutex cstypes.RoundState - state sm.State // State until height-1. + state sm.State // State until height-1. // state changes may be triggered by: msgs from peers, // msgs from ourself, or by timeouts @@ -538,6 +538,7 @@ func (cs *ConsensusState) updateToState(state sm.State) { cs.CommitRound = -1 cs.LastCommit = lastPrecommits cs.LastValidators = state.LastValidators + cs.TriggeredTimeoutPrecommit = false cs.state = state cs.Deprecated = state.Deprecated From e66c115997bfafec4d50d02ce0a772e7703fe324 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 26 Jun 2019 16:52:20 +0800 Subject: [PATCH 13/26] Flush wal on stop --- consensus/wal.go | 1 + 1 file changed, 1 insertion(+) diff --git a/consensus/wal.go b/consensus/wal.go index d56ede26d23..26428a4c626 100644 --- a/consensus/wal.go +++ b/consensus/wal.go @@ -116,6 +116,7 @@ func (wal *baseWAL) OnStart() error { // Use Wait() to ensure it's finished shutting down // before cleaning up files. func (wal *baseWAL) OnStop() { + wal.group.Flush() wal.group.Stop() wal.group.Close() } From 9c63a3b5bb35f3f3601b00f6bedd51a4e557f223 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 26 Jun 2019 17:56:32 +0800 Subject: [PATCH 14/26] Fix dirty data in peerset --- libs/pubsub/pubsub.go | 2 ++ p2p/switch.go | 45 +++++++++++++++++++------------------------ p2p/switch_test.go | 11 +++++++---- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/libs/pubsub/pubsub.go b/libs/pubsub/pubsub.go index b4e392bbdee..c81c5ddde61 100644 --- a/libs/pubsub/pubsub.go +++ b/libs/pubsub/pubsub.go @@ -100,6 +100,8 @@ type Server struct { cmds chan cmd cmdsCap int + // check if we have subscription before + // subscribing or unsubscribing mtx sync.RWMutex subscriptions map[string]map[string]Query // subscriber -> query (string) -> Query } diff --git a/p2p/switch.go b/p2p/switch.go index 383543b8844..cb1811a6929 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -645,43 +645,38 @@ func (sw *Switch) addPeer(p Peer) error { p.SetLogger(sw.Logger.With("peer", p.NodeInfo().NetAddress())) - // All good. Start peer - if sw.IsRunning() { - if err := sw.startInitPeer(p); err != nil { - return err - } - } else { + // Handle the shut down case where the switch has stopped but we're + // concurrently trying to add a peer. + if !sw.IsRunning() { + // XXX should this return an error or just log and terminate? sw.Logger.Error("Won't start a peer - switch is not running", "peer", p) + return nil } - // Add the peer to .peers. - // We start it first so that a peer in the list is safe to Stop. - // It should not err since we already checked peers.Has(). - if err := sw.peers.Add(p); err != nil { + // Start the peer's send/recv routines. + // Must start it before adding it to the peer set + // to prevent Start and Stop from being called concurrently. + err := p.Start() + if err != nil { + // Should never happen + sw.Logger.Error("Error starting peer", "err", err, "peer", p) return err } - sw.Logger.Info("Added peer", "peer", p) - sw.metrics.Peers.Add(float64(1)) - - return nil -} - -func (sw *Switch) startInitPeer(p Peer) error { - err := p.Start() // spawn send/recv routines - if err != nil { - // Should never happen - sw.Logger.Error( - "Error starting peer", - "err", err, - "peer", p, - ) + // Add the peer to PeerSet. Do this before starting the reactors + // so that if Receive errors, we will find the peer and remove it. + // Add should not err since we already checked peers.Has(). + if err := sw.peers.Add(p); err != nil { return err } + sw.metrics.Peers.Add(float64(1)) + // Start all the reactor protocols on the peer. for _, reactor := range sw.reactors { reactor.AddPeer(p) } + sw.Logger.Info("Added peer", "peer", p) + return nil } diff --git a/p2p/switch_test.go b/p2p/switch_test.go index 4f8d26b6a5d..70bfba96c14 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -274,6 +274,9 @@ func TestSwitchPeerFilterTimeout(t *testing.T) { func TestSwitchPeerFilterDuplicate(t *testing.T) { sw := MakeSwitch(cfg, 1, "testing", "123.123.123", initSwitchFunc) + err := sw.Start() + defer sw.Stop() + require.NoError(t, err) // simulate remote peer rp := &remotePeer{PrivKey: ed25519.GenPrivKey(), Config: cfg} @@ -294,12 +297,12 @@ func TestSwitchPeerFilterDuplicate(t *testing.T) { } err = sw.addPeer(p) - if err, ok := err.(ErrRejected); ok { - if !err.IsDuplicate() { - t.Errorf("expected peer to be duplicate") + if errRej, ok := err.(ErrRejected); ok { + if !errRej.IsDuplicate() { + t.Errorf("expected peer to be duplicate. got %v", errRej) } } else { - t.Errorf("expected ErrRejected") + t.Errorf("expected ErrRejected, got %v", err) } } From 70dfa7e610057e2ebe59ab03f8d3b9ee03f22f9b Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 26 Jun 2019 18:23:58 +0800 Subject: [PATCH 15/26] Check all zeroes for secret connection --- p2p/conn/secret_connection.go | 30 +++++++++++++++++++++++++----- p2p/conn/secret_connection_test.go | 17 +++++++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/p2p/conn/secret_connection.go b/p2p/conn/secret_connection.go index 802a24067ae..7cc4aa7d940 100644 --- a/p2p/conn/secret_connection.go +++ b/p2p/conn/secret_connection.go @@ -29,7 +29,10 @@ const aeadSizeOverhead = 16 // overhead of poly 1305 authentication tag const aeadKeySize = chacha20poly1305.KeySize const aeadNonceSize = chacha20poly1305.NonceSize -var ErrSmallOrderRemotePubKey = errors.New("detected low order point from remote peer") +var ( + ErrSmallOrderRemotePubKey = errors.New("detected low order point from remote peer") + ErrSharedSecretIsZero = errors.New("shared secret is all zeroes") +) // SecretConnection implements net.Conn. // It is an implementation of the STS protocol. @@ -41,7 +44,6 @@ var ErrSmallOrderRemotePubKey = errors.New("detected low order point from remote // Otherwise they are vulnerable to MITM. // (TODO(ismail): see also https://github.com/tendermint/tendermint/issues/3010) type SecretConnection struct { - // immutable recvSecret *[aeadKeySize]byte sendSecret *[aeadKeySize]byte @@ -90,7 +92,10 @@ func MakeSecretConnection(conn io.ReadWriteCloser, locPrivKey crypto.PrivKey) (* locIsLeast := bytes.Equal(locEphPub[:], loEphPub[:]) // Compute common diffie hellman secret using X25519. - dhSecret := computeDHSecret(remEphPub, locEphPriv) + dhSecret, err := computeDHSecret(remEphPub, locEphPriv) + if err != nil { + return nil, err + } // generate the secret used for receiving, sending, challenge via hkdf-sha2 on dhSecret recvSecret, sendSecret, challenge := deriveSecretAndChallenge(dhSecret, locIsLeast) @@ -230,9 +235,12 @@ func (sc *SecretConnection) SetWriteDeadline(t time.Time) error { func genEphKeys() (ephPub, ephPriv *[32]byte) { var err error + // TODO: Probably not a problem but ask Tony: different from the rust implementation (uses x25519-dalek), + // we do not "clamp" the private key scalar: + // see: https://github.com/dalek-cryptography/x25519-dalek/blob/34676d336049df2bba763cc076a75e47ae1f170f/src/x25519.rs#L56-L74 ephPub, ephPriv, err = box.GenerateKey(crand.Reader) if err != nil { - panic("Could not generate ephemeral keypairs") + panic("Could not generate ephemeral key-pair") } return } @@ -349,9 +357,21 @@ func deriveSecretAndChallenge(dhSecret *[32]byte, locIsLeast bool) (recvSecret, return } -func computeDHSecret(remPubKey, locPrivKey *[32]byte) (shrKey *[32]byte) { +// computeDHSecret computes a shared secret Diffie-Hellman secret +// from our own local private key and the other's public key. +// +// It returns an error if the computed shared secret is all zeroes. +func computeDHSecret(remPubKey, locPrivKey *[32]byte) (shrKey *[32]byte, err error) { shrKey = new([32]byte) curve25519.ScalarMult(shrKey, locPrivKey, remPubKey) + + // reject if the returned shared secret is all zeroes + // related to: https://github.com/tendermint/tendermint/issues/3010 + zero := new([32]byte) + if subtle.ConstantTimeCompare(shrKey[:], zero[:]) == 1 { + return nil, ErrSharedSecretIsZero + } + return } diff --git a/p2p/conn/secret_connection_test.go b/p2p/conn/secret_connection_test.go index 69d9c09fef5..12098c47e60 100644 --- a/p2p/conn/secret_connection_test.go +++ b/p2p/conn/secret_connection_test.go @@ -100,8 +100,12 @@ func TestSecretConnectionHandshake(t *testing.T) { } } +// Test that shareEphPubKey rejects lower order public keys based on an +// (incomplete) blacklist. func TestShareLowOrderPubkey(t *testing.T) { var fooConn, barConn = makeKVStoreConnPair() + defer fooConn.Close() + defer barConn.Close() locEphPub, _ := genEphKeys() // all blacklisted low order points: @@ -126,6 +130,19 @@ func TestShareLowOrderPubkey(t *testing.T) { } } +// Test that additionally that the Diffie-Hellman shared secret is non-zero. +// The shared secret would be zero for lower order pub-keys (but tested against the blacklist only). +func TestComputeDHFailsOnLowOrder(t *testing.T) { + _, locPrivKey := genEphKeys() + for _, remLowOrderPubKey := range blacklist { + shared, err := computeDHSecret(&remLowOrderPubKey, locPrivKey) + assert.Error(t, err) + + assert.Equal(t, err, ErrSharedSecretIsZero) + assert.Empty(t, shared) + } +} + func TestConcurrentWrite(t *testing.T) { fooSecConn, barSecConn := makeSecretConnPair(t) fooWriteText := cmn.RandStr(dataMaxSize) From 91382735b751c78818cd88ba827aba5b76135cdd Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 26 Jun 2019 19:56:27 +0800 Subject: [PATCH 16/26] Do not panic when filter times out --- p2p/switch.go | 9 ++++++++- p2p/switch_test.go | 42 ++++++++++++++++++++++++++++++++++++++++++ p2p/transport.go | 2 +- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/p2p/switch.go b/p2p/switch.go index 0aba96b5501..841254dd0db 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -499,7 +499,14 @@ func (sw *Switch) acceptRoutine() { ) continue - case *ErrTransportClosed: + case ErrFilterTimeout: + sw.Logger.Error( + "Peer filter timed out", + "err", err, + ) + + continue + case ErrTransportClosed: sw.Logger.Error( "Stopped accept routine, as transport is closed", "numPeers", sw.peers.Size(), diff --git a/p2p/switch_test.go b/p2p/switch_test.go index 5cfbc6ae6a1..b0be305991f 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -2,6 +2,7 @@ package p2p import ( "bytes" + "errors" "fmt" "io" "io/ioutil" @@ -530,6 +531,47 @@ func TestSwitchAcceptRoutine(t *testing.T) { } } +type errorTransport struct { + acceptErr error +} + +func (et errorTransport) NetAddress() NetAddress { + panic("not implemented") +} + +func (et errorTransport) Accept(c peerConfig) (Peer, error) { + return nil, et.acceptErr +} +func (errorTransport) Dial(NetAddress, peerConfig) (Peer, error) { + panic("not implemented") +} +func (errorTransport) Cleanup(Peer) { + panic("not implemented") +} + +func TestSwitchAcceptRoutineErrorCases(t *testing.T) { + sw := NewSwitch(cfg, errorTransport{ErrFilterTimeout{}}) + assert.NotPanics(t, func() { + err := sw.Start() + assert.NoError(t, err) + sw.Stop() + }) + + sw = NewSwitch(cfg, errorTransport{ErrRejected{conn: nil, err: errors.New("filtered"), isFiltered: true}}) + assert.NotPanics(t, func() { + err := sw.Start() + assert.NoError(t, err) + sw.Stop() + }) + + sw = NewSwitch(cfg, errorTransport{ErrTransportClosed{}}) + assert.NotPanics(t, func() { + err := sw.Start() + assert.NoError(t, err) + sw.Stop() + }) +} + func BenchmarkSwitchBroadcast(b *testing.B) { s1, s2 := MakeSwitchPair(b, func(i int, sw *Switch) *Switch { // Make bar reactors of bar channels each diff --git a/p2p/transport.go b/p2p/transport.go index a9716b6d79d..aa89d1a92be 100644 --- a/p2p/transport.go +++ b/p2p/transport.go @@ -185,7 +185,7 @@ func (mt *MultiplexTransport) Accept(cfg peerConfig) (Peer, error) { return mt.wrapPeer(a.conn, a.nodeInfo, cfg, a.netAddr), nil case <-mt.closec: - return nil, &ErrTransportClosed{} + return nil, ErrTransportClosed{} } } From 4f5acc744de7b4672cebf760dc13564e6888c4f5 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Thu, 27 Jun 2019 10:24:11 +0800 Subject: [PATCH 17/26] Only log on maxAttemptsToDial+1 --- p2p/pex/pex_reactor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 0b043ca831a..c7c1c7b3e1f 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -472,7 +472,7 @@ func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) { if attempts > maxAttemptsToDial { // Do not log the message if the addr gets readded. - if attempts+1 == maxAttemptsToDial { + if attempts == maxAttemptsToDial+1 { r.Logger.Info("Reached max attempts to dial", "addr", addr, "attempts", attempts) r.attemptsToDial.Store(addr.DialString(), _attemptsToDial{attempts + 1, time.Now()}) } From a8b8b0899caef410c54fee8f0c96b94a15865739 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Thu, 27 Jun 2019 15:20:55 +0800 Subject: [PATCH 18/26] Log replay with state module --- consensus/replay.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/consensus/replay.go b/consensus/replay.go index 782924ede45..f1e5458c31d 100644 --- a/consensus/replay.go +++ b/consensus/replay.go @@ -290,7 +290,8 @@ func (h *Handshaker) ReplayBlocks( ) ([]byte, error) { storeBlockHeight := h.store.Height() stateBlockHeight := state.LastBlockHeight - h.logger.Info("ABCI Replay Blocks", "appHeight", appBlockHeight, "storeHeight", storeBlockHeight, "stateHeight", stateBlockHeight) + logger := h.logger.With("module", "state") + logger.Info("ABCI Replay Blocks", "appHeight", appBlockHeight, "storeHeight", storeBlockHeight, "stateHeight", stateBlockHeight) // If appBlockHeight == 0 it means that we are at genesis and hence should send InitChain. if appBlockHeight == 0 { @@ -381,7 +382,7 @@ func (h *Handshaker) ReplayBlocks( // so replayBlock with the real app. // NOTE: We could instead use the cs.WAL on cs.Start, // but we'd have to allow the WAL to replay a block that wrote it's #ENDHEIGHT - h.logger.Info("Replay last block using real app") + logger.Info("Replay last block using real app") state, err = h.replayBlock(state, storeBlockHeight, proxyApp.Consensus()) return state.AppHash, err @@ -392,7 +393,7 @@ func (h *Handshaker) ReplayBlocks( return nil, err } mockApp := newMockProxyApp(appHash, abciResponses) - h.logger.Info("Replay last block using mock app") + logger.Info("Replay last block using mock app") state, err = h.replayBlock(state, storeBlockHeight, mockApp) return state.AppHash, err } @@ -414,6 +415,8 @@ func (h *Handshaker) replayBlocks(state sm.State, proxyApp proxy.AppConns, appBl // // If mutateState == true, the final block is replayed with h.replayBlock() + logger := h.logger.With("module", "state") + var appHash []byte var err error finalBlock := storeBlockHeight @@ -421,7 +424,7 @@ func (h *Handshaker) replayBlocks(state sm.State, proxyApp proxy.AppConns, appBl finalBlock-- } for i := appBlockHeight + 1; i <= finalBlock; i++ { - h.logger.Info("Applying block", "height", i) + logger.Info("Applying block", "height", i) block := h.store.LoadBlock(i) if len(appHash) != 0 { @@ -430,7 +433,7 @@ func (h *Handshaker) replayBlocks(state sm.State, proxyApp proxy.AppConns, appBl } } - appHash, err = sm.ExecCommitBlock(proxyApp.Consensus(), block, h.logger, state.LastValidators, h.stateDB) + appHash, err = sm.ExecCommitBlock(proxyApp.Consensus(), block, logger, state.LastValidators, h.stateDB) if err != nil { return nil, err } From f419f34c5f58be7fda964dbc4ca8552fd0e2b004 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Fri, 28 Jun 2019 16:31:09 +0800 Subject: [PATCH 19/26] Fix pool timer leak bug --- blockchain/pool.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/blockchain/pool.go b/blockchain/pool.go index e6be36012c0..eda7d2a5924 100644 --- a/blockchain/pool.go +++ b/blockchain/pool.go @@ -299,6 +299,9 @@ func (pool *BlockPool) removePeer(peerID p2p.ID) { requester.redo(peerID) } } + if p, exist := pool.peers[peerID]; exist && p.timeout != nil { + p.timeout.Stop() + } delete(pool.peers, peerID) } From f6ec9ca8dce5d736bcb8e69fb6ba66d9040a0910 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Fri, 28 Jun 2019 16:54:56 +0800 Subject: [PATCH 20/26] Close Iterator in RemoteDB --- libs/db/remotedb/grpcdb/server.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/db/remotedb/grpcdb/server.go b/libs/db/remotedb/grpcdb/server.go index bfe65e6109a..a032292b3d4 100644 --- a/libs/db/remotedb/grpcdb/server.go +++ b/libs/db/remotedb/grpcdb/server.go @@ -138,6 +138,7 @@ func (s *server) SetSync(ctx context.Context, in *protodb.Entity) (*protodb.Noth func (s *server) Iterator(query *protodb.Entity, dis protodb.DB_IteratorServer) error { it := s.db.Iterator(query.Start, query.End) + defer it.Close() return s.handleIterator(it, dis.Send) } @@ -162,6 +163,7 @@ func (s *server) handleIterator(it db.Iterator, sendFunc func(*protodb.Iterator) func (s *server) ReverseIterator(query *protodb.Entity, dis protodb.DB_ReverseIteratorServer) error { it := s.db.ReverseIterator(query.Start, query.End) + defer it.Close() return s.handleIterator(it, dis.Send) } From 10f38b1f068a96108812268bcb5c05946bd6aad5 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Mon, 1 Jul 2019 14:52:53 +0800 Subject: [PATCH 21/26] Fix lite client proxy endpoints --- lite/proxy/proxy.go | 4 ++-- lite/proxy/query.go | 32 ++++++++++++++++++++++++++++---- lite/proxy/wrapper.go | 4 ++++ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/lite/proxy/proxy.go b/lite/proxy/proxy.go index d7ffb27d09d..39baf5a489e 100644 --- a/lite/proxy/proxy.go +++ b/lite/proxy/proxy.go @@ -66,7 +66,7 @@ func RPCRoutes(c rpcclient.Client) map[string]*rpcserver.RPCFunc { "block": rpcserver.NewRPCFunc(c.Block, "height"), "commit": rpcserver.NewRPCFunc(c.Commit, "height"), "tx": rpcserver.NewRPCFunc(c.Tx, "hash,prove"), - "validators": rpcserver.NewRPCFunc(c.Validators, ""), + "validators": rpcserver.NewRPCFunc(c.Validators, "height"), // broadcast API "broadcast_tx_commit": rpcserver.NewRPCFunc(c.BroadcastTxCommit, "tx"), @@ -74,7 +74,7 @@ func RPCRoutes(c rpcclient.Client) map[string]*rpcserver.RPCFunc { "broadcast_tx_async": rpcserver.NewRPCFunc(c.BroadcastTxAsync, "tx"), // abci API - "abci_query": rpcserver.NewRPCFunc(c.ABCIQuery, "path,data,prove"), + "abci_query": rpcserver.NewRPCFunc(c.ABCIQuery, "path,data"), "abci_info": rpcserver.NewRPCFunc(c.ABCIInfo, ""), } } diff --git a/lite/proxy/query.go b/lite/proxy/query.go index 3acf826b87e..5a5f04873b0 100644 --- a/lite/proxy/query.go +++ b/lite/proxy/query.go @@ -2,6 +2,7 @@ package proxy import ( "fmt" + "strings" cmn "github.com/tendermint/tendermint/libs/common" @@ -44,9 +45,7 @@ func GetWithProofOptions(prt *merkle.ProofRuntime, path string, key []byte, opts node rpcclient.Client, cert lite.Verifier) ( *ctypes.ResultABCIQuery, error) { - if !opts.Prove { - return nil, cmn.NewError("require ABCIQueryOptions.Prove to be true") - } + opts.Prove = true res, err := node.ABCIQueryWithOptions(path, key, opts) if err != nil { @@ -77,7 +76,14 @@ func GetWithProofOptions(prt *merkle.ProofRuntime, path string, key []byte, opts if resp.Value != nil { // Value exists // XXX How do we encode the key into a string... - err = prt.VerifyValue(resp.Proof, signedHeader.AppHash, string(resp.Key), resp.Value) + storeName, err := parseQueryStorePath(path) + if err != nil { + return nil, err + } + kp := merkle.KeyPath{} + kp = kp.AppendKey([]byte(storeName), merkle.KeyEncodingURL) + kp = kp.AppendKey(resp.Key, merkle.KeyEncodingURL) + err = prt.VerifyValue(resp.Proof, signedHeader.AppHash, kp.String(), resp.Value) if err != nil { return nil, cmn.ErrorWrap(err, "Couldn't verify value proof") } @@ -94,6 +100,24 @@ func GetWithProofOptions(prt *merkle.ProofRuntime, path string, key []byte, opts } } +func parseQueryStorePath(path string) (storeName string, err error) { + if !strings.HasPrefix(path, "/") { + return "", fmt.Errorf("expected path to start with /") + } + + paths := strings.SplitN(path[1:], "/", 3) + switch { + case len(paths) != 3: + return "", fmt.Errorf("expected format like /store//key") + case paths[0] != "store": + return "", fmt.Errorf("expected format like /store//key") + case paths[2] != "key": + return "", fmt.Errorf("expected format like /store//key") + } + + return paths[1], nil +} + // GetCertifiedCommit gets the signed header for a given height and certifies // it. Returns error if unable to get a proven header. func GetCertifiedCommit(h int64, client rpcclient.Client, cert lite.Verifier) (types.SignedHeader, error) { diff --git a/lite/proxy/wrapper.go b/lite/proxy/wrapper.go index 7ddb3b8addb..c90cdb2755e 100644 --- a/lite/proxy/wrapper.go +++ b/lite/proxy/wrapper.go @@ -145,6 +145,10 @@ func (w Wrapper) Commit(height *int64) (*ctypes.ResultCommit, error) { return res, err } +func (w Wrapper) RegisterOpDecoder(typ string, dec merkle.OpDecoder) { + w.prt.RegisterOpDecoder(typ, dec) +} + // // WrappedSwitch creates a websocket connection that auto-verifies any info // // coming through before passing it along. // // From 2a846bbec5d87c44196541e6bc8354abdcf1ad95 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Tue, 2 Jul 2019 14:30:04 +0800 Subject: [PATCH 22/26] Update the maxHeight when a peer is removed --- blockchain/pool.go | 49 ++++++++++++++++++++++++++++++++--------- blockchain/pool_test.go | 45 ++++++++++++++++++++++++++++++++++--- 2 files changed, 81 insertions(+), 13 deletions(-) diff --git a/blockchain/pool.go b/blockchain/pool.go index eda7d2a5924..0d792b7af49 100644 --- a/blockchain/pool.go +++ b/blockchain/pool.go @@ -69,7 +69,7 @@ type BlockPool struct { height int64 // the lowest key in requesters. // peers peers map[p2p.ID]*bpPeer - maxPeerHeight int64 + maxPeerHeight int64 // the biggest reported height // atomic numPending int32 // number of requests pending assignment or block response @@ -78,6 +78,8 @@ type BlockPool struct { errorsCh chan<- peerError } +// NewBlockPool returns a new BlockPool with the height equal to start. Block +// requests and errors will be sent to requestsCh and errorsCh accordingly. func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { bp := &BlockPool{ peers: make(map[p2p.ID]*bpPeer), @@ -93,15 +95,15 @@ func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- p return bp } +// OnStart implements cmn.Service by spawning requesters routine and recording +// pool's start time. func (pool *BlockPool) OnStart() error { go pool.makeRequestersRoutine() pool.startTime = time.Now() return nil } -func (pool *BlockPool) OnStop() {} - -// Run spawns requesters as needed. +// spawns requesters as needed func (pool *BlockPool) makeRequestersRoutine() { for { if !pool.IsRunning() { @@ -150,6 +152,8 @@ func (pool *BlockPool) removeTimedoutPeers() { } } +// GetStatus returns pool's height, numPending requests and the number of +// requesters. func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { pool.mtx.Lock() defer pool.mtx.Unlock() @@ -157,6 +161,7 @@ func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequester return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) } +// IsCaughtUp returns true if this node is caught up, false - otherwise. // TODO: relax conditions, prevent abuse. func (pool *BlockPool) IsCaughtUp() bool { pool.mtx.Lock() @@ -170,8 +175,9 @@ func (pool *BlockPool) IsCaughtUp() bool { // Some conditions to determine if we're caught up. // Ensures we've either received a block or waited some amount of time, - // and that we're synced to the highest known height. Note we use maxPeerHeight - 1 - // because to sync block H requires block H+1 to verify the LastCommit. + // and that we're synced to the highest known height. + // Note we use maxPeerHeight - 1 because to sync block H requires block H+1 + // to verify the LastCommit. receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1) isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers @@ -260,7 +266,7 @@ func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int } } -// MaxPeerHeight returns the highest height reported by a peer. +// MaxPeerHeight returns the highest reported height. func (pool *BlockPool) MaxPeerHeight() int64 { pool.mtx.Lock() defer pool.mtx.Unlock() @@ -286,6 +292,8 @@ func (pool *BlockPool) SetPeerHeight(peerID p2p.ID, height int64) { } } +// RemovePeer removes the peer with peerID from the pool. If there's no peer +// with peerID, function is a no-op. func (pool *BlockPool) RemovePeer(peerID p2p.ID) { pool.mtx.Lock() defer pool.mtx.Unlock() @@ -299,10 +307,31 @@ func (pool *BlockPool) removePeer(peerID p2p.ID) { requester.redo(peerID) } } - if p, exist := pool.peers[peerID]; exist && p.timeout != nil { - p.timeout.Stop() + peer, ok := pool.peers[peerID] + if ok { + if peer.timeout != nil { + peer.timeout.Stop() + } + + delete(pool.peers, peerID) + + // Find a new peer with the biggest height and update maxPeerHeight if the + // peer's height was the biggest. + if peer.height == pool.maxPeerHeight { + pool.updateMaxPeerHeight() + } + } +} + +// If no peers are left, maxPeerHeight is set to 0. +func (pool *BlockPool) updateMaxPeerHeight() { + var max int64 + for _, peer := range pool.peers { + if peer.height > max { + max = peer.height + } } - delete(pool.peers, peerID) + pool.maxPeerHeight = max } // Pick an available peer with at least the given minHeight. diff --git a/blockchain/pool_test.go b/blockchain/pool_test.go index 75a03f631c1..6fb261c58eb 100644 --- a/blockchain/pool_test.go +++ b/blockchain/pool_test.go @@ -1,12 +1,14 @@ package blockchain import ( + "fmt" "testing" "time" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" cmn "github.com/tendermint/tendermint/libs/common" "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/p2p" "github.com/tendermint/tendermint/types" ) @@ -66,7 +68,7 @@ func makePeers(numPeers int, minHeight, maxHeight int64) testPeers { return peers } -func TestBasic(t *testing.T) { +func TestBlockPoolBasic(t *testing.T) { start := int64(42) peers := makePeers(10, start+1, 1000) errorsCh := make(chan peerError, 1000) @@ -122,7 +124,7 @@ func TestBasic(t *testing.T) { } } -func TestTimeout(t *testing.T) { +func TestBlockPoolTimeout(t *testing.T) { start := int64(42) peers := makePeers(10, start+1, 1000) errorsCh := make(chan peerError, 1000) @@ -180,3 +182,40 @@ func TestTimeout(t *testing.T) { } } } + +func TestBlockPoolRemovePeer(t *testing.T) { + peers := make(testPeers, 10) + for i := 0; i < 10; i++ { + peerID := p2p.ID(fmt.Sprintf("%d", i+1)) + height := int64(i + 1) + peers[peerID] = testPeer{peerID, height, make(chan inputData)} + } + requestsCh := make(chan BlockRequest) + errorsCh := make(chan peerError) + + pool := NewBlockPool(1, requestsCh, errorsCh) + pool.SetLogger(log.TestingLogger()) + err := pool.Start() + require.NoError(t, err) + defer pool.Stop() + + // add peers + for peerID, peer := range peers { + pool.SetPeerHeight(peerID, peer.height) + } + assert.EqualValues(t, 10, pool.MaxPeerHeight()) + + // remove not-existing peer + assert.NotPanics(t, func() { pool.RemovePeer(p2p.ID("Superman")) }) + + // remove peer with biggest height + pool.RemovePeer(p2p.ID("10")) + assert.EqualValues(t, 9, pool.MaxPeerHeight()) + + // remove all peers + for peerID := range peers { + pool.RemovePeer(peerID) + } + + assert.EqualValues(t, 0, pool.MaxPeerHeight()) +} From e907c56b4da10a138e9f76a5594f6d24bb83d7c3 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 3 Jul 2019 10:50:54 +0800 Subject: [PATCH 23/26] Update mempool to v0.31.2 --- mempool/bench_test.go | 16 ++- mempool/cache_test.go | 101 ++++++++++++++++ mempool/mempool.go | 256 ++++++++++++++++++++++++++++++---------- mempool/mempool_test.go | 248 +++++++++++++++++++++++++++++++------- mempool/metrics.go | 18 ++- mempool/reactor.go | 109 +++++++++++++++-- mempool/reactor_test.go | 68 ++++++++++- 7 files changed, 685 insertions(+), 131 deletions(-) create mode 100644 mempool/cache_test.go diff --git a/mempool/bench_test.go b/mempool/bench_test.go index 68b033caae7..0cd394cd60f 100644 --- a/mempool/bench_test.go +++ b/mempool/bench_test.go @@ -11,7 +11,8 @@ import ( func BenchmarkReap(b *testing.B) { app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) - mempool := newMempoolWithApp(cc) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() size := 10000 for i := 0; i < size; i++ { @@ -25,6 +26,19 @@ func BenchmarkReap(b *testing.B) { } } +func BenchmarkCheckTx(b *testing.B) { + app := kvstore.NewKVStoreApplication() + cc := proxy.NewLocalClientCreator(app) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() + + for i := 0; i < b.N; i++ { + tx := make([]byte, 8) + binary.BigEndian.PutUint64(tx, uint64(i)) + mempool.CheckTx(tx, nil) + } +} + func BenchmarkCacheInsertTime(b *testing.B) { cache := newMapTxCache(b.N) txs := make([][]byte, b.N) diff --git a/mempool/cache_test.go b/mempool/cache_test.go new file mode 100644 index 00000000000..ea9f63fd67b --- /dev/null +++ b/mempool/cache_test.go @@ -0,0 +1,101 @@ +package mempool + +import ( + "crypto/rand" + "crypto/sha256" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/tendermint/tendermint/abci/example/kvstore" + "github.com/tendermint/tendermint/proxy" + "github.com/tendermint/tendermint/types" +) + +func TestCacheRemove(t *testing.T) { + cache := newMapTxCache(100) + numTxs := 10 + txs := make([][]byte, numTxs) + for i := 0; i < numTxs; i++ { + // probability of collision is 2**-256 + txBytes := make([]byte, 32) + rand.Read(txBytes) // nolint: gosec + txs[i] = txBytes + cache.Push(txBytes) + // make sure its added to both the linked list and the map + require.Equal(t, i+1, len(cache.map_)) + require.Equal(t, i+1, cache.list.Len()) + } + for i := 0; i < numTxs; i++ { + cache.Remove(txs[i]) + // make sure its removed from both the map and the linked list + require.Equal(t, numTxs-(i+1), len(cache.map_)) + require.Equal(t, numTxs-(i+1), cache.list.Len()) + } +} + +func TestCacheAfterUpdate(t *testing.T) { + app := kvstore.NewKVStoreApplication() + cc := proxy.NewLocalClientCreator(app) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() + + // reAddIndices & txsInCache can have elements > numTxsToCreate + // also assumes max index is 255 for convenience + // txs in cache also checks order of elements + tests := []struct { + numTxsToCreate int + updateIndices []int + reAddIndices []int + txsInCache []int + }{ + {1, []int{}, []int{1}, []int{1, 0}}, // adding new txs works + {2, []int{1}, []int{}, []int{1, 0}}, // update doesn't remove tx from cache + {2, []int{2}, []int{}, []int{2, 1, 0}}, // update adds new tx to cache + {2, []int{1}, []int{1}, []int{1, 0}}, // re-adding after update doesn't make dupe + } + for tcIndex, tc := range tests { + for i := 0; i < tc.numTxsToCreate; i++ { + tx := types.Tx{byte(i)} + err := mempool.CheckTx(tx, nil) + require.NoError(t, err) + } + + updateTxs := []types.Tx{} + for _, v := range tc.updateIndices { + tx := types.Tx{byte(v)} + updateTxs = append(updateTxs, tx) + } + mempool.Update(int64(tcIndex), updateTxs, nil, nil) + + for _, v := range tc.reAddIndices { + tx := types.Tx{byte(v)} + _ = mempool.CheckTx(tx, nil) + } + + cache := mempool.cache.(*mapTxCache) + node := cache.list.Front() + counter := 0 + for node != nil { + require.NotEqual(t, len(tc.txsInCache), counter, + "cache larger than expected on testcase %d", tcIndex) + + nodeVal := node.Value.([sha256.Size]byte) + expectedBz := sha256.Sum256([]byte{byte(tc.txsInCache[len(tc.txsInCache)-counter-1])}) + // Reference for reading the errors: + // >>> sha256('\x00').hexdigest() + // '6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d' + // >>> sha256('\x01').hexdigest() + // '4bf5122f344554c53bde2ebb8cd2b7e3d1600ad631c385a5d7cce23c7785459a' + // >>> sha256('\x02').hexdigest() + // 'dbc1b4c900ffe48d575b5da5c638040125f65db0fe3e24494b76ea986457d986' + + require.Equal(t, expectedBz, nodeVal, "Equality failed on index %d, tc %d", counter, tcIndex) + counter++ + node = node.Next() + } + require.Equal(t, len(tc.txsInCache), counter, + "cache smaller than expected on testcase %d", tcIndex) + mempool.Flush() + } +} diff --git a/mempool/mempool.go b/mempool/mempool.go index 9069dab62a9..a6231bac5d0 100644 --- a/mempool/mempool.go +++ b/mempool/mempool.go @@ -31,45 +31,59 @@ type PreCheckFunc func(types.Tx) error // transaction doesn't require more gas than available for the block. type PostCheckFunc func(types.Tx, *abci.ResponseCheckTx) error -/* +// TxInfo are parameters that get passed when attempting to add a tx to the +// mempool. +type TxInfo struct { + // We don't use p2p.ID here because it's too big. The gain is to store max 2 + // bytes with each tx to identify the sender rather than 20 bytes. + PeerID uint16 +} +/* The mempool pushes new txs onto the proxyAppConn. It gets a stream of (req, res) tuples from the proxy. The mempool stores good txs in a concurrent linked-list. - Multiple concurrent go-routines can traverse this linked-list safely by calling .NextWait() on each element. - So we have several go-routines: 1. Consensus calling Update() and Reap() synchronously 2. Many mempool reactor's peer routines calling CheckTx() 3. Many mempool reactor's peer routines traversing the txs linked list 4. Another goroutine calling GarbageCollectTxs() periodically - To manage these goroutines, there are three methods of locking. 1. Mutations to the linked-list is protected by an internal mtx (CList is goroutine-safe) 2. Mutations to the linked-list elements are atomic 3. CheckTx() calls can be paused upon Update() and Reap(), protected by .proxyMtx - Garbage collection of old elements from mempool.txs is handlde via the DetachPrev() call, which makes old elements not reachable by peer broadcastTxRoutine() automatically garbage collected. - TODO: Better handle abci client errors. (make it automatically handle connection errors) - */ var ( // ErrTxInCache is returned to the client if we saw tx earlier ErrTxInCache = errors.New("Tx already exists in cache") - // ErrMempoolIsFull means Tendermint & an application can't handle that much load - ErrMempoolIsFull = errors.New("Mempool is full") - // ErrTxTooLarge means the tx is too big to be sent in a message to other peers ErrTxTooLarge = fmt.Errorf("Tx too large. Max size is %d", maxTxSize) ) +// ErrMempoolIsFull means Tendermint & an application can't handle that much load +type ErrMempoolIsFull struct { + numTxs int + maxTxs int + + txsBytes int64 + maxTxsBytes int64 +} + +func (e ErrMempoolIsFull) Error() string { + return fmt.Sprintf( + "Mempool is full: number of txs %d (max: %d), total txs bytes %d (max: %d)", + e.numTxs, e.maxTxs, + e.txsBytes, e.maxTxsBytes) +} + // ErrPreCheck is returned when tx is too big type ErrPreCheck struct { Reason error @@ -128,6 +142,11 @@ func TxID(tx []byte) string { return fmt.Sprintf("%X", types.Tx(tx).Hash()) } +// txKey is the fixed length array sha256 hash used as the key in maps. +func txKey(tx types.Tx) [sha256.Size]byte { + return sha256.Sum256(tx) +} + // Mempool is an ordered in-memory pool for transactions before they are proposed in a consensus // round. Transaction validity is checked using the CheckTx abci message before the transaction is // added to the pool. The Mempool uses a concurrent list structure for storing transactions that @@ -135,17 +154,30 @@ func TxID(tx []byte) string { type Mempool struct { config *cfg.MempoolConfig - proxyMtx sync.Mutex - proxyAppConn proxy.AppConnMempool - txs *clist.CList // concurrent linked-list of good txs - height int64 // the last block Update()'d to - rechecking int32 // for re-checking filtered txs on Update() - recheckCursor *clist.CElement // next expected response - recheckEnd *clist.CElement // re-checking stops here + proxyMtx sync.Mutex + proxyAppConn proxy.AppConnMempool + txs *clist.CList // concurrent linked-list of good txs + preCheck PreCheckFunc + postCheck PostCheckFunc + + // Track whether we're rechecking txs. + // These are not protected by a mutex and are expected to be mutated + // in serial (ie. by abci responses which are called in serial). + recheckCursor *clist.CElement // next expected response + recheckEnd *clist.CElement // re-checking stops here + + // notify listeners (ie. consensus) when txs are available notifiedTxsAvailable bool txsAvailable chan struct{} // fires once for each height, when the mempool is not empty - preCheck PreCheckFunc - postCheck PostCheckFunc + + // Map for quick access to txs to record sender in CheckTx. + // txsMap: txKey -> CElement + txsMap sync.Map + + // Atomic integers + height int64 // the last block Update()'d to + rechecking int32 // for re-checking filtered txs on Update() + txsBytes int64 // total size of mempool, in bytes // Keep a cache of already-seen txs. // This reduces the pressure on the proxyApp. @@ -185,7 +217,7 @@ func NewMempool( } else { mempool.cache = nopTxCache{} } - proxyAppConn.SetResponseCallback(mempool.resCb) + proxyAppConn.SetResponseCallback(mempool.globalCb) for _, option := range options { option(mempool) } @@ -265,8 +297,13 @@ func (mem *Mempool) Size() int { return mem.txs.Len() } -// Flushes the mempool connection to ensure async resCb calls are done e.g. -// from CheckTx. +// TxsBytes returns the total size of all txs in the mempool. +func (mem *Mempool) TxsBytes() int64 { + return atomic.LoadInt64(&mem.txsBytes) +} + +// FlushAppConn flushes the mempool connection to ensure async reqResCb calls are +// done. E.g. from CheckTx. func (mem *Mempool) FlushAppConn() error { return mem.proxyAppConn.FlushSync() } @@ -282,6 +319,9 @@ func (mem *Mempool) Flush() { mem.txs.Remove(e) e.DetachPrev() } + + mem.txsMap = sync.Map{} + _ = atomic.SwapInt64(&mem.txsBytes, 0) } // TxsFront returns the first transaction in the ordered list for peer @@ -304,12 +344,26 @@ func (mem *Mempool) TxsWaitChan() <-chan struct{} { // It gets called from another goroutine. // CONTRACT: Either cb will get called, or err returned. func (mem *Mempool) CheckTx(tx types.Tx, cb func(*abci.Response)) (err error) { + return mem.CheckTxWithInfo(tx, cb, TxInfo{PeerID: UnknownPeerID}) +} + +// CheckTxWithInfo performs the same operation as CheckTx, but with extra meta data about the tx. +// Currently this metadata is the peer who sent it, +// used to prevent the tx from being gossiped back to them. +func (mem *Mempool) CheckTxWithInfo(tx types.Tx, cb func(*abci.Response), txInfo TxInfo) (err error) { mem.proxyMtx.Lock() // use defer to unlock mutex because application (*local client*) might panic defer mem.proxyMtx.Unlock() - if mem.Size() >= mem.config.Size { - return ErrMempoolIsFull + var ( + memSize = mem.Size() + txsBytes = mem.TxsBytes() + ) + if memSize >= mem.config.Size || + int64(len(tx))+txsBytes > mem.config.MaxTxsBytes { + return ErrMempoolIsFull{ + memSize, mem.config.Size, + txsBytes, mem.config.MaxTxsBytes} } // The size of the corresponding amino-encoded TxMessage @@ -327,6 +381,19 @@ func (mem *Mempool) CheckTx(tx types.Tx, cb func(*abci.Response)) (err error) { // CACHE if !mem.cache.Push(tx) { + // Record a new sender for a tx we've already seen. + // Note it's possible a tx is still in the cache but no longer in the mempool + // (eg. after committing a block, txs are removed from mempool but not cache), + // so we only record the sender for txs still in the mempool. + if e, ok := mem.txsMap.Load(txKey(tx)); ok { + memTx := e.(*clist.CElement).Value.(*mempoolTx) + if _, loaded := memTx.senders.LoadOrStore(txInfo.PeerID, true); loaded { + // TODO: consider punishing peer for dups, + // its non-trivial since invalid txs can become valid, + // but they can spam the same tx with little cost to them atm. + } + } + return ErrTxInCache } // END CACHE @@ -349,29 +416,90 @@ func (mem *Mempool) CheckTx(tx types.Tx, cb func(*abci.Response)) (err error) { if err = mem.proxyAppConn.Error(); err != nil { return err } + reqRes := mem.proxyAppConn.CheckTxAsync(tx) - if cb != nil { - reqRes.SetCallback(cb) - } + reqRes.SetCallback(mem.reqResCb(tx, txInfo.PeerID, cb)) return nil } -// ABCI callback function -func (mem *Mempool) resCb(req *abci.Request, res *abci.Response) { +// Global callback that will be called after every ABCI response. +// Having a single global callback avoids needing to set a callback for each request. +// However, processing the checkTx response requires the peerID (so we can track which txs we heard from who), +// and peerID is not included in the ABCI request, so we have to set request-specific callbacks that +// include this information. If we're not in the midst of a recheck, this function will just return, +// so the request specific callback can do the work. +// When rechecking, we don't need the peerID, so the recheck callback happens here. +func (mem *Mempool) globalCb(req *abci.Request, res *abci.Response) { if mem.recheckCursor == nil { - mem.resCbNormal(req, res) - } else { - mem.metrics.RecheckTimes.Add(1) - mem.resCbRecheck(req, res) + return } + + mem.metrics.RecheckTimes.Add(1) + mem.resCbRecheck(req, res) + + // update metrics mem.metrics.Size.Set(float64(mem.Size())) } -func (mem *Mempool) resCbNormal(req *abci.Request, res *abci.Response) { +// Request specific callback that should be set on individual reqRes objects +// to incorporate local information when processing the response. +// This allows us to track the peer that sent us this tx, so we can avoid sending it back to them. +// NOTE: alternatively, we could include this information in the ABCI request itself. +// +// External callers of CheckTx, like the RPC, can also pass an externalCb through here that is called +// when all other response processing is complete. +// +// Used in CheckTxWithInfo to record PeerID who sent us the tx. +func (mem *Mempool) reqResCb(tx []byte, peerID uint16, externalCb func(*abci.Response)) func(res *abci.Response) { + return func(res *abci.Response) { + if mem.recheckCursor != nil { + // this should never happen + panic("recheck cursor is not nil in reqResCb") + } + + mem.resCbFirstTime(tx, peerID, res) + + // update metrics + mem.metrics.Size.Set(float64(mem.Size())) + + // passed in by the caller of CheckTx, eg. the RPC + if externalCb != nil { + externalCb(res) + } + } +} + +// Called from: +// - resCbFirstTime (lock not held) if tx is valid +func (mem *Mempool) addTx(memTx *mempoolTx) { + e := mem.txs.PushBack(memTx) + mem.txsMap.Store(txKey(memTx.tx), e) + atomic.AddInt64(&mem.txsBytes, int64(len(memTx.tx))) + mem.metrics.TxSizeBytes.Observe(float64(len(memTx.tx))) +} + +// Called from: +// - Update (lock held) if tx was committed +// - resCbRecheck (lock not held) if tx was invalidated +func (mem *Mempool) removeTx(tx types.Tx, elem *clist.CElement, removeFromCache bool) { + mem.txs.Remove(elem) + elem.DetachPrev() + mem.txsMap.Delete(txKey(tx)) + atomic.AddInt64(&mem.txsBytes, int64(-len(tx))) + + if removeFromCache { + mem.cache.Remove(tx) + } +} + +// callback, which is called after the app checked the tx for the first time. +// +// The case where the app checks the tx for the second and subsequent times is +// handled by the resCbRecheck callback. +func (mem *Mempool) resCbFirstTime(tx []byte, peerID uint16, res *abci.Response) { switch r := res.Value.(type) { case *abci.Response_CheckTx: - tx := req.GetCheckTx().Tx var postCheckErr error if mem.postCheck != nil { postCheckErr = mem.postCheck(tx, r.CheckTx) @@ -382,14 +510,14 @@ func (mem *Mempool) resCbNormal(req *abci.Request, res *abci.Response) { gasWanted: r.CheckTx.GasWanted, tx: tx, } - mem.txs.PushBack(memTx) + memTx.senders.Store(peerID, true) + mem.addTx(memTx) mem.logger.Info("Added good transaction", "tx", TxID(tx), "res", r, "height", memTx.height, "total", mem.Size(), ) - mem.metrics.TxSizeBytes.Observe(float64(len(tx))) mem.notifyTxsAvailable() } else { // ignore bad transaction @@ -403,19 +531,20 @@ func (mem *Mempool) resCbNormal(req *abci.Request, res *abci.Response) { } } +// callback, which is called after the app rechecked the tx. +// +// The case where the app checks the tx for the first time is handled by the +// resCbFirstTime callback. func (mem *Mempool) resCbRecheck(req *abci.Request, res *abci.Response) { switch r := res.Value.(type) { case *abci.Response_CheckTx: tx := req.GetCheckTx().Tx memTx := mem.recheckCursor.Value.(*mempoolTx) - if !bytes.Equal(req.GetCheckTx().Tx, memTx.tx) { - cmn.PanicSanity( - fmt.Sprintf( - "Unexpected tx response from proxy during recheck\nExpected %X, got %X", - r.CheckTx.Data, - memTx.tx, - ), - ) + if !bytes.Equal(tx, memTx.tx) { + panic(fmt.Sprintf( + "Unexpected tx response from proxy during recheck\nExpected %X, got %X", + memTx.tx, + tx)) } var postCheckErr error if mem.postCheck != nil { @@ -426,11 +555,8 @@ func (mem *Mempool) resCbRecheck(req *abci.Request, res *abci.Response) { } else { // Tx became invalidated due to newly committed block. mem.logger.Info("Tx is no longer valid", "tx", TxID(tx), "res", r, "err", postCheckErr) - mem.txs.Remove(mem.recheckCursor) - mem.recheckCursor.DetachPrev() - - // remove from cache (it might be good later) - mem.cache.Remove(tx) + // NOTE: we remove tx from the cache because it might be good later + mem.removeTx(tx, mem.recheckCursor, true) } if mem.recheckCursor == mem.recheckEnd { mem.recheckCursor = nil @@ -598,11 +724,9 @@ func (mem *Mempool) removeTxs(txs types.Txs) []types.Tx { memTx := e.Value.(*mempoolTx) // Remove the tx if it's already in a block. if _, ok := txsMap[string(memTx.tx)]; ok { - // remove from clist - mem.txs.Remove(e) - e.DetachPrev() - // NOTE: we don't remove committed txs from the cache. + mem.removeTx(memTx.tx, e, false) + continue } txsLeft = append(txsLeft, memTx.tx) @@ -620,7 +744,7 @@ func (mem *Mempool) recheckTxs(txs []types.Tx) { mem.recheckEnd = mem.txs.Back() // Push txs to proxyAppConn - // NOTE: resCb() may be called concurrently. + // NOTE: globalCb may be called concurrently. for _, tx := range txs { mem.proxyAppConn.CheckTxAsync(tx) } @@ -634,6 +758,10 @@ type mempoolTx struct { height int64 // height that this tx had been validated in gasWanted int64 // amount of gas this tx states it will require tx types.Tx // + + // ids of peers who've sent us this tx (as a map for quick lookups). + // senders: PeerID -> bool + senders sync.Map } // Height returns the height for this transaction @@ -649,13 +777,13 @@ type txCache interface { Remove(tx types.Tx) } -// mapTxCache maintains a cache of transactions. This only stores -// the hash of the tx, due to memory concerns. +// mapTxCache maintains a LRU cache of transactions. This only stores the hash +// of the tx, due to memory concerns. type mapTxCache struct { mtx sync.Mutex size int map_ map[[sha256.Size]byte]*list.Element - list *list.List // to remove oldest tx when cache gets too big + list *list.List } var _ txCache = (*mapTxCache)(nil) @@ -677,14 +805,14 @@ func (cache *mapTxCache) Reset() { cache.mtx.Unlock() } -// Push adds the given tx to the cache and returns true. It returns false if tx -// is already in the cache. +// Push adds the given tx to the cache and returns true. It returns +// false if tx is already in the cache. func (cache *mapTxCache) Push(tx types.Tx) bool { cache.mtx.Lock() defer cache.mtx.Unlock() // Use the tx hash in the cache - txHash := sha256.Sum256(tx) + txHash := txKey(tx) if moved, exists := cache.map_[txHash]; exists { cache.list.MoveToBack(moved) return false @@ -698,15 +826,15 @@ func (cache *mapTxCache) Push(tx types.Tx) bool { cache.list.Remove(popped) } } - cache.list.PushBack(txHash) - cache.map_[txHash] = cache.list.Back() + e := cache.list.PushBack(txHash) + cache.map_[txHash] = e return true } // Remove removes the given tx from the cache. func (cache *mapTxCache) Remove(tx types.Tx) { cache.mtx.Lock() - txHash := sha256.Sum256(tx) + txHash := txKey(tx) popped := cache.map_[txHash] delete(cache.map_, txHash) if popped != nil { diff --git a/mempool/mempool_test.go b/mempool/mempool_test.go index 15bfaa25bf1..d5f25396d33 100644 --- a/mempool/mempool_test.go +++ b/mempool/mempool_test.go @@ -1,31 +1,42 @@ package mempool import ( - "crypto/md5" "crypto/rand" + "crypto/sha256" "encoding/binary" "fmt" "io/ioutil" + mrand "math/rand" "os" "path/filepath" "testing" "time" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + + amino "github.com/tendermint/go-amino" + "github.com/tendermint/tendermint/abci/example/counter" "github.com/tendermint/tendermint/abci/example/kvstore" + abciserver "github.com/tendermint/tendermint/abci/server" abci "github.com/tendermint/tendermint/abci/types" cfg "github.com/tendermint/tendermint/config" + cmn "github.com/tendermint/tendermint/libs/common" "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/proxy" "github.com/tendermint/tendermint/types" ) -func newMempoolWithApp(cc proxy.ClientCreator) *Mempool { - config := cfg.ResetTestRoot("mempool_test") +// A cleanupFunc cleans up any config / test files created for a particular +// test. +type cleanupFunc func() +func newMempoolWithApp(cc proxy.ClientCreator) (*Mempool, cleanupFunc) { + return newMempoolWithAppAndConfig(cc, cfg.ResetTestRoot("mempool_test")) +} + +func newMempoolWithAppAndConfig(cc proxy.ClientCreator, config *cfg.Config) (*Mempool, cleanupFunc) { appConnMem, _ := cc.NewABCIClient() appConnMem.SetLogger(log.TestingLogger().With("module", "abci-client", "connection", "mempool")) err := appConnMem.Start() @@ -34,7 +45,7 @@ func newMempoolWithApp(cc proxy.ClientCreator) *Mempool { } mempool := NewMempool(config.Mempool, appConnMem, 0) mempool.SetLogger(log.TestingLogger()) - return mempool + return mempool, func() { os.RemoveAll(config.RootDir) } } func ensureNoFire(t *testing.T, ch <-chan struct{}, timeoutMS int) { @@ -55,8 +66,9 @@ func ensureFire(t *testing.T, ch <-chan struct{}, timeoutMS int) { } } -func checkTxs(t *testing.T, mempool *Mempool, count int) types.Txs { +func checkTxs(t *testing.T, mempool *Mempool, count int, peerID uint16) types.Txs { txs := make(types.Txs, count) + txInfo := TxInfo{PeerID: peerID} for i := 0; i < count; i++ { txBytes := make([]byte, 20) txs[i] = txBytes @@ -64,7 +76,7 @@ func checkTxs(t *testing.T, mempool *Mempool, count int) types.Txs { if err != nil { t.Error(err) } - if err := mempool.CheckTx(txBytes, nil); err != nil { + if err := mempool.CheckTxWithInfo(txBytes, nil, txInfo); err != nil { // Skip invalid txs. // TestMempoolFilters will fail otherwise. It asserts a number of txs // returned. @@ -80,10 +92,11 @@ func checkTxs(t *testing.T, mempool *Mempool, count int) types.Txs { func TestReapMaxBytesMaxGas(t *testing.T) { app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) - mempool := newMempoolWithApp(cc) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() // Ensure gas calculation behaves as expected - checkTxs(t, mempool, 1) + checkTxs(t, mempool, 1, UnknownPeerID) tx0 := mempool.TxsFront().Value.(*mempoolTx) // assert that kv store has gas wanted = 1. require.Equal(t, app.CheckTx(tx0.tx).GasWanted, int64(1), "KVStore had a gas value neq to 1") @@ -117,7 +130,7 @@ func TestReapMaxBytesMaxGas(t *testing.T) { {20, 20000, 30, 20}, } for tcIndex, tt := range tests { - checkTxs(t, mempool, tt.numTxsToCreate) + checkTxs(t, mempool, tt.numTxsToCreate, UnknownPeerID) got := mempool.ReapMaxBytesMaxGas(tt.maxBytes, tt.maxGas) assert.Equal(t, tt.expectedNumTxs, len(got), "Got %d txs, expected %d, tc #%d", len(got), tt.expectedNumTxs, tcIndex) @@ -128,7 +141,8 @@ func TestReapMaxBytesMaxGas(t *testing.T) { func TestMempoolFilters(t *testing.T) { app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) - mempool := newMempoolWithApp(cc) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() emptyTxArr := []types.Tx{[]byte{}} nopPreFilter := func(tx types.Tx) error { return nil } @@ -157,7 +171,7 @@ func TestMempoolFilters(t *testing.T) { } for tcIndex, tt := range tests { mempool.Update(1, emptyTxArr, tt.preFilter, tt.postFilter) - checkTxs(t, mempool, tt.numTxsToCreate) + checkTxs(t, mempool, tt.numTxsToCreate, UnknownPeerID) require.Equal(t, tt.expectedNumTxs, mempool.Size(), "mempool had the incorrect size, on test case %d", tcIndex) mempool.Flush() } @@ -166,7 +180,8 @@ func TestMempoolFilters(t *testing.T) { func TestMempoolUpdateAddsTxsToCache(t *testing.T) { app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) - mempool := newMempoolWithApp(cc) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() mempool.Update(1, []types.Tx{[]byte{0x01}}, nil, nil) err := mempool.CheckTx([]byte{0x01}, nil) if assert.Error(t, err) { @@ -177,7 +192,8 @@ func TestMempoolUpdateAddsTxsToCache(t *testing.T) { func TestTxsAvailable(t *testing.T) { app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) - mempool := newMempoolWithApp(cc) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() mempool.EnableTxsAvailable() timeoutMS := 500 @@ -186,7 +202,7 @@ func TestTxsAvailable(t *testing.T) { ensureNoFire(t, mempool.TxsAvailable(), timeoutMS) // send a bunch of txs, it should only fire once - txs := checkTxs(t, mempool, 100) + txs := checkTxs(t, mempool, 100, UnknownPeerID) ensureFire(t, mempool.TxsAvailable(), timeoutMS) ensureNoFire(t, mempool.TxsAvailable(), timeoutMS) @@ -201,7 +217,7 @@ func TestTxsAvailable(t *testing.T) { ensureNoFire(t, mempool.TxsAvailable(), timeoutMS) // send a bunch more txs. we already fired for this height so it shouldnt fire again - moreTxs := checkTxs(t, mempool, 50) + moreTxs := checkTxs(t, mempool, 50, UnknownPeerID) ensureNoFire(t, mempool.TxsAvailable(), timeoutMS) // now call update with all the txs. it should not fire as there are no txs left @@ -212,7 +228,7 @@ func TestTxsAvailable(t *testing.T) { ensureNoFire(t, mempool.TxsAvailable(), timeoutMS) // send a bunch more txs, it should only fire once - checkTxs(t, mempool, 100) + checkTxs(t, mempool, 100, UnknownPeerID) ensureFire(t, mempool.TxsAvailable(), timeoutMS) ensureNoFire(t, mempool.TxsAvailable(), timeoutMS) } @@ -222,7 +238,9 @@ func TestSerialReap(t *testing.T) { app.SetOption(abci.RequestSetOption{Key: "serial", Value: "on"}) cc := proxy.NewLocalClientCreator(app) - mempool := newMempoolWithApp(cc) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() + appConnCon, _ := cc.NewABCIClient() appConnCon.SetLogger(log.TestingLogger().With("module", "abci-client", "connection", "consensus")) err := appConnCon.Start() @@ -326,28 +344,6 @@ func TestSerialReap(t *testing.T) { reapCheck(600) } -func TestCacheRemove(t *testing.T) { - cache := newMapTxCache(100) - numTxs := 10 - txs := make([][]byte, numTxs) - for i := 0; i < numTxs; i++ { - // probability of collision is 2**-256 - txBytes := make([]byte, 32) - rand.Read(txBytes) - txs[i] = txBytes - cache.Push(txBytes) - // make sure its added to both the linked list and the map - require.Equal(t, i+1, len(cache.map_)) - require.Equal(t, i+1, cache.list.Len()) - } - for i := 0; i < numTxs; i++ { - cache.Remove(txs[i]) - // make sure its removed from both the map and the linked list - require.Equal(t, numTxs-(i+1), len(cache.map_)) - require.Equal(t, numTxs-(i+1), cache.list.Len()) - } -} - func TestMempoolCloseWAL(t *testing.T) { // 1. Create the temporary directory for mempool and WAL testing. rootDir, err := ioutil.TempDir("", "mempool-test") @@ -362,6 +358,7 @@ func TestMempoolCloseWAL(t *testing.T) { // 3. Create the mempool wcfg := cfg.DefaultMempoolConfig() wcfg.RootDir = rootDir + defer os.RemoveAll(wcfg.RootDir) app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) appConnMem, _ := cc.NewABCIClient() @@ -394,8 +391,177 @@ func TestMempoolCloseWAL(t *testing.T) { require.Equal(t, 1, len(m3), "expecting the wal match in") } +// Size of the amino encoded TxMessage is the length of the +// encoded byte array, plus 1 for the struct field, plus 4 +// for the amino prefix. +func txMessageSize(tx types.Tx) int { + return amino.ByteSliceSize(tx) + 1 + 4 +} + +func TestMempoolMaxMsgSize(t *testing.T) { + app := kvstore.NewKVStoreApplication() + cc := proxy.NewLocalClientCreator(app) + mempl, cleanup := newMempoolWithApp(cc) + defer cleanup() + + testCases := []struct { + len int + err bool + }{ + // check small txs. no error + {10, false}, + {1000, false}, + {1000000, false}, + + // check around maxTxSize + // changes from no error to error + {maxTxSize - 2, false}, + {maxTxSize - 1, false}, + {maxTxSize, false}, + {maxTxSize + 1, true}, + {maxTxSize + 2, true}, + + // check around maxMsgSize. all error + {maxMsgSize - 1, true}, + {maxMsgSize, true}, + {maxMsgSize + 1, true}, + } + + for i, testCase := range testCases { + caseString := fmt.Sprintf("case %d, len %d", i, testCase.len) + + tx := cmn.RandBytes(testCase.len) + err := mempl.CheckTx(tx, nil) + msg := &TxMessage{tx} + encoded := cdc.MustMarshalBinaryBare(msg) + require.Equal(t, len(encoded), txMessageSize(tx), caseString) + if !testCase.err { + require.True(t, len(encoded) <= maxMsgSize, caseString) + require.NoError(t, err, caseString) + } else { + require.True(t, len(encoded) > maxMsgSize, caseString) + require.Equal(t, err, ErrTxTooLarge, caseString) + } + } + +} + +func TestMempoolTxsBytes(t *testing.T) { + app := kvstore.NewKVStoreApplication() + cc := proxy.NewLocalClientCreator(app) + config := cfg.ResetTestRoot("mempool_test") + config.Mempool.MaxTxsBytes = 10 + mempool, cleanup := newMempoolWithAppAndConfig(cc, config) + defer cleanup() + + // 1. zero by default + assert.EqualValues(t, 0, mempool.TxsBytes()) + + // 2. len(tx) after CheckTx + err := mempool.CheckTx([]byte{0x01}, nil) + require.NoError(t, err) + assert.EqualValues(t, 1, mempool.TxsBytes()) + + // 3. zero again after tx is removed by Update + mempool.Update(1, []types.Tx{[]byte{0x01}}, nil, nil) + assert.EqualValues(t, 0, mempool.TxsBytes()) + + // 4. zero after Flush + err = mempool.CheckTx([]byte{0x02, 0x03}, nil) + require.NoError(t, err) + assert.EqualValues(t, 2, mempool.TxsBytes()) + + mempool.Flush() + assert.EqualValues(t, 0, mempool.TxsBytes()) + + // 5. ErrMempoolIsFull is returned when/if MaxTxsBytes limit is reached. + err = mempool.CheckTx([]byte{0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04}, nil) + require.NoError(t, err) + err = mempool.CheckTx([]byte{0x05}, nil) + if assert.Error(t, err) { + assert.IsType(t, ErrMempoolIsFull{}, err) + } + + // 6. zero after tx is rechecked and removed due to not being valid anymore + app2 := counter.NewCounterApplication(true) + cc = proxy.NewLocalClientCreator(app2) + mempool, cleanup = newMempoolWithApp(cc) + defer cleanup() + + txBytes := make([]byte, 8) + binary.BigEndian.PutUint64(txBytes, uint64(0)) + + err = mempool.CheckTx(txBytes, nil) + require.NoError(t, err) + assert.EqualValues(t, 8, mempool.TxsBytes()) + + appConnCon, _ := cc.NewABCIClient() + appConnCon.SetLogger(log.TestingLogger().With("module", "abci-client", "connection", "consensus")) + err = appConnCon.Start() + require.Nil(t, err) + defer appConnCon.Stop() + res, err := appConnCon.DeliverTxSync(txBytes) + require.NoError(t, err) + require.EqualValues(t, 0, res.Code) + res2, err := appConnCon.CommitSync() + require.NoError(t, err) + require.NotEmpty(t, res2.Data) + + // Pretend like we committed nothing so txBytes gets rechecked and removed. + mempool.Update(1, []types.Tx{}, nil, nil) + assert.EqualValues(t, 0, mempool.TxsBytes()) +} + +// This will non-deterministically catch some concurrency failures like +// https://github.com/tendermint/tendermint/issues/3509 +// TODO: all of the tests should probably also run using the remote proxy app +// since otherwise we're not actually testing the concurrency of the mempool here! +func TestMempoolRemoteAppConcurrency(t *testing.T) { + sockPath := fmt.Sprintf("unix:///tmp/echo_%v.sock", cmn.RandStr(6)) + app := kvstore.NewKVStoreApplication() + cc, server := newRemoteApp(t, sockPath, app) + defer server.Stop() + config := cfg.ResetTestRoot("mempool_test") + mempool, cleanup := newMempoolWithAppAndConfig(cc, config) + defer cleanup() + + // generate small number of txs + nTxs := 10 + txLen := 200 + txs := make([]types.Tx, nTxs) + for i := 0; i < nTxs; i++ { + txs[i] = cmn.RandBytes(txLen) + } + + // simulate a group of peers sending them over and over + N := config.Mempool.Size + maxPeers := 5 + for i := 0; i < N; i++ { + peerID := mrand.Intn(maxPeers) + txNum := mrand.Intn(nTxs) + tx := txs[int(txNum)] + + // this will err with ErrTxInCache many times ... + mempool.CheckTxWithInfo(tx, nil, TxInfo{PeerID: uint16(peerID)}) + } + err := mempool.FlushAppConn() + require.NoError(t, err) +} + +// caller must close server +func newRemoteApp(t *testing.T, addr string, app abci.Application) (clientCreator proxy.ClientCreator, server cmn.Service) { + clientCreator = proxy.NewRemoteClientCreator(addr, "socket", true) + + // Start server + server = abciserver.NewSocketServer(addr, app) + server.SetLogger(log.TestingLogger().With("module", "abci-server")) + if err := server.Start(); err != nil { + t.Fatalf("Error starting socket server: %v", err.Error()) + } + return clientCreator, server +} func checksumIt(data []byte) string { - h := md5.New() + h := sha256.New() h.Write(data) return fmt.Sprintf("%x", h.Sum(nil)) } diff --git a/mempool/metrics.go b/mempool/metrics.go index d26534ff93b..5e4eaf5edac 100644 --- a/mempool/metrics.go +++ b/mempool/metrics.go @@ -7,7 +7,11 @@ import ( stdprometheus "github.com/prometheus/client_golang/prometheus" ) -const MetricsSubsytem = "mempool" +const ( + // MetricsSubsystem is a subsystem shared by all metrics exposed by this + // package. + MetricsSubsystem = "mempool" +) // Metrics contains metrics exposed by this package. // see MetricsProvider for descriptions. @@ -23,34 +27,36 @@ type Metrics struct { } // PrometheusMetrics returns Metrics build using Prometheus client library. +// Optionally, labels can be provided along with their values ("foo", +// "fooValue"). func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { - var labels []string + labels := []string{} for i := 0; i < len(labelsAndValues); i += 2 { labels = append(labels, labelsAndValues[i]) } return &Metrics{ Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, - Subsystem: MetricsSubsytem, + Subsystem: MetricsSubsystem, Name: "size", Help: "Size of the mempool (number of uncommitted transactions).", }, labels).With(labelsAndValues...), TxSizeBytes: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Namespace: namespace, - Subsystem: MetricsSubsytem, + Subsystem: MetricsSubsystem, Name: "tx_size_bytes", Help: "Transaction sizes in bytes.", Buckets: stdprometheus.ExponentialBuckets(1, 3, 17), }, labels).With(labelsAndValues...), FailedTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, - Subsystem: MetricsSubsytem, + Subsystem: MetricsSubsystem, Name: "failed_txs", Help: "Number of failed transactions.", }, labels).With(labelsAndValues...), RecheckTimes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, - Subsystem: MetricsSubsytem, + Subsystem: MetricsSubsystem, Name: "recheck_times", Help: "Number of times transactions are rechecked in the mempool.", }, labels).With(labelsAndValues...), diff --git a/mempool/reactor.go b/mempool/reactor.go index 19a411c1683..e1376b2879e 100644 --- a/mempool/reactor.go +++ b/mempool/reactor.go @@ -2,14 +2,16 @@ package mempool import ( "fmt" + "math" "reflect" + "sync" "time" - "github.com/tendermint/go-amino" - "github.com/tendermint/tendermint/libs/clist" - "github.com/tendermint/tendermint/libs/log" + amino "github.com/tendermint/go-amino" cfg "github.com/tendermint/tendermint/config" + "github.com/tendermint/tendermint/libs/clist" + "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" "github.com/tendermint/tendermint/types" ) @@ -17,16 +19,89 @@ import ( const ( MempoolChannel = byte(0x30) - maxMsgSize = 1048576 // 1MB TODO make it configurable - maxTxSize = maxMsgSize - 8 // account for amino overhead of TxMessage - peerCatchupSleepIntervalMS = 100 // If peer is behind, sleep this amount + maxMsgSize = 1048576 // 1MB TODO make it configurable + maxTxSize = maxMsgSize - 8 // account for amino overhead of TxMessage + + peerCatchupSleepIntervalMS = 100 // If peer is behind, sleep this amount + + // UnknownPeerID is the peer ID to use when running CheckTx when there is + // no peer (e.g. RPC) + UnknownPeerID uint16 = 0 + + maxActiveIDs = math.MaxUint16 ) // MempoolReactor handles mempool tx broadcasting amongst peers. +// It maintains a map from peer ID to counter, to prevent gossiping txs to the +// peers you received it from. type MempoolReactor struct { p2p.BaseReactor config *cfg.MempoolConfig Mempool *Mempool + ids *mempoolIDs +} + +type mempoolIDs struct { + mtx sync.RWMutex + peerMap map[p2p.ID]uint16 + nextID uint16 // assumes that a node will never have over 65536 active peers + activeIDs map[uint16]struct{} // used to check if a given peerID key is used, the value doesn't matter +} + +// Reserve searches for the next unused ID and assignes it to the +// peer. +func (ids *mempoolIDs) ReserveForPeer(peer p2p.Peer) { + ids.mtx.Lock() + defer ids.mtx.Unlock() + + curID := ids.nextPeerID() + ids.peerMap[peer.ID()] = curID + ids.activeIDs[curID] = struct{}{} +} + +// nextPeerID returns the next unused peer ID to use. +// This assumes that ids's mutex is already locked. +func (ids *mempoolIDs) nextPeerID() uint16 { + if len(ids.activeIDs) == maxActiveIDs { + panic(fmt.Sprintf("node has maximum %d active IDs and wanted to get one more", maxActiveIDs)) + } + + _, idExists := ids.activeIDs[ids.nextID] + for idExists { + ids.nextID++ + _, idExists = ids.activeIDs[ids.nextID] + } + curID := ids.nextID + ids.nextID++ + return curID +} + +// Reclaim returns the ID reserved for the peer back to unused pool. +func (ids *mempoolIDs) Reclaim(peer p2p.Peer) { + ids.mtx.Lock() + defer ids.mtx.Unlock() + + removedID, ok := ids.peerMap[peer.ID()] + if ok { + delete(ids.activeIDs, removedID) + delete(ids.peerMap, peer.ID()) + } +} + +// GetForPeer returns an ID reserved for the peer. +func (ids *mempoolIDs) GetForPeer(peer p2p.Peer) uint16 { + ids.mtx.RLock() + defer ids.mtx.RUnlock() + + return ids.peerMap[peer.ID()] +} + +func newMempoolIDs() *mempoolIDs { + return &mempoolIDs{ + peerMap: make(map[p2p.ID]uint16), + activeIDs: map[uint16]struct{}{0: {}}, + nextID: 1, // reserve unknownPeerID(0) for mempoolReactor.BroadcastTx + } } // NewMempoolReactor returns a new MempoolReactor with the given config and mempool. @@ -34,6 +109,7 @@ func NewMempoolReactor(config *cfg.MempoolConfig, mempool *Mempool) *MempoolReac memR := &MempoolReactor{ config: config, Mempool: mempool, + ids: newMempoolIDs(), } memR.BaseReactor = *p2p.NewBaseReactor("MempoolReactor", memR) return memR @@ -67,11 +143,13 @@ func (memR *MempoolReactor) GetChannels() []*p2p.ChannelDescriptor { // AddPeer implements Reactor. // It starts a broadcast routine ensuring all txs are forwarded to the given peer. func (memR *MempoolReactor) AddPeer(peer p2p.Peer) { + memR.ids.ReserveForPeer(peer) go memR.broadcastTxRoutine(peer) } // RemovePeer implements Reactor. func (memR *MempoolReactor) RemovePeer(peer p2p.Peer, reason interface{}) { + memR.ids.Reclaim(peer) // broadcast routine checks if peer is gone and returns } @@ -88,7 +166,8 @@ func (memR *MempoolReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { switch msg := msg.(type) { case *TxMessage: - err := memR.Mempool.CheckTx(msg.Tx, nil) + peerID := memR.ids.GetForPeer(src) + err := memR.Mempool.CheckTxWithInfo(msg.Tx, nil, TxInfo{PeerID: peerID}) if err != nil { memR.Logger.Info("Could not check tx", "tx", TxID(msg.Tx), "err", err) } @@ -109,6 +188,7 @@ func (memR *MempoolReactor) broadcastTxRoutine(peer p2p.Peer) { return } + peerID := memR.ids.GetForPeer(peer) var next *clist.CElement for { // In case of both next.NextWaitChan() and peer.Quit() are variable at the same time @@ -149,12 +229,15 @@ func (memR *MempoolReactor) broadcastTxRoutine(peer p2p.Peer) { continue } - // send memTx - msg := &TxMessage{Tx: memTx.tx} - success := peer.Send(MempoolChannel, cdc.MustMarshalBinaryBare(msg)) - if !success { - time.Sleep(peerCatchupSleepIntervalMS * time.Millisecond) - continue + // ensure peer hasn't already sent us this tx + if _, ok := memTx.senders.Load(peerID); !ok { + // send memTx + msg := &TxMessage{Tx: memTx.tx} + success := peer.Send(MempoolChannel, cdc.MustMarshalBinaryBare(msg)) + if !success { + time.Sleep(peerCatchupSleepIntervalMS * time.Millisecond) + continue + } } select { diff --git a/mempool/reactor_test.go b/mempool/reactor_test.go index ad9ad8b4054..c9cf498098d 100644 --- a/mempool/reactor_test.go +++ b/mempool/reactor_test.go @@ -2,21 +2,21 @@ package mempool import ( "fmt" + "net" "sync" "testing" "time" "github.com/fortytw2/leaktest" + "github.com/go-kit/kit/log/term" "github.com/pkg/errors" "github.com/stretchr/testify/assert" - "github.com/go-kit/kit/log/term" - "github.com/tendermint/tendermint/abci/example/kvstore" - "github.com/tendermint/tendermint/libs/log" - cfg "github.com/tendermint/tendermint/config" + "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" + "github.com/tendermint/tendermint/p2p/mock" "github.com/tendermint/tendermint/proxy" "github.com/tendermint/tendermint/types" ) @@ -49,7 +49,8 @@ func makeAndConnectMempoolReactors(config *cfg.Config, N int) []*MempoolReactor for i := 0; i < N; i++ { app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) - mempool := newMempoolWithApp(cc) + mempool, cleanup := newMempoolWithApp(cc) + defer cleanup() reactors[i] = NewMempoolReactor(config.Mempool, mempool) // so we dont start the consensus states reactors[i].SetLogger(logger.With("validator", i)) @@ -101,6 +102,12 @@ func _waitForTxs(t *testing.T, wg *sync.WaitGroup, txs types.Txs, reactorIdx int wg.Done() } +// ensure no txs on reactor after some timeout +func ensureNoTxs(t *testing.T, reactor *MempoolReactor, timeout time.Duration) { + time.Sleep(timeout) // wait for the txs in all mempools + assert.Zero(t, reactor.Mempool.Size()) +} + const ( NUM_TXS = 1000 TIMEOUT = 120 * time.Second // ridiculously high because CircleCI is slow @@ -123,10 +130,26 @@ func TestReactorBroadcastTxMessage(t *testing.T) { // send a bunch of txs to the first reactor's mempool // and wait for them all to be received in the others - txs := checkTxs(t, reactors[0].Mempool, NUM_TXS) + txs := checkTxs(t, reactors[0].Mempool, NUM_TXS, UnknownPeerID) waitForTxs(t, txs, reactors) } +func TestReactorNoBroadcastToSender(t *testing.T) { + config := cfg.TestConfig() + const N = 2 + reactors := makeAndConnectMempoolReactors(config, N) + defer func() { + for _, r := range reactors { + r.Stop() + } + }() + + // send a bunch of txs to the first reactor's mempool, claiming it came from peer + // ensure peer gets no txs + checkTxs(t, reactors[0].Mempool, NUM_TXS, 1) + ensureNoTxs(t, reactors[1], 100*time.Millisecond) +} + func TestBroadcastTxForPeerStopsWhenPeerStops(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") @@ -168,3 +191,36 @@ func TestBroadcastTxForPeerStopsWhenReactorStops(t *testing.T) { // i.e. broadcastTxRoutine finishes when reactor is stopped leaktest.CheckTimeout(t, 10*time.Second)() } + +func TestMempoolIDsBasic(t *testing.T) { + ids := newMempoolIDs() + + peer := mock.NewPeer(net.IP{127, 0, 0, 1}) + + ids.ReserveForPeer(peer) + assert.EqualValues(t, 1, ids.GetForPeer(peer)) + ids.Reclaim(peer) + + ids.ReserveForPeer(peer) + assert.EqualValues(t, 2, ids.GetForPeer(peer)) + ids.Reclaim(peer) +} + +func TestMempoolIDsPanicsIfNodeRequestsOvermaxActiveIDs(t *testing.T) { + if testing.Short() { + return + } + + // 0 is already reserved for UnknownPeerID + ids := newMempoolIDs() + + for i := 0; i < maxActiveIDs-1; i++ { + peer := mock.NewPeer(net.IP{127, 0, 0, 1}) + ids.ReserveForPeer(peer) + } + + assert.Panics(t, func() { + peer := mock.NewPeer(net.IP{127, 0, 0, 1}) + ids.ReserveForPeer(peer) + }) +} From 9fe87cc1e3a5387b750728b062fa8f1ae3e07c77 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 3 Jul 2019 10:52:18 +0800 Subject: [PATCH 24/26] Limit mempool memory usage --- config/config.go | 23 +++++---- config/toml.go | 9 +++- docs/tendermint-core/configuration.md | 9 +++- p2p/mock/peer.go | 68 +++++++++++++++++++++++++++ rpc/client/rpc_test.go | 12 +++-- rpc/core/mempool.go | 46 ++++++++++-------- rpc/core/types/responses.go | 6 ++- state/services.go | 19 +++++--- 8 files changed, 149 insertions(+), 43 deletions(-) create mode 100644 p2p/mock/peer.go diff --git a/config/config.go b/config/config.go index 1cbb4f5fc5c..02188e1f03d 100644 --- a/config/config.go +++ b/config/config.go @@ -515,12 +515,13 @@ func DefaultFuzzConnConfig() *FuzzConnConfig { // MempoolConfig defines the configuration options for the Tendermint mempool type MempoolConfig struct { - RootDir string `mapstructure:"home"` - Recheck bool `mapstructure:"recheck"` - Broadcast bool `mapstructure:"broadcast"` - WalPath string `mapstructure:"wal_dir"` - Size int `mapstructure:"size"` - CacheSize int `mapstructure:"cache_size"` + RootDir string `mapstructure:"home"` + Recheck bool `mapstructure:"recheck"` + Broadcast bool `mapstructure:"broadcast"` + WalPath string `mapstructure:"wal_dir"` + Size int `mapstructure:"size"` + MaxTxsBytes int64 `mapstructure:"max_txs_bytes"` + CacheSize int `mapstructure:"cache_size"` } // DefaultMempoolConfig returns a default configuration for the Tendermint mempool @@ -529,10 +530,11 @@ func DefaultMempoolConfig() *MempoolConfig { Recheck: true, Broadcast: true, WalPath: "", - // Each signature verification takes .5ms, size reduced until we implement + // Each signature verification takes .5ms, Size reduced until we implement // ABCI Recheck - Size: 5000, - CacheSize: 10000, + Size: 5000, + MaxTxsBytes: 1024 * 1024 * 1024, // 1GB + CacheSize: 10000, } } @@ -559,6 +561,9 @@ func (cfg *MempoolConfig) ValidateBasic() error { if cfg.Size < 0 { return errors.New("size can't be negative") } + if cfg.MaxTxsBytes < 0 { + return errors.New("max_txs_bytes can't be negative") + } if cfg.CacheSize < 0 { return errors.New("cache_size can't be negative") } diff --git a/config/toml.go b/config/toml.go index 5ae90483444..7c39322e4a3 100644 --- a/config/toml.go +++ b/config/toml.go @@ -235,10 +235,15 @@ recheck = {{ .Mempool.Recheck }} broadcast = {{ .Mempool.Broadcast }} wal_dir = "{{ js .Mempool.WalPath }}" -# size of the mempool +# Maximum number of transactions in the mempool size = {{ .Mempool.Size }} -# size of the cache (used to filter transactions we saw earlier) +# Limit the total size of all txs in the mempool. +# This only accounts for raw transactions (e.g. given 1MB transactions and +# max_txs_bytes=5MB, mempool will only accept 5 transactions). +max_txs_bytes = {{ .Mempool.MaxTxsBytes }} + +# Size of the cache (used to filter transactions we saw earlier) in transactions cache_size = {{ .Mempool.CacheSize }} ##### consensus configuration options ##### diff --git a/docs/tendermint-core/configuration.md b/docs/tendermint-core/configuration.md index e66dcf51136..413df648130 100644 --- a/docs/tendermint-core/configuration.md +++ b/docs/tendermint-core/configuration.md @@ -183,10 +183,15 @@ recheck = true broadcast = true wal_dir = "" -# size of the mempool +# Maximum number of transactions in the mempool size = 5000 -# size of the cache (used to filter transactions we saw earlier) +# Limit the total size of all txs in the mempool. +# This only accounts for raw transactions (e.g. given 1MB transactions and +# max_txs_bytes=5MB, mempool will only accept 5 transactions). +max_txs_bytes = 1073741824 + +# Size of the cache (used to filter transactions we saw earlier) in transactions cache_size = 10000 ##### consensus configuration options ##### diff --git a/p2p/mock/peer.go b/p2p/mock/peer.go new file mode 100644 index 00000000000..bdcf012de28 --- /dev/null +++ b/p2p/mock/peer.go @@ -0,0 +1,68 @@ +package mock + +import ( + "net" + + "github.com/tendermint/tendermint/crypto/ed25519" + cmn "github.com/tendermint/tendermint/libs/common" + "github.com/tendermint/tendermint/p2p" + "github.com/tendermint/tendermint/p2p/conn" +) + +type Peer struct { + *cmn.BaseService + ip net.IP + id p2p.ID + addr *p2p.NetAddress + kv map[string]interface{} + Outbound, Persistent bool +} + +// NewPeer creates and starts a new mock peer. If the ip +// is nil, random routable address is used. +func NewPeer(ip net.IP) *Peer { + var netAddr *p2p.NetAddress + if ip == nil { + _, netAddr = p2p.CreateRoutableAddr() + } else { + netAddr = p2p.NewNetAddressIPPort(ip, 26656) + } + nodeKey := p2p.NodeKey{PrivKey: ed25519.GenPrivKey()} + netAddr.ID = nodeKey.ID() + mp := &Peer{ + ip: ip, + id: nodeKey.ID(), + addr: netAddr, + kv: make(map[string]interface{}), + } + mp.BaseService = cmn.NewBaseService(nil, "MockPeer", mp) + mp.Start() + return mp +} + +func (mp *Peer) FlushStop() { mp.Stop() } +func (mp *Peer) TrySend(chID byte, msgBytes []byte) bool { return true } +func (mp *Peer) Send(chID byte, msgBytes []byte) bool { return true } +func (mp *Peer) NodeInfo() p2p.NodeInfo { + return p2p.DefaultNodeInfo{ + ID_: mp.addr.ID, + ListenAddr: mp.addr.DialString(), + } +} +func (mp *Peer) Status() conn.ConnectionStatus { return conn.ConnectionStatus{} } +func (mp *Peer) ID() p2p.ID { return mp.id } +func (mp *Peer) IsOutbound() bool { return mp.Outbound } +func (mp *Peer) IsPersistent() bool { return mp.Persistent } +func (mp *Peer) Get(key string) interface{} { + if value, ok := mp.kv[key]; ok { + return value + } + return nil +} +func (mp *Peer) Set(key string, value interface{}) { + mp.kv[key] = value +} +func (mp *Peer) RemoteIP() net.IP { return mp.ip } +func (mp *Peer) SocketAddr() *p2p.NetAddress { return mp.addr } +func (mp *Peer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: mp.ip, Port: 8800} } +func (mp *Peer) CloseConn() error { return nil } diff --git a/rpc/client/rpc_test.go b/rpc/client/rpc_test.go index dac7ec12dde..ee7c9d5d4fd 100644 --- a/rpc/client/rpc_test.go +++ b/rpc/client/rpc_test.go @@ -290,9 +290,13 @@ func TestUnconfirmedTxs(t *testing.T) { for i, c := range GetClients() { mc, ok := c.(client.MempoolClient) require.True(t, ok, "%d", i) - txs, err := mc.UnconfirmedTxs(1) + res, err := mc.UnconfirmedTxs(1) require.Nil(t, err, "%d: %+v", i, err) - assert.Exactly(t, types.Txs{tx}, types.Txs(txs.Txs)) + + assert.Equal(t, 1, res.Count) + assert.Equal(t, 1, res.Total) + assert.Equal(t, mempool.TxsBytes(), res.TotalBytes) + assert.Exactly(t, types.Txs{tx}, types.Txs(res.Txs)) } mempool.Flush() @@ -311,7 +315,9 @@ func TestNumUnconfirmedTxs(t *testing.T) { res, err := mc.NumUnconfirmedTxs() require.Nil(t, err, "%d: %+v", i, err) - assert.Equal(t, mempoolSize, res.N) + assert.Equal(t, mempoolSize, res.Count) + assert.Equal(t, mempoolSize, res.Total) + assert.Equal(t, mempool.TxsBytes(), res.TotalBytes) } mempool.Flush() diff --git a/rpc/core/mempool.go b/rpc/core/mempool.go index ff6b029cf04..4099880338a 100644 --- a/rpc/core/mempool.go +++ b/rpc/core/mempool.go @@ -254,28 +254,32 @@ func BroadcastTxCommit(tx types.Tx) (*ctypes.ResultBroadcastTxCommit, error) { // > The above command returns JSON structured like this: // // ```json -// { -// "error": "", -// "result": { -// "txs": [], -// "n_txs": "0" -// }, -// "id": "", -// "jsonrpc": "2.0" -// } +// "result" : { +// "txs" : [], +// "total_bytes" : "0", +// "n_txs" : "0", +// "total" : "0" +// }, +// "jsonrpc" : "2.0", +// "id" : "" +// } +// ``` // // ### Query Parameters // // | Parameter | Type | Default | Required | Description | // |-----------+------+---------+----------+--------------------------------------| // | limit | int | 30 | false | Maximum number of entries (max: 100) | -// ``` func UnconfirmedTxs(limit int) (*ctypes.ResultUnconfirmedTxs, error) { // reuse per_page validator limit = validatePerPage(limit) txs := mempool.ReapMaxTxs(limit) - return &ctypes.ResultUnconfirmedTxs{len(txs), txs}, nil + return &ctypes.ResultUnconfirmedTxs{ + Count: len(txs), + Total: mempool.Size(), + TotalBytes: mempool.TxsBytes(), + Txs: txs}, nil } // Get number of unconfirmed transactions. @@ -298,15 +302,19 @@ func UnconfirmedTxs(limit int) (*ctypes.ResultUnconfirmedTxs, error) { // // ```json // { -// "error": "", -// "result": { -// "txs": null, -// "n_txs": "0" -// }, -// "id": "", -// "jsonrpc": "2.0" +// "jsonrpc" : "2.0", +// "id" : "", +// "result" : { +// "n_txs" : "0", +// "total_bytes" : "0", +// "txs" : null, +// "total" : "0" +// } // } // ``` func NumUnconfirmedTxs() (*ctypes.ResultUnconfirmedTxs, error) { - return &ctypes.ResultUnconfirmedTxs{N: mempool.Size()}, nil + return &ctypes.ResultUnconfirmedTxs{ + Count: mempool.Size(), + Total: mempool.Size(), + TotalBytes: mempool.TxsBytes()}, nil } diff --git a/rpc/core/types/responses.go b/rpc/core/types/responses.go index 62be1cafd87..df048af7ed9 100644 --- a/rpc/core/types/responses.go +++ b/rpc/core/types/responses.go @@ -179,8 +179,10 @@ type ResultTxSearch struct { // List of mempool txs type ResultUnconfirmedTxs struct { - N int `json:"n_txs"` - Txs []types.Tx `json:"txs"` + Count int `json:"n_txs"` + Total int `json:"total"` + TotalBytes int64 `json:"total_bytes"` + Txs []types.Tx `json:"txs"` } // Info abci msg diff --git a/state/services.go b/state/services.go index f6fa3a1dbb5..83dcc7883b2 100644 --- a/state/services.go +++ b/state/services.go @@ -23,6 +23,7 @@ type Mempool interface { Size() int CheckTx(types.Tx, func(*abci.Response)) error + CheckTxWithInfo(types.Tx, func(*abci.Response), mempool.TxInfo) error ReapMaxBytesMaxGas(maxBytes, maxGas int64) types.Txs Update(int64, types.Txs, mempool.PreCheckFunc, mempool.PostCheckFunc) error Flush() @@ -37,11 +38,17 @@ type MockMempool struct{} var _ Mempool = MockMempool{} -func (MockMempool) Lock() {} -func (MockMempool) Unlock() {} -func (MockMempool) Size() int { return 0 } -func (MockMempool) CheckTx(_ types.Tx, _ func(*abci.Response)) error { return nil } -func (MockMempool) ReapMaxBytesMaxGas(_, _ int64) types.Txs { return types.Txs{} } +func (MockMempool) Lock() {} +func (MockMempool) Unlock() {} +func (MockMempool) Size() int { return 0 } +func (MockMempool) CheckTx(_ types.Tx, _ func(*abci.Response)) error { + return nil +} +func (MockMempool) CheckTxWithInfo(_ types.Tx, _ func(*abci.Response), + _ mempool.TxInfo) error { + return nil +} +func (MockMempool) ReapMaxBytesMaxGas(_, _ int64) types.Txs { return types.Txs{} } func (MockMempool) Update( _ int64, _ types.Txs, @@ -94,4 +101,4 @@ type MockEvidencePool struct{} func (m MockEvidencePool) PendingEvidence(int64) []types.Evidence { return nil } func (m MockEvidencePool) AddEvidence(types.Evidence) error { return nil } func (m MockEvidencePool) Update(*types.Block, State) {} -func (m MockEvidencePool) IsCommitted(types.Evidence) bool {return false} +func (m MockEvidencePool) IsCommitted(types.Evidence) bool { return false } From 7e669d403ed7af4ba024642f78c1e5faf44a8bfd Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Wed, 3 Jul 2019 11:23:41 +0800 Subject: [PATCH 25/26] Client disable compression --- rpc/lib/client/http_client.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rpc/lib/client/http_client.go b/rpc/lib/client/http_client.go index 21be5fe0c05..11ffd156c1a 100644 --- a/rpc/lib/client/http_client.go +++ b/rpc/lib/client/http_client.go @@ -74,7 +74,9 @@ func makeHTTPClient(remoteAddr string) (string, *http.Client) { protocol, address, dialer := makeHTTPDialer(remoteAddr) return protocol + "://" + address, &http.Client{ Transport: &http.Transport{ - Dial: dialer, + // Set to true to prevent GZIP-bomb DoS attacks + DisableCompression: true, + Dial: dialer, }, } } From e7811f1c2ce96944ce739b6bfc0fc9d457c689e0 Mon Sep 17 00:00:00 2001 From: chengwenxi Date: Fri, 5 Jul 2019 10:38:44 +0800 Subject: [PATCH 26/26] Update version --- version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version/version.go b/version/version.go index 2636e4c3eb9..b3853175d3e 100644 --- a/version/version.go +++ b/version/version.go @@ -18,7 +18,7 @@ const ( // TMCoreSemVer is the current version of Tendermint Core. // It's the Semantic Version of the software. // Must be a string because scripts like dist.sh read this file. - TMCoreSemVer = "0.30.0-dev" + TMCoreSemVer = "0.31.0" // ABCISemVer is the semantic version of the ABCI library ABCISemVer = "0.15.0"