Skip to content

Commit

Permalink
fix(privval): retry accepting a connection on errors (cometbft#2047)
Browse files Browse the repository at this point in the history
Before the change, we'd error after just 1 attempt of accepting a
connection.

Co-authored by: @corverroos

Closes cometbft#452

Port of corverroos/tendermint#1

---

#### PR checklist

- [x] Tests written/updated
- [x] Changelog entry added in `.changelog` (we use
[unclog](https://github.com/informalsystems/unclog) to manage our
changelog)
- [x] Updated relevant documentation (`docs/` or `spec/`) and code
comments
  • Loading branch information
melekes authored Jan 30, 2024
1 parent 0ccf9b6 commit a8991d6
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- `[privval]` Retry accepting a connection ([\#2047](https://github.com/cometbft/cometbft/pull/2047))
32 changes: 14 additions & 18 deletions privval/signer_listener_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ func NewSignerListenerEndpoint(

// OnStart implements service.Service.
func (sl *SignerListenerEndpoint) OnStart() error {
sl.connectRequestCh = make(chan struct{})
sl.connectRequestCh = make(chan struct{}, 1) // Buffer of 1 to allow `serviceLoop` to re-trigger itself.
sl.connectionAvailableCh = make(chan net.Conn)

// NOTE: ping timeout must be less than read/write timeout
// NOTE: ping timeout must be less than read/write timeout.
sl.pingInterval = time.Duration(sl.signerEndpoint.timeoutReadWrite.Milliseconds()*2/3) * time.Millisecond
sl.pingTimer = time.NewTicker(sl.pingInterval)

Expand Down Expand Up @@ -181,23 +181,19 @@ func (sl *SignerListenerEndpoint) serviceLoop() {
for {
select {
case <-sl.connectRequestCh:
{
conn, err := sl.acceptNewConnection()
if err == nil {
sl.Logger.Info("SignerListener: Connected")

// We have a good connection, wait for someone that needs one otherwise cancellation
select {
case sl.connectionAvailableCh <- conn:
case <-sl.Quit():
return
}
}
conn, err := sl.acceptNewConnection()
if err != nil {
sl.Logger.Error("SignerListener: Error accepting connection", "err", err)
sl.triggerConnect()
continue
}

select {
case sl.connectRequestCh <- struct{}{}:
default:
}
// We have a good connection, wait for someone that needs one otherwise cancellation
sl.Logger.Info("SignerListener: Connected")
select {
case sl.connectionAvailableCh <- conn:
case <-sl.Quit():
return
}
case <-sl.Quit():
return
Expand Down
26 changes: 26 additions & 0 deletions privval/signer_listener_endpoint_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package privval

import (
"errors"
"net"
"testing"
"time"
Expand Down Expand Up @@ -214,3 +215,28 @@ func getMockEndpoints(

return listenerEndpoint, dialerEndpoint
}

func TestSignerListenerEndpointServiceLoop(t *testing.T) {
listenerEndpoint := NewSignerListenerEndpoint(
log.TestingLogger(),
&testListener{initialErrs: 5},
)

require.NoError(t, listenerEndpoint.Start())
require.NoError(t, listenerEndpoint.WaitForConnection(time.Second))
}

type testListener struct {
net.Listener
initialErrs int
}

func (l *testListener) Accept() (net.Conn, error) {
if l.initialErrs > 0 {
l.initialErrs--

return nil, errors.New("accept error")
}

return nil, nil // Note this doesn't actually return a valid connection, it just doesn't error.
}

0 comments on commit a8991d6

Please sign in to comment.