Skip to content

Commit

Permalink
Merge "Panic when the network can't get stable checkpoint cert"
Browse files Browse the repository at this point in the history
  • Loading branch information
binhn authored and Gerrit Code Review committed Sep 11, 2016
2 parents 165c0ed + 30f832f commit c891561
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
20 changes: 18 additions & 2 deletions consensus/pbft/pbft-core.go
Original file line number Diff line number Diff line change
Expand Up @@ -1160,15 +1160,31 @@ func (instance *pbftCore) recvCheckpoint(chkpt *Checkpoint) events.Event {

instance.checkpointStore[*chkpt] = true

// Track how many different checkpoint values we have for the seqNo in question
diffValues := make(map[string]struct{})
diffValues[chkpt.Id] = struct{}{}

matching := 0
for testChkpt := range instance.checkpointStore {
if testChkpt.SequenceNumber == chkpt.SequenceNumber && testChkpt.Id == chkpt.Id {
matching++
if testChkpt.SequenceNumber == chkpt.SequenceNumber {
if testChkpt.Id == chkpt.Id {
matching++
} else {
if _, ok := diffValues[testChkpt.Id]; !ok {
diffValues[testChkpt.Id] = struct{}{}
}
}
}
}
logger.Debugf("Replica %d found %d matching checkpoints for seqNo %d, digest %s",
instance.id, matching, chkpt.SequenceNumber, chkpt.Id)

// If f+2 different values have been observed, we'll never be able to get a stable cert for this seqNo
if count := len(diffValues); count > instance.f+1 {
logger.Panicf("Network unable to find stable certificate for seqNo %d (%d different values observed already)",
chkpt.SequenceNumber, count)
}

if matching == instance.f+1 {
// We have a weak cert
// If we have generated a checkpoint for this seqNo, make sure we have a match
Expand Down
25 changes: 23 additions & 2 deletions consensus/pbft/pbft-core_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"os"
"reflect"
"strconv"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -1763,7 +1764,7 @@ func TestCheckpointDiffersFromWeakCert(t *testing.T) {

badChkpt := &Checkpoint{
SequenceNumber: 10,
Id: base64.StdEncoding.EncodeToString([]byte("WRONG")),
Id: "WRONG",
ReplicaId: 3,
}
instance.chkpts[10] = badChkpt.Id // This is done via the exec path, shortcut it here
Expand All @@ -1772,7 +1773,7 @@ func TestCheckpointDiffersFromWeakCert(t *testing.T) {
for i := uint64(0); i < 2; i++ {
events.SendEvent(instance, &Checkpoint{
SequenceNumber: 10,
Id: base64.StdEncoding.EncodeToString([]byte("CORRECT")),
Id: "CORRECT",
ReplicaId: i,
})
}
Expand All @@ -1781,3 +1782,23 @@ func TestCheckpointDiffersFromWeakCert(t *testing.T) {
t.Fatalf("State target should not have been updated")
}
}

// This test is designed to ensure the peer panics if it observes > f+1 different checkpoint values for the same seqNo
// This indicates a network that will be unable to move its watermarks and thus progress
func TestNoCheckpointQuorum(t *testing.T) {
defer func() {
if r := recover(); r == nil {
t.Errorf("More than f+1 different checkpoint values found, should have panicked.")
}
}()

instance := newPbftCore(3, loadConfig(), &omniProto{}, &inertTimerFactory{})

for i := uint64(0); i < 3; i++ {
events.SendEvent(instance, &Checkpoint{
SequenceNumber: 10,
Id: strconv.FormatUint(i, 10),
ReplicaId: i,
})
}
}

0 comments on commit c891561

Please sign in to comment.