Skip to content

Commit

Permalink
asim: add zone config satisfiability check
Browse files Browse the repository at this point in the history
Now that we have added the option to generate random span configurations in
cockroachdb#110967, we want to have a way to check whether these configurations are
satisfiable with the cluster setting.

This patch adds the validation check. Please note that the validation process can
be expensive with a time complexity of O(max(node count in the cluster, number of
replica constraints, number of voter constraints)). To perform this validation
and see which span config could lead to failure, please use following command:

```
"eval" [verbose=validate]
```

See also: cockroachdb#110967
Part of: cockroachdb#106192
Release Note: none
Epic: none
  • Loading branch information
wenyihu6 committed Aug 26, 2024
1 parent b1cffb0 commit a4abd68
Show file tree
Hide file tree
Showing 35 changed files with 2,175 additions and 88 deletions.
6 changes: 6 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -223,13 +223,15 @@ ALL_TESTS = [
"//pkg/kv/kvserver/allocator/plan:plan_test",
"//pkg/kv/kvserver/allocator/storepool:storepool_test",
"//pkg/kv/kvserver/apply:apply_test",
"//pkg/kv/kvserver/asim/event:validator_test",
"//pkg/kv/kvserver/asim/gossip:gossip_test",
"//pkg/kv/kvserver/asim/metrics:metrics_test",
"//pkg/kv/kvserver/asim/op:op_test",
"//pkg/kv/kvserver/asim/queue:queue_test",
"//pkg/kv/kvserver/asim/state:state_test",
"//pkg/kv/kvserver/asim/storerebalancer:storerebalancer_test",
"//pkg/kv/kvserver/asim/tests:tests_test",
"//pkg/kv/kvserver/asim/validator:validator_test",
"//pkg/kv/kvserver/asim/workload:workload_test",
"//pkg/kv/kvserver/asim:asim_test",
"//pkg/kv/kvserver/batcheval/result:result_test",
Expand Down Expand Up @@ -1395,6 +1397,8 @@ GO_TARGETS = [
"//pkg/kv/kvserver/asim/assertion:assertion",
"//pkg/kv/kvserver/asim/config:config",
"//pkg/kv/kvserver/asim/event:event",
"//pkg/kv/kvserver/asim/event:validator",
"//pkg/kv/kvserver/asim/event:validator_test",
"//pkg/kv/kvserver/asim/gen:gen",
"//pkg/kv/kvserver/asim/gossip:gossip",
"//pkg/kv/kvserver/asim/gossip:gossip_test",
Expand All @@ -1412,6 +1416,8 @@ GO_TARGETS = [
"//pkg/kv/kvserver/asim/storerebalancer:storerebalancer_test",
"//pkg/kv/kvserver/asim/tests:tests",
"//pkg/kv/kvserver/asim/tests:tests_test",
"//pkg/kv/kvserver/asim/validator:validator",
"//pkg/kv/kvserver/asim/validator:validator_test",
"//pkg/kv/kvserver/asim/workload:workload",
"//pkg/kv/kvserver/asim/workload:workload_test",
"//pkg/kv/kvserver/asim:asim",
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/asim/assertion/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ go_library(
"//pkg/kv/kvserver",
"//pkg/kv/kvserver/asim/history",
"//pkg/kv/kvserver/asim/metrics",
"//pkg/kv/kvserver/asim/state",
"//pkg/roachpb",
"//pkg/spanconfig/spanconfigtestutils",
"//pkg/util/log",
Expand Down
185 changes: 161 additions & 24 deletions pkg/kv/kvserver/asim/assertion/assert.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ import (
"context"
"fmt"
"math"
"sort"
"strings"

"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/history"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/metrics"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigtestutils"
"github.com/cockroachdb/cockroach/pkg/util/log"
Expand Down Expand Up @@ -301,6 +303,7 @@ func (sa StoreStatAssertion) String() string {
}

type ConformanceAssertion struct {
BetterFormat bool
Underreplicated int
Overreplicated int
ViolatingConstraints int
Expand Down Expand Up @@ -377,38 +380,68 @@ func (ca ConformanceAssertion) Assert(
if ca.Unavailable != ConformanceAssertionSentinel &&
ca.Unavailable != unavailable {
maybeInitHolds()
buf.WriteString(PrintSpanConfigConformanceList(
"unavailable", replicaReport.Unavailable))
if ca.BetterFormat {
buf.WriteString(PrettyPrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"unavailable", replicaReport.Unavailable))
} else {
buf.WriteString(PrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"unavailable", replicaReport.Unavailable))
}
}
if ca.Underreplicated != ConformanceAssertionSentinel &&
ca.Underreplicated != under {
maybeInitHolds()
buf.WriteString(PrintSpanConfigConformanceList(
"under replicated", replicaReport.UnderReplicated))
if ca.BetterFormat {
buf.WriteString(PrettyPrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"under replicated", replicaReport.UnderReplicated))
} else {
buf.WriteString(PrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"under replicated", replicaReport.UnderReplicated))
}
}
if ca.Overreplicated != ConformanceAssertionSentinel &&
ca.Overreplicated != over {
maybeInitHolds()
buf.WriteString(PrintSpanConfigConformanceList(
"over replicated", replicaReport.OverReplicated))
if ca.BetterFormat {
buf.WriteString(PrettyPrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"over replicated", replicaReport.OverReplicated))
} else {
buf.WriteString(PrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"over replicated", replicaReport.OverReplicated))
}
}
if ca.ViolatingConstraints != ConformanceAssertionSentinel &&
ca.ViolatingConstraints != violatingConstraints {
maybeInitHolds()
buf.WriteString(PrintSpanConfigConformanceList(
"violating constraints", replicaReport.ViolatingConstraints))
if ca.BetterFormat {
buf.WriteString(PrettyPrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"violating constraints", replicaReport.ViolatingConstraints))
} else {
buf.WriteString(PrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"violating constraints", replicaReport.ViolatingConstraints))
}
}
if ca.ViolatingLeasePreferences != ConformanceAssertionSentinel &&
ca.ViolatingLeasePreferences != violatingLeases {
maybeInitHolds()
buf.WriteString(PrintSpanConfigConformanceList(
"violating lease preferences", leaseViolatingPrefs))
if ca.BetterFormat {
buf.WriteString(PrettyPrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"violating lease preferences", leaseViolatingPrefs))
} else {
buf.WriteString(PrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"violating lease preferences", leaseViolatingPrefs))
}
}
if ca.LessPreferredLeases != ConformanceAssertionSentinel &&
ca.LessPreferredLeases != lessPrefLeases {
maybeInitHolds()
buf.WriteString(PrintSpanConfigConformanceList(
"less preferred preferences", leaseLessPrefs))
if ca.BetterFormat {
buf.WriteString(PrettyPrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"less preferred preferences", leaseLessPrefs))
} else {
buf.WriteString(PrintSpanConfigConformanceList(h.S.NodeLocalityMap(),
"less preferred preferences", leaseLessPrefs))
}
}

return holds, buf.String()
Expand Down Expand Up @@ -439,32 +472,136 @@ func (ca ConformanceAssertion) String() string {
return buf.String()
}

func printRangeDesc(r roachpb.RangeDescriptor) string {
func formatType(t roachpb.ReplicaType) string {
switch t {
case roachpb.VOTER_FULL:
return "voter"
case roachpb.NON_VOTER:
return "non-voter"
case roachpb.LEARNER:
return "learner"
case roachpb.VOTER_INCOMING:
return "voter-incoming"
case roachpb.VOTER_OUTGOING:
return "voter-outgoing"
case roachpb.VOTER_DEMOTING_LEARNER:
return "voter-demoting-learner"
case roachpb.VOTER_DEMOTING_NON_VOTER:
return "voter-demoting-non-voter"
default:
panic("unknown replica type")
}
}

func sumRangeInfo(
nodes map[state.NodeID]roachpb.Locality, replicas []roachpb.ReplicaDescriptor,
) string {
if len(replicas) <= 0 {
return "<no replicas>"
}

var buf strings.Builder
buf.WriteString(fmt.Sprintf("r%d:", r.RangeID))
buf.WriteString(r.RSpan().String())
buf.WriteString(" [")
if allReplicas := r.Replicas().Descriptors(); len(allReplicas) > 0 {
for i, rep := range allReplicas {
tiers := make(map[string]map[roachpb.ReplicaType]int)
for _, rep := range replicas {
val, ok := nodes[state.NodeID(rep.NodeID)]
if !ok {
panic(fmt.Sprintf("node %d not found", rep.NodeID))
}
for _, tier := range val.Tiers {
if _, ok := tiers[tier.Value]; !ok {
tiers[tier.Value] = make(map[roachpb.ReplicaType]int)
}
tiers[tier.Value][rep.Type]++
}
}

keys := make([]string, 0, len(tiers))
for k := range tiers {
keys = append(keys, k)
}
sort.Strings(keys)

for _, k := range keys {
v := tiers[k]
if buf.Len() > 0 {
buf.WriteString(", ")
}
buf.WriteString(fmt.Sprintf("%s: ", k))
keys := make([]int, 0, len(v))
for k := range v {
keys = append(keys, int(k))
}
sort.Ints(keys)
for i, t := range keys {
if i > 0 {
buf.WriteString(", ")
buf.WriteString(" ")
}
count := v[roachpb.ReplicaType(t)]
if count != 0 {
if count > 1 {
buf.WriteString(fmt.Sprintf("%d %ss", count, formatType(roachpb.ReplicaType(t))))
} else {
buf.WriteString(fmt.Sprintf("%d %s", count, formatType(roachpb.ReplicaType(t))))
}
}
buf.WriteString(rep.String())
}
} else {
buf.WriteString("<no replicas>")
}
return buf.String()
}

func prettyPrintRangeDesc(
nodes map[state.NodeID]roachpb.Locality, r roachpb.RangeDescriptor,
) string {
var buf strings.Builder
buf.WriteString(fmt.Sprintf("r%d:", r.RangeID))
buf.WriteString(" [")
// todo(wenyi): reivist replicaset helper functions
minLen := min(3, len(r.Replicas().Descriptors()))
buf.WriteString(sumRangeInfo(nodes, r.Replicas().Descriptors()[:minLen]))
buf.WriteString("]")
return buf.String()
}

func printRangeDesc(nodes map[state.NodeID]roachpb.Locality, r roachpb.RangeDescriptor) string {
var buf strings.Builder
buf.WriteString(fmt.Sprintf("r%d:", r.RangeID))
buf.WriteString(r.RSpan().String())
buf.WriteString(" [")
// todo(wenyi): reivist replicaset helper functions
minLen := min(3, len(r.Replicas().Descriptors()))
buf.WriteString(sumRangeInfo(nodes, r.Replicas().Descriptors()[:minLen]))
buf.WriteString("]")
return buf.String()
}

func PrintSpanConfigConformanceList(tag string, ranges []roachpb.ConformanceReportedRange) string {
func PrettyPrintSpanConfigConformanceList(
nodes map[state.NodeID]roachpb.Locality, tag string, ranges []roachpb.ConformanceReportedRange,
) string {
var buf strings.Builder
for i, r := range ranges {
if i == 3 {
return buf.String() + fmt.Sprintf("... and %d more", len(ranges)-3)
}
if i == 0 {
buf.WriteString(fmt.Sprintf("%s:\n", tag))
}
buf.WriteString(fmt.Sprintf(" %s", prettyPrintRangeDesc(nodes, r.RangeDescriptor)))
if i != len(ranges)-1 {
buf.WriteString("\n")
}
}
return buf.String()
}

func PrintSpanConfigConformanceList(
nodes map[state.NodeID]roachpb.Locality, tag string, ranges []roachpb.ConformanceReportedRange,
) string {
var buf strings.Builder
for i, r := range ranges {
if i == 0 {
buf.WriteString(fmt.Sprintf("%s:\n", tag))
}
buf.WriteString(fmt.Sprintf(" %s applying %s", printRangeDesc(r.RangeDescriptor),
buf.WriteString(fmt.Sprintf(" %s applying %s", printRangeDesc(nodes, r.RangeDescriptor),
spanconfigtestutils.PrintSpanConfigDiffedAgainstDefaults(r.Config)))
if i != len(ranges)-1 {
buf.WriteString("\n")
Expand Down
30 changes: 29 additions & 1 deletion pkg/kv/kvserver/asim/event/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "validator",
srcs = [
"config_validator.go",
"validator.go",
],
visibility = ["//visibility:public"],
deps = [
"//pkg/kv/kvserver/asim/event",
"//pkg/kv/kvserver/asim/state",
"//pkg/roachpb",
"@com_github_cockroachdb_errors//:errors",
],
)

go_test(
name = "validator_test",
srcs = ["validator_test.go"],
args = ["-test.timeout=295s"],
embed = [":validator"],
deps = [
"//pkg/kv/kvserver/asim/state",
"//pkg/spanconfig/spanconfigtestutils",
"@com_github_stretchr_testify//require",
],
)

go_library(
name = "event",
Expand All @@ -13,6 +40,7 @@ go_library(
"//pkg/kv/kvserver/asim/assertion",
"//pkg/kv/kvserver/asim/history",
"//pkg/kv/kvserver/asim/state",
"//pkg/kv/kvserver/asim/validator",
"//pkg/kv/kvserver/liveness/livenesspb",
"//pkg/roachpb",
"//pkg/util/log",
Expand Down
Loading

0 comments on commit a4abd68

Please sign in to comment.