From 3eb6597e29dd6f6e6b038f993a14912951566f83 Mon Sep 17 00:00:00 2001 From: wenyihu6 Date: Thu, 31 Aug 2023 10:24:06 -0400 Subject: [PATCH] asim: add zone config satisfiability check Now that we have added the option to generate random span configurations in #110967, we want to have a way to check whether these configurations are satisfiable with the cluster setting. This patch adds the validation check. Please note that the validation process can be expensive with a time complexity of O(max(node count in the cluster, number of replica constraints, number of voter constraints)). To perform this validation and see which span config could lead to failure, please use following command: ``` "eval" [verbose=validate] ``` See also: #110967 Part of: #106192 Release Note: none Epic: none --- pkg/BUILD.bazel | 3 + .../scheduled/scheduled_event_executor.go | 7 + pkg/kv/kvserver/asim/tests/BUILD.bazel | 1 + pkg/kv/kvserver/asim/tests/output.go | 12 +- pkg/kv/kvserver/asim/tests/rand_gen.go | 3 +- pkg/kv/kvserver/asim/tests/rand_test.go | 3 +- .../asim/tests/testdata/rand/default_settings | 6 + .../asim/tests/testdata/rand/rand_cluster | 6 + .../asim/tests/testdata/rand/rand_event | 381 ++++++++++++++-- .../asim/tests/testdata/rand/rand_ranges | 2 + pkg/kv/kvserver/asim/validator/BUILD.bazel | 30 ++ .../asim/validator/config_validator.go | 414 ++++++++++++++++++ pkg/kv/kvserver/asim/validator/validator.go | 49 +++ .../kvserver/asim/validator/validator_test.go | 246 +++++++++++ 14 files changed, 1116 insertions(+), 47 deletions(-) create mode 100644 pkg/kv/kvserver/asim/validator/BUILD.bazel create mode 100644 pkg/kv/kvserver/asim/validator/config_validator.go create mode 100644 pkg/kv/kvserver/asim/validator/validator.go create mode 100644 pkg/kv/kvserver/asim/validator/validator_test.go diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index df026e79c24b..244cef1c5c31 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -222,6 +222,7 @@ ALL_TESTS = [ "//pkg/kv/kvserver/asim/state:state_test", "//pkg/kv/kvserver/asim/storerebalancer:storerebalancer_test", "//pkg/kv/kvserver/asim/tests:tests_test", + "//pkg/kv/kvserver/asim/validator:validator_test", "//pkg/kv/kvserver/asim/workload:workload_test", "//pkg/kv/kvserver/asim:asim_test", "//pkg/kv/kvserver/batcheval/result:result_test", @@ -1376,6 +1377,8 @@ GO_TARGETS = [ "//pkg/kv/kvserver/asim/storerebalancer:storerebalancer_test", "//pkg/kv/kvserver/asim/tests:tests", "//pkg/kv/kvserver/asim/tests:tests_test", + "//pkg/kv/kvserver/asim/validator:validator", + "//pkg/kv/kvserver/asim/validator:validator_test", "//pkg/kv/kvserver/asim/workload:workload", "//pkg/kv/kvserver/asim/workload:workload_test", "//pkg/kv/kvserver/asim:asim", diff --git a/pkg/kv/kvserver/asim/scheduled/scheduled_event_executor.go b/pkg/kv/kvserver/asim/scheduled/scheduled_event_executor.go index 943e3b82c1ab..f42f678300bb 100644 --- a/pkg/kv/kvserver/asim/scheduled/scheduled_event_executor.go +++ b/pkg/kv/kvserver/asim/scheduled/scheduled_event_executor.go @@ -41,6 +41,8 @@ type EventExecutor interface { // events including details of mutation events, assertion checks, and assertion // results. PrintEventsExecuted() string + // ScheduledEvents returns the list of scheduled events. + ScheduledEvents() ScheduledEventList } // eventExecutor is the private implementation of the EventExecutor interface, @@ -71,6 +73,11 @@ func newExecutorWithNoEvents() *eventExecutor { } } +// ScheduledEvents returns the list of scheduled events. +func (e *eventExecutor) ScheduledEvents() ScheduledEventList { + return e.scheduledEvents +} + // PrintEventSummary returns a string summarizing the executed mutation and // assertion events. func (e *eventExecutor) PrintEventSummary() string { diff --git a/pkg/kv/kvserver/asim/tests/BUILD.bazel b/pkg/kv/kvserver/asim/tests/BUILD.bazel index bbeb48984ecc..fe36b7a1218e 100644 --- a/pkg/kv/kvserver/asim/tests/BUILD.bazel +++ b/pkg/kv/kvserver/asim/tests/BUILD.bazel @@ -20,6 +20,7 @@ go_library( "//pkg/kv/kvserver/asim/history", "//pkg/kv/kvserver/asim/scheduled", "//pkg/kv/kvserver/asim/state", + "//pkg/kv/kvserver/asim/validator", "//pkg/roachpb", "//pkg/sql", "//pkg/sql/catalog/catpb", diff --git a/pkg/kv/kvserver/asim/tests/output.go b/pkg/kv/kvserver/asim/tests/output.go index cf1a01a29a58..87dbabaf7f00 100644 --- a/pkg/kv/kvserver/asim/tests/output.go +++ b/pkg/kv/kvserver/asim/tests/output.go @@ -18,6 +18,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/gen" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/history" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/scheduled" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/validator" ) // OutputFlags sets flags for what to output in tests. If you want to add a flag @@ -39,15 +41,18 @@ const ( OutputTopology // 1 << 3: 0000 1000 // OutputEvents displays delayed events executed. OutputEvents // 1 << 4: 0001 0000 + // OutputValidationResult displays validation result of whether any events are + // expected to lead to assertion failure. + OutputValidationResult // 1 << 5: 0010 0000 // OutputAll shows everything above. - OutputAll = (1 << (iota - 1)) - 1 // (1 << 5) - 1: 0001 1111 + OutputAll = (1 << (iota - 1)) - 1 // (1 << 6) - 1: 0011 1111 ) // ScanFlags converts an array of input strings into a single flag. func (o OutputFlags) ScanFlags(inputs []string) OutputFlags { dict := map[string]OutputFlags{"result_only": OutputResultOnly, "test_settings": OutputTestSettings, "initial_state": OutputInitialState, "config_gen": OutputConfigGen, "topology": OutputTopology, - "events": OutputEvents, "all": OutputAll} + "events": OutputEvents, "validate": OutputValidationResult, "all": OutputAll} flag := OutputResultOnly for _, input := range inputs { flag = flag.set(dict[input]) @@ -169,6 +174,9 @@ func (tr testResultsReport) String() string { if failed || tr.flags.Has(OutputEvents) { buf.WriteString(output.eventExecutor.PrintEventsExecuted()) } + if failed || tr.flags.Has(OutputValidationResult) { + buf.WriteString(validator.Validate(output.initialState, output.eventExecutor)) + } if failed { buf.WriteString(fmt.Sprintf("sample%d: failed assertion\n%s\n", nthSample, output.reason)) } else { diff --git a/pkg/kv/kvserver/asim/tests/rand_gen.go b/pkg/kv/kvserver/asim/tests/rand_gen.go index a742f7a4318b..3a3f627f2460 100644 --- a/pkg/kv/kvserver/asim/tests/rand_gen.go +++ b/pkg/kv/kvserver/asim/tests/rand_gen.go @@ -404,7 +404,8 @@ func randomlySelectDataPlacement(randSource *rand.Rand) descpb.DataPlacement { // intervals defined by durationToAssert from the start time. These events apply // a randomly generated zone configuration followed by an assertion event. Note // that these random configurations might be unsatisfiable under the cluster -// setup. +// setup. To validate whether the configurations generated are satisfiable, +// please use "eval" [verbose=validate]. func generateRandomSurvivalGoalsEvents( regions []state.Region, startTime time.Time, diff --git a/pkg/kv/kvserver/asim/tests/rand_test.go b/pkg/kv/kvserver/asim/tests/rand_test.go index d21b00e3002c..fb6cfe173b1c 100644 --- a/pkg/kv/kvserver/asim/tests/rand_test.go +++ b/pkg/kv/kvserver/asim/tests/rand_test.go @@ -83,7 +83,8 @@ const ( // 4. sum of weights in the array should be equal to 1 // 3. "eval" [seed=] [num_iterations=] [duration=] -// [verbose=(<[]("result_only","test_settings","initial_state","config_gen","event","topology","all")>)] +// [verbose=(<[]("result_only","test_settings","initial_state","config_gen", +// "event","topology","validate","all")>)] // e.g. eval seed=20 duration=30m2s verbose=(test_settings,initial_state) // - eval: generates a simulation based on the configuration set with the given // commands. diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/default_settings b/pkg/kv/kvserver/asim/tests/testdata/rand/default_settings index 94d089ed8c15..f002c4098922 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/default_settings +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/default_settings @@ -232,6 +232,8 @@ AU_EAST AU_EAST_1 └── [1 2 3] no events were scheduled +validation result: + valid sample1: pass ---------------------------------- sample2: start running @@ -247,6 +249,8 @@ AU_EAST AU_EAST_1 └── [1 2 3] no events were scheduled +validation result: + valid sample2: pass ---------------------------------- sample3: start running @@ -262,6 +266,8 @@ AU_EAST AU_EAST_1 └── [1 2 3] no events were scheduled +validation result: + valid sample3: pass ---------------------------------- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_cluster b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_cluster index 9128acba4c58..22bc21cbca94 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_cluster +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_cluster @@ -89,6 +89,8 @@ US_West US_West_1 └── [17 18] no events were scheduled +validation result: + valid sample1: pass ---------------------------------- sample2: start running @@ -125,6 +127,8 @@ US_West US_West_3 └── [21 22 23 24] no events were scheduled +validation result: + valid sample2: pass ---------------------------------- sample3: start running @@ -159,6 +163,8 @@ US_West US_West_1 └── [17 18] no events were scheduled +validation result: + valid sample3: pass ---------------------------------- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_event b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_event index 0ced167c709a..45a31abfd921 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_event +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_event @@ -7,52 +7,10 @@ change_static_option ranges=1 rand_events type=cycle_via_random_survival_goals duration_to_assert_on_event=5m ---- -eval duration=60m num_iterations=1 verbose=(all) +eval duration=60m num_iterations=5 verbose=(events,validate) ---- -test settings - num_iterations=1 duration=1h0m0s ----------------------------------- -generating cluster configurations using randomized option - cluster_gen_type=multi_region -generating ranges configurations using static option - placement_type=even, ranges=1, key_space=200000, replication_factor=3, bytes=0 -generating load configurations using static option - rw_ratio=0.00, rate=0.00, min_block=1, max_block=1, min_key=1, max_key=200000, skewed_access=false -generating events configurations using randomized option - duration_to_assert_on_event=5m0s, type=cycle_via_random_survival_goals -generating settings configurations using static option ---------------------------------- sample1: start running -configurations generated using seed 7894140303635748408 - loaded cluster with - region:US_East [zone=US_East_1(nodes=1,stores=1), zone=US_East_2(nodes=2,stores=1), zone=US_East_3(nodes=3,stores=1), zone=US_East_3(nodes=10,stores=1)] - region:US_West [zone=US_West_1(nodes=2,stores=1)] - region:EU [zone=EU_1(nodes=3,stores=1), zone=EU_2(nodes=3,stores=1), zone=EU_3(nodes=4,stores=1)] - basic ranges with placement_type=even, ranges=1, key_space=200000, replication_factor=3, bytes=0 - basic load with rw_ratio=0.00, rate=0.00, skewed_access=false, min_block_size=1, max_block_size=1, min_key=1, max_key=200000 - number of mutation events=12, number of assertion events=12 -initial state at 2022-03-21 11:00:00: - stores(28)=[s1n1=(replicas(1)),s2n2=(replicas(1)),s3n3=(replicas(1)),s4n4=(replicas(0)),s5n5=(replicas(0)),s6n6=(replicas(0)),s7n7=(replicas(0)),s8n8=(replicas(0)),s9n9=(replicas(0)),s10n10=(replicas(0)),s11n11=(replicas(0)),s12n12=(replicas(0)),s13n13=(replicas(0)),s14n14=(replicas(0)),s15n15=(replicas(0)),s16n16=(replicas(0)),s17n17=(replicas(0)),s18n18=(replicas(0)),s19n19=(replicas(0)),s20n20=(replicas(0)),s21n21=(replicas(0)),s22n22=(replicas(0)),s23n23=(replicas(0)),s24n24=(replicas(0)),s25n25=(replicas(0)),s26n26=(replicas(0)),s27n27=(replicas(0)),s28n28=(replicas(0))] -topology: -EU - EU_1 - │ └── [19 20 21] - EU_2 - │ └── [22 23 24] - EU_3 - │ └── [25 26 27 28] -US_East - US_East_1 - │ └── [1] - US_East_2 - │ └── [2 3] - US_East_3 - │ └── [4 5 6] - US_East_4 - │ └── [7 8 9 10 11 12 13 14 15 16] -US_West - US_West_1 - └── [17 18] 24 events executed: executed at: 2022-03-21 11:00:00 event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > @@ -129,5 +87,342 @@ violating constraints: event: assertion checking event 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 lease-violating=0 lease-less-preferred=0 passed +validation result: + event scheduled at 2022-03-21 11:20:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West sample1: pass ---------------------------------- +sample2: start running +24 events executed: + executed at: 2022-03-21 11:00:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:05:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:05:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:10:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:10:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:15:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:15:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:20:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:20:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:25:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:25:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:30:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:30:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:35:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:35:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:40:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:40:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:45:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:45:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:50:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:50:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:55:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:55:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 12:00:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed +validation result: + valid +sample2: pass +---------------------------------- +sample3: start running +24 events executed: + executed at: 2022-03-21 11:00:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:05:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:05:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:10:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:10:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:15:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n1,s1):1, (n17,s17):4, (n18,s18):9, (n13,s13):7NON_VOTER, (n28,s28):10NON_VOTER] applying num_replicas=5 num_voters=3 constraints=[+region=US_East:1 +region=US_West:1 +region=EU:1] voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:15:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:20:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:20:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:25:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n17,s17):18, (n23,s23):13, (n18,s18):16, (n7,s7):14NON_VOTER, (n19,s19):17NON_VOTER] applying num_replicas=5 num_voters=3 constraints=[+region=US_East:1 +region=US_West:1 +region=EU:1] voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:25:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:30:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:30:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:35:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:35:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:40:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:40:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:45:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:45:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:50:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:50:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:55:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:55:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 12:00:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed +validation result: + event scheduled at 2022-03-21 11:10:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West + event scheduled at 2022-03-21 11:20:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West +sample3: pass +---------------------------------- +sample4: start running +24 events executed: + executed at: 2022-03-21 11:00:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:05:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:05:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:10:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:10:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:15:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:15:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:20:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:20:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:25:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:25:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:30:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:30:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:35:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:35:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:40:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n26,s26):12, (n17,s17):17, (n18,s18):18] applying num_voters=3 voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:40:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:45:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:45:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:50:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:50:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:55:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:55:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 12:00:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed +validation result: + event scheduled at 2022-03-21 11:35:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West +sample4: pass +---------------------------------- +sample5: start running +24 events executed: + executed at: 2022-03-21 11:00:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:05:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:05:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:10:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:10:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:15:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:15:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:20:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:20:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:25:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:25:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:30:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:30:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:35:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:35:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:40:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n17,s17):15, (n8,s8):12, (n18,s18):16] applying num_voters=3 voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:40:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:3 num_voters:3 voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:45:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + failed: conformance unavailable=0 under=0 over=0 violating=0 + actual unavailable=0 under=0, over=0 violating=1 +violating constraints: + r1:{0000000000-9999999999} [(n17,s17):15, (n8,s8):12, (n18,s18):16] applying num_voters=3 voter_constraints=[+region=US_West] lease_preferences=[+region=US_West] + executed at: 2022-03-21 11:45:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:50:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:50:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:3 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 11:55:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed + executed at: 2022-03-21 11:55:00 + event: set span config event with span={0000000000-9999999999}, config=range_min_bytes:134217728 range_max_bytes:536870912 gc_policy: num_replicas:5 num_voters:5 constraints: > constraints: > constraints: > voter_constraints: > lease_preferences: > + executed at: 2022-03-21 12:00:00 + event: assertion checking event + 1. assertion=conformance unavailable=0 under=0 over=0 violating=0 + passed +validation result: + event scheduled at 2022-03-21 11:35:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West + event scheduled at 2022-03-21 11:40:00 is expected to lead to failure + unsatisfiable: failed to satisfy constraints for region US_West +sample5: pass +---------------------------------- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges index a728c61fce1b..aa32b8e8eff2 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges @@ -188,6 +188,8 @@ US US_3 └── [11 12 13 14 15] no events were scheduled +validation result: + valid sample2: failed assertion conformance unavailable=0 under=0 over=0 violating=0 lease-violating=0 lease-less-preferred=0 actual unavailable=0 under=0, over=27 violating=0 lease-violating=0 lease-less-preferred=0 diff --git a/pkg/kv/kvserver/asim/validator/BUILD.bazel b/pkg/kv/kvserver/asim/validator/BUILD.bazel new file mode 100644 index 000000000000..76d333642dad --- /dev/null +++ b/pkg/kv/kvserver/asim/validator/BUILD.bazel @@ -0,0 +1,30 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "validator", + srcs = [ + "config_validator.go", + "validator.go", + ], + importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/validator", + visibility = ["//visibility:public"], + deps = [ + "//pkg/kv/kvserver/asim/event", + "//pkg/kv/kvserver/asim/scheduled", + "//pkg/kv/kvserver/asim/state", + "//pkg/roachpb", + "@com_github_cockroachdb_errors//:errors", + ], +) + +go_test( + name = "validator_test", + srcs = ["validator_test.go"], + args = ["-test.timeout=295s"], + embed = [":validator"], + deps = [ + "//pkg/kv/kvserver/asim/state", + "//pkg/spanconfig/spanconfigtestutils", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/kv/kvserver/asim/validator/config_validator.go b/pkg/kv/kvserver/asim/validator/config_validator.go new file mode 100644 index 000000000000..e7187a06e1e2 --- /dev/null +++ b/pkg/kv/kvserver/asim/validator/config_validator.go @@ -0,0 +1,414 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package validator + +import ( + "math" + + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/errors" +) + +// processClusterInfo handles region data and returns: 1. A map of zone names to +// their respective region names 2. A map of zone names to the number of +// available nodes in the zones 3. A map of region names to the number of +// available nodes in the regions +func processClusterInfo( + regions []state.Region, +) (map[string]string, map[string]int, map[string]int, int) { + zone := map[string]int{} + region := map[string]int{} + total := 0 + zoneToRegion := map[string]string{} + + for _, r := range regions { + for _, z := range r.Zones { + zoneToRegion[z.Name] = r.Name + zone[z.Name] += z.NodeCount + region[r.Name] += z.NodeCount + total += z.NodeCount + } + } + return zoneToRegion, zone, region, total +} + +type allocationDetailsAtEachLevel struct { + unassigned int + assignedVoters int + assignedNonVoters int +} + +// tryToAddVoters attempts to assign numOfVoters from the available nodes as +// voters. It returns true if there are sufficient available nodes to be +// assigned as voters, and false otherwise. +func (a *allocationDetailsAtEachLevel) tryToAddVoters(numOfVoters int) (success bool) { + if a.unassigned < numOfVoters { + return false + } + a.unassigned -= numOfVoters + a.assignedVoters += numOfVoters + return true +} + +// tryToAddNonVoters attempts to assign numOfNonVoters from the available nodes +// as nonvoters. It returns true if there are sufficient available nodes to be +// assigned as voters, and false otherwise. +func (a *allocationDetailsAtEachLevel) tryToAddNonVoters(numOfNonVoters int) (success bool) { + if a.unassigned < numOfNonVoters { + return false + } + a.unassigned -= numOfNonVoters + a.assignedNonVoters += numOfNonVoters + return true +} + +// promoteNonVoters promotes numOfNonVotersToPromote of nonvoters to voters. +func (a *allocationDetailsAtEachLevel) promoteNonVoters(numOfNonVotersToPromote int) { + if a.assignedNonVoters < numOfNonVotersToPromote { + panic("insufficient non-voters for promotion. This is unexpected as computeNecessaryChanges " + + "should calculate number of non-voters for promotion correctly.") + } + a.assignedNonVoters -= numOfNonVotersToPromote + a.assignedVoters += numOfNonVotersToPromote +} + +type mockAllocator struct { + zoneToRegion map[string]string + zone map[string]allocationDetailsAtEachLevel + region map[string]allocationDetailsAtEachLevel + cluster allocationDetailsAtEachLevel +} + +// newMockAllocator creates a mock allocator based on the provided cluster +// setup. mockAllocator is designed to determine if a config can be satisfied by +// trying to assign replicas in a way that meet the constraints. Note that since +// isSatisfiable directly alters mockAlloactor fields, a new mock allocator +// should be initialized for each isSatisfiable call. +func newMockAllocator( + zoneToRegion map[string]string, zone map[string]int, region map[string]int, total int, +) mockAllocator { + m := mockAllocator{ + zoneToRegion: zoneToRegion, + zone: map[string]allocationDetailsAtEachLevel{}, + region: map[string]allocationDetailsAtEachLevel{}, + cluster: allocationDetailsAtEachLevel{ + unassigned: total, + }, + } + + for k, v := range zone { + m.zone[k] = allocationDetailsAtEachLevel{ + unassigned: v, + } + } + + for k, v := range region { + m.region[k] = allocationDetailsAtEachLevel{ + unassigned: v, + } + } + return m +} + +type constraint struct { + requiredReplicas int + requiredVoters int +} + +// validateConstraint returns nil if the constraint is feasible and error +// (not `nil`) otherwise. +func (m *mockAllocator) validateConstraint(c roachpb.Constraint) error { + if c.Type == roachpb.Constraint_PROHIBITED { + return errors.New("constraints marked as Constraint_PROHIBITED are unsupported") + } + switch c.Key { + case "zone": + _, ok := m.zone[c.Value] + if !ok { + return errors.Newf("zone constraint value %s is not found in the cluster set up", c.Value) + } + case "region": + _, ok := m.region[c.Value] + if !ok { + return errors.Newf("region constraint value %s is not found in the cluster set up", c.Value) + } + default: + return errors.New("only zone and region constraint keys are supported") + } + return nil +} + +// processConstraintsHelper is a helper function for processConstraint to handle +// the processing logic for both replica and voter constraints. It centralizes +// the validation and updating of the given zoneConstraints and +// regionConstraints. If all constraints are feasible, it returns nil. +// Otherwise, it returns error (not `nil`). +func (m *mockAllocator) processConstraintsHelper( + constraintsConjunction []roachpb.ConstraintsConjunction, + isVoterConstraint bool, + totalNumOfVotersOrReplicas int, + zoneConstraints map[string]constraint, + regionConstraints map[string]constraint, +) error { + for _, cc := range constraintsConjunction { + required := int(cc.NumReplicas) + if cc.NumReplicas == 0 { + // If NumReplicas is zero, the constraints will be applied to all voters / + // replicas. + required = totalNumOfVotersOrReplicas + } + for _, c := range cc.Constraints { + if err := m.validateConstraint(c); err != nil { + return err + } + if c.Key == "zone" { + zc := zoneConstraints[c.Value] + if isVoterConstraint { + zc.requiredVoters = required + } else { + zc.requiredReplicas = required + } + zoneConstraints[c.Value] = zc + } else if c.Key == "region" { + rc := regionConstraints[c.Value] + if isVoterConstraint { + rc.requiredVoters = required + } else { + rc.requiredReplicas = required + } + regionConstraints[c.Value] = rc + } + } + } + return nil +} + +// processConstraints validates and extracts region and zone-specific replica +// and voter constraints, storing them in two separate maps. If certain +// constraints fail the validation, they are considered as infeasible. In such +// cases, error(not `nil`) will be returned. +func (m *mockAllocator) processConstraints( + config roachpb.SpanConfig, +) (map[string]constraint, map[string]constraint, error) { + zoneConstraints := map[string]constraint{} + regionConstraints := map[string]constraint{} + totalVoters := int(config.GetNumVoters()) + totalReplicas := int(config.NumReplicas) + if err := m.processConstraintsHelper( + config.VoterConstraints, true /*isVoterConstraint*/, totalVoters, /*totalNumOfVotersOrReplicas*/ + zoneConstraints, regionConstraints); err != nil { + return map[string]constraint{}, map[string]constraint{}, err + } + if err := m.processConstraintsHelper( + config.Constraints, false /*isVoterConstraint*/, totalReplicas, /*totalNumOfVotersOrReplicas*/ + zoneConstraints, regionConstraints); err != nil { + return map[string]constraint{}, map[string]constraint{}, err + } + return zoneConstraints, regionConstraints, nil +} + +// computeNecessaryChanges computes the necessary minimal changes needed for a +// level to satisfy the constraints, considering the existing number of voters +// and non-voters, as well as the required number of voters and replicas. +func computeNecessaryChanges( + existingVoters int, existingNonVoters int, requiredVoters int, requiredReplicas int, +) (int, int, int) { + // Note that having more than required (having unconstrained + // replicas/voters) is fine and simply means no more additional voters or + // replicas need to be added. + + // numOfVotersNeeded will be satisfied by promoting non-voters or adding + // voters. Try to promote as many existing nonvoters to voters as possible + // first to satisfy voter constraints (so that we require minimal voters or + // replicas to be added). + numOfVotersNeeded := int(math.Max(0, float64(requiredVoters-existingVoters))) + + // Step 1: find out number of nonvoters needed to be promoted + nonVotersToPromote := int(math.Min(float64(existingNonVoters), float64(numOfVotersNeeded))) + existingVotersAfterPromotion := existingVoters + nonVotersToPromote + existingNonVotersAfterPromotion := existingNonVoters - nonVotersToPromote + + // Step 2: find out number of voters needed to be added + votersToAdd := int(math.Max(0, float64(requiredVoters-existingVotersAfterPromotion))) + existingVotersAfterPromotionAndVoterAddition := existingVotersAfterPromotion + votersToAdd + existingNonVotersAfterPromotionAndVoterAddition := existingNonVotersAfterPromotion // no changes + + // Step 3: find out number of nonvoters needed to be added + nonVotersToAdd := int(math.Max(0, float64(requiredReplicas-existingVotersAfterPromotionAndVoterAddition-existingNonVotersAfterPromotionAndVoterAddition))) + return nonVotersToPromote, votersToAdd, nonVotersToAdd +} + +// applyAtRegionLevel attempts to apply the desired changes (nonVotersToPromote, +// votersToAdd, nonVotersToAdd) at the provided region (specified by +// regionName). If enough nodes are available, it makes the changes and returns +// true. Otherwise, it returns false. +func (m *mockAllocator) applyAtRegionLevel( + regionName string, nonVotersToPromote int, votersToAdd int, nonVotersToAdd int, +) bool { + existing, ok := m.region[regionName] + if !ok { + panic("unknown region name in the region constraint. " + + "This is unexpected as validateConstraint should have validated it beforehand.") + } + + existing.promoteNonVoters(nonVotersToPromote) + success := existing.tryToAddVoters(votersToAdd) && existing.tryToAddNonVoters(nonVotersToAdd) + m.region[regionName] = existing + return success +} + +// applyAtClusterLevel attempts to apply the desired changes +// (nonVotersToPromote, votersToAdd, nonVotersToAdd) at the cluster level. If +// enough nodes are available, it makes the changes and returns true. Otherwise, +// it returns false. +func (m *mockAllocator) applyAtClusterLevel( + nonVotersToPromote int, votersToAdd int, nonVotersToAdd int, +) bool { + m.cluster.promoteNonVoters(nonVotersToPromote) + return m.cluster.tryToAddVoters(votersToAdd) && m.cluster.tryToAddNonVoters(nonVotersToAdd) +} + +// applyAtZoneLevel attempts to apply the desired changes (nonVotersToPromote, +// votersToAdd, nonVotersToAdd) at the provided zone (specified by zoneName). If +// enough nodes are available, it makes the changes and returns true. Otherwise, +// it returns false. +func (m *mockAllocator) applyAtZoneLevel( + zoneName string, nonVotersToPromote int, votersToAdd int, nonVotersToAdd int, +) bool { + existing, ok := m.zone[zoneName] + if !ok { + panic("unknown zone name in the zone constraint. " + + "This is unexpected as validateConstraint should have validated it beforehand.") + } + existing.promoteNonVoters(nonVotersToPromote) + success := existing.tryToAddVoters(votersToAdd) && existing.tryToAddNonVoters(nonVotersToAdd) + m.zone[zoneName] = existing + return success +} + +// tryToSatisfyRegionConstraint checks whether the allocator can assign voters +// and replicas in a manner that meets the specified required voters and +// replicas for the region. If possible, it makes the necessary assignment, +// updates the allocator, and returns true. Otherwise, it returns false. +func (m *mockAllocator) tryToSatisfyRegionConstraint( + regionName string, requiredVoters int, requiredReplicas int, +) bool { + existing, ok := m.region[regionName] + if !ok { + panic("unknown region name in the region constraint. " + + "This is unexpected as validateConstraint should have validated it beforehand.") + } + nonVotersToPromote, votersToAdd, nonVotersToAdd := computeNecessaryChanges( + existing.assignedVoters, existing.assignedNonVoters, requiredVoters, requiredReplicas) + if nonVotersToPromote == 0 && votersToAdd == 0 && nonVotersToAdd == 0 { + return true + } + // Propagate the changes to region and cluster. + return m.applyAtRegionLevel(regionName, nonVotersToPromote, votersToAdd, nonVotersToAdd) && + m.applyAtClusterLevel(nonVotersToPromote, votersToAdd, nonVotersToAdd) +} + +// tryToSatisfyZoneConstraint checks whether the allocator can assign voters and +// replicas in a manner that meets the specified required voters and replicas +// for the zone. If possible, it makes the necessary assignment, updates the +// allocator, and returns true. Otherwise, it returns false. +func (m *mockAllocator) tryToSatisfyZoneConstraint( + zoneName string, requiredVoters int, requiredReplicas int, +) bool { + existing, ok := m.zone[zoneName] + if !ok { + panic("unknown zone name in the zone constraint. " + + "This is unexpected as validateConstraint should have validated it beforehand.") + } + nonVotersToPromote, votersToAdd, nonVotersToAdd := computeNecessaryChanges( + existing.assignedVoters, existing.assignedNonVoters, requiredVoters, requiredReplicas) + if nonVotersToPromote == 0 && votersToAdd == 0 && nonVotersToAdd == 0 { + return true + } + // Propagate the changes to zone, region and cluster. + return m.applyAtZoneLevel(zoneName, nonVotersToPromote, votersToAdd, nonVotersToAdd) && + m.applyAtRegionLevel(m.zoneToRegion[zoneName], nonVotersToPromote, votersToAdd, nonVotersToAdd) && + m.applyAtClusterLevel(nonVotersToPromote, votersToAdd, nonVotersToAdd) +} + +// tryToSatisfyClusterConstraint checks whether the allocator can assign voters +// and replicas in a manner that meets the specified required voters and +// replicas for the cluster. If possible, it makes the necessary assignment, +// updates the allocator, and returns true. Otherwise, it returns false. +func (m *mockAllocator) tryToSatisfyClusterConstraint( + requiredVoters int, requiredReplicas int, +) bool { + existing := m.cluster + nonVotersToPromote, votersToAdd, nonVotersToAdd := computeNecessaryChanges( + existing.assignedVoters, existing.assignedNonVoters, requiredVoters, requiredReplicas) + if nonVotersToPromote == 0 && votersToAdd == 0 && nonVotersToAdd == 0 { + return true + } + // Propagate the changes to cluster. + success := m.applyAtClusterLevel(nonVotersToPromote, votersToAdd, nonVotersToAdd) + if m.cluster.assignedVoters != requiredVoters || m.cluster.assignedNonVoters+m.cluster.assignedVoters != requiredReplicas { + // Since having unconstrained replicas or voters do not lead to error in + // earlier process, we check for exact bound cluster constraint here. + return false + } + return success +} + +// isSatisfiable is a method that assesses whether a given configuration is +// satisfiable within the cluster used to initialize the mockAllocator. It +// returns (true, nil) for satisfiable configurations and (false, reason) for +// unsatisfiable configurations. mockAllocator tries to allocate voters and +// nonvoters across nodes in a manner that satisfies the constraints. If no such +// allocation can be found, the constraint is considered unsatisfiable. The +// allocation is found through the following process: +// 1. Preprocess the config constraints to store replica and voter constraints +// specific to the zone and region in two maps. +// 2. Try to satisfy zone constraints first, region constraints next, and +// cluster constraints in the end. As we allocate replicas for zone constraints, +// some region constraints are also satisfied. +// 3. While trying to satisfy constraints at each hierarchical level, we +// allocate voters or replicas specific to the zone or region only when +// necessary. It first promotes non-voters to voters when possible as voters are +// also replicas and can satisfy both constraints. Additional voters and +// non-voters are then assigned as needed. If any zones or regions lack +// available nodes for assignment, the constraint is considered as +// unsatisfiable. +// +// Limitation: +// - leaseholder preference are not checked and treated as satisfiable. - +// constraints with a key other than zone and region are unsatisfiable. - +// constraints with a value that does not correspond to a known zone or region +// in the cluster setup are unsatisfiable. +// - constraints labeled as Constraint_PROHIBITED are considered unsatisfiable. +func (m *mockAllocator) isSatisfiable(config roachpb.SpanConfig) (success bool, err error) { + zoneConstraints, regionConstraints, err := m.processConstraints(config) + if err != nil { + return false, err + } + + for zoneName, zc := range zoneConstraints { + if !m.tryToSatisfyZoneConstraint(zoneName, zc.requiredVoters, zc.requiredReplicas) { + return false, errors.Newf("failed to satisfy constraints for zone %s", zoneName) + } + } + + for regionName, rc := range regionConstraints { + if !m.tryToSatisfyRegionConstraint(regionName, rc.requiredVoters, rc.requiredReplicas) { + return false, errors.Newf("failed to satisfy constraints for region %s", regionName) + } + } + + if !m.tryToSatisfyClusterConstraint(int(config.GetNumVoters()), int(config.NumReplicas)) { + return false, errors.Newf("failed to satisfy constraints for cluster") + } + return true, nil +} diff --git a/pkg/kv/kvserver/asim/validator/validator.go b/pkg/kv/kvserver/asim/validator/validator.go new file mode 100644 index 000000000000..ca8ef94ea0d1 --- /dev/null +++ b/pkg/kv/kvserver/asim/validator/validator.go @@ -0,0 +1,49 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package validator + +import ( + "fmt" + "strings" + + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/event" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/scheduled" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" +) + +// Validate checks for any invalid events. Currently, it only checks +// SetSpanConfigEvent for the presence of unsatisfiable configurations. But it +// can be extended to validate the initial state and other events as well. +func Validate(initialState state.State, events scheduled.EventExecutor) string { + buf := strings.Builder{} + buf.WriteString("validation result:\n") + failed := false + + // Since all constraint checks utilize the same cluster info, we process the + // cluster info once and reuse it. + zoneToRegion, zone, region, total := processClusterInfo(initialState.ClusterInfo().Regions) + for _, se := range events.ScheduledEvents() { + if e, ok := se.TargetEvent.(event.SetSpanConfigEvent); ok { + // Create a new mockAllocator for every constraint satisfiability check as + // isSatisfiable directly modifies mockAllocator fields. + ma := newMockAllocator(zoneToRegion, zone, region, total) + if success, reason := ma.isSatisfiable(e.Config); !success { + failed = true + buf.WriteString(fmt.Sprintf("\tevent scheduled at %s is expected to lead to failure\n", se.At.Format("2006-01-02 15:04:05"))) + buf.WriteString(fmt.Sprintf("\t\tunsatisfiable: %s\n", reason)) + } + } + } + if !failed { + buf.WriteString("\tvalid\n") + } + return buf.String() +} diff --git a/pkg/kv/kvserver/asim/validator/validator_test.go b/pkg/kv/kvserver/asim/validator/validator_test.go new file mode 100644 index 000000000000..f6778b86f7ec --- /dev/null +++ b/pkg/kv/kvserver/asim/validator/validator_test.go @@ -0,0 +1,246 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package validator + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/state" + "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigtestutils" + "github.com/stretchr/testify/require" +) + +// TestValidator validates the correctness of span configuration satisfiability +// check in Validator. +func TestValidator(t *testing.T) { + zoneToRegion, zone, region, total := processClusterInfo(state.ComplexConfig.Regions) + // ComplexConfig Topology: + // EU + // EU_1 + // │ └── [19 20 21] + // EU_2 + // │ └── [22 23 24] + // EU_3 + // │ └── [25 26 27 28] + // US_East + // US_East_1 + // │ └── [1] + // US_East_2 + // │ └── [2 3] + // US_East_3 + // │ └── [4 5 6] + // US_East_4 + // │ └── [7 8 9 10 11 12 13 14 15 16] + // US_West + // US_West_1 + // └── [17 18] + testCases := []struct { + description string + constraint string + expectedSuccess bool + expectedErrorMsgStr string + }{ + { + description: "straightforward valid configuration", + constraint: "num_replicas=2 num_voters=1", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "straightforward valid configuration", + constraint: "num_replicas=5 num_voters=5 " + + "constraints={'+region=US_East':3,'+region=US_West':1,'+region=EU':1} " + + "voter_constraints={'+region=US_East':3,'+region=US_West':1,'+region=EU':1}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "promotion to satisfy region voter constraint", + constraint: "num_replicas=2 num_voters=2 " + + "constraints={'+zone=US_West_1':2} voter_constraints={'+region=US_West':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "promotion to satisfy cluster constraint", + constraint: "num_replicas=2 num_voters=2 constraints={'+zone=US_West_1':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "promoting some nonvoters to voters", + constraint: "num_replicas=6 num_voters=3 constraints={'+zone=US_East_3':3} " + + "voter_constraints={'+region=US_East':3,'+zone=US_East_2':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "promoting some nonvoters + add voters + add nonvoters", + constraint: "num_replicas=15 num_voters=6 " + + "constraints={'+zone=US_East_4':10,'+region=EU':3,'+region=US_East':11} " + + "voter_constraints={'+region=US_East':3,'+zone=US_East_3':1,'+zone=US_West_1':1}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "satisfying zone constraint can help satisfy region constraint", + constraint: "num_replicas=2 constraints={'+zone=US_West_1':2,'+region=US_West':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "cluster is fully assigned by region constraints", + constraint: "num_replicas=28 num_voters=28 " + + "constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "cluster is fully assigned by region and zone constraints", + constraint: "num_replicas=28 num_voters=28 " + + "constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10," + + "'+zone=US_East_1':1,'+zone=US_East_2':2,'+zone=US_East_3':3,'+zone=US_East_4':10,'+zone=US_West_1':2," + + "'+zone=EU_1':3,'+zone=EU_2':3,'+zone=EU_3':4} " + + "voter_constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10," + + "'+zone=US_East_1':1,'+zone=US_East_2':2,'+zone=US_East_3':3,'+zone=US_East_4':10,'+zone=US_West_1':2," + + "'+zone=EU_1':3,'+zone=EU_2':3,'+zone=EU_3':4}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "having unconstrained replicas + unconstrained voters", + constraint: "num_replicas=28 num_voters=25 " + + "constraints={'+region=US_East':2} voter_constraints={'+region=US_East':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "having unconstrained replicas + fully constrained voters", + constraint: "num_replicas=27 num_voters=16 voter_constraints={'+region=US_East':16}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "having fully constrained replicas + unconstrained voters", + constraint: "num_replicas=16 num_voters=3 " + + "constraints={'+region=US_East':16,'+zone=US_East_1':1,'+zone=US_East_2':2} " + + "voter_constraints={'+zone=US_East_4':3}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "can promote any replicas to voters at cluster level", + constraint: "num_replicas=28 num_voters=3 " + + "constraints={'+region=US_East':16,'+region=US_West':2,'+region=EU':10} " + + "voter_constraints={'+region=EU':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "configuration for issue #106559", + constraint: "num_replicas=6 num_voters=5 " + + "constraints={'+zone=US_West_1':1,'+zone=EU_1':1,'+zone=US_East_2':2,'+zone=US_East_3':2} " + + "voter_constraints={'+zone=US_West_1':1,'+zone=EU_1':1,'+zone=US_East_2':2,'+zone=US_East_3':1}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "configuration for issue #106559", + constraint: "num_replicas=6 num_voters=5 " + + "constraints={'+zone=US_West_1':1,'+zone=EU_1':1,'+zone=US_East_2':1,'+zone=US_East_3':1} " + + "voter_constraints={'+zone=US_West_1':2,'+zone=US_East_2':2}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "no voters or replicas needed to add for constraints", + constraint: "num_replicas=0 constraints={'+zone=US_East_1':0}", + expectedSuccess: true, + expectedErrorMsgStr: "", + }, + { + description: "insufficient replicas for region constraint", + constraint: "num_replicas=28 num_voters=28 " + + "constraints={'+region=US_East':17,'+region=US_West':2,'+region=EU':10}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for region US_East", + }, + { + description: "insufficient replicas for cluster constraints", + constraint: "num_replicas=16 num_voters=3 " + + "constraints={'+region=US_East':16} voter_constraints={'+region=EU':2}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for cluster", + }, + { + description: "more voters than replicas", + constraint: "num_replicas=1 num_voters=2", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for cluster", + }, + { + description: "too many replicas for cluster constraint", + constraint: "num_replicas=6 num_voters=2 constraints={'+region=US_East':16}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for cluster", + }, + { + description: "too many voters for cluster constraint", + constraint: "num_replicas=20 num_voters=2 voter_constraints={'+region=US_East':16}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for cluster", + }, + { + description: "zero NumReplicas should use total num_replicas, num_voters for constraints", + constraint: "num_replicas=5 num_voters=3 " + + "constraints={'+region=US_East'} voter_constraints={'+region=US_West'}", + expectedSuccess: false, + expectedErrorMsgStr: "failed to satisfy constraints for region US_West", + }, + { + description: "unsupported constraint key", + constraint: "num_replicas=5 constraints={'+az=US_East'}", + expectedSuccess: false, + expectedErrorMsgStr: "only zone and region constraint keys are supported", + }, + { + description: "unsupported constraint value", + constraint: "num_replicas=5 num_voters=1 voter_constraints={'+region=CA':1}", + expectedSuccess: false, + expectedErrorMsgStr: "region constraint value CA is not found in the cluster set up", + }, + { + description: "unsupported constraint value", + constraint: "num_replicas=5 constraints={'+zone=CA':1}", + expectedSuccess: false, + expectedErrorMsgStr: "zone constraint value CA is not found in the cluster set up", + }, + { + description: "unsupported constraint type", + constraint: "num_replicas=5 constraints={'-region=US_West':1}", + expectedSuccess: false, + expectedErrorMsgStr: "constraints marked as Constraint_PROHIBITED are unsupported", + }, + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + ma := newMockAllocator(zoneToRegion, zone, region, total) + config := spanconfigtestutils.ParseZoneConfig(t, tc.constraint).AsSpanConfig() + success, actualError := ma.isSatisfiable(config) + require.Equal(t, tc.expectedSuccess, success) + if tc.expectedErrorMsgStr == "" { + require.Nil(t, actualError) + } else { + require.EqualError(t, actualError, tc.expectedErrorMsgStr) + } + }) + } +}