From 3c1de07e122ac8284febed7d183494a56219cdd4 Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Fri, 14 Oct 2022 07:40:07 -0400 Subject: [PATCH] spanconfig: introduce spanconfig.Reporter This is KV-side API for multi-tenant replication reports (#89987) Release note: None --- pkg/BUILD.bazel | 4 + pkg/ccl/kvccl/kvtenantccl/connector.go | 25 ++ pkg/ccl/kvccl/kvtenantccl/connector_test.go | 6 + .../spanconfigcomparedccl/BUILD.bazel | 0 pkg/kv/kvclient/kvcoord/send_test.go | 6 + pkg/kv/kvclient/kvcoord/transport_test.go | 6 + pkg/kv/kvclient/kvtenant/connector.go | 4 + .../allocator/allocatorimpl/allocator.go | 44 ++- .../allocatorimpl/allocator_scorer_test.go | 15 +- .../allocator/allocatorimpl/allocator_test.go | 15 +- pkg/kv/kvserver/constraint/analyzer.go | 28 +- pkg/kv/kvserver/replica_metrics.go | 8 +- .../reports/replication_stats_report.go | 5 +- pkg/kv/kvserver/store.go | 3 +- pkg/roachpb/api.proto | 4 + pkg/roachpb/metadata.go | 11 + pkg/roachpb/metadata_replicas.go | 33 +- pkg/roachpb/roachpbmock/mocks_generated.go | 20 ++ pkg/roachpb/span_config.go | 3 - pkg/roachpb/span_config.proto | 23 ++ pkg/rpc/context_test.go | 6 + pkg/rpc/nodedialer/nodedialer_test.go | 6 + pkg/server/BUILD.bazel | 1 + pkg/server/node.go | 19 ++ pkg/server/server.go | 17 + pkg/server/serverpb/migration.proto | 3 + pkg/server/serverpb/status.proto | 1 + pkg/server/status.go | 22 +- pkg/server/testserver.go | 10 + pkg/spanconfig/spanconfig.go | 8 + pkg/spanconfig/spanconfigreporter/BUILD.bazel | 55 +++ .../spanconfigreporter/datadriven_test.go | 318 ++++++++++++++++++ pkg/spanconfig/spanconfigreporter/disabled.go | 34 ++ .../spanconfigreporter/main_test.go | 33 ++ pkg/spanconfig/spanconfigreporter/reporter.go | 181 ++++++++++ .../spanconfigreporter/testdata/basic | 189 +++++++++++ .../testdata/constraint_conformance | 87 +++++ .../testdata/joint_consensus | 104 ++++++ .../testdata/over_under_replicated | 120 +++++++ .../spanconfigtestutils/BUILD.bazel | 2 + pkg/spanconfig/spanconfigtestutils/utils.go | 94 ++++++ pkg/testutils/serverutils/test_tenant_shim.go | 4 + 42 files changed, 1509 insertions(+), 68 deletions(-) delete mode 100644 pkg/ccl/spanconfigccl/spanconfigcomparedccl/BUILD.bazel create mode 100644 pkg/spanconfig/spanconfigreporter/BUILD.bazel create mode 100644 pkg/spanconfig/spanconfigreporter/datadriven_test.go create mode 100644 pkg/spanconfig/spanconfigreporter/disabled.go create mode 100644 pkg/spanconfig/spanconfigreporter/main_test.go create mode 100644 pkg/spanconfig/spanconfigreporter/reporter.go create mode 100644 pkg/spanconfig/spanconfigreporter/testdata/basic create mode 100644 pkg/spanconfig/spanconfigreporter/testdata/constraint_conformance create mode 100644 pkg/spanconfig/spanconfigreporter/testdata/joint_consensus create mode 100644 pkg/spanconfig/spanconfigreporter/testdata/over_under_replicated diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 531556511cb9..41b8a42a4871 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -267,6 +267,7 @@ ALL_TESTS = [ "//pkg/spanconfig/spanconfigmanager:spanconfigmanager_test", "//pkg/spanconfig/spanconfigptsreader:spanconfigptsreader_test", "//pkg/spanconfig/spanconfigreconciler:spanconfigreconciler_test", + "//pkg/spanconfig/spanconfigreporter:spanconfigreporter_test", "//pkg/spanconfig/spanconfigsqltranslator:spanconfigsqltranslator_test", "//pkg/spanconfig/spanconfigsqlwatcher:spanconfigsqlwatcher_test", "//pkg/spanconfig/spanconfigstore:spanconfigstore_test", @@ -1335,6 +1336,8 @@ GO_TARGETS = [ "//pkg/spanconfig/spanconfigptsreader:spanconfigptsreader_test", "//pkg/spanconfig/spanconfigreconciler:spanconfigreconciler", "//pkg/spanconfig/spanconfigreconciler:spanconfigreconciler_test", + "//pkg/spanconfig/spanconfigreporter:spanconfigreporter", + "//pkg/spanconfig/spanconfigreporter:spanconfigreporter_test", "//pkg/spanconfig/spanconfigsplitter:spanconfigsplitter", "//pkg/spanconfig/spanconfigsqltranslator:spanconfigsqltranslator", "//pkg/spanconfig/spanconfigsqltranslator:spanconfigsqltranslator_test", @@ -2573,6 +2576,7 @@ GET_X_DATA_TARGETS = [ "//pkg/spanconfig/spanconfigmanager:get_x_data", "//pkg/spanconfig/spanconfigptsreader:get_x_data", "//pkg/spanconfig/spanconfigreconciler:get_x_data", + "//pkg/spanconfig/spanconfigreporter:get_x_data", "//pkg/spanconfig/spanconfigsplitter:get_x_data", "//pkg/spanconfig/spanconfigsqltranslator:get_x_data", "//pkg/spanconfig/spanconfigsqlwatcher:get_x_data", diff --git a/pkg/ccl/kvccl/kvtenantccl/connector.go b/pkg/ccl/kvccl/kvtenantccl/connector.go index b1509ad739fc..d47c7e8e7dd4 100644 --- a/pkg/ccl/kvccl/kvtenantccl/connector.go +++ b/pkg/ccl/kvccl/kvtenantccl/connector.go @@ -129,6 +129,10 @@ var _ serverpb.TenantStatusServer = (*Connector)(nil) // Connector is capable of accessing span configurations for secondary tenants. var _ spanconfig.KVAccessor = (*Connector)(nil) +// Reporter is capable of generating span configuration conformance reports for +// secondary tenants. +var _ spanconfig.Reporter = (*Connector)(nil) + // NewConnector creates a new Connector. // NOTE: Calling Start will set cfg.RPCContext.ClusterID. func NewConnector(cfg kvtenant.ConnectorConfig, addrs []string) *Connector { @@ -534,6 +538,27 @@ func (c *Connector) UpdateSpanConfigRecords( }) } +// SpanConfigConformance implements the spanconfig.Reporter interface. +func (c *Connector) SpanConfigConformance( + ctx context.Context, spans []roachpb.Span, +) (roachpb.SpanConfigConformanceReport, error) { + var report roachpb.SpanConfigConformanceReport + if err := c.withClient(ctx, func(ctx context.Context, c *client) error { + resp, err := c.SpanConfigConformance(ctx, &roachpb.SpanConfigConformanceRequest{ + Spans: spans, + }) + if err != nil { + return err + } + + report = resp.Report + return nil + }); err != nil { + return roachpb.SpanConfigConformanceReport{}, err + } + return report, nil +} + // GetAllSystemSpanConfigsThatApply implements the spanconfig.KVAccessor // interface. func (c *Connector) GetAllSystemSpanConfigsThatApply( diff --git a/pkg/ccl/kvccl/kvtenantccl/connector_test.go b/pkg/ccl/kvccl/kvtenantccl/connector_test.go index 4212f1d44a19..c9dd250b1968 100644 --- a/pkg/ccl/kvccl/kvtenantccl/connector_test.go +++ b/pkg/ccl/kvccl/kvtenantccl/connector_test.go @@ -124,6 +124,12 @@ func (m *mockServer) UpdateSpanConfigs( panic("unimplemented") } +func (m *mockServer) SpanConfigConformance( + context.Context, *roachpb.SpanConfigConformanceRequest, +) (*roachpb.SpanConfigConformanceResponse, error) { + panic("unimplemented") +} + func gossipEventForClusterID(clusterID uuid.UUID) *roachpb.GossipSubscriptionEvent { return &roachpb.GossipSubscriptionEvent{ Key: gossip.KeyClusterID, diff --git a/pkg/ccl/spanconfigccl/spanconfigcomparedccl/BUILD.bazel b/pkg/ccl/spanconfigccl/spanconfigcomparedccl/BUILD.bazel deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/pkg/kv/kvclient/kvcoord/send_test.go b/pkg/kv/kvclient/kvcoord/send_test.go index 61dcf55d812b..c47064d9613e 100644 --- a/pkg/kv/kvclient/kvcoord/send_test.go +++ b/pkg/kv/kvclient/kvcoord/send_test.go @@ -101,6 +101,12 @@ func (n Node) UpdateSpanConfigs( panic("unimplemented") } +func (n Node) SpanConfigConformance( + context.Context, *roachpb.SpanConfigConformanceRequest, +) (*roachpb.SpanConfigConformanceResponse, error) { + panic("implement me") +} + func (n Node) TenantSettings( *roachpb.TenantSettingsRequest, roachpb.Internal_TenantSettingsServer, ) error { diff --git a/pkg/kv/kvclient/kvcoord/transport_test.go b/pkg/kv/kvclient/kvcoord/transport_test.go index ec3610569db2..afc31c9a9631 100644 --- a/pkg/kv/kvclient/kvcoord/transport_test.go +++ b/pkg/kv/kvclient/kvcoord/transport_test.go @@ -218,6 +218,12 @@ func (m *mockInternalClient) GetSpanConfigs( return nil, fmt.Errorf("unsupported GetSpanConfigs call") } +func (m *mockInternalClient) SpanConfigConformance( + _ context.Context, _ *roachpb.SpanConfigConformanceRequest, _ ...grpc.CallOption, +) (*roachpb.SpanConfigConformanceResponse, error) { + return nil, fmt.Errorf("unsupported SpanConfigConformance call") +} + func (m *mockInternalClient) GetAllSystemSpanConfigsThatApply( context.Context, *roachpb.GetAllSystemSpanConfigsThatApplyRequest, ...grpc.CallOption, ) (*roachpb.GetAllSystemSpanConfigsThatApplyResponse, error) { diff --git a/pkg/kv/kvclient/kvtenant/connector.go b/pkg/kv/kvclient/kvtenant/connector.go index 5e2dfb29287e..891bff372e1b 100644 --- a/pkg/kv/kvclient/kvtenant/connector.go +++ b/pkg/kv/kvclient/kvtenant/connector.go @@ -76,6 +76,10 @@ type Connector interface { // applicable to secondary tenants. spanconfig.KVAccessor + // Reporter provides access to conformance reports, i.e. whether ranges + // backing queried keyspans conform the span configs that apply to them. + spanconfig.Reporter + // OverridesMonitor provides access to tenant cluster setting overrides. settingswatcher.OverridesMonitor diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go index 43dc9ef072b4..672cb07c3435 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go @@ -998,10 +998,18 @@ func (a *Allocator) AllocateTargetFromList( targetType TargetReplicaType, ) (roachpb.ReplicationTarget, string) { existingReplicas := append(existingVoters, existingNonVoters...) - analyzedOverallConstraints := constraint.AnalyzeConstraints(ctx, a.StorePool.GetStoreDescriptor, - existingReplicas, conf.NumReplicas, conf.Constraints) - analyzedVoterConstraints := constraint.AnalyzeConstraints(ctx, a.StorePool.GetStoreDescriptor, - existingVoters, conf.GetNumVoters(), conf.VoterConstraints) + analyzedOverallConstraints := constraint.AnalyzeConstraints( + a.StorePool, + existingReplicas, + conf.NumReplicas, + conf.Constraints, + ) + analyzedVoterConstraints := constraint.AnalyzeConstraints( + a.StorePool, + existingVoters, + conf.GetNumVoters(), + conf.VoterConstraints, + ) var constraintsChecker constraintsCheckFn switch t := targetType; t { @@ -1138,10 +1146,18 @@ func (a Allocator) RemoveTarget( } existingReplicas := append(existingVoters, existingNonVoters...) - analyzedOverallConstraints := constraint.AnalyzeConstraints(ctx, a.StorePool.GetStoreDescriptor, - existingReplicas, conf.NumReplicas, conf.Constraints) - analyzedVoterConstraints := constraint.AnalyzeConstraints(ctx, a.StorePool.GetStoreDescriptor, - existingVoters, conf.GetNumVoters(), conf.VoterConstraints) + analyzedOverallConstraints := constraint.AnalyzeConstraints( + a.StorePool, + existingReplicas, + conf.NumReplicas, + conf.Constraints, + ) + analyzedVoterConstraints := constraint.AnalyzeConstraints( + a.StorePool, + existingVoters, + conf.GetNumVoters(), + conf.VoterConstraints, + ) var constraintsChecker constraintsCheckFn switch t := targetType; t { @@ -1273,9 +1289,17 @@ func (a Allocator) RebalanceTarget( zero := roachpb.ReplicationTarget{} analyzedOverallConstraints := constraint.AnalyzeConstraints( - ctx, a.StorePool.GetStoreDescriptor, existingReplicas, conf.NumReplicas, conf.Constraints) + a.StorePool, + existingReplicas, + conf.NumReplicas, + conf.Constraints, + ) analyzedVoterConstraints := constraint.AnalyzeConstraints( - ctx, a.StorePool.GetStoreDescriptor, existingVoters, conf.GetNumVoters(), conf.VoterConstraints) + a.StorePool, + existingVoters, + conf.GetNumVoters(), + conf.VoterConstraints, + ) var removalConstraintsChecker constraintsCheckFn var rebalanceConstraintsChecker rebalanceConstraintsCheckFn var replicaSetToRebalance, replicasWithExcludedStores []roachpb.ReplicaDescriptor diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go index f0d49c02eb34..cf480fe5c394 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go @@ -583,7 +583,13 @@ var ( } ) -func getTestStoreDesc(storeID roachpb.StoreID) (roachpb.StoreDescriptor, bool) { +type mockStoreResolver struct{} + +var _ constraint.StoreResolver = mockStoreResolver{} + +func (m mockStoreResolver) GetStoreDescriptor( + storeID roachpb.StoreID, +) (roachpb.StoreDescriptor, bool) { desc, ok := testStores[storeID] return desc, ok } @@ -936,9 +942,7 @@ func TestAllocateConstraintsCheck(t *testing.T) { Constraints: tc.constraints, NumReplicas: tc.numReplicas, } - analyzed := constraint.AnalyzeConstraints( - context.Background(), getTestStoreDesc, testStoreReplicas(tc.existing), - conf.NumReplicas, conf.Constraints) + analyzed := constraint.AnalyzeConstraints(mockStoreResolver{}, testStoreReplicas(tc.existing), conf.NumReplicas, conf.Constraints) for _, s := range testStores { valid, necessary := allocateConstraintsCheck(s, analyzed) if e, a := tc.expectedValid[s.StoreID], valid; e != a { @@ -1071,8 +1075,7 @@ func TestRemoveConstraintsCheck(t *testing.T) { Constraints: tc.constraints, NumReplicas: tc.numReplicas, } - analyzed := constraint.AnalyzeConstraints( - context.Background(), getTestStoreDesc, existing, conf.NumReplicas, conf.Constraints) + analyzed := constraint.AnalyzeConstraints(mockStoreResolver{}, existing, conf.NumReplicas, conf.Constraints) for storeID, expected := range tc.expected { valid, necessary := removeConstraintsCheck(testStores[storeID], analyzed) if e, a := expected.valid, valid; e != a { diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go index 5531ef96c355..b0fb9e89f75e 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go @@ -3387,9 +3387,7 @@ func TestAllocateCandidatesExcludeNonReadyNodes(t *testing.T) { } // No constraints. conf := roachpb.SpanConfig{} - analyzed := constraint.AnalyzeConstraints( - ctx, a.StorePool.GetStoreDescriptor, existingRepls, conf.NumReplicas, - conf.Constraints) + analyzed := constraint.AnalyzeConstraints(a.StorePool, existingRepls, conf.NumReplicas, conf.Constraints) allocationConstraintsChecker := voterConstraintsCheckerForAllocation(analyzed, constraint.EmptyAnalyzedConstraints) removalConstraintsChecker := voterConstraintsCheckerForRemoval(analyzed, constraint.EmptyAnalyzedConstraints) rebalanceConstraintsChecker := voterConstraintsCheckerForRebalance(analyzed, constraint.EmptyAnalyzedConstraints) @@ -3741,9 +3739,7 @@ func TestAllocateCandidatesNumReplicasConstraints(t *testing.T) { } } conf := roachpb.SpanConfig{Constraints: tc.constraints} - analyzed := constraint.AnalyzeConstraints( - ctx, a.StorePool.GetStoreDescriptor, existingRepls, conf.NumReplicas, - conf.Constraints) + analyzed := constraint.AnalyzeConstraints(a.StorePool, existingRepls, conf.NumReplicas, conf.Constraints) checkFn := voterConstraintsCheckerForAllocation(analyzed, constraint.EmptyAnalyzedConstraints) candidates := rankedCandidateListForAllocation( @@ -3971,8 +3967,7 @@ func TestRemoveCandidatesNumReplicasConstraints(t *testing.T) { StoreID: storeID, } } - analyzed := constraint.AnalyzeConstraints(ctx, a.StorePool.GetStoreDescriptor, existingRepls, - 0 /* numReplicas */, tc.constraints) + analyzed := constraint.AnalyzeConstraints(a.StorePool, existingRepls, 0, tc.constraints) // Check behavior in a span config where `voter_constraints` are empty. checkFn := voterConstraintsCheckerForRemoval(analyzed, constraint.EmptyAnalyzedConstraints) @@ -5178,9 +5173,7 @@ func TestRebalanceCandidatesNumReplicasConstraints(t *testing.T) { Constraints: tc.constraints, NumReplicas: tc.numReplicas, } - analyzed := constraint.AnalyzeConstraints( - ctx, a.StorePool.GetStoreDescriptor, existingRepls, - conf.NumReplicas, conf.Constraints) + analyzed := constraint.AnalyzeConstraints(a.StorePool, existingRepls, conf.NumReplicas, conf.Constraints) removalConstraintsChecker := voterConstraintsCheckerForRemoval( analyzed, constraint.EmptyAnalyzedConstraints, diff --git a/pkg/kv/kvserver/constraint/analyzer.go b/pkg/kv/kvserver/constraint/analyzer.go index 90f0b5970fc6..5b3ead151f41 100644 --- a/pkg/kv/kvserver/constraint/analyzer.go +++ b/pkg/kv/kvserver/constraint/analyzer.go @@ -10,19 +10,15 @@ package constraint -import ( - "context" +import "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/roachpb" -) - -// AnalyzedConstraints represents the result or AnalyzeConstraints(). It -// combines a zone's constraints with information about which stores satisfy -// what term of the constraints disjunction. +// AnalyzedConstraints represents the result of AnalyzeConstraints(). It +// combines a span config's constraints with information about which stores +// satisfy what term of the constraints disjunction. type AnalyzedConstraints struct { Constraints []roachpb.ConstraintsConjunction // True if the per-replica constraints don't fully cover all the desired - // replicas in the range (sum(constraints.NumReplicas) < zone.NumReplicas). + // replicas in the range (sum(constraints.NumReplicas) < config.NumReplicas). // In such cases, we allow replicas that don't match any of the per-replica // constraints, but never mark them as necessary. UnconstrainedReplicas bool @@ -39,13 +35,17 @@ type AnalyzedConstraints struct { // satisfied by any given configuration of replicas. var EmptyAnalyzedConstraints = AnalyzedConstraints{} -// AnalyzeConstraints processes the zone config constraints that apply to a +// StoreResolver resolves a store descriptor by a given ID. +type StoreResolver interface { + GetStoreDescriptor(storeID roachpb.StoreID) (roachpb.StoreDescriptor, bool) +} + +// AnalyzeConstraints processes the span config constraints that apply to a // range along with the current replicas for a range, spitting back out // information about which constraints are satisfied by which replicas and // which replicas satisfy which constraints, aiding in allocation decisions. func AnalyzeConstraints( - ctx context.Context, - getStoreDescFn func(roachpb.StoreID) (roachpb.StoreDescriptor, bool), + storeResolver StoreResolver, existing []roachpb.ReplicaDescriptor, numReplicas int32, constraints []roachpb.ConstraintsConjunction, @@ -67,7 +67,7 @@ func AnalyzeConstraints( // happen once a node is hooked into gossip), trust that it's valid. This // is a much more stable failure state than frantically moving everything // off such a node. - store, ok := getStoreDescFn(repl.StoreID) + store, ok := storeResolver.GetStoreDescriptor(repl.StoreID) if !ok || ConjunctionsCheck(store, subConstraints.Constraints) { result.SatisfiedBy[i] = append(result.SatisfiedBy[i], store.StoreID) result.Satisfies[store.StoreID] = append(result.Satisfies[store.StoreID], i) @@ -82,7 +82,7 @@ func AnalyzeConstraints( // ConjunctionsCheck checks a store against a single set of constraints (out of // the possibly numerous sets that apply to a range), returning true iff the -// store matches the constraints. The contraints are AND'ed together; a store +// store matches the constraints. The constraints are AND'ed together; a store // matches the conjunction if it matches all of them. func ConjunctionsCheck(store roachpb.StoreDescriptor, constraints []roachpb.Constraint) bool { for _, constraint := range constraints { diff --git a/pkg/kv/kvserver/replica_metrics.go b/pkg/kv/kvserver/replica_metrics.go index 563620e1f173..efbe8d941993 100644 --- a/pkg/kv/kvserver/replica_metrics.go +++ b/pkg/kv/kvserver/replica_metrics.go @@ -220,10 +220,10 @@ func calcRangeCounter( status := desc.Replicas().ReplicationStatus(func(rDesc roachpb.ReplicaDescriptor) bool { return livenessMap[rDesc.NodeID].IsLive }, - // neededVoters - we don't care about the under/over-replication - // determinations from the report because it's too magic. We'll do our own - // determination below. - 0) + // needed{Voters,NonVoters} - we don't care about the + // under/over-replication determinations from the report because + // it's too magic. We'll do our own determination below. + 0, 0) unavailable = !status.Available liveVoters := calcLiveVoterReplicas(desc, livenessMap) liveNonVoters := calcLiveNonVoterReplicas(desc, livenessMap) diff --git a/pkg/kv/kvserver/reports/replication_stats_report.go b/pkg/kv/kvserver/reports/replication_stats_report.go index 6644448f1065..863f91d097eb 100644 --- a/pkg/kv/kvserver/reports/replication_stats_report.go +++ b/pkg/kv/kvserver/reports/replication_stats_report.go @@ -401,7 +401,10 @@ func (v *replicationStatsVisitor) countRange( ) { status := r.Replicas().ReplicationStatus(func(rDesc roachpb.ReplicaDescriptor) bool { return v.nodeChecker(rDesc.NodeID) - }, replicationFactor) + // NB: this reporting code was written before ReplicationStatus reported + // on non-voting replicas. This code will also soon be removed in favor + // of something that works with multi-tenancy (#89987). + }, replicationFactor, 0) // Note that a range can be under-replicated and over-replicated at the same // time if it has many replicas, but sufficiently many of them are on dead // nodes. diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index 2bbdc0b27177..e4931615b87e 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -1072,7 +1072,8 @@ type StoreConfig struct { // SpanConfigsDisabled determines whether we're able to use the span configs // infrastructure or not. - // TODO(richardjcai): We can likely remove this. + // + // TODO(irfansharif): We can remove this. SpanConfigsDisabled bool // Used to subscribe to span configuration changes, keeping up-to-date a // data structure useful for retrieving span configs. Only available if diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index c3b09cbe881d..9a4f156185dc 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -3022,6 +3022,10 @@ service Internal { // keyspans. rpc UpdateSpanConfigs (UpdateSpanConfigsRequest) returns (UpdateSpanConfigsResponse) { } + // SpanConfigConformance is used to determine whether ranges backing the given + // keyspans conform to span configs that apply over them. + rpc SpanConfigConformance (SpanConfigConformanceRequest) returns (SpanConfigConformanceResponse) { } + // TenantSettings is used by tenants to obtain and stay up to date with tenant // setting overrides. rpc TenantSettings (TenantSettingsRequest) returns (stream TenantSettingsEvent) { } diff --git a/pkg/roachpb/metadata.go b/pkg/roachpb/metadata.go index 882a7331d1d9..ff09e7730fc9 100644 --- a/pkg/roachpb/metadata.go +++ b/pkg/roachpb/metadata.go @@ -483,6 +483,17 @@ func (r ReplicaDescriptor) IsAnyVoter() bool { } } +// IsNonVoter returns true if the replica is a non-voter. Can be used as a +// filter for ReplicaDescriptors.Filter. +func (r ReplicaDescriptor) IsNonVoter() bool { + switch r.Type { + case NON_VOTER: + return true + default: + return false + } +} + // PercentilesFromData derives percentiles from a slice of data points. // Sorts the input data if it isn't already sorted. func PercentilesFromData(data []float64) Percentiles { diff --git a/pkg/roachpb/metadata_replicas.go b/pkg/roachpb/metadata_replicas.go index 280dcff656c8..4401237f15d4 100644 --- a/pkg/roachpb/metadata_replicas.go +++ b/pkg/roachpb/metadata_replicas.go @@ -378,7 +378,7 @@ func (d ReplicaSet) HasReplicaOnNode(nodeID NodeID) bool { // the replication layer. This is more complicated than just counting the number // of replicas due to the existence of joint quorums. func (d ReplicaSet) CanMakeProgress(liveFunc func(descriptor ReplicaDescriptor) bool) bool { - return d.ReplicationStatus(liveFunc, 0 /* neededVoters */).Available + return d.ReplicationStatus(liveFunc, 0 /* neededVoters */, 0 /* neededNonVoters*/).Available } // RangeStatusReport contains info about a range's replication status. Returned @@ -389,26 +389,32 @@ type RangeStatusReport struct { Available bool // UnderReplicated is set if the range is considered under-replicated // according to the desired replication factor and the replica liveness info - // passed to ReplicationStatus. Dead replicas are considered to be missing. + // passed to ReplicationStatus. Only voting replicas are counted here. Dead + // replicas are considered to be missing. UnderReplicated bool - // UnderReplicated is set if the range is considered under-replicated + // OverReplicated is set if the range is considered over-replicated // according to the desired replication factor passed to ReplicationStatus. - // Replica liveness is not considered. + // Only voting replicas are counted here. Replica liveness is not + // considered. // // Note that a range can be under-replicated and over-replicated at the same // time if it has many replicas, but sufficiently many of them are on dead // nodes. OverReplicated bool + // {Under,Over}ReplicatedNonVoters are like their {Under,Over}Replicated + // counterparts but applying only to non-voters. + UnderReplicatedNonVoters, OverReplicatedNonVoters bool } // ReplicationStatus returns availability and over/under-replication // determinations for the range. // -// replicationFactor is the replica's desired replication for purposes of -// determining over/under-replication. 0 can be passed if the caller is only -// interested in availability and not interested in the other report fields. +// neededVoters is the replica's desired replication for purposes of determining +// over/under-replication of voters. If the caller is only interested in +// availability of voting replicas, 0 can be passed in. neededNonVoters is the +// counterpart for non-voting replicas. func (d ReplicaSet) ReplicationStatus( - liveFunc func(descriptor ReplicaDescriptor) bool, neededVoters int, + liveFunc func(descriptor ReplicaDescriptor) bool, neededVoters int, neededNonVoters int, ) RangeStatusReport { var res RangeStatusReport // isBoth takes two replica predicates and returns their conjunction. @@ -440,13 +446,22 @@ func (d ReplicaSet) ReplicationStatus( res.Available = availableIncomingGroup && availableOutgoingGroup - // Determine over/under-replication. Note that learners don't matter. + // Determine over/under-replication of voting replicas. Note that learners + // don't matter. underReplicatedOldGroup := len(liveVotersOldGroup) < neededVoters underReplicatedNewGroup := len(liveVotersNewGroup) < neededVoters overReplicatedOldGroup := len(votersOldGroup) > neededVoters overReplicatedNewGroup := len(votersNewGroup) > neededVoters res.UnderReplicated = underReplicatedOldGroup || underReplicatedNewGroup res.OverReplicated = overReplicatedOldGroup || overReplicatedNewGroup + if neededNonVoters == 0 { + return res + } + + nonVoters := d.FilterToDescriptors(ReplicaDescriptor.IsNonVoter) + liveNonVoters := d.FilterToDescriptors(isBoth(ReplicaDescriptor.IsNonVoter, liveFunc)) + res.UnderReplicatedNonVoters = len(liveNonVoters) < neededNonVoters + res.OverReplicatedNonVoters = len(nonVoters) > neededNonVoters return res } diff --git a/pkg/roachpb/roachpbmock/mocks_generated.go b/pkg/roachpb/roachpbmock/mocks_generated.go index 377c7b7b0fcb..68143d9dbe83 100644 --- a/pkg/roachpb/roachpbmock/mocks_generated.go +++ b/pkg/roachpb/roachpbmock/mocks_generated.go @@ -217,6 +217,26 @@ func (mr *MockInternalClientMockRecorder) ResetQuorum(arg0, arg1 interface{}, ar return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResetQuorum", reflect.TypeOf((*MockInternalClient)(nil).ResetQuorum), varargs...) } +// SpanConfigConformance mocks base method. +func (m *MockInternalClient) SpanConfigConformance(arg0 context.Context, arg1 *roachpb.SpanConfigConformanceRequest, arg2 ...grpc.CallOption) (*roachpb.SpanConfigConformanceResponse, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "SpanConfigConformance", varargs...) + ret0, _ := ret[0].(*roachpb.SpanConfigConformanceResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// SpanConfigConformance indicates an expected call of SpanConfigConformance. +func (mr *MockInternalClientMockRecorder) SpanConfigConformance(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SpanConfigConformance", reflect.TypeOf((*MockInternalClient)(nil).SpanConfigConformance), varargs...) +} + // TenantSettings mocks base method. func (m *MockInternalClient) TenantSettings(arg0 context.Context, arg1 *roachpb.TenantSettingsRequest, arg2 ...grpc.CallOption) (roachpb.Internal_TenantSettingsClient, error) { m.ctrl.T.Helper() diff --git a/pkg/roachpb/span_config.go b/pkg/roachpb/span_config.go index 02c621ea8ba1..ee7a933b06fd 100644 --- a/pkg/roachpb/span_config.go +++ b/pkg/roachpb/span_config.go @@ -97,9 +97,6 @@ func (s *SpanConfig) ValidateSystemTargetSpanConfig() error { // GetNumVoters returns the number of voting replicas as defined in the // span config. -// TODO(arul): We can get rid of this now that we're correctly populating -// -// numVoters when going from ZoneConfigs -> SpanConfigs. func (s *SpanConfig) GetNumVoters() int32 { if s.NumVoters != 0 { return s.NumVoters diff --git a/pkg/roachpb/span_config.proto b/pkg/roachpb/span_config.proto index 3a75e58efca3..dc298ac6bae2 100644 --- a/pkg/roachpb/span_config.proto +++ b/pkg/roachpb/span_config.proto @@ -14,6 +14,7 @@ option go_package = "roachpb"; import "errorspb/errors.proto"; import "roachpb/data.proto"; +import "roachpb/metadata.proto"; import "gogoproto/gogo.proto"; import "util/hlc/timestamp.proto"; @@ -277,6 +278,15 @@ message SpanConfigEntry { SpanConfig config = 2 [(gogoproto.nullable) = false]; }; +// SpanConfigConformanceReport lists out ranges that (i) don't conform to span +// configs that apply over them, and (ii) are unavailable. +message SpanConfigConformanceReport { + repeated RangeDescriptor under_replicated = 1 [(gogoproto.nullable) = false]; + repeated RangeDescriptor over_replicated = 2 [(gogoproto.nullable) = false]; + repeated RangeDescriptor violating_constraints = 3 [(gogoproto.nullable) = false]; + repeated RangeDescriptor unavailable = 4 [(gogoproto.nullable) = false]; +}; + // GetSpanConfigsRequest is used to fetch the span configurations and system // span configurations. message GetSpanConfigsRequest { @@ -344,6 +354,19 @@ message UpdateSpanConfigsResponse { errorspb.EncodedError error = 1 [(gogoproto.nullable) = false]; }; +// SpanConfigConformanceRequest is used to determine whether ranges backing the +// given keyspans conform to the span configs that apply over them. +message SpanConfigConformanceRequest { + // Spans to request the conformance data for. The spans listed here are not + // allowed to overlap with one another. + repeated Span spans = 1 [(gogoproto.nullable) = false]; +}; + +// SpanConfigConformanceResponse lists out ranges that (i) don't conform to span +// configs that apply over them, and (ii) are unavailable. +message SpanConfigConformanceResponse { + SpanConfigConformanceReport report = 1 [(gogoproto.nullable) = false]; +}; // GetAllSystemSpanConfigsThatApplyRequest is used to fetch all system span // configs that apply to a given tenant. For a specific tenant range, this diff --git a/pkg/rpc/context_test.go b/pkg/rpc/context_test.go index ab7fcfc41700..eed1d567e47d 100644 --- a/pkg/rpc/context_test.go +++ b/pkg/rpc/context_test.go @@ -425,6 +425,12 @@ func (*internalServer) UpdateSpanConfigs( panic("unimplemented") } +func (s *internalServer) SpanConfigConformance( + context.Context, *roachpb.SpanConfigConformanceRequest, +) (*roachpb.SpanConfigConformanceResponse, error) { + panic("unimplemented") +} + func (*internalServer) TenantSettings( *roachpb.TenantSettingsRequest, roachpb.Internal_TenantSettingsServer, ) error { diff --git a/pkg/rpc/nodedialer/nodedialer_test.go b/pkg/rpc/nodedialer/nodedialer_test.go index 0d0baf7b1569..f27907f2fced 100644 --- a/pkg/rpc/nodedialer/nodedialer_test.go +++ b/pkg/rpc/nodedialer/nodedialer_test.go @@ -621,6 +621,12 @@ func (*internalServer) UpdateSpanConfigs( panic("unimplemented") } +func (s *internalServer) SpanConfigConformance( + context.Context, *roachpb.SpanConfigConformanceRequest, +) (*roachpb.SpanConfigConformanceResponse, error) { + panic("unimplemented") +} + func (*internalServer) TenantSettings( *roachpb.TenantSettingsRequest, roachpb.Internal_TenantSettingsServer, ) error { diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index 9bbfbdb2a58f..6e283a3133e6 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -155,6 +155,7 @@ go_library( "//pkg/spanconfig/spanconfigmanager", "//pkg/spanconfig/spanconfigptsreader", "//pkg/spanconfig/spanconfigreconciler", + "//pkg/spanconfig/spanconfigreporter", "//pkg/spanconfig/spanconfigsplitter", "//pkg/spanconfig/spanconfigsqltranslator", "//pkg/spanconfig/spanconfigsqlwatcher", diff --git a/pkg/server/node.go b/pkg/server/node.go index c4ce69ae08d2..86721d766dbb 100644 --- a/pkg/server/node.go +++ b/pkg/server/node.go @@ -241,6 +241,8 @@ type Node struct { spanConfigAccessor spanconfig.KVAccessor // powers the span configuration RPCs + spanConfigReporter spanconfig.Reporter // powers the span configuration RPCs + // Turns `Node.writeNodeStatus` into a no-op. This is a hack to enable the // COCKROACH_DEBUG_TS_IMPORT_FILE env var. suppressNodeStatus syncutil.AtomicBool @@ -370,6 +372,7 @@ func NewNode( tenantUsage multitenant.TenantUsageServer, tenantSettingsWatcher *tenantsettingswatcher.Watcher, spanConfigAccessor spanconfig.KVAccessor, + spanConfigReporter spanconfig.Reporter, ) *Node { n := &Node{ storeCfg: cfg, @@ -383,6 +386,7 @@ func NewNode( tenantUsage: tenantUsage, tenantSettingsWatcher: tenantSettingsWatcher, spanConfigAccessor: spanConfigAccessor, + spanConfigReporter: spanConfigReporter, testingErrorEvent: cfg.TestingKnobs.TestingResponseErrorEvent, } n.storeCfg.KVAdmissionController = kvserver.MakeKVAdmissionController( @@ -1848,3 +1852,18 @@ func (n *Node) UpdateSpanConfigs( } return &roachpb.UpdateSpanConfigsResponse{}, nil } + +// SpanConfigConformance implements the roachpb.InternalServer interface. +func (n *Node) SpanConfigConformance( + ctx context.Context, req *roachpb.SpanConfigConformanceRequest, +) (*roachpb.SpanConfigConformanceResponse, error) { + if n.storeCfg.SpanConfigSubscriber.LastUpdated().IsEmpty() { + return nil, errors.Newf("haven't (yet) subscribed to span configs") + } + + report, err := n.spanConfigReporter.SpanConfigConformance(ctx, req.Spans) + if err != nil { + return nil, err + } + return &roachpb.SpanConfigConformanceResponse{Report: report}, nil +} diff --git a/pkg/server/server.go b/pkg/server/server.go index ca61c8056077..05648dad0897 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -64,6 +64,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigkvaccessor" "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigkvsubscriber" "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigptsreader" + "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigreporter" "github.com/cockroachdb/cockroach/pkg/sql" _ "github.com/cockroachdb/cockroach/pkg/sql/catalog/schematelemetry" // register schedules declared outside of pkg/sql "github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema" @@ -87,6 +88,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/netutil" + "github.com/cockroachdb/cockroach/pkg/util/rangedesciter" "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/schedulerlatency" "github.com/cockroachdb/cockroach/pkg/util/stop" @@ -566,6 +568,8 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { // kvAccessor powers the span configuration RPCs and the host tenant's // reconciliation job. kvAccessor spanconfig.KVAccessor + // reporter is used to report over span config conformance. + reporter spanconfig.Reporter // subscriber is used by stores to subscribe to span configuration updates. subscriber spanconfig.KVSubscriber // kvAccessorForTenantRecords is when creating/destroying secondary @@ -616,6 +620,14 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { spanConfigKnobs, ) spanConfig.kvAccessor, spanConfig.kvAccessorForTenantRecords = scKVAccessor, scKVAccessor + spanConfig.reporter = spanconfigreporter.New( + nodeLiveness, + storePool, + spanConfig.subscriber, + rangedesciter.New(db), + cfg.Settings, + spanConfigKnobs, + ) } else { // If the spanconfigs infrastructure is disabled, there should be no // reconciliation jobs or RPCs issued against the infrastructure. Plug @@ -623,6 +635,9 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { // unexpected use. spanConfig.kvAccessor = spanconfigkvaccessor.DisabledKVAccessor + // Ditto for the spanconfig.Reporter. + spanConfig.reporter = spanconfigreporter.DisabledReporter + // Use a no-op accessor where tenant records are created/destroyed. spanConfig.kvAccessorForTenantRecords = spanconfigkvaccessor.NoopKVAccessor @@ -722,6 +737,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { tenantUsage, tenantSettingsWatcher, spanConfig.kvAccessor, + spanConfig.reporter, ) roachpb.RegisterInternalServer(grpcServer.Server, node) kvserver.RegisterPerReplicaServer(grpcServer.Server, node.perReplicaServer) @@ -777,6 +793,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { closedSessionCache, flowScheduler, internalExecutor, + spanConfig.reporter, ) var jobAdoptionStopFile string diff --git a/pkg/server/serverpb/migration.proto b/pkg/server/serverpb/migration.proto index 38ee1302fc17..3fd0af2a3407 100644 --- a/pkg/server/serverpb/migration.proto +++ b/pkg/server/serverpb/migration.proto @@ -87,5 +87,8 @@ service Migration { // WaitForSpanConfigSubscription waits until the target node is wholly // subscribed to the global span configurations state. + // + // TODO(irfansharif): This can be removed -- 22.2 nodes will never issue this + // RPC. rpc WaitForSpanConfigSubscription (WaitForSpanConfigSubscriptionRequest) returns (WaitForSpanConfigSubscriptionResponse) { } } diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 453f0cb201a5..de812eb9181f 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -19,6 +19,7 @@ import "jobs/jobspb/jobs.proto"; import "roachpb/app_stats.proto"; import "roachpb/data.proto"; import "roachpb/index_usage_stats.proto"; +import "roachpb/span_config.proto"; import "roachpb/metadata.proto"; import "server/diagnostics/diagnosticspb/diagnostics.proto"; import "server/serverpb/index_recommendations.proto"; diff --git a/pkg/server/status.go b/pkg/server/status.go index 0ef48c47af2f..2d0baa319bc7 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -52,6 +52,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/server/status/statuspb" "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/spanconfig" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" @@ -511,6 +512,7 @@ type statusServer struct { si systemInfoOnce stmtDiagnosticsRequester StmtDiagnosticsRequester internalExecutor *sql.InternalExecutor + spanConfigReporter spanconfig.Reporter } // StmtDiagnosticsRequester is the interface into *stmtdiagnostics.Registry @@ -569,6 +571,7 @@ func newStatusServer( closedSessionCache *sql.ClosedSessionCache, flowScheduler *flowinfra.FlowScheduler, internalExecutor *sql.InternalExecutor, + spanConfigReporter spanconfig.Reporter, ) *statusServer { ambient.AddLogTag("status", nil) server := &statusServer{ @@ -582,15 +585,16 @@ func newStatusServer( rpcCtx: rpcCtx, stopper: stopper, }, - cfg: cfg, - admin: adminServer, - db: db, - gossip: gossip, - metricSource: metricSource, - nodeLiveness: nodeLiveness, - storePool: storePool, - stores: stores, - internalExecutor: internalExecutor, + cfg: cfg, + admin: adminServer, + db: db, + gossip: gossip, + metricSource: metricSource, + nodeLiveness: nodeLiveness, + storePool: storePool, + stores: stores, + internalExecutor: internalExecutor, + spanConfigReporter: spanConfigReporter, } return server diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index f650877eb1cb..b5429c602bb0 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -732,6 +732,11 @@ func (t *TestTenant) SpanConfigKVAccessor() interface{} { return t.SQLServer.tenantConnect } +// SpanConfigReporter is part TestTenantInterface. +func (t *TestTenant) SpanConfigReporter() interface{} { + return t.SQLServer.tenantConnect +} + // SpanConfigReconciler is part TestTenantInterface. func (t *TestTenant) SpanConfigReconciler() interface{} { return t.SQLServer.spanconfigMgr.Reconciler @@ -1135,6 +1140,11 @@ func (ts *TestServer) SpanConfigKVAccessor() interface{} { return ts.Server.node.spanConfigAccessor } +// SpanConfigReporter is part of TestServerInterface. +func (ts *TestServer) SpanConfigReporter() interface{} { + return ts.Server.node.spanConfigReporter +} + // SpanConfigReconciler is part of TestServerInterface. func (ts *TestServer) SpanConfigReconciler() interface{} { if ts.sqlServer.spanconfigMgr == nil { diff --git a/pkg/spanconfig/spanconfig.go b/pkg/spanconfig/spanconfig.go index 955154c59552..5bd6ebf4eb3a 100644 --- a/pkg/spanconfig/spanconfig.go +++ b/pkg/spanconfig/spanconfig.go @@ -350,6 +350,14 @@ func Delta( return delta, nil } +// Reporter generates a conformance report over the given spans, i.e. whether +// the backing ranges conform to the span configs that apply to them. +type Reporter interface { + SpanConfigConformance( + ctx context.Context, spans []roachpb.Span, + ) (roachpb.SpanConfigConformanceReport, error) +} + // SQLUpdate captures either a descriptor or a protected timestamp update. // It is the unit emitted by the SQLWatcher. type SQLUpdate struct { diff --git a/pkg/spanconfig/spanconfigreporter/BUILD.bazel b/pkg/spanconfig/spanconfigreporter/BUILD.bazel new file mode 100644 index 000000000000..220924de2eb2 --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/BUILD.bazel @@ -0,0 +1,55 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "spanconfigreporter", + srcs = [ + "disabled.go", + "reporter.go", + ], + importpath = "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigreporter", + visibility = ["//visibility:public"], + deps = [ + "//pkg/kv/kvserver/constraint", + "//pkg/roachpb", + "//pkg/settings", + "//pkg/settings/cluster", + "//pkg/spanconfig", + "//pkg/util/log", + "//pkg/util/rangedesciter", + "@com_github_cockroachdb_errors//:errors", + ], +) + +go_test( + name = "spanconfigreporter_test", + srcs = [ + "datadriven_test.go", + "main_test.go", + ], + args = ["-test.timeout=295s"], + data = glob(["testdata/**"]), + deps = [ + ":spanconfigreporter", + "//pkg/kv/kvserver/constraint", + "//pkg/roachpb", + "//pkg/security/securityassets", + "//pkg/security/securitytest", + "//pkg/server", + "//pkg/settings/cluster", + "//pkg/spanconfig", + "//pkg/spanconfig/spanconfigstore", + "//pkg/spanconfig/spanconfigtestutils", + "//pkg/testutils", + "//pkg/testutils/serverutils", + "//pkg/testutils/testcluster", + "//pkg/util/leaktest", + "//pkg/util/log", + "//pkg/util/randutil", + "//pkg/util/rangedesciter", + "@com_github_cockroachdb_datadriven//:datadriven", + "@com_github_stretchr_testify//require", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/spanconfig/spanconfigreporter/datadriven_test.go b/pkg/spanconfig/spanconfigreporter/datadriven_test.go new file mode 100644 index 000000000000..15fe3f4ac288 --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/datadriven_test.go @@ -0,0 +1,318 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package spanconfigreporter_test + +import ( + "context" + "fmt" + "sort" + "strings" + "testing" + + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/constraint" + "github.com/cockroachdb/cockroach/pkg/roachpb" + clustersettings "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/spanconfig" + "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigreporter" + "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigstore" + "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigtestutils" + "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/rangedesciter" + "github.com/cockroachdb/datadriven" + "github.com/stretchr/testify/require" +) + +// TestDataDriven is a data-driven test for spanconfig.Reporter. It offers +// the following commands: +// +// init +// n1: attr-key=attr-value +// r1: [a,b) +// ---- +// +// liveness +// n1: live|dead +// ---- +// +// allocate +// r1: voters=[n1,n2] nonvoters=[n3] +// ---- +// +// configure +// [a,b): num_replicas=3 constraints='...' voter_constraints='...' +// ---- +// +// report +// [a,b) +// [c,d) +// ---- +func TestDataDriven(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + st := clustersettings.MakeTestingClusterSettings() + scKnobs := &spanconfig.TestingKnobs{} + + datadriven.Walk(t, testutils.TestDataPath(t), func(t *testing.T, path string) { + cluster := newMockCluster(t, st, scKnobs) + reporter := spanconfigreporter.New(cluster, cluster, cluster, cluster, st, scKnobs) + datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "init": + for _, line := range strings.Split(d.Input, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + parts := strings.Split(line, ":") + require.Len(t, parts, 2) + id, data := strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]) + switch { + case strings.HasPrefix(id, "n"): // node + nodeID := spanconfigtestutils.ParseNodeID(t, id) + locality := &roachpb.Locality{} + if data != "" { + require.NoError(t, locality.Set(data)) + } + cluster.addNode(roachpb.NodeDescriptor{ + NodeID: nodeID, + Locality: *locality, + }) + case strings.HasPrefix(id, "r"): // range + rangeID := spanconfigtestutils.ParseRangeID(t, id) + span := spanconfigtestutils.ParseSpan(t, data) + cluster.addRange(roachpb.RangeDescriptor{ + RangeID: rangeID, + StartKey: roachpb.RKey(span.Key), + EndKey: roachpb.RKey(span.EndKey), + }) + default: + t.Fatalf("malformed line %q, expected to find 'n' or 'r' prefix", line) + } + } + + case "liveness": + for _, line := range strings.Split(d.Input, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + parts := strings.Split(line, ":") + require.Len(t, parts, 2) + id, data := strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]) + if data != "live" && data != "dead" { + t.Fatalf("malformed line %q, expected to find 'live' or 'dead' annotation", line) + } + nodeID := spanconfigtestutils.ParseNodeID(t, id) + cluster.markLive(nodeID, data == "live") + } + + case "allocate": + for _, line := range strings.Split(d.Input, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + parts := strings.Split(line, ":") + require.Len(t, parts, 2) + id, data := strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]) + rangeID := spanconfigtestutils.ParseRangeID(t, id) + desc := cluster.getRangeDescriptor(rangeID) + desc.SetReplicas(spanconfigtestutils.ParseReplicaSet(t, data)) + cluster.setRangeDescriptor(desc) + } + + case "configure": + for _, line := range strings.Split(d.Input, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + tag, data, found := strings.Cut(line, ":") + require.True(t, found) + tag, data = strings.TrimSpace(tag), strings.TrimSpace(data) + span := spanconfigtestutils.ParseSpan(t, tag) + conf := spanconfigtestutils.ParseZoneConfig(t, data).AsSpanConfig() + cluster.applyConfig(ctx, span, conf) + } + + case "report": + var spans []roachpb.Span + for _, line := range strings.Split(d.Input, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + spans = append(spans, spanconfigtestutils.ParseSpan(t, line)) + } + report, err := reporter.SpanConfigConformance(ctx, spans) + require.NoError(t, err) + printRangeDesc := func(r roachpb.RangeDescriptor) string { + var buf strings.Builder + buf.WriteString(fmt.Sprintf("r%d:", r.RangeID)) + buf.WriteString(r.RSpan().String()) + buf.WriteString(" [") + if allReplicas := r.Replicas().Descriptors(); len(allReplicas) > 0 { + for i, rep := range allReplicas { + if i > 0 { + buf.WriteString(", ") + } + buf.WriteString(rep.String()) + } + } else { + buf.WriteString("") + } + buf.WriteString("]") + return buf.String() + } + printList := func(tag string, descs []roachpb.RangeDescriptor) string { + var buf strings.Builder + for i, desc := range descs { + if i == 0 { + buf.WriteString(fmt.Sprintf("%s:\n", tag)) + } + buf.WriteString(fmt.Sprintf(" %s\n", printRangeDesc(desc))) + } + return buf.String() + } + var buf strings.Builder + buf.WriteString(printList("unavailable", report.Unavailable)) + buf.WriteString(printList("under replicated", report.UnderReplicated)) + buf.WriteString(printList("over replicated", report.OverReplicated)) + buf.WriteString(printList("violating constraints", report.ViolatingConstraints)) + if buf.Len() == 0 { + return "ok" + } + return buf.String() + + default: + t.Fatalf("unknown command: %s", d.Cmd) + } + + return "" + }) + }) +} + +type mockCluster struct { + t *testing.T + + nodes map[roachpb.NodeID]roachpb.NodeDescriptor + ranges map[roachpb.RangeID]roachpb.RangeDescriptor + liveness map[roachpb.NodeID]bool + store *spanconfigstore.Store +} + +var _ spanconfigreporter.Liveness = &mockCluster{} +var _ constraint.StoreResolver = &mockCluster{} +var _ rangedesciter.Iterator = &mockCluster{} +var _ spanconfig.StoreReader = &mockCluster{} + +func newMockCluster( + t *testing.T, st *clustersettings.Settings, scKnobs *spanconfig.TestingKnobs, +) *mockCluster { + return &mockCluster{ + t: t, + nodes: make(map[roachpb.NodeID]roachpb.NodeDescriptor), + ranges: make(map[roachpb.RangeID]roachpb.RangeDescriptor), + liveness: make(map[roachpb.NodeID]bool), + store: spanconfigstore.New(roachpb.TestingDefaultSpanConfig(), st, scKnobs), + } +} + +// IsLive implements spanconfigreporter.Liveness. +func (s *mockCluster) IsLive(id roachpb.NodeID) (bool, error) { + live, found := s.liveness[id] + require.True(s.t, found, "undeclared node n%d", id) + return live, nil +} + +// GetStoreDescriptor implements constraint.StoreResolver. +func (s *mockCluster) GetStoreDescriptor(storeID roachpb.StoreID) (roachpb.StoreDescriptor, bool) { + desc, found := s.nodes[roachpb.NodeID(storeID)] + require.True(s.t, found, "undeclared node n%d", storeID) + + return roachpb.StoreDescriptor{ + StoreID: storeID, // simulate storeIDs == nodeIDs + Node: desc, + }, true +} + +// Iterate implements rangedesciter.Iterator. +func (s *mockCluster) Iterate( + _ context.Context, _ int, _ func(), fn func(...roachpb.RangeDescriptor) error, +) error { + var descs []roachpb.RangeDescriptor + for _, d := range s.ranges { + descs = append(descs, d) + } + sort.Slice(descs, func(i, j int) bool { + return descs[i].StartKey.Less(descs[j].StartKey) + }) + return fn(descs...) +} + +// NeedsSplit implements spanconfig.StoreReader. +func (s *mockCluster) NeedsSplit(ctx context.Context, start, end roachpb.RKey) bool { + return s.store.NeedsSplit(ctx, start, end) +} + +// ComputeSplitKey implements spanconfig.StoreReader. +func (s *mockCluster) ComputeSplitKey(ctx context.Context, start, end roachpb.RKey) roachpb.RKey { + return s.store.ComputeSplitKey(ctx, start, end) +} + +// GetSpanConfigForKey implements spanconfig.StoreReader. +func (s *mockCluster) GetSpanConfigForKey( + ctx context.Context, key roachpb.RKey, +) (roachpb.SpanConfig, error) { + return s.store.GetSpanConfigForKey(ctx, key) +} + +func (s *mockCluster) addNode(desc roachpb.NodeDescriptor) { + _, found := s.nodes[desc.NodeID] + require.Falsef(s.t, found, "attempting to re-add n%d", desc.NodeID) + s.nodes[desc.NodeID] = desc + s.markLive(desc.NodeID, true /* live */) +} + +func (s *mockCluster) markLive(id roachpb.NodeID, live bool) { + _, found := s.nodes[id] + require.Truef(s.t, found, "n%d not found", id) + s.liveness[id] = live +} + +func (s *mockCluster) addRange(desc roachpb.RangeDescriptor) { + _, found := s.ranges[desc.RangeID] + require.Falsef(s.t, found, "attempting to re-add r%d", desc.RangeID) + s.ranges[desc.RangeID] = desc +} + +func (s *mockCluster) setRangeDescriptor(desc roachpb.RangeDescriptor) { + _, found := s.ranges[desc.RangeID] + require.Truef(s.t, found, "r%d not found", desc.RangeID) + s.ranges[desc.RangeID] = desc +} + +func (s *mockCluster) getRangeDescriptor(id roachpb.RangeID) roachpb.RangeDescriptor { + desc, found := s.ranges[id] + require.Truef(s.t, found, "r%d not found", id) + return desc +} + +func (s *mockCluster) applyConfig(ctx context.Context, span roachpb.Span, conf roachpb.SpanConfig) { + update, err := spanconfig.Addition(spanconfig.MakeTargetFromSpan(span), conf) + require.NoError(s.t, err) + s.store.Apply(ctx, false, update) +} diff --git a/pkg/spanconfig/spanconfigreporter/disabled.go b/pkg/spanconfig/spanconfigreporter/disabled.go new file mode 100644 index 000000000000..545079c36de7 --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/disabled.go @@ -0,0 +1,34 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package spanconfigreporter + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/spanconfig" + "github.com/cockroachdb/errors" +) + +// DisabledReporter is a spanconfig.Reporter that only returns "disabled" +// errors. +var DisabledReporter = disabled{} + +type disabled struct{} + +var _ spanconfig.Reporter = disabled{} + +// SpanConfigConformance implements the spanconfig.Reporter interface. +func (d disabled) SpanConfigConformance( + ctx context.Context, spans []roachpb.Span, +) (roachpb.SpanConfigConformanceReport, error) { + return roachpb.SpanConfigConformanceReport{}, errors.New("span configs disabled") +} diff --git a/pkg/spanconfig/spanconfigreporter/main_test.go b/pkg/spanconfig/spanconfigreporter/main_test.go new file mode 100644 index 000000000000..f6c86b6937ab --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/main_test.go @@ -0,0 +1,33 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package spanconfigreporter_test + +import ( + "os" + "testing" + + "github.com/cockroachdb/cockroach/pkg/security/securityassets" + "github.com/cockroachdb/cockroach/pkg/security/securitytest" + "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" + "github.com/cockroachdb/cockroach/pkg/util/randutil" +) + +//go:generate ../../../util/leaktest/add-leaktest.sh *_test.go + +func TestMain(m *testing.M) { + securityassets.SetLoader(securitytest.EmbeddedAssets) + randutil.SeedForTests() + serverutils.InitTestServerFactory(server.TestServerFactory) + serverutils.InitTestClusterFactory(testcluster.TestClusterFactory) + os.Exit(m.Run()) +} diff --git a/pkg/spanconfig/spanconfigreporter/reporter.go b/pkg/spanconfig/spanconfigreporter/reporter.go new file mode 100644 index 000000000000..ad1d7f707372 --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/reporter.go @@ -0,0 +1,181 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package spanconfigreporter reports on whether ranges over the queried spans +// conform to the span configs that apply to them. +package spanconfigreporter + +import ( + "context" + "fmt" + + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/constraint" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/spanconfig" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/rangedesciter" +) + +// rangeDescPageSize controls the page size when iterating through range +// descriptors. It's settable only by the system tenant. +var rangeDescPageSize = settings.RegisterIntSetting( + settings.SystemOnly, + "spanconfig.reporter.range_desc_page_size", + "pa", + 100, + func(i int64) error { + if i < 5 || i > 25000 { + return fmt.Errorf("expected range_desc_page_size to be in range [5, 25000], got %d", i) + } + return nil + }, +) + +// Liveness is the subset of the interface satisfied by CRDB's node liveness +// component that the reporter relies on. +type Liveness interface { + IsLive(roachpb.NodeID) (bool, error) +} + +// Reporter is used to figure out whether ranges backing specific spans conform +// to the span configs that apply over them. It's a concrete implementation of +// the spanconfig.Reporter interface. +type Reporter struct { + dep struct { + Liveness + constraint.StoreResolver + rangedesciter.Iterator + spanconfig.StoreReader + } + + settings *cluster.Settings + knobs *spanconfig.TestingKnobs +} + +var _ spanconfig.Reporter = &Reporter{} + +// New constructs and returns a Reporter. +func New( + liveness Liveness, + resolver constraint.StoreResolver, + reader spanconfig.StoreReader, + iterator rangedesciter.Iterator, + settings *cluster.Settings, + knobs *spanconfig.TestingKnobs, +) *Reporter { + r := &Reporter{ + settings: settings, + knobs: knobs, + } + r.dep.Liveness = liveness + r.dep.StoreResolver = resolver + r.dep.Iterator = iterator + r.dep.StoreReader = reader + return r +} + +// TODO(irfansharif): Support the equivalent of "critical localities", perhaps +// through a different API than the one below since it's not quite +// span-oriented. + +// SpanConfigConformance implements the spanconfig.Reporter interface. +func (r *Reporter) SpanConfigConformance( + ctx context.Context, spans []roachpb.Span, +) (roachpb.SpanConfigConformanceReport, error) { + // XXX: Actually use the spans parameter. Update the rangedesc.Iterator + // interfaces to take in a keyspan and bound meta{1,2} search just to + // segments that would possibly overlap with that keyspan. Until this + // keyspan scoping is done, we can't let this be used in tenants. + _ = spans + + // XXX: Write an end-to-end test using actual SQL and zone configs. Set configs + // on a table, disable replication, see conformance. Enable repl, change + // configs, etc. Use tenants as well for this mode. Do this for tenants as well. + // Do this after some form of this API is exposed through SQL/an endpoint. + + // XXX: Can we improve the SpanConfigConformanceReport proto type? Perhaps + // include some {meta,}data about the span config being violated as well? Or + // include the span config directly and provide helper libraries to compute + // human-readable "why is this in violation" text. + // - Only include range ID + replica descriptors + keys? + // - Type to represent exactly which constraint exactly is being violated? + // - Segment over/under replicated by what replica type (voter/non-voter) + // exactly is over/under replicated? + + report := roachpb.SpanConfigConformanceReport{} + if err := r.dep.Iterate(ctx, int(rangeDescPageSize.Get(&r.settings.SV)), func() { + report = roachpb.SpanConfigConformanceReport{} // init + }, func(descriptors ...roachpb.RangeDescriptor) error { + for _, desc := range descriptors { + conf, err := r.dep.StoreReader.GetSpanConfigForKey(ctx, desc.StartKey) + if err != nil { + return err + } + + status := desc.Replicas().ReplicationStatus( + func(rDesc roachpb.ReplicaDescriptor) bool { + isLive, err := r.dep.Liveness.IsLive(rDesc.NodeID) + if err != nil { + // As of 2022-10, this error only appears if we're + // asking for the liveness of a node ID that doesn't + // exist, which should never happen. Shout loudly + // and declare things as non-live. + log.Errorf(ctx, "programming error: unexpected err: %v", err) + return false + } + return isLive + }, int(conf.GetNumVoters()), int(conf.GetNumNonVoters())) + if !status.Available { + report.Unavailable = append(report.Unavailable, desc) + } + if status.UnderReplicated || status.UnderReplicatedNonVoters { + report.UnderReplicated = append(report.UnderReplicated, desc) + } + if status.OverReplicated || status.OverReplicatedNonVoters { + report.OverReplicated = append(report.OverReplicated, desc) + } + + // Compute constraint violations for the overall (affecting voters + // and non-voters alike) and voter constraints. + overall := constraint.AnalyzeConstraints( + r.dep.StoreResolver, + desc.Replicas().Descriptors(), + conf.NumReplicas, conf.Constraints) + for i, c := range overall.Constraints { + if c.NumReplicas == 0 { + c.NumReplicas = conf.NumReplicas + } + if len(overall.SatisfiedBy[i]) < int(c.NumReplicas) { + report.ViolatingConstraints = append(report.ViolatingConstraints, desc) + break + } + } + voters := constraint.AnalyzeConstraints( + r.dep.StoreResolver, + desc.Replicas().Voters().Descriptors(), + conf.GetNumVoters(), conf.VoterConstraints) + for i, c := range voters.Constraints { + if c.NumReplicas == 0 { + c.NumReplicas = conf.GetNumVoters() + } + if len(voters.SatisfiedBy[i]) < int(c.NumReplicas) { + report.ViolatingConstraints = append(report.ViolatingConstraints, desc) + break + } + } + } + return nil + }); err != nil { + return roachpb.SpanConfigConformanceReport{}, err + } + return report, nil +} diff --git a/pkg/spanconfig/spanconfigreporter/testdata/basic b/pkg/spanconfig/spanconfigreporter/testdata/basic new file mode 100644 index 000000000000..144b03dbe909 --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/testdata/basic @@ -0,0 +1,189 @@ +# Walk through the basics of the datadriven syntax. We initialize a six node +# cluster with two nodes in each region (us-{west,central,east}) and three +# ranges. + +init +n1: region=us-west +n2: region=us-west +n3: region=us-central +n4: region=us-central +n5: region=us-east +n6: region=us-east +r1: [a,b) +r2: [b,c) +r3: [c,d) +---- + +# Set-up a replication factor of 3 across the entire keyspan, and allocate +# replicas accordingly. Our conformance report should indicate no problems. +configure +[a,d): num_replicas=3 +---- + +allocate +r1: voters=[n1,n3,n5] +r2: voters=[n1,n3,n5] +r3: voters=[n1,n3,n5] +---- + +report +---- +ok + + +# Shift around the replicas for r1 to the second node in each region. We'll +# kill these nodes one by one and verify that we report the range to first be +# under-replicated, and then both under-replicated and unavailable. +allocate +r1: voters=[n2,n4,n6] +---- + +liveness +n6: dead +---- + +report +---- +under replicated: + r1:{a-b} [(n2,s2):2, (n4,s4):4, (n6,s6):6] + +liveness +n4: dead +---- + +report +---- +unavailable: + r1:{a-b} [(n2,s2):2, (n4,s4):4, (n6,s6):6] +under replicated: + r1:{a-b} [(n2,s2):2, (n4,s4):4, (n6,s6):6] + +liveness +n4: live +n6: live +---- + +report +---- +ok + + +# Add extra replicas for r2, and verify that it shows up as over-replicated. +allocate +r2: voters=[n1,n3,n5,n6] +---- + +report +---- +over replicated: + r2:{b-c} [(n1,s1):1, (n3,s3):3, (n5,s5):5, (n6,s6):6] + +# It should also work when we don't have enough replicas. +allocate +r2: voters=[n1] +---- + +report +---- +under replicated: + r2:{b-c} [(n1,s1):1] + +allocate +r2: voters=[n1,n3,n5] +---- + +report +---- +ok + + +# Configuring different parts of the keyspan with different replication factors +# will work as expected. All ranges currently have 3 replicas each, so if the +# span configs indicate that we want a different number of replicas, the +# reports should indicate as much. +configure +[c,d): num_replicas=5 +[a,b): num_replicas=1 +---- + +report +---- +under replicated: + r3:{c-d} [(n1,s1):1, (n3,s3):3, (n5,s5):5] +over replicated: + r1:{a-b} [(n2,s2):2, (n4,s4):4, (n6,s6):6] + +configure +[a,d): num_replicas=3 +---- + +report +---- +ok + + +# Verify that conformance reports also work for voter/non-voter +# constraints/replica counts. +configure +[b,c): num_replicas=6 num_voters=3 +---- + +allocate +r2: voters=[n1,n3,n5] +---- + +report +---- +under replicated: + r2:{b-c} [(n1,s1):1, (n3,s3):3, (n5,s5):5] + +allocate +r2: voters=[n1,n2,n3,n4,n5,n6] +---- + +# We're under replicated due to non-voters, over replicated due to voters. +report +---- +under replicated: + r2:{b-c} [(n1,s1):1, (n2,s2):2, (n3,s3):3, (n4,s4):4, (n5,s5):5, (n6,s6):6] +over replicated: + r2:{b-c} [(n1,s1):1, (n2,s2):2, (n3,s3):3, (n4,s4):4, (n5,s5):5, (n6,s6):6] + +allocate +r2: voters=[n1,n3,n5] non-voters=[n2,n4,n6] +---- + +report +---- +ok + +configure +[a,d): num_replicas=3 +---- + +allocate +r2: voters=[n1,n3,n5] +---- + +report +---- +ok + + +# Verify that constraints are also reported on. +configure +[b,c): num_replicas=3 constraints={'+region=us-central':2} +---- + +report +---- +violating constraints: + r2:{b-c} [(n1,s1):1, (n3,s3):3, (n5,s5):5] + +allocate +r2: voters=[n1,n3,n4] +---- + +report +---- +ok diff --git a/pkg/spanconfig/spanconfigreporter/testdata/constraint_conformance b/pkg/spanconfig/spanconfigreporter/testdata/constraint_conformance new file mode 100644 index 000000000000..942091d86e23 --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/testdata/constraint_conformance @@ -0,0 +1,87 @@ +# Walk through basics of constraint conformance reporting. We'll use a six-node +# cluster with various attributes and see how constraints/allocation interact. + +init +n1: region=us-west,dc=dc-a +n2: region=us-west,dc=dc-b +n3: region=us-west,dc=dc-c +n4: region=us-east,dc=dc-d +n5: region=us-east,dc=dc-e +n6: region=us-east,dc=dc-f +r1: [a,b) +---- + +# Pin all three replicas to us-west. If any replica is found outside of it, it +# should be in violation. +configure +[a,b): num_replicas=3 constraints={'+region=us-west':3} +---- + +allocate +r1: voters=[n1,n2,n4] +---- + +report +---- +violating constraints: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n4,s4):4] + +# Pin replicas to two specific DCs. A conforming replica placement should show +# up as such. +configure +[a,b): num_replicas=2 constraints={'+region=us-west,+dc=dc-a':1,'+region=us-east,+dc=dc-d':1} +---- + +allocate +r1: voters=[n1,n4] +---- + +report +---- +ok + +# Pin a voter and a non-voter to two specific DCs (n1 and n4 respectively). +# It's in violation until we get exactly what we're looking for. +configure +[a,b): num_replicas=2 num_voters=1 constraints={'+dc=dc-a':1,'+dc=dc-d':1} voter_constraints={'+dc=dc-a':1} +---- + +allocate +r1: voters=[n1] non-voters=[n3] +---- + +report +---- +violating constraints: + r1:{a-b} [(n1,s1):1, (n3,s3):3NON_VOTER] + +allocate +r1: voters=[n1] non-voters=[n4] +---- + +report +---- +ok + +# Try negative constraints over all replicas. If any are found in n1, n2 or n3, +# we're in violation. +configure +[a,b): num_replicas=3 constraints=[-region=us-west] +---- + +allocate +r1: voters=[n1,n3,n5] +---- + +report +---- +violating constraints: + r1:{a-b} [(n1,s1):1, (n3,s3):3, (n5,s5):5] + +allocate +r1: voters=[n4,n5,n6] +---- + +report +---- +ok diff --git a/pkg/spanconfig/spanconfigreporter/testdata/joint_consensus b/pkg/spanconfig/spanconfigreporter/testdata/joint_consensus new file mode 100644 index 000000000000..bc0b4e09704c --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/testdata/joint_consensus @@ -0,0 +1,104 @@ +# Walk through a few scenarios where we generate reports in the presence of +# voter-{incoming,outgoing,demoting-learners,demoting-non-voters} and learners. + +init +n1: +n2: +n3: +n4: +n5: +n6: +r1: [a,b) +---- + +liveness +n4: dead +n5: dead +---- + +configure +[a,b): num_replicas=3 +---- + +allocate +r1: voters=[n1,n2,n3] +---- + +report +---- +ok + +# Under-replication in the "old group". +allocate +r1: voters=[n1,n2] voters-incoming=[n3] +---- + +report +---- +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3VOTER_INCOMING] + +# Under-replication in the "new group". +allocate +r1: voters=[n1,n2] voters-outgoing=[n3] +---- + +report +---- +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3VOTER_OUTGOING] + +# Under-replication in the old group because 4 is dead. +allocate +r1: voters=[n1,n2] voters-outgoing=[n4] voters-incoming=[n3] +---- + +report +---- +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n4,s4):4VOTER_OUTGOING, (n3,s3):3VOTER_INCOMING] + +# Unavailable in the new group (and also under-replicated), and also +# over-replicated in the new group. +allocate +r1: voters=[n1,n2] voters-outgoing=[n3] voters-incoming=[n4,n5] +---- + +report +---- +unavailable: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3VOTER_OUTGOING, (n4,s4):4VOTER_INCOMING, (n5,s5):5VOTER_INCOMING] +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3VOTER_OUTGOING, (n4,s4):4VOTER_INCOMING, (n5,s5):5VOTER_INCOMING] +over replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3VOTER_OUTGOING, (n4,s4):4VOTER_INCOMING, (n5,s5):5VOTER_INCOMING] + +# Over-replicated in the new group. +allocate +r1: voters=[n1,n2] voters-outgoing=[n3] voters-incoming=[n5,n6] +---- + +report +---- +over replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3VOTER_OUTGOING, (n5,s5):5VOTER_INCOMING, (n6,s6):6VOTER_INCOMING] + + +# Many learners. No problems, since learners don't count. +allocate +r1: voters=[n1,n2,n3] learners=[n4,n5,n6] +---- + +report +---- +ok + +# Under replicated. Learners don't count. +allocate +r1: voters=[n1,n2] learners=[n3] +---- + +report +---- +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3LEARNER] diff --git a/pkg/spanconfig/spanconfigreporter/testdata/over_under_replicated b/pkg/spanconfig/spanconfigreporter/testdata/over_under_replicated new file mode 100644 index 000000000000..5d73280c2b2d --- /dev/null +++ b/pkg/spanconfig/spanconfigreporter/testdata/over_under_replicated @@ -0,0 +1,120 @@ +# Walk through a few scenarios where a replica is {over,under}-replicated, +# and/or unavailable. + +init +n1: +n2: +n3: +n4: +r1: [a,b) +---- + +configure +[a,b): num_replicas=3 +---- + +allocate +r1: voters=[n1,n2,n3] +---- + +# We want 3 replicas and we have them, report should be ok. +report +---- +ok + +# ----------------------------------------------------------------------------- +# We have 4 replica when we want 3, we're over replicated. +allocate +r1: voters=[n1,n2,n3,n4] +---- + +report +---- +over replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3, (n4,s4):4] + +# ----------------------------------------------------------------------------- +# We have 1 or 2 replicas when we want 3, we're under replicated. +allocate +r1: voters=[n1] +---- + +report +---- +under replicated: + r1:{a-b} [(n1,s1):1] + +allocate +r1: voters=[n1,n2] +---- + +report +---- +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2] + +# ----------------------------------------------------------------------------- +# We have the desired number of replicas, but one of them is on a dead node so +# we're under-replicated. +liveness +n3: dead +---- + +allocate +r1: voters=[n1,n2,n3] +---- + +report +---- +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3] + +# If we've lost quorum we're also unavailable. +liveness +n2: dead +n3: dead +---- + +report +---- +unavailable: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3] +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3] + +liveness +n2: live +n3: live +---- + +report +---- +ok + +# ----------------------------------------------------------------------------- +# We can be under-replicated and over-replicated at the same time if it +# has many replicas but sufficiently many of them are on dead nodes. It can +# also be unavailable. Set up a triply replicated range where we want two +# replicas (so over-replicated), except two of the range's replicas are on dead +# nodes (under-replicated + unavailable). +allocate +r1: voters=[n1,n2,n3] +---- + +configure +[a,b): num_replicas=2 +---- + +liveness +n1: dead +n2: dead +---- + +report +---- +unavailable: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3] +under replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3] +over replicated: + r1:{a-b} [(n1,s1):1, (n2,s2):2, (n3,s3):3] diff --git a/pkg/spanconfig/spanconfigtestutils/BUILD.bazel b/pkg/spanconfig/spanconfigtestutils/BUILD.bazel index 3a76a3bc1e5f..ccd602dbb69f 100644 --- a/pkg/spanconfig/spanconfigtestutils/BUILD.bazel +++ b/pkg/spanconfig/spanconfigtestutils/BUILD.bazel @@ -10,6 +10,7 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigtestutils", visibility = ["//visibility:public"], deps = [ + "//pkg/config/zonepb", "//pkg/kv", "//pkg/kv/kvserver/protectedts/ptpb", "//pkg/roachpb", @@ -19,6 +20,7 @@ go_library( "//pkg/util/syncutil", "@com_github_cockroachdb_datadriven//:datadriven", "@com_github_stretchr_testify//require", + "@in_gopkg_yaml_v2//:yaml_v2", ], ) diff --git a/pkg/spanconfig/spanconfigtestutils/utils.go b/pkg/spanconfig/spanconfigtestutils/utils.go index 3d4fc0d8114c..57adf26584a2 100644 --- a/pkg/spanconfig/spanconfigtestutils/utils.go +++ b/pkg/spanconfig/spanconfigtestutils/utils.go @@ -20,6 +20,7 @@ import ( "strings" "testing" + "github.com/cockroachdb/cockroach/pkg/config/zonepb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/spanconfig" @@ -27,6 +28,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/datadriven" "github.com/stretchr/testify/require" + "gopkg.in/yaml.v2" ) // spanRe matches strings of the form "[start, end)", capturing both the "start" @@ -43,6 +45,98 @@ var systemTargetRe = regexp.MustCompile( // shorthand for declaring a unique tagged config. var configRe = regexp.MustCompile(`^(FALLBACK)|(^\w)$`) +// ParseRangeID is helper function that constructs a roachpb.RangeID from a +// string of the form "r". +func ParseRangeID(t testing.TB, s string) roachpb.RangeID { + rangeID, err := strconv.Atoi(strings.TrimPrefix(s, "r")) + require.NoError(t, err) + return roachpb.RangeID(rangeID) +} + +// ParseNodeID is helper function that constructs a roachpb.NodeID from a string +// of the form "n". +func ParseNodeID(t testing.TB, s string) roachpb.NodeID { + nodeID, err := strconv.Atoi(strings.TrimPrefix(s, "n")) + require.NoError(t, err) + return roachpb.NodeID(nodeID) +} + +// ParseReplicaSet is helper function that constructs a roachpb.ReplicaSet from +// a string of the form "voters=[n1,n2,...] non-voters=[n3,...]". The +// {store,replica} IDs for each replica is set to be equal to the corresponding +// node ID. +func ParseReplicaSet(t testing.TB, s string) roachpb.ReplicaSet { + replSet := roachpb.ReplicaSet{} + rtypes := map[string]roachpb.ReplicaType{ + "voters": roachpb.VOTER_FULL, + "voters-incoming": roachpb.VOTER_INCOMING, + "voters-outgoing": roachpb.VOTER_OUTGOING, + "voters-demoting-learners": roachpb.VOTER_DEMOTING_LEARNER, + "voters-demoting-non-voters": roachpb.VOTER_DEMOTING_NON_VOTER, + "learners": roachpb.LEARNER, + "non-voters": roachpb.NON_VOTER, + } + for _, part := range strings.Split(s, " ") { + inner := strings.Split(part, "=") + require.Len(t, inner, 2) + rtype, found := rtypes[inner[0]] + require.Truef(t, found, "unexpected replica type: %s", inner[0]) + nodes := strings.TrimSuffix(strings.TrimPrefix(inner[1], "["), "]") + + for _, n := range strings.Split(nodes, ",") { + n = strings.TrimSpace(n) + if n == "" { + continue + } + nodeID := ParseNodeID(t, n) + replSet.AddReplica(roachpb.ReplicaDescriptor{ + NodeID: nodeID, + StoreID: roachpb.StoreID(nodeID), + ReplicaID: roachpb.ReplicaID(nodeID), + Type: rtype, + }) + } + } + return replSet +} + +// ParseZoneConfig is helper function that constructs a zonepb.ZoneConfig from a +// string of the form "num_replicas= num_voters= constraints='..' +// voter_constraints='..'". +func ParseZoneConfig(t testing.TB, s string) zonepb.ZoneConfig { + config := zonepb.DefaultZoneConfig() + parts := strings.Split(s, " ") + for _, part := range parts { + switch { + case strings.HasPrefix(part, "num_replicas="): + part = strings.TrimPrefix(part, "num_replicas=") + n, err := strconv.Atoi(part) + require.NoError(t, err) + n32 := int32(n) + config.NumReplicas = &n32 + case strings.HasPrefix(part, "num_voters="): + part = strings.TrimPrefix(part, "num_voters=") + n, err := strconv.Atoi(part) + require.NoError(t, err) + n32 := int32(n) + config.NumVoters = &n32 + case strings.HasPrefix(part, "constraints="): + cl := zonepb.ConstraintsList{} + part = strings.TrimPrefix(part, "constraints=") + require.NoError(t, yaml.UnmarshalStrict([]byte(part), &cl)) + config.Constraints = cl.Constraints + case strings.HasPrefix(part, "voter_constraints="): + cl := zonepb.ConstraintsList{} + part = strings.TrimPrefix(part, "voter_constraints=") + require.NoError(t, yaml.UnmarshalStrict([]byte(part), &cl)) + config.VoterConstraints = cl.Constraints + default: + t.Fatalf("unrecognized suffix for %s, expected 'num_replicas=', 'num_voters=', 'constraints=', or 'voter_constraints='", part) + } + } + return config +} + // ParseSpan is helper function that constructs a roachpb.Span from a string of // the form "[start, end)". func ParseSpan(t testing.TB, sp string) roachpb.Span { diff --git a/pkg/testutils/serverutils/test_tenant_shim.go b/pkg/testutils/serverutils/test_tenant_shim.go index dfb88bfb9473..94d7839fb30f 100644 --- a/pkg/testutils/serverutils/test_tenant_shim.go +++ b/pkg/testutils/serverutils/test_tenant_shim.go @@ -98,6 +98,10 @@ type TestTenantInterface interface { // interface{}. SpanConfigKVAccessor() interface{} + // SpanConfigReporter returns the underlying spanconfig.Reporter as an + // interface{}. + SpanConfigReporter() interface{} + // SpanConfigReconciler returns the underlying spanconfig.Reconciler as an // interface{}. SpanConfigReconciler() interface{}