Skip to content

Commit

Permalink
rpc: Check for decommissioning before dialback
Browse files Browse the repository at this point in the history
Fixes: cockroachdb#101634
Previously the dialback check was done before the decommissioning check.
Both checks fail with error when the node is decommissioned, however we
want the decommissioned check to be done first since it returns an
appropriate permission error rather than an obscure connection refused
error. This change reverses the order the checks are done in.

Epic: none

Release note: None
  • Loading branch information
andrewbaptist committed Apr 17, 2023
1 parent 536c7f5 commit 7687257
Showing 1 changed file with 13 additions and 12 deletions.
25 changes: 13 additions & 12 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
return nil, errors.Wrap(err, "failed to apply loss of quorum recovery plan")
}

nodeTombStorage, checkPingFor := getPingCheckDecommissionFn(engines)
nodeTombStorage, decommissionCheck := getPingCheckDecommissionFn(engines)

g := gossip.New(
cfg.AmbientCtx,
Expand Down Expand Up @@ -316,7 +316,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
// Outgoing ping will block requests with codes.FailedPrecondition to
// notify caller that this replica is decommissioned but others could
// still be tried as caller node is valid, but not the destination.
return checkPingFor(ctx, req.TargetNodeID, codes.FailedPrecondition)
return decommissionCheck(ctx, req.TargetNodeID, codes.FailedPrecondition)
},
TenantRPCAuthorizer: authorizer,
NeedsDialback: true,
Expand All @@ -329,17 +329,18 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {

rpcContext.OnIncomingPing = func(ctx context.Context, req *rpc.PingRequest, resp *rpc.PingResponse) error {
// Decommission state is only tracked for the system tenant.
if tenantID, isTenant := roachpb.ClientTenantFromContext(ctx); isTenant &&
!roachpb.IsSystemTenantID(tenantID.ToUint64()) {
return nil
}
if err := rpcContext.VerifyDialback(ctx, req, resp, cfg.Locality); err != nil {
return err
if tenantID, isTenant := roachpb.ClientTenantFromContext(ctx); !isTenant ||
roachpb.IsSystemTenantID(tenantID.ToUint64()) {
// Incoming ping will reject requests with codes.PermissionDenied to
// signal remote node that it is not considered valid anymore and
// operations should fail immediately.
if err := decommissionCheck(ctx, req.OriginNodeID, codes.PermissionDenied); err != nil {
return err
}
}
// Incoming ping will reject requests with codes.PermissionDenied to
// signal remote node that it is not considered valid anymore and
// operations should fail immediately.
return checkPingFor(ctx, req.OriginNodeID, codes.PermissionDenied)
// VerifyDialback verifies if a reverse connection to the sending node can
// be established.
return rpcContext.VerifyDialback(ctx, req, resp, cfg.Locality)
}

rpcContext.HeartbeatCB = func() {
Expand Down

0 comments on commit 7687257

Please sign in to comment.