Skip to content

Commit

Permalink
sqlproxyccl: Add codeUnavailable to the list of error codes
Browse files Browse the repository at this point in the history
Release justification: fixes for high-priority bug in existing functionality

Previously, if a tenant cluster had maxPods set to 0 the error returned by
directory.EnsureTenantAddr was not treated as a non-retryable error.

The tenant directory implementation used in CockroachCloud now identifies
this situation and returns a status error with codes.FailedPrecondition and
a descriptive message.

This patch adds a check for the FailedPrecondition error in
connector.lookupAddr.

Release note (bug fix): The sqlproxy now checks for FailedPrecondition errors
returned by the tenant directory.
  • Loading branch information
alyshanjahani-crl committed Mar 7, 2022
1 parent 3b7f761 commit 14b8cf8
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 3 deletions.
6 changes: 5 additions & 1 deletion pkg/ccl/sqlproxyccl/connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,11 @@ func (c *connector) lookupAddr(ctx context.Context) (string, error) {
if c.Directory != nil {
addr, err := c.Directory.EnsureTenantAddr(ctx, c.TenantID, c.ClusterName)
if err != nil {
if status.Code(err) != codes.NotFound {
if status.Code(err) == codes.FailedPrecondition {
if st, ok := status.FromError(err); ok {
return "", newErrorf(codeUnavailable, st.Message())
}
} else if status.Code(err) != codes.NotFound {
return "", markAsRetriableConnectorError(err)
}
// Fallback to old resolution rule.
Expand Down
23 changes: 23 additions & 0 deletions pkg/ccl/sqlproxyccl/connector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,29 @@ func TestConnector_lookupAddr(t *testing.T) {
require.Equal(t, "", addr)
require.Equal(t, 1, ensureTenantAddrFnCount)
})

t.Run("directory with FailedPrecondition error", func(t *testing.T) {
var ensureTenantAddrFnCount int
c := &connector{
ClusterName: "my-foo",
TenantID: roachpb.MakeTenantID(10),
RoutingRule: "foo.bar",
}
c.Directory = &testTenantResolver{
ensureTenantAddrFn: func(fnCtx context.Context, tenantID roachpb.TenantID, clusterName string) (string, error) {
ensureTenantAddrFnCount++
require.Equal(t, ctx, fnCtx)
require.Equal(t, c.TenantID, tenantID)
require.Equal(t, c.ClusterName, clusterName)
return "", status.Errorf(codes.FailedPrecondition, "foo")
},
}

addr, err := c.lookupAddr(ctx)
require.EqualError(t, err, "foo")
require.Equal(t, "", addr)
require.Equal(t, 1, ensureTenantAddrFnCount)
})
}

func TestConnector_dialSQLServer(t *testing.T) {
Expand Down
4 changes: 4 additions & 0 deletions pkg/ccl/sqlproxyccl/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ const (
// codeIdleDisconnect indicates that the connection was disconnected for
// being idle for longer than the specified timeout.
codeIdleDisconnect

// codeUnavailable indicates that the backend SQL server exists but is not
// accepting connections. For example, a tenant cluster that has maxPods set to 0.
codeUnavailable
)

// codeError is combines an error with one of the above codes to ease
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/sqlproxyccl/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func (metrics *metrics) updateForError(err error) {
case codeProxyRefusedConnection:
metrics.RefusedConnCount.Inc(1)
metrics.BackendDownCount.Inc(1)
case codeParamsRoutingFailed:
case codeParamsRoutingFailed, codeUnavailable:
metrics.RoutingErrCount.Inc(1)
metrics.BackendDownCount.Inc(1)
case codeBackendDown:
Expand Down
3 changes: 2 additions & 1 deletion pkg/ccl/sqlproxyccl/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ func toPgError(err error) *pgproto3.ErrorResponse {
codeBackendDisconnected,
codeAuthFailed,
codeProxyRefusedConnection,
codeIdleDisconnect:
codeIdleDisconnect,
codeUnavailable:
msg = codeErr.Error()
// The rest - the message sent back is sanitized.
case codeUnexpectedInsecureStartupMessage:
Expand Down

0 comments on commit 14b8cf8

Please sign in to comment.