Skip to content

Commit

Permalink
Merge #98601 #98715
Browse files Browse the repository at this point in the history
98601: ui: add PGCode to admin api errors r=j82w a=j82w

Adding the PGCode makes it easy to identify
the underlying issue of why the request failed.

old error:
```
RequestError: An internal server error has occurred.
Please check your CockroachDB logs for more details.
```

new error
```
RequestError: An internal server error has occurred. 
Please check your CockroachDB logs for more details. Error Code: 53200
```

<img width="2058" alt="Screenshot 2023-03-14 at 2 51 57 PM" src="https://user-images.githubusercontent.com/8868107/225108075-5db442da-699f-487b-852c-732eb030f644.png">


Epic: none
Closes: #98596

Release note: none

98715: kvserver: deflake and unskip split race uninit rhs r=nvanbenschoten a=kvoli

`TestStoreRangeSplitRaceUninitializedRHS` was skipped some time ago, in mid 2021 (#67082). The test was skipped due to flakes that appeared related to untimely test cluster startup.

This commit unskips the test and makes minor adjustments in order to be current with semantics of Raft transport. Without these adjustments, the `MsgVote` sent every microsecond with the intention of triggering a race, would completely fill up  the Raft transport send queue. Once the queue was full, the test would fail as requests are dropped.

This commit updates the `MsgVote` send loop logic to not require every `MsgVote` request to be sent for the test to succeed.

Resolves: #66480

Release note: None

Co-authored-by: j82w <jwilley@cockroachlabs.com>
Co-authored-by: Austen McClernon <austen@cockroachlabs.com>
  • Loading branch information
3 people committed Mar 16, 2023
3 parents 683545c + b6e0f76 + 4be1c75 commit 81a9f1d
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 9 deletions.
13 changes: 10 additions & 3 deletions pkg/kv/kvserver/client_split_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/ts"
Expand Down Expand Up @@ -2025,7 +2024,6 @@ func TestStoreSplitGCHint(t *testing.T) {
// and the uninitialized replica reacting to messages.
func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) {
defer leaktest.AfterTest(t)()
skip.WithIssue(t, 66480, "flaky test")
defer log.Scope(t).Close(t)

currentTrigger := make(chan *roachpb.SplitTrigger, 1)
Expand Down Expand Up @@ -2103,6 +2101,7 @@ func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) {

for i := 0; i < 10; i++ {
errChan := make(chan *kvpb.Error)
failedSendLog := log.Every(time.Second)

// Closed when the split goroutine is done.
splitDone := make(chan struct{})
Expand Down Expand Up @@ -2147,7 +2146,15 @@ func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) {
Term: term,
},
}, rpc.DefaultClass); !sent {
t.Error("transport failed to send vote request")
// SendAsync can return false, indicating the message didn't send.
// The most likely reason this test encounters a message failing to
// send is the outgoing message queue being full. The queue filling
// up is expected given it has fixed capacity and this loop is
// attempting to sending 1 MsgVote every microsecond. See comments
// below and above for the frequency rationale.
if failedSendLog.ShouldLog() {
log.Infof(ctx, "transport failed to send vote request")
}
}
select {
case <-splitDone:
Expand Down
3 changes: 3 additions & 0 deletions pkg/server/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ go_library(
"//pkg/sql/optionalnodeliveness",
"//pkg/sql/parser",
"//pkg/sql/pgwire",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/pgwire/pgwirecancel",
"//pkg/sql/physicalplan",
Expand Down Expand Up @@ -482,6 +483,8 @@ go_test(
"//pkg/sql/clusterunique",
"//pkg/sql/execinfrapb",
"//pkg/sql/idxusage",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/roleoption",
"//pkg/sql/sem/catconstants",
"//pkg/sql/sem/tree",
Expand Down
26 changes: 20 additions & 6 deletions pkg/server/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/isql"
"github.com/cockroachdb/cockroach/pkg/sql/optionalnodeliveness"
"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/privilege"
"github.com/cockroachdb/cockroach/pkg/sql/roleoption"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
Expand Down Expand Up @@ -109,10 +111,6 @@ func nonTableDescriptorRangeCount() int64 {
}))
}

// apiServerMessage is the standard body for all HTTP 500 responses.
var errAdminAPIError = grpcstatus.Errorf(codes.Internal, "An internal server error "+
"has occurred. Please check your CockroachDB logs for more details.")

// A adminServer provides a RESTful HTTP API to administration of
// the cockroach cluster.
type adminServer struct {
Expand Down Expand Up @@ -309,14 +307,30 @@ func (s *adminServer) RegisterGateway(
// the RPC endpoint method.
func serverError(ctx context.Context, err error) error {
log.ErrorfDepth(ctx, 1, "%+v", err)
return errAdminAPIError

// Include the PGCode in the message for easier troubleshooting
errCode := pgerror.GetPGCode(err).String()
if errCode != pgcode.Uncategorized.String() {
errMessage := fmt.Sprintf("%s Error Code: %s", errAPIInternalErrorString, errCode)
return grpcstatus.Errorf(codes.Internal, errMessage)
}

// The error is already grpcstatus formatted error.
// Likely calling serverError multiple times on same error.
grpcCode := grpcstatus.Code(err)
if grpcCode != codes.Unknown {
return err
}

// Fallback to generic message
return errAPIInternalError
}

// serverErrorf logs the provided error and returns an error that should be returned by
// the RPC endpoint method.
func serverErrorf(ctx context.Context, format string, args ...interface{}) error {
log.ErrorfDepth(ctx, 1, format, args...)
return errAdminAPIError
return errAPIInternalError
}

// isNotFoundError returns true if err is a table/database not found error.
Expand Down
18 changes: 18 additions & 0 deletions pkg/server/admin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/idxusage"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/sem/catconstants"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
Expand Down Expand Up @@ -3249,6 +3251,22 @@ func TestAdminPrivilegeChecker(t *testing.T) {
}
}

func TestServerError(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
ctx := context.Background()
pgError := pgerror.New(pgcode.OutOfMemory, "TestServerError.OutOfMemory")
err := serverError(ctx, pgError)
require.Equal(t, "rpc error: code = Internal desc = An internal server error has occurred. Please check your CockroachDB logs for more details. Error Code: 53200", err.Error())

err = serverError(ctx, err)
require.Equal(t, "rpc error: code = Internal desc = An internal server error has occurred. Please check your CockroachDB logs for more details. Error Code: 53200", err.Error())

err = fmt.Errorf("random error that is not pgerror or grpcstatus")
err = serverError(ctx, err)
require.Equal(t, "rpc error: code = Internal desc = An internal server error has occurred. Please check your CockroachDB logs for more details.", err.Error())
}

func TestDatabaseAndTableIndexRecommendations(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down

0 comments on commit 81a9f1d

Please sign in to comment.