From b6e0f769d1cb6e4503d84b52d1e7168f4d61f840 Mon Sep 17 00:00:00 2001 From: j82w Date: Tue, 14 Mar 2023 13:37:23 -0400 Subject: [PATCH 1/2] ui: add PGCode to admin api errors Adding the Error Code makes it easy to identify the underlying issue of why the request failed. old error: ```RequestError: An internal server error has occurred. Please check your CockroachDB logs for more details.``` new error ```RequestError: An internal server error has occurred. Please check your CockroachDB logs for more details. Error Code: 53200``` Epic: none Closes: #98596 Release note: none --- pkg/server/BUILD.bazel | 3 +++ pkg/server/admin.go | 26 ++++++++++++++++++++------ pkg/server/admin_test.go | 18 ++++++++++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index e901b1ed8eb8..22f788aea569 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -219,6 +219,7 @@ go_library( "//pkg/sql/optionalnodeliveness", "//pkg/sql/parser", "//pkg/sql/pgwire", + "//pkg/sql/pgwire/pgcode", "//pkg/sql/pgwire/pgerror", "//pkg/sql/pgwire/pgwirecancel", "//pkg/sql/physicalplan", @@ -482,6 +483,8 @@ go_test( "//pkg/sql/clusterunique", "//pkg/sql/execinfrapb", "//pkg/sql/idxusage", + "//pkg/sql/pgwire/pgcode", + "//pkg/sql/pgwire/pgerror", "//pkg/sql/roleoption", "//pkg/sql/sem/catconstants", "//pkg/sql/sem/tree", diff --git a/pkg/server/admin.go b/pkg/server/admin.go index a1a07c3fa14e..13c4c604099b 100644 --- a/pkg/server/admin.go +++ b/pkg/server/admin.go @@ -51,6 +51,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/isql" "github.com/cockroachdb/cockroach/pkg/sql/optionalnodeliveness" "github.com/cockroachdb/cockroach/pkg/sql/parser" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/privilege" "github.com/cockroachdb/cockroach/pkg/sql/roleoption" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" @@ -109,10 +111,6 @@ func nonTableDescriptorRangeCount() int64 { })) } -// apiServerMessage is the standard body for all HTTP 500 responses. -var errAdminAPIError = grpcstatus.Errorf(codes.Internal, "An internal server error "+ - "has occurred. Please check your CockroachDB logs for more details.") - // A adminServer provides a RESTful HTTP API to administration of // the cockroach cluster. type adminServer struct { @@ -309,14 +307,30 @@ func (s *adminServer) RegisterGateway( // the RPC endpoint method. func serverError(ctx context.Context, err error) error { log.ErrorfDepth(ctx, 1, "%+v", err) - return errAdminAPIError + + // Include the PGCode in the message for easier troubleshooting + errCode := pgerror.GetPGCode(err).String() + if errCode != pgcode.Uncategorized.String() { + errMessage := fmt.Sprintf("%s Error Code: %s", errAPIInternalErrorString, errCode) + return grpcstatus.Errorf(codes.Internal, errMessage) + } + + // The error is already grpcstatus formatted error. + // Likely calling serverError multiple times on same error. + grpcCode := grpcstatus.Code(err) + if grpcCode != codes.Unknown { + return err + } + + // Fallback to generic message + return errAPIInternalError } // serverErrorf logs the provided error and returns an error that should be returned by // the RPC endpoint method. func serverErrorf(ctx context.Context, format string, args ...interface{}) error { log.ErrorfDepth(ctx, 1, format, args...) - return errAdminAPIError + return errAPIInternalError } // isNotFoundError returns true if err is a table/database not found error. diff --git a/pkg/server/admin_test.go b/pkg/server/admin_test.go index 0f6548e4e033..631b4c13173c 100644 --- a/pkg/server/admin_test.go +++ b/pkg/server/admin_test.go @@ -47,6 +47,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/idxusage" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/sem/catconstants" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" @@ -3249,6 +3251,22 @@ func TestAdminPrivilegeChecker(t *testing.T) { } } +func TestServerError(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + pgError := pgerror.New(pgcode.OutOfMemory, "TestServerError.OutOfMemory") + err := serverError(ctx, pgError) + require.Equal(t, "rpc error: code = Internal desc = An internal server error has occurred. Please check your CockroachDB logs for more details. Error Code: 53200", err.Error()) + + err = serverError(ctx, err) + require.Equal(t, "rpc error: code = Internal desc = An internal server error has occurred. Please check your CockroachDB logs for more details. Error Code: 53200", err.Error()) + + err = fmt.Errorf("random error that is not pgerror or grpcstatus") + err = serverError(ctx, err) + require.Equal(t, "rpc error: code = Internal desc = An internal server error has occurred. Please check your CockroachDB logs for more details.", err.Error()) +} + func TestDatabaseAndTableIndexRecommendations(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) From 4be1c75a3a7bdd2f16a7450f237fc460f930e1fe Mon Sep 17 00:00:00 2001 From: Austen McClernon Date: Wed, 15 Mar 2023 21:02:57 +0000 Subject: [PATCH 2/2] kvserver: deflake and unskip split race uninit rhs `TestStoreRangeSplitRaceUninitializedRHS` was skipped some time ago, in mid 2021 (#67082). The test was skipped due to flakes that appeared related to untimely test cluster startup. This commit unskips the test and makes minor adjustments in order to be current with semantics of Raft transport. Without these adjustments, the `MsgVote` sent every microsecond with the intention of triggering a race, would completely fill up the Raft transport send queue. Once the queue was full, the test would fail as requests are dropped. This commit updates the `MsgVote` send loop logic to not require every `MsgVote` request to be sent for the test to succeed. Resolves: #66480 Release note: None --- pkg/kv/kvserver/client_split_test.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pkg/kv/kvserver/client_split_test.go b/pkg/kv/kvserver/client_split_test.go index 6b97f1fd3f88..28f1f978c983 100644 --- a/pkg/kv/kvserver/client_split_test.go +++ b/pkg/kv/kvserver/client_split_test.go @@ -50,7 +50,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" - "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" "github.com/cockroachdb/cockroach/pkg/ts" @@ -2025,7 +2024,6 @@ func TestStoreSplitGCHint(t *testing.T) { // and the uninitialized replica reacting to messages. func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) { defer leaktest.AfterTest(t)() - skip.WithIssue(t, 66480, "flaky test") defer log.Scope(t).Close(t) currentTrigger := make(chan *roachpb.SplitTrigger, 1) @@ -2103,6 +2101,7 @@ func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) { for i := 0; i < 10; i++ { errChan := make(chan *kvpb.Error) + failedSendLog := log.Every(time.Second) // Closed when the split goroutine is done. splitDone := make(chan struct{}) @@ -2147,7 +2146,15 @@ func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) { Term: term, }, }, rpc.DefaultClass); !sent { - t.Error("transport failed to send vote request") + // SendAsync can return false, indicating the message didn't send. + // The most likely reason this test encounters a message failing to + // send is the outgoing message queue being full. The queue filling + // up is expected given it has fixed capacity and this loop is + // attempting to sending 1 MsgVote every microsecond. See comments + // below and above for the frequency rationale. + if failedSendLog.ShouldLog() { + log.Infof(ctx, "transport failed to send vote request") + } } select { case <-splitDone: