From ee1e9cd60eec2b33bface429aaeeb6c4d9750e50 Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Thu, 20 Jan 2022 09:31:51 -0500 Subject: [PATCH 1/5] kvserver: de-flake TestReplicateQueueUpAndDownReplicateNonVoters Fixes #75135. This test asserted on span configs applying to a scratch range. When stressing, it appeared that some time we were not seeing the scratch range adopt the prescribed number of voters/non-voters. Staring at the test itself, we were only nudging the replication queues for the first node in the three node test. It's possible for the scratch range to have been housed on a node other than the first; this commit makes it so that the test nudges queues on all nodes. For good measure, lets also ensure that the split queues process everything, ditto for the snapshot queues. To repro: dev test pkg/kv/kvserver \ -f TestReplicateQueueUpAndDownReplicateNonVoters \ -v --show-logs --timeout 2m --stress Release note: None --- pkg/kv/kvserver/replicate_queue_test.go | 31 +++++++++++-------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/pkg/kv/kvserver/replicate_queue_test.go b/pkg/kv/kvserver/replicate_queue_test.go index 51f17bfc3b92..942f1b28f475 100644 --- a/pkg/kv/kvserver/replicate_queue_test.go +++ b/pkg/kv/kvserver/replicate_queue_test.go @@ -301,23 +301,22 @@ func TestReplicateQueueDownReplicate(t *testing.T) { } func scanAndGetNumNonVoters( - ctx context.Context, - t *testing.T, - tc *testcluster.TestCluster, - store *kvserver.Store, - scratchKey roachpb.Key, + t *testing.T, tc *testcluster.TestCluster, scratchKey roachpb.Key, ) (numNonVoters int) { - // Nudge the replicateQueue to up/down-replicate our scratch range. - if err := store.ForceReplicationScanAndProcess(); err != nil { - t.Fatal(err) + for _, s := range tc.Servers { + // Nudge internal queues to up/down-replicate our scratch range. + require.NoError(t, s.Stores().VisitStores(func(s *kvserver.Store) error { + require.NoError(t, s.ForceSplitScanAndProcess()) + require.NoError(t, s.ForceReplicationScanAndProcess()) + require.NoError(t, s.ForceRaftSnapshotQueueProcess()) + return nil + })) } scratchRange := tc.LookupRangeOrFatal(t, scratchKey) row := tc.ServerConn(0).QueryRow( - `SELECT array_length(non_voting_replicas, 1) FROM crdb_internal.ranges_no_leases WHERE range_id=$1`, + `SELECT coalesce(max(array_length(non_voting_replicas, 1)),0) FROM crdb_internal.ranges_no_leases WHERE range_id=$1`, scratchRange.GetRangeID()) - err := row.Scan(&numNonVoters) - log.Warningf(ctx, "error while retrieving the number of non-voters: %s", err) - + require.NoError(t, row.Scan(&numNonVoters)) return numNonVoters } @@ -329,7 +328,6 @@ func TestReplicateQueueUpAndDownReplicateNonVoters(t *testing.T) { skip.UnderRace(t) defer log.Scope(t).Close(t) - ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ReplicationMode: base.ReplicationAuto, @@ -360,13 +358,10 @@ func TestReplicateQueueUpAndDownReplicateNonVoters(t *testing.T) { // Add two new servers and expect that 2 non-voters are added to the range. tc.AddAndStartServer(t, base.TestServerArgs{}) tc.AddAndStartServer(t, base.TestServerArgs{}) - store, err := tc.Server(0).GetStores().(*kvserver.Stores).GetStore( - tc.Server(0).GetFirstStoreID()) - require.NoError(t, err) var expectedNonVoterCount = 2 testutils.SucceedsSoon(t, func() error { - if found := scanAndGetNumNonVoters(ctx, t, tc, store, scratchKey); found != expectedNonVoterCount { + if found := scanAndGetNumNonVoters(t, tc, scratchKey); found != expectedNonVoterCount { return errors.Errorf("expected upreplication to %d non-voters; found %d", expectedNonVoterCount, found) } @@ -379,7 +374,7 @@ func TestReplicateQueueUpAndDownReplicateNonVoters(t *testing.T) { require.NoError(t, err) expectedNonVoterCount = 0 testutils.SucceedsSoon(t, func() error { - if found := scanAndGetNumNonVoters(ctx, t, tc, store, scratchKey); found != expectedNonVoterCount { + if found := scanAndGetNumNonVoters(t, tc, scratchKey); found != expectedNonVoterCount { return errors.Errorf("expected downreplication to %d non-voters; found %d", expectedNonVoterCount, found) } From bd72f751c6c3dca1b20b042a594c56a4ec7fbfbe Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Thu, 20 Jan 2022 17:08:52 +0100 Subject: [PATCH 2/5] kvserver: disable sendWithRangeID call stack Sadly, this on longer embeds the RangeID in the stack trace (likely culprit: Go adopting a register-based calling convention) Instead, you get garbage values that often are obviously garbage, but this may not always be true. We want to avoid being misled, so for now remove the rangeID here and explain when it can come back. See: https://cockroachlabs.slack.com/archives/G01G8LK77DK/p1641478596004700 Release note: None --- pkg/kv/kvserver/replica_send.go | 15 ++++++++++++--- pkg/kv/kvserver/replica_test.go | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pkg/kv/kvserver/replica_send.go b/pkg/kv/kvserver/replica_send.go index ad67decb0862..a0b4394a7a03 100644 --- a/pkg/kv/kvserver/replica_send.go +++ b/pkg/kv/kvserver/replica_send.go @@ -93,7 +93,7 @@ var optimisticEvalLimitedScans = settings.RegisterBoolSetting( func (r *Replica) Send( ctx context.Context, ba roachpb.BatchRequest, ) (*roachpb.BatchResponse, *roachpb.Error) { - return r.sendWithRangeID(ctx, r.RangeID, &ba) + return r.sendWithoutRangeID(ctx, &ba) } // checkCircuitBreaker takes a cancelable context and its cancel function. The @@ -163,6 +163,15 @@ func maybeAdjustWithBreakerError(pErr *roachpb.Error, brErr error) *roachpb.Erro return pErr } +// sendWithoutRangeID used to be called sendWithRangeID, accepted a `_forStacks +// roachpb.RangeID` argument, and had the description below. Ever since Go +// switched to the register-based calling convention though, this stopped +// working, giving essentially random numbers in the goroutine dumps that were +// misleading. It has thus been "disarmed" until Go produces useful values +// again. +// +// See (internal): https://cockroachlabs.slack.com/archives/G01G8LK77DK/p1641478596004700 +// // sendWithRangeID takes an unused rangeID argument so that the range // ID will be accessible in stack traces (both in panics and when // sampling goroutines from a live server). This line is subject to @@ -174,8 +183,8 @@ func maybeAdjustWithBreakerError(pErr *roachpb.Error, brErr error) *roachpb.Erro // within the portion printed in the stack trace): // // github.com/cockroachdb/cockroach/pkg/storage.(*Replica).sendWithRangeID(0xc420d1a000, 0x64bfb80, 0xc421564b10, 0x15, 0x153fd4634aeb0193, 0x0, 0x100000001, 0x1, 0x15, 0x0, ...) -func (r *Replica) sendWithRangeID( - ctx context.Context, _forStacks roachpb.RangeID, ba *roachpb.BatchRequest, +func (r *Replica) sendWithoutRangeID( + ctx context.Context, ba *roachpb.BatchRequest, ) (_ *roachpb.BatchResponse, rErr *roachpb.Error) { var br *roachpb.BatchResponse if r.leaseholderStats != nil && ba.Header.GatewayNodeID != 0 { diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index 2b6a7d52143e..dc97b746c73d 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -12667,7 +12667,7 @@ func TestProposalNotAcknowledgedOrReproposedAfterApplication(t *testing.T) { // Round trip another proposal through the replica to ensure that previously // committed entries have been applied. - _, pErr = tc.repl.sendWithRangeID(ctx, tc.repl.RangeID, &ba) + _, pErr = tc.repl.sendWithoutRangeID(ctx, &ba) if pErr != nil { t.Fatal(pErr) } From 9c6e09b66f2457c8ec3ced50a1ff1d795b14d355 Mon Sep 17 00:00:00 2001 From: richardjcai Date: Thu, 20 Jan 2022 10:53:45 -0500 Subject: [PATCH 3/5] backupccl: disable span configs for full cluster restore jobs test Release note: None --- pkg/ccl/backupccl/full_cluster_backup_restore_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/ccl/backupccl/full_cluster_backup_restore_test.go b/pkg/ccl/backupccl/full_cluster_backup_restore_test.go index 5791a2b1be58..4c3061419a7c 100644 --- a/pkg/ccl/backupccl/full_cluster_backup_restore_test.go +++ b/pkg/ccl/backupccl/full_cluster_backup_restore_test.go @@ -48,6 +48,7 @@ func TestFullClusterBackup(t *testing.T) { params := base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ + DisableSpanConfigs: true, // TODO(irfansharif): #75060. Knobs: base.TestingKnobs{ SpanConfig: &spanconfig.TestingKnobs{ // We compare job progress before and after a restore. Disable From 9e91b5ad34287507c6152831d2bd6d7b20ac395f Mon Sep 17 00:00:00 2001 From: Ricky Stewart Date: Wed, 19 Jan 2022 14:03:22 -0600 Subject: [PATCH 4/5] vendor: pull in latest version of `stress` Pull in the latest version of `stress` including these changes: ``` 43d99a9 Merge pull request #13 from cockroachdb/bazelsharding 01690a1 stress: add `-bazel` support, support for sharding artifacts ``` Release note: None --- DEPS.bzl | 6 +++--- go.mod | 2 +- go.sum | 4 ++-- vendor | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index 1aa5cd1875eb..9e9e6eb55bd4 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -1258,10 +1258,10 @@ def go_deps(): name = "com_github_cockroachdb_stress", build_file_proto_mode = "disable_global", importpath = "github.com/cockroachdb/stress", - sha256 = "2b7b584a4cafacd0d971adf1e150fe9c1f770eb2b56993af06a4ae7fa329a521", - strip_prefix = "github.com/cockroachdb/stress@v0.0.0-20170808184505-29b5d31b4c3a", + sha256 = "bd0dea0ebea9ea71aa12e5918fa8b61a78a548d54e8e9c126aa285c1ae84d3e4", + strip_prefix = "github.com/cockroachdb/stress@v0.0.0-20220119200057-43d99a9e6d7f", urls = [ - "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/stress/com_github_cockroachdb_stress-v0.0.0-20170808184505-29b5d31b4c3a.zip", + "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/stress/com_github_cockroachdb_stress-v0.0.0-20220119200057-43d99a9e6d7f.zip", ], ) go_repository( diff --git a/go.mod b/go.mod index 731022bd037e..1c76536cf11e 100644 --- a/go.mod +++ b/go.mod @@ -46,7 +46,7 @@ require ( github.com/cockroachdb/redact v1.1.3 github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 - github.com/cockroachdb/stress v0.0.0-20170808184505-29b5d31b4c3a + github.com/cockroachdb/stress v0.0.0-20220119200057-43d99a9e6d7f github.com/cockroachdb/tools v0.0.0-20211112185054-642e51449b40 github.com/cockroachdb/ttycolor v0.0.0-20210902133924-c7d7dcdde4e8 github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd diff --git a/go.sum b/go.sum index 9b47a8f22692..13b011598db7 100644 --- a/go.sum +++ b/go.sum @@ -426,8 +426,8 @@ github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd h1:KFOt5I9 github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd/go.mod h1:AN708GD2FFeLgUHMbD58YPe4Nw8GG//3rwgyG4L9gR0= github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 h1:IKgmqgMQlVJIZj19CdocBeSfSaiCbEBZGKODaixqtHM= github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2/go.mod h1:8BT+cPK6xvFOcRlk0R8eg+OTkcqI6baNH4xAkpiYVvQ= -github.com/cockroachdb/stress v0.0.0-20170808184505-29b5d31b4c3a h1:7bnDlMxIJCg3o3vIILG2COsbNZpiYHgI4UkCYmeAWnQ= -github.com/cockroachdb/stress v0.0.0-20170808184505-29b5d31b4c3a/go.mod h1:oapRqABg5KA/TaAikQH53fpP5nvCe9sftYmYV/F/yVE= +github.com/cockroachdb/stress v0.0.0-20220119200057-43d99a9e6d7f h1:jXa4+uPllulalBUwZlptnW177YokSScljz6TpsMGld8= +github.com/cockroachdb/stress v0.0.0-20220119200057-43d99a9e6d7f/go.mod h1:oapRqABg5KA/TaAikQH53fpP5nvCe9sftYmYV/F/yVE= github.com/cockroachdb/tablewriter v0.0.5-0.20200105123400-bd15540e8847 h1:c7yLgqcm/3c9lYtpWeVD9NYqA9cKsKHdpQM62PHtTUM= github.com/cockroachdb/tablewriter v0.0.5-0.20200105123400-bd15540e8847/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= github.com/cockroachdb/teamcity v0.0.0-20180905144921-8ca25c33eb11 h1:UAqRo5xPCyTtZznAJ9iPVpDUFxGI0a6QWtQ8E+zwJRg= diff --git a/vendor b/vendor index efb0b2036523..4c70094c23a6 160000 --- a/vendor +++ b/vendor @@ -1 +1 @@ -Subproject commit efb0b2036523326e3f041e3149f75909c5295936 +Subproject commit 4c70094c23a64ef3211ebaa4c541734f310b3102 From 9b3efa2d449966642239dabcc0b21f31cece803c Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Wed, 19 Jan 2022 23:16:39 -0500 Subject: [PATCH 5/5] cli: stop ignoring user arg in insecure mode Release note (bug fix): The --user argument is no longer ignored when using `cockroach sql` in --insecure mode. --- pkg/cli/democluster/demo_cluster.go | 4 ++-- pkg/cli/interactive_tests/test_style_enabled.tcl | 4 ++-- pkg/cli/interactive_tests/test_url_db_override.tcl | 5 +++++ pkg/server/pgurl/pgurl.go | 1 - pkg/server/pgurl/testdata/url | 12 ++++++------ 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/pkg/cli/democluster/demo_cluster.go b/pkg/cli/democluster/demo_cluster.go index 54a3320ba487..8cae0258bb78 100644 --- a/pkg/cli/democluster/demo_cluster.go +++ b/pkg/cli/democluster/demo_cluster.go @@ -1147,7 +1147,8 @@ func (c *transientCluster) getNetworkURLForServer( host, port, _ := addr.SplitHostPort(sqlAddr, "") u. WithNet(pgurl.NetTCP(host, port)). - WithDatabase(database) + WithDatabase(database). + WithUsername(c.adminUser.Normalized()) // For a demo cluster we don't use client TLS certs and instead use // password-based authentication with the password pre-filled in the @@ -1156,7 +1157,6 @@ func (c *transientCluster) getNetworkURLForServer( u.WithInsecure() } else { u. - WithUsername(c.adminUser.Normalized()). WithAuthn(pgurl.AuthnPassword(true, c.adminPassword)). WithTransport(pgurl.TransportTLS(pgurl.TLSRequire, "")) } diff --git a/pkg/cli/interactive_tests/test_style_enabled.tcl b/pkg/cli/interactive_tests/test_style_enabled.tcl index b421e36d3ec5..3d25660b0d46 100644 --- a/pkg/cli/interactive_tests/test_style_enabled.tcl +++ b/pkg/cli/interactive_tests/test_style_enabled.tcl @@ -19,7 +19,7 @@ eexpect eof # Try connect and check the session variables match. -spawn $argv sql --url "postgresql://test@localhost:26257?options=-cintervalstyle%3Diso_8601" +spawn $argv sql --url "postgresql://root@localhost:26257?options=-cintervalstyle%3Diso_8601" eexpect root@ send "SHOW intervalstyle;\r" eexpect "iso_8601" @@ -28,7 +28,7 @@ interrupt eexpect eof # TODO(#72065): uncomment -#spawn $argv sql --url "postgresql://test@localhost:26257?options=-cdatestyle%3Dymd" +#spawn $argv sql --url "postgresql://root@localhost:26257?options=-cdatestyle%3Dymd" #eexpect root@ #send "SHOW datestyle;\r" #eexpect "ISO, YMD" diff --git a/pkg/cli/interactive_tests/test_url_db_override.tcl b/pkg/cli/interactive_tests/test_url_db_override.tcl index a77daa303010..75e053d51daf 100644 --- a/pkg/cli/interactive_tests/test_url_db_override.tcl +++ b/pkg/cli/interactive_tests/test_url_db_override.tcl @@ -23,6 +23,11 @@ spawn $argv sql --url "postgresql://test@localhost:26257" --insecure -e "select eexpect "1 row" eexpect eof +# Make sure --insecure does not override --user +spawn $argv sql --user=test --insecure -e "select 'user=' || current_user()" +eexpect "user=test" +eexpect eof + set ::env(COCKROACH_INSECURE) "true" end_test diff --git a/pkg/server/pgurl/pgurl.go b/pkg/server/pgurl/pgurl.go index 1fac0d1f1bfb..82b03dccd1fb 100644 --- a/pkg/server/pgurl/pgurl.go +++ b/pkg/server/pgurl/pgurl.go @@ -133,7 +133,6 @@ func (u *URL) GetOption(opt string) string { // all security controls disabled. func (u *URL) WithInsecure() *URL { return u. - WithUsername("root"). WithAuthn(AuthnNone()). WithTransport(TransportNone()) } diff --git a/pkg/server/pgurl/testdata/url b/pkg/server/pgurl/testdata/url index ae10dfecd757..5eef68ca545a 100644 --- a/pkg/server/pgurl/testdata/url +++ b/pkg/server/pgurl/testdata/url @@ -90,13 +90,13 @@ JDBC: jdbc:postgresql://somehost:26257/somedb?application_name=myapp&password= insecure ---- -pq URL: postgresql://root@/?sslmode=disable -DSN: user=root sslmode=disable -JDBC: jdbc:postgresql:///?sslmode=disable&user=root +pq URL: postgresql:///?sslmode=disable +DSN: sslmode=disable +JDBC: jdbc:postgresql:///?sslmode=disable --defaults filled-- -pq URL: postgresql://root@defaulthost:26257/defaultdb?sslmode=disable -DSN: database=defaultdb user=root host=defaulthost port=26257 sslmode=disable -JDBC: jdbc:postgresql://defaulthost:26257/defaultdb?sslmode=disable&user=root +pq URL: postgresql://defaultuser@defaulthost:26257/defaultdb?sslmode=disable +DSN: database=defaultdb user=defaultuser host=defaulthost port=26257 sslmode=disable +JDBC: jdbc:postgresql://defaulthost:26257/defaultdb?sslmode=disable&user=defaultuser subtest redundant