Skip to content

Commit

Permalink
statusccl: speedup unit test by reducing concurrency
Browse files Browse the repository at this point in the history
Previously, we spin up 1 host cluster and 2 tenant clusters for testing
tenant status server. Each cluster contains 3 nodes and we effectively
spin up 9 nodes for the test. This is very expensive and can cause
errors like "race: limit on 8128 simultaneously alive goroutines is
exceeded, dying" due to the underyling LLVM limitation.

This commit reduces the total number of nodes that are spinned up.
Host cluster and the second tenant cluster size is now reduced to
1 node to reduce the cost of the tests. Additional special
`randomServerIdx` value is introduced to avoid having the test
rely on the hard-coded server index.

Release note: None
  • Loading branch information
Azhng committed Jan 4, 2022
1 parent e45f601 commit 5f7c9ae
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 26 deletions.
35 changes: 19 additions & 16 deletions pkg/ccl/serverccl/statusccl/tenant_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,21 +279,21 @@ func testResetSQLStatsRPCForTenant(
t.Run(fmt.Sprintf("flushed=%t", flushed), func(t *testing.T) {
// Clears the SQL Stats at the end of each test via builtin.
defer func() {
testCluster.tenantConn(0 /* idx */).Exec(t, "SELECT crdb_internal.reset_sql_stats()")
controlCluster.tenantConn(0 /* idx */).Exec(t, "SELECT crdb_internal.reset_sql_stats()")
testCluster.tenantConn(randomServer).Exec(t, "SELECT crdb_internal.reset_sql_stats()")
controlCluster.tenantConn(randomServer).Exec(t, "SELECT crdb_internal.reset_sql_stats()")
}()

for _, stmt := range stmts {
testCluster.tenantConn(0 /* idx */).Exec(t, stmt)
controlCluster.tenantConn(0 /* idx */).Exec(t, stmt)
testCluster.tenantConn(randomServer).Exec(t, stmt)
controlCluster.tenantConn(randomServer).Exec(t, stmt)
}

if flushed {
testCluster.tenantSQLStats(0 /* idx */).Flush(ctx)
controlCluster.tenantSQLStats(0 /* idx */).Flush(ctx)
testCluster.tenantSQLStats(randomServer).Flush(ctx)
controlCluster.tenantSQLStats(randomServer).Flush(ctx)
}

status := testCluster.tenantStatusSrv(1 /* idx */)
status := testCluster.tenantStatusSrv(randomServer)

statsPreReset, err := status.Statements(ctx, &serverpb.StatementsRequest{
Combined: true,
Expand Down Expand Up @@ -334,7 +334,7 @@ func testResetSQLStatsRPCForTenant(

// Ensures that sql stats reset is isolated by tenant boundary.
statsFromControlCluster, err :=
controlCluster.tenantStatusSrv(1 /* idx */).Statements(ctx, &serverpb.StatementsRequest{
controlCluster.tenantStatusSrv(randomServer).Statements(ctx, &serverpb.StatementsRequest{
Combined: true,
})
require.NoError(t, err)
Expand Down Expand Up @@ -396,11 +396,14 @@ VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)
`)

// Record scan on primary index.
cluster.tenantConn(0).Exec(t, "SELECT * FROM test")
cluster.tenantConn(randomServer).
Exec(t, "SELECT * FROM test")

// Record scan on secondary index.
cluster.tenantConn(1).Exec(t, "SELECT * FROM test@test_a_idx")
testTableIDStr := cluster.tenantConn(2).QueryStr(t, "SELECT 'test'::regclass::oid")[0][0]
cluster.tenantConn(randomServer).
Exec(t, "SELECT * FROM test@test_a_idx")
testTableIDStr := cluster.tenantConn(randomServer).
QueryStr(t, "SELECT 'test'::regclass::oid")[0][0]
testTableID, err := strconv.Atoi(testTableIDStr)
require.NoError(t, err)

Expand Down Expand Up @@ -440,12 +443,12 @@ WHERE
{testTableIDStr, "1", "1", "true"},
{testTableIDStr, "2", "1", "true"},
}
cluster.tenantConn(2).CheckQueryResults(t, query, expected)
cluster.tenantConn(randomServer).CheckQueryResults(t, query, expected)
}

// Reset index usage stats.
timePreReset := timeutil.Now()
status := testingCluster.tenantStatusSrv(1 /* idx */)
status := testingCluster.tenantStatusSrv(randomServer)

// Reset index usage stats.
testCase.resetFn(testHelper)
Expand All @@ -457,7 +460,7 @@ WHERE

// Ensure tenant data isolation.
// Check that last reset time was not updated for control cluster.
status = controlCluster.tenantStatusSrv(1 /* idx */)
status = controlCluster.tenantStatusSrv(randomServer)
resp, err = status.IndexUsageStatistics(ctx, &serverpb.IndexUsageStatisticsRequest{})
require.NoError(t, err)
require.Equal(t, resp.LastReset, time.Time{})
Expand Down Expand Up @@ -486,7 +489,7 @@ WHERE
}

// Ensure tenant data isolation.
rows = controlCluster.tenantConn(2).QueryStr(t, query, controlTableID)
rows = controlCluster.tenantConn(0).QueryStr(t, query, controlTableID)
require.NotNil(t, rows)
for _, row := range rows {
require.NotEqual(t, row[1], "0", "expected total reads for table %s to not be reset, but got %s", row[0], row[1])
Expand Down Expand Up @@ -659,7 +662,7 @@ WHERE
require.Equal(t, expected, actual)

// Ensure tenant data isolation.
actual = controlledCluster.tenantConn(2).QueryStr(t, query, testTableID)
actual = controlledCluster.tenantConn(0).QueryStr(t, query, testTableID)
expected = [][]string{}

require.Equal(t, expected, actual)
Expand Down
40 changes: 30 additions & 10 deletions pkg/ccl/serverccl/statusccl/tenant_test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package statusccl
import (
"context"
gosql "database/sql"
"math/rand"
"net/http"
"testing"

Expand All @@ -29,6 +30,13 @@ import (
"github.com/stretchr/testify/require"
)

// serverIdx is the index of the node within a test cluster. A special value
// `randomServer` can be used to let the test helper to randomly choose to
// a server from the test cluster.
type serverIdx int

const randomServer serverIdx = -1

type testTenant struct {
tenant serverutils.TestTenantInterface
tenantConn *gosql.DB
Expand Down Expand Up @@ -85,7 +93,7 @@ func newTestTenantHelper(
t.Helper()

params, _ := tests.CreateTestServerParams()
testCluster := serverutils.StartNewTestCluster(t, 3 /* numNodes */, base.TestClusterArgs{
testCluster := serverutils.StartNewTestCluster(t, 1 /* numNodes */, base.TestClusterArgs{
ServerArgs: params,
})
server := testCluster.Server(0)
Expand All @@ -99,10 +107,12 @@ func newTestTenantHelper(
security.EmbeddedTenantIDs()[0],
knobs,
),
// Spin up a small tenant cluster under a different tenant ID to test
// tenant isolation.
tenantControlCluster: newTenantCluster(
t,
server,
tenantClusterSize,
1, /* tenantClusterSize */
security.EmbeddedTenantIDs()[1],
knobs,
),
Expand Down Expand Up @@ -146,22 +156,22 @@ func newTenantCluster(
return cluster
}

func (c tenantCluster) tenantConn(idx int) *sqlutils.SQLRunner {
return c[idx].tenantDB
func (c tenantCluster) tenantConn(idx serverIdx) *sqlutils.SQLRunner {
return c.tenant(idx).tenantDB
}

func (c tenantCluster) tenantHTTPClient(t *testing.T, idx int) *httpClient {
client, err := c[idx].tenant.RPCContext().GetHTTPClient()
func (c tenantCluster) tenantHTTPClient(t *testing.T, idx serverIdx) *httpClient {
client, err := c.tenant(idx).tenant.RPCContext().GetHTTPClient()
require.NoError(t, err)
return &httpClient{t: t, client: client, baseURL: "https://" + c[idx].tenant.HTTPAddr()}
}

func (c tenantCluster) tenantSQLStats(idx int) *persistedsqlstats.PersistedSQLStats {
return c[idx].tenantSQLStats
func (c tenantCluster) tenantSQLStats(idx serverIdx) *persistedsqlstats.PersistedSQLStats {
return c.tenant(idx).tenantSQLStats
}

func (c tenantCluster) tenantStatusSrv(idx int) serverpb.SQLStatusServer {
return c[idx].tenantStatus
func (c tenantCluster) tenantStatusSrv(idx serverIdx) serverpb.SQLStatusServer {
return c.tenant(idx).tenantStatus
}

func (c tenantCluster) cleanup(t *testing.T) {
Expand All @@ -170,6 +180,16 @@ func (c tenantCluster) cleanup(t *testing.T) {
}
}

// tenant selects a tenant node from the tenant cluster. If randomServer
// is passed in, then a random node is selected.
func (c tenantCluster) tenant(idx serverIdx) *testTenant {
if idx == randomServer {
return c[rand.Intn(len(c))]
}

return c[idx]
}

type httpClient struct {
t *testing.T
client http.Client
Expand Down

0 comments on commit 5f7c9ae

Please sign in to comment.