Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
77609: lease: Remove descriptors from cache if not found during lease refresh r=Xiang-Gu a=Xiang-Gu

Previously, when a descriptor is not found, we just log it and move on. This is inadequate because the no-longer-exist descriptor still exists in cache (i.e. in-memory objects managed by `lease.Manager`). To address this, this PR added logic that removes the descriptor when this situation occurred.

Fixes cockroachdb#67364

Release note: None

80806: sql: do not drop physical shard column if not cascade. r=Xiang-Gu a=Xiang-Gu

Release note (sql change): Previously, when we drop a hash-sharded
index, we will also drop the accompanying shard column, if no other
index uses this shard column.

For hash-sharded index created in 21.2 and prior, this shard
column is a physical, `STORED` column. Dropping such a physical column
can be very expensive since it requires a full table rewrite. For
hash-sharded index craeted in 22.1 and later, this shard column is a
virtual, computed column and dropping a virtual column is no problem.

This PR introduces the sql change that, if the to-be-dropped
sharded index has a physical shard column (and no other index uses that
column), we will drop only the index if not CASCADE; we will drop both
the index and the column if CASCADE.

Fixed: cockroachdb#80181

Co-authored-by: Xiang Gu <xiang@cockroachlabs.com>
  • Loading branch information
craig[bot] and Xiang-Gu committed May 5, 2022
3 parents da04bc2 + 91b6324 + ef61dfe commit f2d5708
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 11 deletions.
8 changes: 8 additions & 0 deletions pkg/sql/catalog/lease/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,11 @@ func (m *Manager) Publish(
}
return results[id], nil
}

func (m *Manager) TestingRefreshSomeLeases(ctx context.Context) {
m.refreshSomeLeases(ctx)
}

func (m *Manager) TestingDescriptorStateIsNil(id descpb.ID) bool {
return m.findDescriptorState(id, false /* create */) == nil
}
19 changes: 19 additions & 0 deletions pkg/sql/catalog/lease/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -1201,8 +1201,27 @@ func (m *Manager) refreshSomeLeases(ctx context.Context) {
},
func(ctx context.Context) {
defer wg.Done()

if evFunc := m.testingKnobs.TestingBeforeAcquireLeaseDuringRefresh; evFunc != nil {
if err := evFunc(id); err != nil {
log.Infof(ctx, "knob failed for desc (%v): %v", id, err)
return
}
}

if _, err := acquireNodeLease(ctx, m, id); err != nil {
log.Infof(ctx, "refreshing descriptor: %d lease failed: %s", id, err)

if errors.Is(err, catalog.ErrDescriptorNotFound) {
// Lease renewal failed due to removed descriptor; Remove this descriptor from cache.
if err := purgeOldVersions(ctx, m.DB(), id, true /* dropped */, 0 /* minVersion */, m); err != nil {
log.Warningf(ctx, "error purging leases for descriptor %d: %s",
id, err)
}
m.mu.Lock()
delete(m.mu.descriptors, id)
m.mu.Unlock()
}
}
}); err != nil {
log.Infof(ctx, "didnt refresh descriptor: %d lease: %s", id, err)
Expand Down
82 changes: 82 additions & 0 deletions pkg/sql/catalog/lease/lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3244,3 +3244,85 @@ func TestAmbiguousResultIsRetried(t *testing.T) {
// Ensure that the query completed successfully.
require.NoError(t, <-selectErr)
}

// TestDescriptorRemovedFromCacheWhenLeaseRenewalForThisDescriptorFails makes sure that, during a lease
// periodical refresh, if the descriptor, whose lease we intend to refresh, does not exist anymore, we delete
// this descriptor from "cache" (i.e. manager.mu.descriptor).
func TestDescriptorRemovedFromCacheWhenLeaseRenewalForThisDescriptorFails(t *testing.T) {
defer leaktest.AfterTest(t)()
ctx := context.Background()

// typeDescID will be set to id of the created type `typ` later.
mu := syncutil.Mutex{}
typeDescID := descpb.InvalidID
typeDescName := ""
var tdb *sqlutils.SQLRunner
dropCompleted := make(chan bool)

// The overall testing strategy is
// 1. Add a testing knob immediately before the acquire a node lease inside refreshSomeLeases;
// 2. Create a new type `typ` and acquire a lease of it;
// 3. When the lease manager attempts to refresh the lease on `typ`, the testing knob is trigger which removes
// `typ` from storage;
// 4. This allows refreshSomeLeases fail with a DescriptorNotFound error and trigger the logic that removes this
// descriptor entry from the lease manager's cache (namely, manager.mu.descriptor).
// 5. Finally, we assert that the entry for `typ` is no longer in the cache.
params := createTestServerParams()
params.Knobs = base.TestingKnobs{
SQLLeaseManager: &lease.ManagerTestingKnobs{
TestingBeforeAcquireLeaseDuringRefresh: func(id descpb.ID) error {
mu.Lock()
defer mu.Unlock()
if typeDescID != descpb.InvalidID && id == typeDescID {
// Drop this type to trigger the logic that remove unfound descriptor from lease manager cache.
tdb.Exec(t, fmt.Sprintf("DROP TYPE %v", typeDescName))
dropCompleted <- true
}
return nil
},
},
}

// Set lease duration to something small so that the periodical lease refresh is kicked off often where the testing
// knob will be invoked, and eventually the logic to remove unfound descriptor from cache will be triggered.
lease.LeaseDuration.Override(ctx, &params.SV, time.Second)

s, sqlDB, kvDB := serverutils.StartServer(t, params)
defer s.Stopper().Stop(ctx)
tdb = sqlutils.MakeSQLRunner(sqlDB)

sql := `
CREATE DATABASE test;
USE test;
CREATE TYPE typ as enum ('a', 'b');
`
tdb.Exec(t, sql)

// Ensure `typ` is present in the lease manger by acquiring a lease on it.
typeDesc := desctestutils.TestingGetTypeDescriptor(kvDB, keys.SystemSQLCodec,
"test", "public", "typ")
lm := s.LeaseManager().(*lease.Manager)
_, err := lm.Acquire(ctx, s.Clock().Now(), typeDesc.GetID())
require.NoError(t, err)

// Set typeDescID such that the next periodical lease refresh will trigger the testing knob that drops `typ`.
mu.Lock()
typeDescID = typeDesc.GetID()
typeDescName = typeDesc.GetName()
mu.Unlock()

// Wait until the testing knob drops `typ`
<-dropCompleted

// Assert that soon (when the next periodical lease refresh happens) the testing knob will drop `typ`,
// and consequently trigger the logic to remove the descriptor from lease manager due to a failure
// to acquire a lease on this descriptor.
testutils.SucceedsSoon(t, func() error {
if lm.TestingDescriptorStateIsNil(typeDesc.GetID()) {
return nil
}

return errors.Errorf("descriptor %v(#%v) is still there. Expected: descriptor removed from cache.",
typeDesc.GetName(), typeDesc.GetID())
})
}
4 changes: 4 additions & 0 deletions pkg/sql/catalog/lease/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ type ManagerTestingKnobs struct {
// ignored.
TestingDescriptorUpdateEvent func(descriptor *descpb.Descriptor) error

// TestingBeforeAcquireLeaseDuringRefresh is a callback right before
// the lease manager attempts to acquire a lease for descriptor `id`.
TestingBeforeAcquireLeaseDuringRefresh func(id descpb.ID) error

// To disable the deletion of orphaned leases at server startup.
DisableDeleteOrphanedLeases bool

Expand Down
32 changes: 22 additions & 10 deletions pkg/sql/drop_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,30 @@ func (n *dropIndexNode) startExec(params runParams) error {
}

if shardColName != "" {
ok, err := n.maybeQueueDropShardColumn(tableDesc, shardColName)
// Retrieve the sharded column descriptor by name.
shardColDesc, err := tableDesc.FindColumnWithName(tree.Name(shardColName))
if err != nil {
return err
}
columnsDropped = columnsDropped || ok
if !shardColDesc.Dropped() {
// Only drop this shard column if it's not a physical column (as is the case for all hash-sharded index in 22.1
// and after), or, CASCADE is set.
if shardColDesc.IsVirtual() || n.n.DropBehavior == tree.DropCascade {
ok, err := n.maybeQueueDropShardColumn(tableDesc, shardColDesc)
if err != nil {
return err
}
columnsDropped = columnsDropped || ok
} else {
params.p.BufferClientNotice(
ctx,
pgnotice.Newf("The accompanying shard column %q is a physical column and dropping it can be "+
"expensive, so, we dropped the index %q but skipped dropping %q. Issue another "+
"'ALTER TABLE %v DROP COLUMN %v' query if you want to drop column %q.",
shardColName, idx.GetName(), shardColName, tableDesc.GetName(), shardColName, shardColName),
)
}
}
}

if columnsDropped {
Expand Down Expand Up @@ -205,15 +224,8 @@ func (n *dropIndexNode) queueDropColumn(tableDesc *tabledesc.Mutable, col catalo
//
// Assumes that the given index is sharded.
func (n *dropIndexNode) maybeQueueDropShardColumn(
tableDesc *tabledesc.Mutable, shardColName string,
tableDesc *tabledesc.Mutable, shardColDesc catalog.Column,
) (bool, error) {
shardColDesc, err := tableDesc.FindColumnWithName(tree.Name(shardColName))
if err != nil {
return false, err
}
if shardColDesc.Dropped() {
return false, nil
}
if catalog.FindNonDropIndex(tableDesc, func(otherIdx catalog.Index) bool {
colIDs := otherIdx.CollectKeyColumnIDs()
if !otherIdx.Primary() {
Expand Down
83 changes: 83 additions & 0 deletions pkg/sql/drop_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
gosql "database/sql"
"fmt"
"math/rand"
"net/url"
"strings"
"testing"

Expand Down Expand Up @@ -56,6 +57,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
"github.com/lib/pq"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -1165,6 +1167,87 @@ WHERE
require.NoError(t, txn.Rollback())
}

// TestDropIndexOnHashShardedIndexWithStoredShardColumn tests the case when attempt to drop a hash-sharded index with
// a stored shard column (all hash-sharded index created in 21.2 and prior will have a stored shard column) without
// cascade, we skip dropping the shard column.
func TestDropIndexOnHashShardedIndexWithStoredShardColumn(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

// Start a test server and connect to it with notice handler (so we can get and check notices from running queries).
ctx := context.Background()
params, _ := tests.CreateTestServerParams()
s, _, _ := serverutils.StartServer(t, params)
defer s.Stopper().Stop(ctx)
url, cleanup := sqlutils.PGUrl(t, s.ServingSQLAddr(), t.Name(), url.User(username.RootUser))
defer cleanup()
base, err := pq.NewConnector(url.String())
if err != nil {
t.Fatal(err)
}
actualNotices := make([]string, 0)
connector := pq.ConnectorWithNoticeHandler(base, func(n *pq.Error) {
actualNotices = append(actualNotices, n.Message)
})
dbWithHandler := gosql.OpenDB(connector)
defer dbWithHandler.Close()
tdb := sqlutils.MakeSQLRunner(dbWithHandler)

// Create a table with a stored column with the same name as the shard column so that the hash-sharded
// index will just use that column. This is the trick we use to be able to create a hash-shard index with
// a STORED column.
query :=
`
CREATE TABLE tbl (
a INT,
crdb_internal_a_shard_7 INT NOT VISIBLE AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 7:::INT)) STORED,
INDEX idx (a) USING HASH WITH BUCKET_COUNT=7
)
`
tdb.Exec(t, query)

// Assert that the table has two indexes and the shard index uses the stored column as its shard column.
var tableID descpb.ID
var tableDesc catalog.TableDescriptor
query = `SELECT id FROM system.namespace WHERE name = 'tbl'`
tdb.QueryRow(t, query).Scan(&tableID)
require.NoError(t, sql.TestingDescsTxn(ctx, s,
func(ctx context.Context, txn *kv.Txn, col *descs.Collection) (err error) {
tableDesc, err = col.Direct().MustGetTableDescByID(ctx, txn, tableID)
return err
}))
shardIdx, err := tableDesc.FindIndexWithName("idx")
require.NoError(t, err)
require.True(t, shardIdx.IsSharded())
require.Equal(t, "crdb_internal_a_shard_7", shardIdx.GetShardColumnName())
shardCol, err := tableDesc.FindColumnWithName("crdb_internal_a_shard_7")
require.NoError(t, err)
require.False(t, shardCol.IsVirtual())

// Drop the hash-sharded index.
query = `DROP INDEX idx`
tdb.Exec(t, query)

// Assert that the index is dropped but the shard column remains after dropping the index.
require.NoError(t, sql.TestingDescsTxn(ctx, s,
func(ctx context.Context, txn *kv.Txn, col *descs.Collection) (err error) {
tableDesc, err = col.Direct().MustGetTableDescByID(ctx, txn, tableID)
return err
}))
_, err = tableDesc.FindIndexWithName("idx")
require.Error(t, err)
shardCol, err = tableDesc.FindColumnWithName("crdb_internal_a_shard_7")
require.NoError(t, err)
require.False(t, shardCol.IsVirtual())

// Assert that we get the expected notice.
expectedNotice := "The accompanying shard column \"crdb_internal_a_shard_7\" is a physical column and dropping it " +
"can be expensive, so, we dropped the index \"idx\" but skipped dropping \"crdb_internal_a_shard_7\". Issue " +
"another 'ALTER TABLE tbl DROP COLUMN crdb_internal_a_shard_7' query if you want to drop column" +
" \"crdb_internal_a_shard_7\"."
require.Contains(t, actualNotices, expectedNotice)
}

// TestDropDatabaseWithForeignKeys tests that databases containing tables with
// foreign key relationships can be dropped and GC'ed. This is a regression test
// for #50344, which is a bug ultimately caused by the fact that when we remove
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/pgwire/testdata/pgtest/notice
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Query {"String": "DROP INDEX t_x_idx"}
until crdb_only
CommandComplete
----
{"Severity":"NOTICE","SeverityUnlocalized":"","Code":"00000","Message":"the data for dropped indexes is reclaimed asynchronously","Detail":"","Hint":"The reclamation delay can be customized in the zone configuration for the table.","Position":0,"InternalPosition":0,"InternalQuery":"","Where":"","SchemaName":"","TableName":"","ColumnName":"","DataTypeName":"","ConstraintName":"","File":"drop_index.go","Line":560,"Routine":"dropIndexByName","UnknownFields":null}
{"Severity":"NOTICE","SeverityUnlocalized":"","Code":"00000","Message":"the data for dropped indexes is reclaimed asynchronously","Detail":"","Hint":"The reclamation delay can be customized in the zone configuration for the table.","Position":0,"InternalPosition":0,"InternalQuery":"","Where":"","SchemaName":"","TableName":"","ColumnName":"","DataTypeName":"","ConstraintName":"","File":"drop_index.go","Line":572,"Routine":"dropIndexByName","UnknownFields":null}
{"Type":"CommandComplete","CommandTag":"DROP INDEX"}

until noncrdb_only
Expand Down

0 comments on commit f2d5708

Please sign in to comment.