Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

infoschema: optimize 'select count(*) from information_schema.tables' for v2 #55574

Merged
merged 9 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 89 additions & 7 deletions pkg/executor/infoschema_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -689,15 +689,25 @@ func (e *memtableRetriever) setDataFromOneTable(
return rows, nil
}

func (e *memtableRetriever) setDataFromTables(ctx context.Context, sctx sessionctx.Context) error {
useStatsCache := e.updateStatsCacheIfNeed()
checker := privilege.GetPrivilegeManager(sctx)
func onlySchemaOrTableColumns(columns []*model.ColumnInfo) bool {
if len(columns) <= 3 {
for _, colInfo := range columns {
switch colInfo.Name.L {
case "table_schema":
case "table_name":
case "table_catalog":
default:
return false
}
}
return true
}
return false
}

func (e *memtableRetriever) setDataFromTables(ctx context.Context, sctx sessionctx.Context) error {
var rows [][]types.Datum
loc := sctx.GetSessionVars().TimeZone
if loc == nil {
loc = time.Local
}
checker := privilege.GetPrivilegeManager(sctx)
ex, ok := e.extractor.(*plannercore.InfoSchemaTablesExtractor)
if !ok {
return errors.Errorf("wrong extractor type: %T, expected InfoSchemaTablesExtractor", e.extractor)
Expand All @@ -706,10 +716,82 @@ func (e *memtableRetriever) setDataFromTables(ctx context.Context, sctx sessionc
return nil
}

// Special optimize for queries on infoschema v2 like:
// select count(table_schema) from INFORMATION_SCHEMA.TABLES
// select count(*) from INFORMATION_SCHEMA.TABLES
// select table_schema, table_name from INFORMATION_SCHEMA.TABLES
// column pruning in general is not supported here.
if onlySchemaOrTableColumns(e.columns) {
is := e.is
if raw, ok := is.(*infoschema.SessionExtendedInfoSchema); ok {
is = raw.InfoSchema
}
v2, ok := is.(interface {
IterateAllTableItems(visit func(infoschema.TableItem) bool)
})
if ok {
if x := ctx.Value("cover-check"); x != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe use a failpoint

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, failpoint is not as convient sometimes.

  • It works globally, so there's no good way for precise control with a specific query.
  • It requires make failpoint-enable, an extra step to enable the test

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It requires make failpoint-enable, an extra step to enable the test

I have added this feature for failpoint, it can be configurated with our other testing command line arguments like --tags=intest or -race https://github.com/pingcap/failpoint?tab=readme-ov-file#quick-start-use-failpoint-toolexec

// The interface assertion is too tricky, so we add test to cover here.
// To ensure that if implementation changes one day, we can catch it.
slot := x.(*bool)
*slot = true
}
v2.IterateAllTableItems(func(t infoschema.TableItem) bool {
if !ex.HasTableName(t.TableName.L) {
return true
}
if !ex.HasTableSchema(t.DBName.L) {
return true
}
if checker != nil && !checker.RequestVerification(sctx.GetSessionVars().ActiveRoles, t.DBName.L, t.TableName.L, "", mysql.SelectPriv) {
return true
}

record := types.MakeDatums(
infoschema.CatalogVal, // TABLE_CATALOG
t.DBName.O, // TABLE_SCHEMA
t.TableName.O, // TABLE_NAME
nil, // TABLE_TYPE
nil, // ENGINE
nil, // VERSION
nil, // ROW_FORMAT
nil, // TABLE_ROWS
nil, // AVG_ROW_LENGTH
nil, // DATA_LENGTH
nil, // MAX_DATA_LENGTH
nil, // INDEX_LENGTH
nil, // DATA_FREE
nil, // AUTO_INCREMENT
nil, // CREATE_TIME
nil, // UPDATE_TIME
nil, // CHECK_TIME
nil, // TABLE_COLLATION
nil, // CHECKSUM
nil, // CREATE_OPTIONS
nil, // TABLE_COMMENT
nil, // TIDB_TABLE_ID
nil, // TIDB_ROW_ID_SHARDING_INFO
nil, // TIDB_PK_TYPE
nil, // TIDB_PLACEMENT_POLICY_NAME
)
rows = append(rows, record)
return true
})
e.rows = rows
return nil
}
}

// Normal code path.
schemas, tables, err := ex.ListSchemasAndTables(ctx, e.is)
if err != nil {
return errors.Trace(err)
}
useStatsCache := e.updateStatsCacheIfNeed()
loc := sctx.GetSessionVars().TimeZone
if loc == nil {
loc = time.Local
}
for i, table := range tables {
rows, err = e.setDataFromOneTable(sctx, loc, checker, schemas[i], table, rows, useStatsCache)
if err != nil {
Expand Down
29 changes: 29 additions & 0 deletions pkg/executor/infoschema_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -894,3 +894,32 @@ func TestInfoSchemaConditionWorks(t *testing.T) {
rows = tk.MustQuery("select * from information_schema.partitions where table_schema = 'db_no_partition' and (partition_name is NULL or partition_name = 'p0');").Rows()
require.Equal(t, 2, len(rows))
}

func TestInfoschemaTablesSpecialOptimizationCovered(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)

for _, testCase := range []struct {
sql string
expect bool
}{
{"select table_name, table_schema from information_schema.tables", true},
{"select table_name from information_schema.tables", true},
{"select table_name from information_schema.tables where table_schema = 'test'", true},
{"select table_schema from information_schema.tables", true},
{"select count(table_schema) from information_schema.tables", true},
{"select count(table_name) from information_schema.tables", true},
{"select count(table_rows) from information_schema.tables", false},
{"select count(1) from information_schema.tables", true},
{"select count(*) from information_schema.tables", true},
{"select count(1) from (select table_name from information_schema.tables) t", true},
{"select * from information_schema.tables", false},
{"select table_name, table_catalog from information_schema.tables", true},
{"select table_name, table_rows from information_schema.tables", false},
} {
var covered bool
ctx := context.WithValue(context.Background(), "cover-check", &covered)
tk.MustQueryWithContext(ctx, testCase.sql)
require.Equal(t, testCase.expect, covered, testCase.sql)
}
}
28 changes: 28 additions & 0 deletions pkg/infoschema/infoschema_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,34 @@ func (is *infoschemaV2) TableByID(ctx context.Context, id int64) (val table.Tabl
return ret, true
}

// TableItem is exported from tableItem.
type TableItem struct {
DBName model.CIStr
TableName model.CIStr
}

// IterateAllTableItems is used for special performance optimization.
// Used by executor/infoschema_reader.go to handle reading from INFORMATION_SCHEMA.TABLES.
func (is *infoschemaV2) IterateAllTableItems(visit func(TableItem) bool) {
pivot, ok := is.byName.Max()
if !ok {
return
}
if !visit(TableItem{DBName: pivot.dbName, TableName: pivot.tableName}) {
return
}
is.byName.Descend(pivot, func(item tableItem) bool {
if pivot.dbName == item.dbName && pivot.tableName == item.tableName {
return true // skip MVCC version
}
pivot = item
if !item.tomb {
return visit(TableItem{DBName: item.dbName, TableName: item.tableName})
}
return true
})
}

// IsSpecialDB tells whether the database is a special database.
func IsSpecialDB(dbName string) bool {
return dbName == util.InformationSchemaName.L ||
Expand Down
10 changes: 10 additions & 0 deletions pkg/planner/core/memtable_infoschema_extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,16 @@ func NewInfoSchemaTablesExtractor() *InfoSchemaTablesExtractor {
return e
}

// HasTableName returns true if table name is specified in predicates.
func (e *InfoSchemaTablesExtractor) HasTableName(name string) bool {
return !e.filter(_tableName, name)
}

// HasTableSchema returns true if table schema is specified in predicates.
func (e *InfoSchemaTablesExtractor) HasTableSchema(name string) bool {
return !e.filter(_tableSchema, name)
}

// InfoSchemaViewsExtractor is the predicate extractor for information_schema.views.
type InfoSchemaViewsExtractor struct {
InfoSchemaBaseExtractor
Expand Down
21 changes: 19 additions & 2 deletions tests/integrationtest/r/executor/infoschema_reader.result
Original file line number Diff line number Diff line change
Expand Up @@ -420,5 +420,22 @@ select * from information_schema.table_constraints where table_schema = 'executo
CONSTRAINT_CATALOG CONSTRAINT_SCHEMA CONSTRAINT_NAME TABLE_SCHEMA TABLE_NAME CONSTRAINT_TYPE
select * from information_schema.table_constraints where table_schema = 'executor__infoschema_reader' and CONSTRAINT_NAME = 'c1';
CONSTRAINT_CATALOG CONSTRAINT_SCHEMA CONSTRAINT_NAME TABLE_SCHEMA TABLE_NAME CONSTRAINT_TYPE
def executor__infoschema_reader PRIMARY executor__infoschema_reader t PRIMARY KEY
def executor__infoschema_reader PRIMARY executor__infoschema_reader t_int PRIMARY KEY
select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,VERSION from information_schema.tables where table_name = 't';
TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION
def executor__infoschema_reader t BASE TABLE InnoDB 10
select table_name, table_schema from information_schema.tables where table_name = 't';
table_name table_schema
t executor__infoschema_reader
select table_name from information_schema.tables where table_name = 't';
table_name
t
explain format='brief' select table_name, table_schema from information_schema.tables;
id estRows task access object operator info
Projection 10000.00 root Column#3, Column#2
└─MemTableScan 10000.00 root table:TABLES
select count(*) from information_schema.tables where table_name = 't';
count(*)
1
select count(table_name) from information_schema.tables where table_name = 't';
count(table_name)
1
8 changes: 8 additions & 0 deletions tests/integrationtest/t/executor/infoschema_reader.test
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,11 @@ select * from information_schema.table_constraints where table_schema = 'executo
select * from information_schema.table_constraints where table_schema = 'executor__infoschema_reader' and table_name = 'non_exist';
--sorted_result
select * from information_schema.table_constraints where table_schema = 'executor__infoschema_reader' and CONSTRAINT_NAME = 'c1';

# TestTables
select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,VERSION from information_schema.tables where table_name = 't';
select table_name, table_schema from information_schema.tables where table_name = 't';
select table_name from information_schema.tables where table_name = 't';
explain format='brief' select table_name, table_schema from information_schema.tables;
select count(*) from information_schema.tables where table_name = 't';
select count(table_name) from information_schema.tables where table_name = 't';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
select count(table_name) from information_schema.tables where table_name = 't';
select count(table_name) from information_schema.tables where table_name = 't';