From 51741ed69941ca3f3400e9bebaf8c8e7f2ab8ca5 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Thu, 24 Oct 2024 22:01:40 -0400 Subject: [PATCH 01/11] Improve Schema engine's TablesWithSize80 query This avoids the table scan on stage from the query execution in the middle here: EXPLAIN: -> Filter: (`TABLE_NAME` like 'trb3%') (actual time=1030..1030 rows=1 loops=1) -> Table scan on (actual time=1030..1030 rows=3005 loops=1) -> Aggregate using temporary table (actual time=1030..1030 rows=3005 loops=1) ... Signed-off-by: Matt Lord --- go/mysql/flavor_mysql.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index 5d92694e9f2..e142994a26f 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -430,7 +430,7 @@ const TablesWithSize80 = `SELECT t.table_name, SUM(i.allocated_size) FROM information_schema.tables t LEFT JOIN information_schema.innodb_tablespaces i - ON i.name LIKE CONCAT(t.table_schema, '/', t.table_name, IF(t.create_options <=> 'partitioned', '#p#%', '')) COLLATE utf8mb3_general_ci + ON LEFT(i.name, char_length(t.table_schema)+1+char_length(table_name)) = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci WHERE t.table_schema = database() GROUP BY From 5da3b82e74cb5c2644745404286d9fe6d9d14e81 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Thu, 24 Oct 2024 22:51:44 -0400 Subject: [PATCH 02/11] Use a longer timeout when waiting for vrep pos prior to locks Signed-off-by: Matt Lord --- go/vt/vttablet/onlineddl/executor.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 22dd9447bb9..f3be3c81f03 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -891,8 +891,8 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh migrationCutOverThreshold := getMigrationCutOverThreshold(onlineDDL) - waitForPos := func(s *VReplStream, pos replication.Position) error { - ctx, cancel := context.WithTimeout(ctx, migrationCutOverThreshold) + waitForPos := func(s *VReplStream, pos replication.Position, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() // Wait for target to reach the up-to-date pos if err := tmClient.VReplicationWaitForPos(ctx, tablet.Tablet, s.id, replication.EncodePosition(pos)); err != nil { @@ -950,7 +950,11 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh return err } e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-sentry pos: %v", replication.EncodePosition(postSentryPos)) - if err := waitForPos(s, postSentryPos); err != nil { + // We have not locked anything, stopped anything, or done anything that otherwise impacts + // query serving so we wait for a multiple of the cutover threshold here, with that variable + // primarily serving to limit the max time we later spend waiting for the position AFTER + // we've taken the locks and table access is blocked. + if err := waitForPos(s, postSentryPos, migrationCutOverThreshold*5); err != nil { return err } e.updateMigrationStage(ctx, onlineDDL.UUID, "post-sentry pos reached") @@ -1148,7 +1152,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh } e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-lock pos: %v", replication.EncodePosition(postWritesPos)) - if err := waitForPos(s, postWritesPos); err != nil { + if err := waitForPos(s, postWritesPos, migrationCutOverThreshold); err != nil { e.updateMigrationStage(ctx, onlineDDL.UUID, "timeout while waiting for post-lock pos: %v", err) return err } From 4558c4e420702b25c8531108465b789ee39a10d3 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 25 Oct 2024 00:01:08 -0400 Subject: [PATCH 03/11] Correct query so that we're not matching table name prefixes Signed-off-by: Matt Lord --- go/mysql/flavor_mysql.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index e142994a26f..b5398a523dc 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -430,7 +430,7 @@ const TablesWithSize80 = `SELECT t.table_name, SUM(i.allocated_size) FROM information_schema.tables t LEFT JOIN information_schema.innodb_tablespaces i - ON LEFT(i.name, char_length(t.table_schema)+1+char_length(table_name)) = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci + ON ((i.name = CONCAT(t.table_schema, '/', t.table_name)) OR LEFT(i.name, CHAR_LENGTH(t.table_schema)+1+CHAR_LENGTH(table_name)+3) = CONCAT(t.table_schema, '/', t.table_name, '#p#') COLLATE utf8mb3_general_ci) WHERE t.table_schema = database() GROUP BY From 5d71b8e7f20b285a6c74991f8f4f4cf9b9018f52 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 25 Oct 2024 00:25:54 -0400 Subject: [PATCH 04/11] Make query exponentially faster Signed-off-by: Matt Lord --- go/mysql/flavor_mysql.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index b5398a523dc..c5e8526d223 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -430,7 +430,7 @@ const TablesWithSize80 = `SELECT t.table_name, SUM(i.allocated_size) FROM information_schema.tables t LEFT JOIN information_schema.innodb_tablespaces i - ON ((i.name = CONCAT(t.table_schema, '/', t.table_name)) OR LEFT(i.name, CHAR_LENGTH(t.table_schema)+1+CHAR_LENGTH(table_name)+3) = CONCAT(t.table_schema, '/', t.table_name, '#p#') COLLATE utf8mb3_general_ci) + ON SUBSTRING_INDEX(i.name, '#p#', 1) = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci WHERE t.table_schema = database() GROUP BY From 94d4c96527639cc6e4ee909dab7e4386a60d476f Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 25 Oct 2024 00:54:16 -0400 Subject: [PATCH 05/11] Minor change from self review Signed-off-by: Matt Lord --- go/vt/vttablet/onlineddl/executor.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index f3be3c81f03..27a0c59fb30 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -950,10 +950,10 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh return err } e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-sentry pos: %v", replication.EncodePosition(postSentryPos)) - // We have not locked anything, stopped anything, or done anything that otherwise impacts - // query serving so we wait for a multiple of the cutover threshold here, with that variable - // primarily serving to limit the max time we later spend waiting for the position AFTER - // we've taken the locks and table access is blocked. + // We have not yet locked anything, stopped anything, or done anything that otherwise + // impacts query serving so we wait for a multiple of the cutover threshold here, with + // that variable primarily serving to limit the max time we later spend waiting for + // a position again AFTER we've taken the locks and table access is blocked. if err := waitForPos(s, postSentryPos, migrationCutOverThreshold*5); err != nil { return err } From bed84fa8aa87a4f490da417cc3b5055f7b9b0b0d Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 25 Oct 2024 01:01:25 -0400 Subject: [PATCH 06/11] Be less aggressive in timeout extension Signed-off-by: Matt Lord --- go/vt/vttablet/onlineddl/executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 27a0c59fb30..041d3ace467 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -954,7 +954,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh // impacts query serving so we wait for a multiple of the cutover threshold here, with // that variable primarily serving to limit the max time we later spend waiting for // a position again AFTER we've taken the locks and table access is blocked. - if err := waitForPos(s, postSentryPos, migrationCutOverThreshold*5); err != nil { + if err := waitForPos(s, postSentryPos, migrationCutOverThreshold*3); err != nil { return err } e.updateMigrationStage(ctx, onlineDDL.UUID, "post-sentry pos reached") From 1b3511b1347f0c0ec2955da8ce7358e22c51d011 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 25 Oct 2024 11:42:36 -0400 Subject: [PATCH 07/11] Use full partition name indicator (second p before the num) Signed-off-by: Matt Lord --- go/mysql/flavor_mysql.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index c5e8526d223..e614a867186 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -430,7 +430,7 @@ const TablesWithSize80 = `SELECT t.table_name, SUM(i.allocated_size) FROM information_schema.tables t LEFT JOIN information_schema.innodb_tablespaces i - ON SUBSTRING_INDEX(i.name, '#p#', 1) = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci + ON SUBSTRING_INDEX(i.name, '#p#p', 1) = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci WHERE t.table_schema = database() GROUP BY From ea56d4ff05e4bb219621023c5aa9eb82577d3476 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sun, 27 Oct 2024 10:28:08 -0400 Subject: [PATCH 08/11] Use safer query that's only ~ 50% slower. Signed-off-by: Matt Lord --- go/mysql/flavor_mysql.go | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index e614a867186..0690706b380 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -415,10 +415,6 @@ const BaseShowTables = `SELECT t.table_name, // TablesWithSize80 is a query to select table along with size for mysql 8.0 // // Note the following: -// - We use a single query to fetch both partitioned and non-partitioned tables. This is because -// accessing `information_schema.innodb_tablespaces` is expensive on servers with many tablespaces, -// and every query that loads the table needs to perform full table scans on it. Doing a single -// table scan is more efficient than doing more than one. // - We utilize `INFORMATION_SCHEMA`.`TABLES`.`CREATE_OPTIONS` column to do early pruning before the JOIN. // - `TABLES`.`TABLE_NAME` has `utf8mb4_0900_ai_ci` collation. `INNODB_TABLESPACES`.`NAME` has `utf8mb3_general_ci`. // We normalize the collation to get better query performance (we force the casting at the time of our choosing) @@ -429,10 +425,24 @@ const TablesWithSize80 = `SELECT t.table_name, SUM(i.file_size), SUM(i.allocated_size) FROM information_schema.tables t - LEFT JOIN information_schema.innodb_tablespaces i - ON SUBSTRING_INDEX(i.name, '#p#p', 1) = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci + LEFT JOIN information_schema.innodb_tablespaces i + ON i.name = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci WHERE - t.table_schema = database() + t.table_schema = database() AND t.create_options NOT LIKE '%partitioned%' + GROUP BY + t.table_schema, t.table_name, t.table_type, t.create_time, t.table_comment +UNION ALL +SELECT t.table_name, + t.table_type, + UNIX_TIMESTAMP(t.create_time), + t.table_comment, + SUM(i.file_size), + SUM(i.allocated_size) + FROM information_schema.tables t + LEFT JOIN information_schema.innodb_tablespaces i + ON i.name LIKE (CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8mb3_general_ci) + WHERE + t.table_schema = database() AND t.create_options LIKE '%partitioned%' GROUP BY t.table_schema, t.table_name, t.table_type, t.create_time, t.table_comment ` From 01b08a102578fb3dea8fc5668f4cb5ceb877ef05 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sun, 27 Oct 2024 10:31:17 -0400 Subject: [PATCH 09/11] Remove no longer relevant comments Signed-off-by: Matt Lord --- go/mysql/flavor_mysql.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index 0690706b380..4c49a48583e 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -413,11 +413,6 @@ const BaseShowTables = `SELECT t.table_name, ` // TablesWithSize80 is a query to select table along with size for mysql 8.0 -// -// Note the following: -// - We utilize `INFORMATION_SCHEMA`.`TABLES`.`CREATE_OPTIONS` column to do early pruning before the JOIN. -// - `TABLES`.`TABLE_NAME` has `utf8mb4_0900_ai_ci` collation. `INNODB_TABLESPACES`.`NAME` has `utf8mb3_general_ci`. -// We normalize the collation to get better query performance (we force the casting at the time of our choosing) const TablesWithSize80 = `SELECT t.table_name, t.table_type, UNIX_TIMESTAMP(t.create_time), From 0467caa13b98384c5ce634bdf368ef0e58650801 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 27 Oct 2024 18:11:36 +0200 Subject: [PATCH 10/11] support NULL for TABLE_OPTIONS Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/mysql/flavor_mysql.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index 4c49a48583e..0cfa41ad548 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -423,7 +423,7 @@ const TablesWithSize80 = `SELECT t.table_name, LEFT JOIN information_schema.innodb_tablespaces i ON i.name = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci WHERE - t.table_schema = database() AND t.create_options NOT LIKE '%partitioned%' + t.table_schema = database() AND IFNULL(t.create_options, '') NOT LIKE '%partitioned%' GROUP BY t.table_schema, t.table_name, t.table_type, t.create_time, t.table_comment UNION ALL From 8df90408be68beed8e8cfef771367e17a30b6792 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 28 Oct 2024 16:23:01 +0200 Subject: [PATCH 11/11] using subselect. Adding comments Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/mysql/flavor_mysql.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/go/mysql/flavor_mysql.go b/go/mysql/flavor_mysql.go index 0cfa41ad548..b840c3cca36 100644 --- a/go/mysql/flavor_mysql.go +++ b/go/mysql/flavor_mysql.go @@ -413,6 +413,20 @@ const BaseShowTables = `SELECT t.table_name, ` // TablesWithSize80 is a query to select table along with size for mysql 8.0 +// Note the following: +// - `TABLES`.`TABLE_NAME` has `utf8mb4_0900_ai_ci` collation. `INNODB_TABLESPACES`.`NAME` has `utf8mb3_general_ci`. +// We normalize the collation to get better query performance (we force the casting at the time of our choosing) +// - InnoDB has different table names than MySQL does, in particular for partitioned tables. As far as InnoDB +// is concerned, each partition is its own table. +// - We use a `UNION ALL` approach to handle two distinct scenarios: tables that are partitioned and those that are not. +// Since we `LEFT JOIN` from `TABLES` to `INNODB_TABLESPACES`, we know we already do full table scan on `TABLES`. We therefore +// don't mind spending some extra computation time (as in `CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8mb3_general_ci`) +// to make things easier for the JOIN. +// - We utilize `INFORMATION_SCHEMA`.`TABLES`.`CREATE_OPTIONS` column to tell if the table is partitioned or not. The column +// may be `NULL` or may have multiple attributes, one of which is "partitioned", which we are looking for. +// - In a partitioned table, InnoDB will return multiple rows for the same table name, one for each partition, which we successively SUM. +// We also `SUM` the sizes in the non-partitioned case. This is not because we need to, but because it makes the query +// symmetric and less prone to future edit errors. const TablesWithSize80 = `SELECT t.table_name, t.table_type, UNIX_TIMESTAMP(t.create_time), @@ -420,7 +434,7 @@ const TablesWithSize80 = `SELECT t.table_name, SUM(i.file_size), SUM(i.allocated_size) FROM information_schema.tables t - LEFT JOIN information_schema.innodb_tablespaces i + LEFT JOIN (SELECT name, file_size, allocated_size FROM information_schema.innodb_tablespaces WHERE name LIKE CONCAT(database(), '/%')) i ON i.name = CONCAT(t.table_schema, '/', t.table_name) COLLATE utf8mb3_general_ci WHERE t.table_schema = database() AND IFNULL(t.create_options, '') NOT LIKE '%partitioned%' @@ -434,7 +448,7 @@ SELECT t.table_name, SUM(i.file_size), SUM(i.allocated_size) FROM information_schema.tables t - LEFT JOIN information_schema.innodb_tablespaces i + LEFT JOIN (SELECT name, file_size, allocated_size FROM information_schema.innodb_tablespaces WHERE name LIKE CONCAT(database(), '/%')) i ON i.name LIKE (CONCAT(t.table_schema, '/', t.table_name, '#p#%') COLLATE utf8mb3_general_ci) WHERE t.table_schema = database() AND t.create_options LIKE '%partitioned%'