diff --git a/DEPS.bzl b/DEPS.bzl index 09739e4aba3bb..6ed169827a27f 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -5338,26 +5338,26 @@ def go_deps(): name = "com_github_onsi_ginkgo_v2", build_file_proto_mode = "disable_global", importpath = "github.com/onsi/ginkgo/v2", - sha256 = "f41e92baa52ec53d482603e4585c0906ca0c02e05004dca78a62bf1de88833ad", - strip_prefix = "github.com/onsi/ginkgo/v2@v2.9.4", + sha256 = "4865aab6c56b0d29a93cfe56206b586f1c9f36fde5a66e85650576344861b7cc", + strip_prefix = "github.com/onsi/ginkgo/v2@v2.13.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/onsi/ginkgo/v2/com_github_onsi_ginkgo_v2-v2.9.4.zip", - "http://ats.apps.svc/gomod/github.com/onsi/ginkgo/v2/com_github_onsi_ginkgo_v2-v2.9.4.zip", - "https://cache.hawkingrei.com/gomod/github.com/onsi/ginkgo/v2/com_github_onsi_ginkgo_v2-v2.9.4.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/onsi/ginkgo/v2/com_github_onsi_ginkgo_v2-v2.9.4.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/onsi/ginkgo/v2/com_github_onsi_ginkgo_v2-v2.13.0.zip", + "http://ats.apps.svc/gomod/github.com/onsi/ginkgo/v2/com_github_onsi_ginkgo_v2-v2.13.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/onsi/ginkgo/v2/com_github_onsi_ginkgo_v2-v2.13.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/onsi/ginkgo/v2/com_github_onsi_ginkgo_v2-v2.13.0.zip", ], ) go_repository( name = "com_github_onsi_gomega", build_file_proto_mode = "disable_global", importpath = "github.com/onsi/gomega", - sha256 = "ea2b22782cc15569645dfdfc066a651e1335626677ad92d7ba4358a0885bf369", - strip_prefix = "github.com/onsi/gomega@v1.20.1", + sha256 = "923e8d0a1f95b3989f31c45142dee0b80a0aaa00cfa210bbd4d059f7046d12a8", + strip_prefix = "github.com/onsi/gomega@v1.29.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/onsi/gomega/com_github_onsi_gomega-v1.20.1.zip", - "http://ats.apps.svc/gomod/github.com/onsi/gomega/com_github_onsi_gomega-v1.20.1.zip", - "https://cache.hawkingrei.com/gomod/github.com/onsi/gomega/com_github_onsi_gomega-v1.20.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/onsi/gomega/com_github_onsi_gomega-v1.20.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/onsi/gomega/com_github_onsi_gomega-v1.29.0.zip", + "http://ats.apps.svc/gomod/github.com/onsi/gomega/com_github_onsi_gomega-v1.29.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/onsi/gomega/com_github_onsi_gomega-v1.29.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/onsi/gomega/com_github_onsi_gomega-v1.29.0.zip", ], ) go_repository( @@ -6807,13 +6807,13 @@ def go_deps(): name = "com_github_tikv_client_go_v2", build_file_proto_mode = "disable_global", importpath = "github.com/tikv/client-go/v2", - sha256 = "6701afd9ef373b22010ff1c3aeb91fca8a6165341c6a38dd31a00ed10d24f314", - strip_prefix = "github.com/tikv/client-go/v2@v2.0.8-0.20240913090512-3777c384feb1", + sha256 = "1fd861359a159d20435b7e4ed81f520b7235941aa2c5e059e05f338f1f781664", + strip_prefix = "github.com/tikv/client-go/v2@v2.0.8-0.20241125064226-08d0b3b826b3", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/tikv/client-go/v2/com_github_tikv_client_go_v2-v2.0.8-0.20240913090512-3777c384feb1.zip", - "http://ats.apps.svc/gomod/github.com/tikv/client-go/v2/com_github_tikv_client_go_v2-v2.0.8-0.20240913090512-3777c384feb1.zip", - "https://cache.hawkingrei.com/gomod/github.com/tikv/client-go/v2/com_github_tikv_client_go_v2-v2.0.8-0.20240913090512-3777c384feb1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tikv/client-go/v2/com_github_tikv_client_go_v2-v2.0.8-0.20240913090512-3777c384feb1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/tikv/client-go/v2/com_github_tikv_client_go_v2-v2.0.8-0.20241125064226-08d0b3b826b3.zip", + "http://ats.apps.svc/gomod/github.com/tikv/client-go/v2/com_github_tikv_client_go_v2-v2.0.8-0.20241125064226-08d0b3b826b3.zip", + "https://cache.hawkingrei.com/gomod/github.com/tikv/client-go/v2/com_github_tikv_client_go_v2-v2.0.8-0.20241125064226-08d0b3b826b3.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tikv/client-go/v2/com_github_tikv_client_go_v2-v2.0.8-0.20241125064226-08d0b3b826b3.zip", ], ) go_repository( @@ -9268,26 +9268,26 @@ def go_deps(): name = "io_k8s_api", build_file_proto_mode = "disable_global", importpath = "k8s.io/api", - sha256 = "2255428d2347df0b3a9cf6ac2791f5be6653b3c642359736e46733584d917335", - strip_prefix = "k8s.io/api@v0.28.6", + sha256 = "ae7b519f36431bc55fa56c47a51c1c37cf9e0df7e9d23741b3e839426d2627ff", + strip_prefix = "k8s.io/api@v0.29.11", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/k8s.io/api/io_k8s_api-v0.28.6.zip", - "http://ats.apps.svc/gomod/k8s.io/api/io_k8s_api-v0.28.6.zip", - "https://cache.hawkingrei.com/gomod/k8s.io/api/io_k8s_api-v0.28.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/k8s.io/api/io_k8s_api-v0.28.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/k8s.io/api/io_k8s_api-v0.29.11.zip", + "http://ats.apps.svc/gomod/k8s.io/api/io_k8s_api-v0.29.11.zip", + "https://cache.hawkingrei.com/gomod/k8s.io/api/io_k8s_api-v0.29.11.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/k8s.io/api/io_k8s_api-v0.29.11.zip", ], ) go_repository( name = "io_k8s_apimachinery", build_file_proto_mode = "disable_global", importpath = "k8s.io/apimachinery", - sha256 = "efc7e38cb4662d0b6c5648772e1ae92040a4d03af0a3a7731aedf17f8eab7359", - strip_prefix = "k8s.io/apimachinery@v0.28.6", + sha256 = "8dd5f53bf72f7bd6323bcc8f9f45823b30fc350daee4ab2d9e27cf1960d06b25", + strip_prefix = "k8s.io/apimachinery@v0.29.11", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/k8s.io/apimachinery/io_k8s_apimachinery-v0.28.6.zip", - "http://ats.apps.svc/gomod/k8s.io/apimachinery/io_k8s_apimachinery-v0.28.6.zip", - "https://cache.hawkingrei.com/gomod/k8s.io/apimachinery/io_k8s_apimachinery-v0.28.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/k8s.io/apimachinery/io_k8s_apimachinery-v0.28.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/k8s.io/apimachinery/io_k8s_apimachinery-v0.29.11.zip", + "http://ats.apps.svc/gomod/k8s.io/apimachinery/io_k8s_apimachinery-v0.29.11.zip", + "https://cache.hawkingrei.com/gomod/k8s.io/apimachinery/io_k8s_apimachinery-v0.29.11.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/k8s.io/apimachinery/io_k8s_apimachinery-v0.29.11.zip", ], ) go_repository( diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 3e9574e2dad51..5060aeba30ae8 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -75,12 +75,26 @@ type Checksum struct { // ProgressUnit represents the unit of progress. type ProgressUnit string +type StoreBasedErr struct { + storeID uint64 + err error +} + +func (e *StoreBasedErr) Error() string { + return fmt.Sprintf("Store ID '%d': %v", e.storeID, e.err.Error()) +} + +func (e *StoreBasedErr) Unwrap() error { + return e.err +} + const ( // backupFineGrainedMaxBackoff is 1 hour. // given it begins the fine-grained backup, there must be some problems in the cluster. // We need to be more patient. backupFineGrainedMaxBackoff = 3600000 backupRetryTimes = 5 + disconnectRetryTimeout = 20000 // RangeUnit represents the progress updated counter when a range finished. RangeUnit ProgressUnit = "range" // RegionUnit represents the progress updated counter when a region finished. @@ -1121,6 +1135,7 @@ func (bc *Client) fineGrainedBackup( }) bo := utils.AdaptTiKVBackoffer(ctx, backupFineGrainedMaxBackoff, berrors.ErrUnknown) + maxDisconnect := make(map[uint64]uint) for { // Step1, check whether there is any incomplete range incomplete := pr.Res.GetIncompleteRange(req.StartKey, req.EndKey) @@ -1168,8 +1183,19 @@ func (bc *Client) fineGrainedBackup( for { select { case err := <-errCh: - // TODO: should we handle err here? - return errors.Trace(err) + if !berrors.Is(err, berrors.ErrFailedToConnect) { + return errors.Trace(err) + } + storeErr, ok := err.(*StoreBasedErr) + if !ok { + return errors.Trace(err) + } + + storeID := storeErr.storeID + maxDisconnect[storeID]++ + if maxDisconnect[storeID] > backupRetryTimes { + return errors.Annotatef(err, "Failed to connect to store %d more than %d times", storeID, backupRetryTimes) + } case resp, ok := <-respCh: if !ok { // Finished. @@ -1270,12 +1296,22 @@ func (bc *Client) handleFineGrained( storeID := targetPeer.GetStoreId() lockResolver := bc.mgr.GetLockResolver() client, err := bc.mgr.GetBackupClient(ctx, storeID) + + // inject a disconnect failpoint + failpoint.Inject("disconnect", func(_ failpoint.Value) { + logutil.CL(ctx).Warn("This is a injected disconnection error") + err = berrors.ErrFailedToConnect + }) + if err != nil { if berrors.Is(err, berrors.ErrFailedToConnect) { // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, nil + return disconnectRetryTimeout, &StoreBasedErr{ + storeID: storeID, + err: err, + } } logutil.CL(ctx).Error("fail to connect store", zap.Uint64("StoreID", storeID)) @@ -1314,7 +1350,10 @@ func (bc *Client) handleFineGrained( // When the leader store is died, // 20s for the default max duration before the raft election timer fires. logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID)) - return 20000, nil + return disconnectRetryTimeout, &StoreBasedErr{ + storeID: storeID, + err: err, + } } logutil.CL(ctx).Error("failed to send fine-grained backup", zap.Uint64("storeID", storeID), logutil.ShortError(err)) return 0, errors.Annotatef(err, "failed to send fine-grained backup [%s, %s)", diff --git a/br/pkg/checkpoint/checkpoint.go b/br/pkg/checkpoint/checkpoint.go index 1d9f309843b88..4b397a60e5eeb 100644 --- a/br/pkg/checkpoint/checkpoint.go +++ b/br/pkg/checkpoint/checkpoint.go @@ -43,7 +43,7 @@ import ( "golang.org/x/sync/errgroup" ) -const CheckpointDir = "/checkpoints" +const CheckpointDir = "checkpoints" type flushPosition struct { CheckpointDataDir string diff --git a/br/pkg/task/common.go b/br/pkg/task/common.go index 343f1c0e84b16..c2432bcbaf042 100644 --- a/br/pkg/task/common.go +++ b/br/pkg/task/common.go @@ -732,7 +732,8 @@ func ReadBackupMeta( // flagToZapField checks whether this flag can be logged, // if need to log, return its zap field. Or return a field with hidden value. func flagToZapField(f *pflag.Flag) zap.Field { - if f.Name == flagStorage { + switch f.Name { + case flagStorage, FlagStreamFullBackupStorage: hiddenQuery, err := url.Parse(f.Value.String()) if err != nil { return zap.String(f.Name, "") @@ -740,8 +741,11 @@ func flagToZapField(f *pflag.Flag) zap.Field { // hide all query here. hiddenQuery.RawQuery = "" return zap.Stringer(f.Name, hiddenQuery) + case flagCipherKey, "azblob.encryption-key": + return zap.String(f.Name, "") + default: + return zap.Stringer(f.Name, f.Value) } - return zap.Stringer(f.Name, f.Value) } // LogArguments prints origin command arguments. diff --git a/br/pkg/task/common_test.go b/br/pkg/task/common_test.go index c942b96bc531e..79bd235fe1b45 100644 --- a/br/pkg/task/common_test.go +++ b/br/pkg/task/common_test.go @@ -33,13 +33,56 @@ func (f fakeValue) Type() string { } func TestUrlNoQuery(t *testing.T) { - flag := &pflag.Flag{ - Name: flagStorage, - Value: fakeValue("s3://some/what?secret=a123456789&key=987654321"), + testCases := []struct { + inputName string + expectedName string + inputValue string + expectedValue string + }{ + { + inputName: flagSendCreds, + expectedName: "send-credentials-to-tikv", + inputValue: "true", + expectedValue: "true", + }, + { + inputName: flagStorage, + expectedName: "storage", + inputValue: "s3://some/what?secret=a123456789&key=987654321", + expectedValue: "s3://some/what", + }, + { + inputName: FlagStreamFullBackupStorage, + expectedName: "full-backup-storage", + inputValue: "s3://bucket/prefix/?access-key=1&secret-key=2", + expectedValue: "s3://bucket/prefix/", + }, + { + inputName: flagCipherKey, + expectedName: "crypter.key", + inputValue: "537570657253656372657456616C7565", + expectedValue: "", + }, + { + inputName: "azblob.encryption-key", + expectedName: "azblob.encryption-key", + inputValue: "SUPERSECRET_AZURE_ENCRYPTION_KEY", + expectedValue: "", + }, + } + + for _, tc := range testCases { + flag := pflag.Flag{ + Name: tc.inputName, + Value: fakeValue(tc.inputValue), + } + field := flagToZapField(&flag) + require.Equal(t, tc.expectedName, field.Key, `test-case [%s="%s"]`, tc.expectedName, tc.expectedValue) + if stringer, ok := field.Interface.(fmt.Stringer); ok { + field.String = stringer.String() + } + require.Equal(t, tc.expectedValue, field.String, `test-case [%s="%s"]`, tc.expectedName, tc.expectedValue) } - field := flagToZapField(flag) - require.Equal(t, flagStorage, field.Key) - require.Equal(t, "s3://some/what", field.Interface.(fmt.Stringer).String()) } func TestTiDBConfigUnchanged(t *testing.T) { diff --git a/br/pkg/task/stream.go b/br/pkg/task/stream.go index 7462ace17616e..8dcc75aff38dc 100644 --- a/br/pkg/task/stream.go +++ b/br/pkg/task/stream.go @@ -74,6 +74,12 @@ const ( "You may check the metadata and continue by wait other task finish or manually delete the lock file " + truncateLockPath + " at the external storage." ) +const ( + waitInfoSchemaReloadCheckInterval = 1 * time.Second + // a million tables should take a few minutes to load all DDL change, making 15 to make sure we don't exit early + waitInfoSchemaReloadTimeout = 15 * time.Minute +) + var ( StreamStart = "log start" StreamStop = "log stop" @@ -1445,6 +1451,21 @@ func restoreStream( return errors.Annotate(err, "failed to restore kv files") } + // failpoint to stop for a while after restoring kvs + // this is to mimic the scenario that restore takes long time and the lease in schemaInfo has expired and needs refresh + failpoint.Inject("post-restore-kv-pending", func(val failpoint.Value) { + if val.(bool) { + // not ideal to use sleep but not sure what's the better way right now + log.Info("sleep after restoring kv") + time.Sleep(2 * time.Second) + } + }) + + // make sure schema reload finishes before proceeding + if err = waitUntilSchemaReload(ctx, client); err != nil { + return errors.Trace(err) + } + if err = client.CleanUpKVFiles(ctx); err != nil { return errors.Annotate(err, "failed to clean up") } @@ -1869,3 +1890,16 @@ func checkPiTRTaskInfo( return curTaskInfo, doFullRestore, nil } + +func waitUntilSchemaReload(ctx context.Context, client *restore.Client) error { + log.Info("waiting for schema info finishes reloading") + reloadStart := time.Now() + conditionFunc := func() bool { + return !client.GetDomain().IsLeaseExpired() + } + if err := utils.WaitUntil(ctx, conditionFunc, waitInfoSchemaReloadCheckInterval, waitInfoSchemaReloadTimeout); err != nil { + return errors.Annotate(err, "failed to wait until schema reload") + } + log.Info("reloading schema finished", zap.Duration("timeTaken", time.Since(reloadStart))) + return nil +} diff --git a/br/pkg/utils/BUILD.bazel b/br/pkg/utils/BUILD.bazel index d776456ca914b..808f9638e3616 100644 --- a/br/pkg/utils/BUILD.bazel +++ b/br/pkg/utils/BUILD.bazel @@ -19,6 +19,7 @@ go_library( "safe_point.go", "schema.go", "store_manager.go", + "wait.go", "worker.go", ], importpath = "github.com/pingcap/tidb/br/pkg/utils", diff --git a/br/pkg/utils/safe_point.go b/br/pkg/utils/safe_point.go index 93fa9415aa369..93491425e6a27 100644 --- a/br/pkg/utils/safe_point.go +++ b/br/pkg/utils/safe_point.go @@ -85,7 +85,7 @@ func UpdateServiceSafePoint(ctx context.Context, pdClient pd.Client, sp BRServic log.Debug("update PD safePoint limit with TTL", zap.Object("safePoint", sp)) lastSafePoint, err := pdClient.UpdateServiceGCSafePoint(ctx, sp.ID, sp.TTL, sp.BackupTS-1) - if lastSafePoint > sp.BackupTS-1 { + if lastSafePoint > sp.BackupTS-1 && sp.TTL > 0 { log.Warn("service GC safe point lost, we may fail to back up if GC lifetime isn't long enough", zap.Uint64("lastSafePoint", lastSafePoint), zap.Object("safePoint", sp), diff --git a/br/pkg/utils/wait.go b/br/pkg/utils/wait.go new file mode 100644 index 0000000000000..5e5616eafb9eb --- /dev/null +++ b/br/pkg/utils/wait.go @@ -0,0 +1,49 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "time" + + "github.com/pingcap/errors" +) + +func WaitUntil(ctx context.Context, condition func() bool, checkInterval, maxTimeout time.Duration) error { + // do a quick check before starting the ticker + if condition() { + return nil + } + + timeoutCtx, cancel := context.WithTimeout(ctx, maxTimeout) + defer cancel() + + ticker := time.NewTicker(checkInterval) + defer ticker.Stop() + + for { + select { + case <-timeoutCtx.Done(): + if ctx.Err() != nil { + return ctx.Err() + } + return errors.Errorf("waitUntil timed out after waiting for %v", maxTimeout) + case <-ticker.C: + if condition() { + return nil + } + } + } +} diff --git a/br/tests/br_fine_grained_disconnect/run.sh b/br/tests/br_fine_grained_disconnect/run.sh new file mode 100755 index 0000000000000..bdbeaed651716 --- /dev/null +++ b/br/tests/br_fine_grained_disconnect/run.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# +# Copyright 2022 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu +DB="$TEST_NAME" +TABLE="usertable" +DB_COUNT=3 +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +function create_db_with_table(){ + for i in $(seq $DB_COUNT); do + run_sql "CREATE DATABASE $DB${i};" + go-ycsb load mysql -P $CUR/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB${i} + done +} + +function drop_db(){ + for i in $(seq $DB_COUNT); do + run_sql "DROP DATABASE $DB${i};" + done +} + +# Create dbs with table +create_db_with_table + +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/noop-backup=100*return(1)" +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/backup/disconnect=100" + +output=$(run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB/${CRYPTER_METHOD}_file" 2>&1) + +if ! echo "$output" | grep -q "Failed to connect to store"; then + exit 1 +fi + +# Drop dbs finally +drop_db diff --git a/br/tests/br_fine_grained_disconnect/workload b/br/tests/br_fine_grained_disconnect/workload new file mode 100644 index 0000000000000..448ca3c1a477f --- /dev/null +++ b/br/tests/br_fine_grained_disconnect/workload @@ -0,0 +1,12 @@ +recordcount=1000 +operationcount=0 +workload=core + +readallfields=true + +readproportion=0 +updateproportion=0 +scanproportion=0 +insertproportion=0 + +requestdistribution=uniform \ No newline at end of file diff --git a/br/tests/br_pitr_long_running_schema_loading/run.sh b/br/tests/br_pitr_long_running_schema_loading/run.sh new file mode 100644 index 0000000000000..e6f32c08bcdce --- /dev/null +++ b/br/tests/br_pitr_long_running_schema_loading/run.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# +# Copyright 2024 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu +. run_services +CUR=$(cd `dirname $0`; pwd) + +TASK_NAME="pitr_long_running_schema_loading" +res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" +DB="$TEST_NAME" + +restart_services + +run_sql "CREATE SCHEMA $DB;" + +# start the log backup +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + +run_sql "USE $DB; CREATE TABLE t1 (id INT PRIMARY KEY, value VARCHAR(255));" +run_sql "USE $DB; INSERT INTO t1 VALUES (1, 'before-backup-1'), (2, 'before-backup-2');" + + +# do a full backup +run_br --pd "$PD_ADDR" backup full -s "local://$TEST_DIR/$TASK_NAME/full" + +run_sql "USE $DB; INSERT INTO t1 VALUES (3, 'after-backup-1'), (4, 'after-backup-2');" +run_sql "USE $DB; DROP TABLE t1;" +run_sql "USE $DB; CREATE TABLE t2 (id INT PRIMARY KEY, data TEXT);" +run_sql "USE $DB; INSERT INTO t2 VALUES (1, 'new-table-data');" + +echo "wait checkpoint advance" +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME + +restart_services + +export GO_FAILPOINTS="github.com/pingcap/tidb/pkg/domain/mock-load-schema-long-time=return(true);github.com/pingcap/tidb/br/pkg/task/post-restore-kv-pending=return(true)" +run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" +export GO_FAILPOINTS="" diff --git a/br/tests/run_group_br_tests.sh b/br/tests/run_group_br_tests.sh index 2379549727f4a..008b0e63d9fbe 100755 --- a/br/tests/run_group_br_tests.sh +++ b/br/tests/run_group_br_tests.sh @@ -22,7 +22,7 @@ declare -A groups groups=( ["G00"]="br_300_small_tables br_backup_empty br_backup_version br_cache_table br_case_sensitive br_charset_gbk br_check_new_collocation_enable br_history br_gcs br_rawkv" ["G01"]="br_autoid br_crypter2 br_db br_db_online br_db_online_newkv br_db_skip br_debug_meta br_ebs br_foreign_key br_full br_table_partition br_full_ddl" - ["G02"]="br_full_cluster_restore br_full_index br_incremental_ddl br_pitr_failpoint" + ["G02"]="br_full_cluster_restore br_full_index br_incremental_ddl br_pitr_failpoint br_pitr_long_running_schema_loading" ["G03"]='br_incompatible_tidb_config br_incremental br_incremental_index br_incremental_only_ddl br_incremental_same_table br_insert_after_restore br_key_locked br_log_test br_move_backup br_mv_index br_other br_partition_add_index br_tidb_placement_policy br_tiflash' ["G04"]='br_range br_replica_read br_restore_TDE_enable br_restore_log_task_enable br_s3 br_shuffle_leader br_shuffle_region br_single_table' ["G05"]='br_skip_checksum br_small_batch_size br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter' diff --git a/go.mod b/go.mod index d3d560683e859..1bec350c0be39 100644 --- a/go.mod +++ b/go.mod @@ -106,7 +106,7 @@ require ( github.com/tdakkota/asciicheck v0.2.0 github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 github.com/tidwall/btree v1.7.0 - github.com/tikv/client-go/v2 v2.0.8-0.20240913090512-3777c384feb1 + github.com/tikv/client-go/v2 v2.0.8-0.20241125064226-08d0b3b826b3 github.com/tikv/pd/client v0.0.0-20240806105739-10ecdbe92b55 github.com/timakin/bodyclose v0.0.0-20240125160201-f835fa56326a github.com/twmb/murmur3 v1.1.6 @@ -140,7 +140,7 @@ require ( google.golang.org/grpc v1.62.1 gopkg.in/yaml.v2 v2.4.0 honnef.co/go/tools v0.4.7 - k8s.io/api v0.28.6 + k8s.io/api v0.29.11 sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0 sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) @@ -302,7 +302,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apimachinery v0.28.6 // indirect + k8s.io/apimachinery v0.29.11 // indirect k8s.io/klog/v2 v2.120.1 // indirect k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/go.sum b/go.sum index 52f609134c162..0ee6bdf1727b3 100644 --- a/go.sum +++ b/go.sum @@ -601,8 +601,8 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= -github.com/onsi/gomega v1.20.1 h1:PA/3qinGoukvymdIDV8pii6tiZgC8kbmJO6Z5+b002Q= -github.com/onsi/gomega v1.20.1/go.mod h1:DtrZpjmvpn2mPm4YWQa0/ALMDj9v4YxLgojwPeREyVo= +github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= +github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= github.com/opentracing/basictracer-go v1.0.0 h1:YyUAhaEfjoWXclZVJ9sGoNct7j4TVk7lZWlQw5UXuoo= github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= @@ -783,8 +783,8 @@ github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a h1:J/YdBZ46WKpXsxsW github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a/go.mod h1:h4xBhSNtOeEosLJ4P7JyKXX7Cabg7AVkWCK5gV2vOrM= github.com/tidwall/btree v1.7.0 h1:L1fkJH/AuEh5zBnnBbmTwQ5Lt+bRJ5A8EWecslvo9iI= github.com/tidwall/btree v1.7.0/go.mod h1:twD9XRA5jj9VUQGELzDO4HPQTNJsoWWfYEL+EUQ2cKY= -github.com/tikv/client-go/v2 v2.0.8-0.20240913090512-3777c384feb1 h1:KmLTcRXq+sMZ5tKKLLX6H+bs1fUQB6uYaRg52Wkt6n0= -github.com/tikv/client-go/v2 v2.0.8-0.20240913090512-3777c384feb1/go.mod h1:+vXk4Aex17GnI8gfSMPxrL0SQLbBYgP3Db4FvHiImwM= +github.com/tikv/client-go/v2 v2.0.8-0.20241125064226-08d0b3b826b3 h1:M15i12ypNUsxuOPJ/zcCxajncJSs4yz0+mYB2RSJS0w= +github.com/tikv/client-go/v2 v2.0.8-0.20241125064226-08d0b3b826b3/go.mod h1:+vXk4Aex17GnI8gfSMPxrL0SQLbBYgP3Db4FvHiImwM= github.com/tikv/pd/client v0.0.0-20240806105739-10ecdbe92b55 h1:+1unfy0TcJJtud3d7BuYsvNG6tPVuXIH+WiIFhOx1Kc= github.com/tikv/pd/client v0.0.0-20240806105739-10ecdbe92b55/go.mod h1:1zqLOMhnkZIpBLj2oXOO2bWvtXhb12OmYr+cPkjQ6tI= github.com/timakin/bodyclose v0.0.0-20240125160201-f835fa56326a h1:A6uKudFIfAEpoPdaal3aSqGxBzLyU8TqyXImLwo6dIo= @@ -1362,10 +1362,10 @@ honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9 honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.4.7 h1:9MDAWxMoSnB6QoSqiVr7P5mtkT9pOc1kSxchzPCnqJs= honnef.co/go/tools v0.4.7/go.mod h1:+rnGS1THNh8zMwnd2oVOTL9QF6vmfyG6ZXBULae2uc0= -k8s.io/api v0.28.6 h1:yy6u9CuIhmg55YvF/BavPBBXB+5QicB64njJXxVnzLo= -k8s.io/api v0.28.6/go.mod h1:AM6Ys6g9MY3dl/XNaNfg/GePI0FT7WBGu8efU/lirAo= -k8s.io/apimachinery v0.28.6 h1:RsTeR4z6S07srPg6XYrwXpTJVMXsjPXn0ODakMytSW0= -k8s.io/apimachinery v0.28.6/go.mod h1:QFNX/kCl/EMT2WTSz8k4WLCv2XnkOLMaL8GAVRMdpsA= +k8s.io/api v0.29.11 h1:6FwDo33f1WX5Yu0RQTX9YAd3wth8Ik0B4SXQKsoQfbk= +k8s.io/api v0.29.11/go.mod h1:3TDAW1OpFbz/Yx5r0W06b6eiAfHEwtH61VYDzpTU4Ng= +k8s.io/apimachinery v0.29.11 h1:55+6ue9advpA7T0sX2ZJDHCLKuiFfrAAR/39VQN9KEQ= +k8s.io/apimachinery v0.29.11/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y= k8s.io/client-go v0.28.6 h1:Gge6ziyIdafRchfoBKcpaARuz7jfrK1R1azuwORIsQI= k8s.io/client-go v0.28.6/go.mod h1:+nu0Yp21Oeo/cBCsprNVXB2BfJTV51lFfe5tXl2rUL8= k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= diff --git a/lightning/tidb-lightning.toml b/lightning/tidb-lightning.toml index e7945b1c4eafb..8884d59c38b8d 100644 --- a/lightning/tidb-lightning.toml +++ b/lightning/tidb-lightning.toml @@ -116,6 +116,7 @@ backend = "importer" addr = "127.0.0.1:8287" # Maximum KV size of SST files produced in the 'local' backend. This should be the same as # the TiKV region size to avoid further region splitting. The default value is 96 MiB. +# If the configuration value is less than TiKV's region size, it will be adjusted to TiKV's region size. #region-split-size = '96MiB' # write key-values pairs to tikv batch size #send-kv-pairs = 32768 diff --git a/pkg/bindinfo/OWNERS b/pkg/bindinfo/OWNERS new file mode 100644 index 0000000000000..904863f05e7fc --- /dev/null +++ b/pkg/bindinfo/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +approvers: + - sig-approvers-planner +labels: + - sig/planner diff --git a/pkg/ddl/backfilling_dist_executor.go b/pkg/ddl/backfilling_dist_executor.go index c6588333e3b79..4126364735b09 100644 --- a/pkg/ddl/backfilling_dist_executor.go +++ b/pkg/ddl/backfilling_dist_executor.go @@ -56,6 +56,7 @@ type BackfillSubTaskMeta struct { RowEnd []byte `json:"row_end"` // Used by global sort write & ingest step. + RangeJobKeys [][]byte `json:"range_job_keys,omitempty"` RangeSplitKeys [][]byte `json:"range_split_keys,omitempty"` DataFiles []string `json:"data-files,omitempty"` StatFiles []string `json:"stat-files,omitempty"` diff --git a/pkg/ddl/backfilling_dist_scheduler.go b/pkg/ddl/backfilling_dist_scheduler.go index 0f59055940b45..53c71aef65607 100644 --- a/pkg/ddl/backfilling_dist_scheduler.go +++ b/pkg/ddl/backfilling_dist_scheduler.go @@ -431,7 +431,7 @@ func splitSubtaskMetaForOneKVMetaGroup( startKey := kvMeta.StartKey var endKey kv.Key for { - endKeyOfGroup, dataFiles, statFiles, rangeSplitKeys, err := splitter.SplitOneRangesGroup() + endKeyOfGroup, dataFiles, statFiles, interiorRangeJobKeys, regionSplitKeys, err := splitter.SplitOneRangesGroup() if err != nil { return nil, err } @@ -448,6 +448,10 @@ func splitSubtaskMetaForOneKVMetaGroup( return nil, errors.Errorf("invalid range, startKey: %s, endKey: %s", hex.EncodeToString(startKey), hex.EncodeToString(endKey)) } + rangeJobKeys := make([][]byte, 0, len(interiorRangeJobKeys)+2) + rangeJobKeys = append(rangeJobKeys, startKey) + rangeJobKeys = append(rangeJobKeys, interiorRangeJobKeys...) + rangeJobKeys = append(rangeJobKeys, endKey) m := &BackfillSubTaskMeta{ MetaGroups: []*external.SortedKVMeta{{ StartKey: startKey, @@ -456,7 +460,8 @@ func splitSubtaskMetaForOneKVMetaGroup( }}, DataFiles: dataFiles, StatFiles: statFiles, - RangeSplitKeys: rangeSplitKeys, + RangeJobKeys: rangeJobKeys, + RangeSplitKeys: regionSplitKeys, TS: ts, } metaBytes, err := json.Marshal(m) @@ -568,16 +573,16 @@ func getRangeSplitter( rangeGroupSize := totalSize / instanceCnt rangeGroupKeys := int64(math.MaxInt64) - var maxSizePerRange = int64(config.SplitRegionSize) - var maxKeysPerRange = int64(config.SplitRegionKeys) + var regionSplitSize = int64(config.SplitRegionSize) + var regionSplitKeys = int64(config.SplitRegionKeys) if store != nil { pdCli := store.GetPDClient() tls, err := ingest.NewDDLTLS() if err == nil { size, keys, err := local.GetRegionSplitSizeKeys(ctx, pdCli, tls) if err == nil { - maxSizePerRange = max(maxSizePerRange, size) - maxKeysPerRange = max(maxKeysPerRange, keys) + regionSplitSize = max(regionSplitSize, size) + regionSplitKeys = max(regionSplitKeys, keys) } else { logger.Warn("fail to get region split keys and size", zap.Error(err)) } @@ -586,8 +591,11 @@ func getRangeSplitter( } } + // no matter region split size and keys, we always split range jobs by 96MB return external.NewRangeSplitter(ctx, multiFileStat, extStore, - rangeGroupSize, rangeGroupKeys, maxSizePerRange, maxKeysPerRange) + rangeGroupSize, rangeGroupKeys, + int64(config.SplitRegionSize), int64(config.SplitRegionKeys), + regionSplitSize, regionSplitKeys) } func forEachBackfillSubtaskMeta( diff --git a/pkg/ddl/backfilling_import_cloud.go b/pkg/ddl/backfilling_import_cloud.go index 16d9c872e4152..637cd623d7ad3 100644 --- a/pkg/ddl/backfilling_import_cloud.go +++ b/pkg/ddl/backfilling_import_cloud.go @@ -83,6 +83,11 @@ func (m *cloudImportExecutor) RunSubtask(ctx context.Context, subtask *proto.Sub all.Merge(g) } + // compatible with old version task meta + jobKeys := sm.RangeJobKeys + if jobKeys == nil { + jobKeys = sm.RangeSplitKeys + } err = local.CloseEngine(ctx, &backend.EngineConfig{ External: &backend.ExternalEngineConfig{ StorageURI: m.cloudStoreURI, @@ -90,6 +95,7 @@ func (m *cloudImportExecutor) RunSubtask(ctx context.Context, subtask *proto.Sub StatFiles: sm.StatFiles, StartKey: all.StartKey, EndKey: all.EndKey, + JobKeys: jobKeys, SplitKeys: sm.RangeSplitKeys, TotalFileSize: int64(all.TotalKVSize), TotalKVCount: 0, diff --git a/pkg/ddl/ingest/backend.go b/pkg/ddl/ingest/backend.go index f11849f70b11a..48f9a57c10f6e 100644 --- a/pkg/ddl/ingest/backend.go +++ b/pkg/ddl/ingest/backend.go @@ -296,8 +296,7 @@ func (bc *litBackendCtx) unsafeImportAndReset(ei *engineInfo) error { if err != nil { return errors.Trace(err) } - ei.openedEngine.SetTS(newTS) - return nil + return bc.backend.SetTSAfterResetEngine(ei.uuid, newTS) } // ForceSyncFlagForTest is a flag to force sync only for test. diff --git a/pkg/disttask/importinto/planner.go b/pkg/disttask/importinto/planner.go index 8fda8686facbb..ff4add8bd3d88 100644 --- a/pkg/disttask/importinto/planner.go +++ b/pkg/disttask/importinto/planner.go @@ -387,7 +387,7 @@ func generateWriteIngestSpecs(planCtx planner.PlanCtx, p *LogicalPlan) ([]planne startKey := tidbkv.Key(kvMeta.StartKey) var endKey tidbkv.Key for { - endKeyOfGroup, dataFiles, statFiles, rangeSplitKeys, err2 := splitter.SplitOneRangesGroup() + endKeyOfGroup, dataFiles, statFiles, interiorRangeJobKeys, regionSplitKeys, err2 := splitter.SplitOneRangesGroup() if err2 != nil { return err2 } @@ -404,6 +404,10 @@ func generateWriteIngestSpecs(planCtx planner.PlanCtx, p *LogicalPlan) ([]planne return errors.Errorf("invalid kv range, startKey: %s, endKey: %s", hex.EncodeToString(startKey), hex.EncodeToString(endKey)) } + rangeJobKeys := make([][]byte, 0, len(interiorRangeJobKeys)+2) + rangeJobKeys = append(rangeJobKeys, startKey) + rangeJobKeys = append(rangeJobKeys, interiorRangeJobKeys...) + rangeJobKeys = append(rangeJobKeys, endKey) // each subtask will write and ingest one range group m := &WriteIngestStepMeta{ KVGroup: kvGroup, @@ -415,8 +419,8 @@ func generateWriteIngestSpecs(planCtx planner.PlanCtx, p *LogicalPlan) ([]planne }, DataFiles: dataFiles, StatFiles: statFiles, - RangeSplitKeys: rangeSplitKeys, - RangeSplitSize: splitter.GetRangeSplitSize(), + RangeJobKeys: rangeJobKeys, + RangeSplitKeys: regionSplitKeys, TS: ts, } specs = append(specs, &WriteIngestSpec{m}) @@ -516,12 +520,15 @@ func getRangeSplitter(ctx context.Context, store storage.ExternalStorage, kvMeta zap.Int64("region-split-size", regionSplitSize), zap.Int64("region-split-keys", regionSplitKeys)) + // no matter region split size and keys, we always split range jobs by 96MB return external.NewRangeSplitter( ctx, kvMeta.MultipleFilesStats, store, int64(config.DefaultBatchSize), int64(math.MaxInt64), + int64(config.SplitRegionSize), + int64(config.SplitRegionKeys), regionSplitSize, regionSplitKeys, ) diff --git a/pkg/disttask/importinto/proto.go b/pkg/disttask/importinto/proto.go index 1545bcdf446a6..161e482db9472 100644 --- a/pkg/disttask/importinto/proto.go +++ b/pkg/disttask/importinto/proto.go @@ -86,8 +86,8 @@ type WriteIngestStepMeta struct { external.SortedKVMeta `json:"sorted-kv-meta"` DataFiles []string `json:"data-files"` StatFiles []string `json:"stat-files"` + RangeJobKeys [][]byte `json:"range-job-keys"` RangeSplitKeys [][]byte `json:"range-split-keys"` - RangeSplitSize int64 `json:"range-split-size"` TS uint64 `json:"ts"` Result Result diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index 78f41ff03c0f7..8a6540a5bb31e 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -404,18 +404,23 @@ func (e *writeAndIngestStepExecutor) RunSubtask(ctx context.Context, subtask *pr _, engineUUID := backend.MakeUUID("", subtask.ID) localBackend := e.tableImporter.Backend() localBackend.WorkerConcurrency = subtask.Concurrency * 2 + // compatible with old version task meta + jobKeys := sm.RangeJobKeys + if jobKeys == nil { + jobKeys = sm.RangeSplitKeys + } err = localBackend.CloseEngine(ctx, &backend.EngineConfig{ External: &backend.ExternalEngineConfig{ - StorageURI: e.taskMeta.Plan.CloudStorageURI, - DataFiles: sm.DataFiles, - StatFiles: sm.StatFiles, - StartKey: sm.StartKey, - EndKey: sm.EndKey, - SplitKeys: sm.RangeSplitKeys, - RegionSplitSize: sm.RangeSplitSize, - TotalFileSize: int64(sm.TotalKVSize), - TotalKVCount: 0, - CheckHotspot: false, + StorageURI: e.taskMeta.Plan.CloudStorageURI, + DataFiles: sm.DataFiles, + StatFiles: sm.StatFiles, + StartKey: sm.StartKey, + EndKey: sm.EndKey, + JobKeys: jobKeys, + SplitKeys: sm.RangeSplitKeys, + TotalFileSize: int64(sm.TotalKVSize), + TotalKVCount: 0, + CheckHotspot: false, }, TS: sm.TS, }, engineUUID) diff --git a/pkg/domain/domain.go b/pkg/domain/domain.go index 17d6e81d18f68..40eafa98a2a64 100644 --- a/pkg/domain/domain.go +++ b/pkg/domain/domain.go @@ -291,6 +291,16 @@ func (do *Domain) loadInfoSchema(startTS uint64) (infoschema.InfoSchema, bool, i // We can fall back to full load, don't need to return the error. logutil.BgLogger().Error("failed to load schema diff", zap.Error(err)) } + + // add failpoint to simulate long-running schema loading scenario + failpoint.Inject("mock-load-schema-long-time", func(val failpoint.Value) { + if val.(bool) { + // not ideal to use sleep, but not sure if there is a better way + logutil.BgLogger().Error("sleep before doing a full load") + time.Sleep(15 * time.Second) + } + }) + // full load. schemas, err := do.fetchAllSchemasWithTables(m) if err != nil { @@ -1328,6 +1338,11 @@ func (do *Domain) Init( return nil } +// IsLeaseExpired returns whether lease has expired +func (do *Domain) IsLeaseExpired() bool { + return do.SchemaValidator.IsLeaseExpired() +} + // InitInfo4Test init infosync for distributed execution test. func (do *Domain) InitInfo4Test() { infosync.MockGlobalServerInfoManagerEntry.Add(do.ddl.GetID(), do.ServerID) diff --git a/pkg/domain/schema_validator.go b/pkg/domain/schema_validator.go index c018085d0141f..0104b11f54b76 100644 --- a/pkg/domain/schema_validator.go +++ b/pkg/domain/schema_validator.go @@ -57,6 +57,8 @@ type SchemaValidator interface { Reset() // IsStarted indicates whether SchemaValidator is started. IsStarted() bool + // IsLeaseExpired checks whether the current lease has expired + IsLeaseExpired() bool } type deltaSchemaInfo struct { @@ -172,6 +174,10 @@ func (s *schemaValidator) Update(leaseGrantTS uint64, oldVer, currVer int64, cha } } +func (s *schemaValidator) IsLeaseExpired() bool { + return time.Now().After(s.latestSchemaExpire) +} + // isRelatedTablesChanged returns the result whether relatedTableIDs is changed // from usedVer to the latest schema version. // NOTE, this function should be called under lock! diff --git a/pkg/executor/adapter.go b/pkg/executor/adapter.go index 9be17cfec5c43..c3d66ba08c77c 100644 --- a/pkg/executor/adapter.go +++ b/pkg/executor/adapter.go @@ -603,7 +603,7 @@ func (a *ExecStmt) handleStmtForeignKeyTrigger(ctx context.Context, e exec.Execu if stmtCtx.ForeignKeyTriggerCtx.HasFKCascades { // If the ExecStmt has foreign key cascade to be executed, we need call `StmtCommit` to commit the ExecStmt itself // change first. - // Since `UnionScanExec` use `SnapshotIter` and `SnapshotGetter` to read txn mem-buffer, if we don't do `StmtCommit`, + // Since `UnionScanExec` use `SnapshotIter` and `SnapshotGetter` to read txn mem-buffer, if we don't do `StmtCommit`, // then the fk cascade executor can't read the mem-buffer changed by the ExecStmt. a.Ctx.StmtCommit(ctx) } diff --git a/pkg/executor/builder.go b/pkg/executor/builder.go index 297506380aae0..de6fcd8675f44 100644 --- a/pkg/executor/builder.go +++ b/pkg/executor/builder.go @@ -798,12 +798,15 @@ func (b *executorBuilder) buildLimit(v *plannercore.PhysicalLimit) exec.Executor end: v.Offset + v.Count, } - childUsedSchemaLen := v.Children()[0].Schema().Len() + childSchemaLen := v.Children()[0].Schema().Len() childUsedSchema := markChildrenUsedCols(v.Schema().Columns, v.Children()[0].Schema())[0] e.columnIdxsUsedByChild = make([]int, 0, len(childUsedSchema)) e.columnIdxsUsedByChild = append(e.columnIdxsUsedByChild, childUsedSchema...) - if len(e.columnIdxsUsedByChild) == childUsedSchemaLen { + if len(e.columnIdxsUsedByChild) == childSchemaLen { e.columnIdxsUsedByChild = nil // indicates that all columns are used. LimitExec will improve performance for this condition. + } else { + // construct a project evaluator to do the inline projection + e.columnSwapHelper = chunk.NewColumnSwapHelper(e.columnIdxsUsedByChild) } return e } diff --git a/pkg/executor/cte.go b/pkg/executor/cte.go index 1b358c0e18064..346c13bbe6aa0 100644 --- a/pkg/executor/cte.go +++ b/pkg/executor/cte.go @@ -89,16 +89,16 @@ func (e *CTEExec) Open(ctx context.Context) (err error) { defer e.producer.resTbl.Unlock() if e.producer.checkAndUpdateCorColHashCode() { - e.producer.reset() - if err = e.producer.reopenTbls(); err != nil { + err = e.producer.reset() + if err != nil { return err } } if e.producer.openErr != nil { return e.producer.openErr } - if !e.producer.opened { - if err = e.producer.openProducer(ctx, e); err != nil { + if !e.producer.hasCTEResult() && !e.producer.executorOpened { + if err = e.producer.openProducerExecutor(ctx, e); err != nil { return err } } @@ -109,8 +109,14 @@ func (e *CTEExec) Open(ctx context.Context) (err error) { func (e *CTEExec) Next(ctx context.Context, req *chunk.Chunk) (err error) { e.producer.resTbl.Lock() defer e.producer.resTbl.Unlock() - if !e.producer.resTbl.Done() { - if err = e.producer.produce(ctx); err != nil { + if !e.producer.hasCTEResult() { + // in case that another CTEExec call close without generate CTE result. + if !e.producer.executorOpened { + if err = e.producer.openProducerExecutor(ctx, e); err != nil { + return err + } + } + if err = e.producer.genCTEResult(ctx); err != nil { return err } } @@ -132,7 +138,7 @@ func (e *CTEExec) Close() (firstErr error) { func() { e.producer.resTbl.Lock() defer e.producer.resTbl.Unlock() - if !e.producer.closed { + if e.producer.executorOpened { failpoint.Inject("mock_cte_exec_panic_avoid_deadlock", func(v failpoint.Value) { ok := v.(bool) if ok { @@ -140,12 +146,17 @@ func (e *CTEExec) Close() (firstErr error) { panic(exeerrors.ErrMemoryExceedForQuery) } }) - // closeProducer() only close seedExec and recursiveExec, will not touch resTbl. - // It means you can still read resTbl after call closeProducer(). - // You can even call all three functions(openProducer/produce/closeProducer) in CTEExec.Next(). + // closeProducerExecutor() only close seedExec and recursiveExec, will not touch resTbl. + // It means you can still read resTbl after call closeProducerExecutor(). + // You can even call all three functions(openProducerExecutor/genCTEResult/closeProducerExecutor) in CTEExec.Next(). // Separating these three function calls is only to follow the abstraction of the volcano model. - err := e.producer.closeProducer() + err := e.producer.closeProducerExecutor() firstErr = setFirstErr(firstErr, err, "close cte producer error") + if !e.producer.hasCTEResult() { + // CTE result is not generated, in this case, we reset it + err = e.producer.reset() + firstErr = setFirstErr(firstErr, err, "close cte producer error") + } } }() err := e.BaseExecutor.Close() @@ -160,10 +171,10 @@ func (e *CTEExec) reset() { } type cteProducer struct { - // opened should be false when not open or open fail(a.k.a. openErr != nil) - opened bool - produced bool - closed bool + // executorOpened is used to indicate whether the executor(seedExec/recursiveExec) is opened. + // when executorOpened is true, the executor is opened, otherwise it means the executor is + // not opened or is already closed. + executorOpened bool // cteProducer is shared by multiple operators, so if the first operator tries to open // and got error, the second should return open error directly instead of open again. @@ -202,14 +213,10 @@ type cteProducer struct { corColHashCodes [][]byte } -func (p *cteProducer) openProducer(ctx context.Context, cteExec *CTEExec) (err error) { +func (p *cteProducer) openProducerExecutor(ctx context.Context, cteExec *CTEExec) (err error) { defer func() { p.openErr = err - if err == nil { - p.opened = true - } else { - p.opened = false - } + p.executorOpened = true }() if p.seedExec == nil { return errors.New("seedExec for CTEExec is nil") @@ -252,7 +259,7 @@ func (p *cteProducer) openProducer(ctx context.Context, cteExec *CTEExec) (err e return nil } -func (p *cteProducer) closeProducer() (firstErr error) { +func (p *cteProducer) closeProducerExecutor() (firstErr error) { err := exec.Close(p.seedExec) firstErr = setFirstErr(firstErr, err, "close seedExec err") @@ -271,7 +278,7 @@ func (p *cteProducer) closeProducer() (firstErr error) { // because ExplainExec still needs tracker to get mem usage info. p.memTracker = nil p.diskTracker = nil - p.closed = true + p.executorOpened = false return } @@ -338,7 +345,13 @@ func (p *cteProducer) nextChunkLimit(cteExec *CTEExec, req *chunk.Chunk) error { return nil } -func (p *cteProducer) produce(ctx context.Context) (err error) { +func (p *cteProducer) hasCTEResult() bool { + return p.resTbl.Done() +} + +// genCTEResult generates the result of CTE, and stores the result in resTbl. +// This is a synchronous function, which means it will block until the result is generated. +func (p *cteProducer) genCTEResult(ctx context.Context) (err error) { if p.resTbl.Error() != nil { return p.resTbl.Error() } @@ -531,14 +544,18 @@ func (p *cteProducer) setupTblsForNewIteration() (err error) { return nil } -func (p *cteProducer) reset() { +func (p *cteProducer) reset() error { p.curIter = 0 p.hashTbl = nil - - p.opened = false + p.executorOpened = false p.openErr = nil - p.produced = false - p.closed = false + + // Normally we need to setup tracker after calling Reopen(), + // But reopen resTbl means we need to call genCTEResult() again, it will setup tracker. + if err := p.resTbl.Reopen(); err != nil { + return err + } + return p.iterInTbl.Reopen() } func (p *cteProducer) resetTracker() { @@ -552,18 +569,6 @@ func (p *cteProducer) resetTracker() { } } -func (p *cteProducer) reopenTbls() (err error) { - if p.isDistinct { - p.hashTbl = newConcurrentMapHashTable() - } - // Normally we need to setup tracker after calling Reopen(), - // But reopen resTbl means we need to call produce() again, it will setup tracker. - if err := p.resTbl.Reopen(); err != nil { - return err - } - return p.iterInTbl.Reopen() -} - // Check if tbl meets the requirement of limit. func (p *cteProducer) limitDone(tbl cteutil.Storage) bool { return p.hasLimit && uint64(tbl.NumRows()) >= p.limitEnd diff --git a/pkg/executor/executor.go b/pkg/executor/executor.go index 71feeb2086dc2..9da2a016aea25 100644 --- a/pkg/executor/executor.go +++ b/pkg/executor/executor.go @@ -1332,6 +1332,7 @@ type LimitExec struct { // columnIdxsUsedByChild keep column indexes of child executor used for inline projection columnIdxsUsedByChild []int + columnSwapHelper *chunk.ColumnSwapHelper // Log the close time when opentracing is enabled. span opentracing.Span @@ -1393,10 +1394,9 @@ func (e *LimitExec) Next(ctx context.Context, req *chunk.Chunk) error { e.cursor += batchSize if e.columnIdxsUsedByChild != nil { - for i, childIdx := range e.columnIdxsUsedByChild { - if err = req.SwapColumn(i, e.childResult, childIdx); err != nil { - return err - } + err = e.columnSwapHelper.SwapColumns(e.childResult, req) + if err != nil { + return err } } else { req.SwapColumns(e.childResult) diff --git a/pkg/executor/infoschema_reader.go b/pkg/executor/infoschema_reader.go index f100600b0ad70..6586413cbcd41 100644 --- a/pkg/executor/infoschema_reader.go +++ b/pkg/executor/infoschema_reader.go @@ -454,6 +454,11 @@ func (e *memtableRetriever) setDataForStatisticsInTable(schema model.CIStr, tabl expression = tblCol.GeneratedExprString } + var subPart any + if key.Length != types.UnspecifiedLength { + subPart = key.Length + } + record := types.MakeDatums( infoschema.CatalogVal, // TABLE_CATALOG schema.O, // TABLE_SCHEMA @@ -465,7 +470,7 @@ func (e *memtableRetriever) setDataForStatisticsInTable(schema model.CIStr, tabl colName, // COLUMN_NAME "A", // COLLATION 0, // CARDINALITY - nil, // SUB_PART + subPart, // SUB_PART nil, // PACKED nullable, // NULLABLE "BTREE", // INDEX_TYPE @@ -862,7 +867,7 @@ func (e *hugeMemTableRetriever) dataForColumnsInTable(ctx context.Context, sctx // Build plan is not thread safe, there will be concurrency on sessionctx. if err := runWithSystemSession(internalCtx, sctx, func(s sessionctx.Context) error { is := sessiontxn.GetTxnManager(s).GetTxnInfoSchema() - planBuilder, _ := plannercore.NewPlanBuilder().Init(s.GetPlanCtx(), is, hint.NewQBHintHandler(nil)) + planBuilder, _ := plannercore.NewPlanBuilder(plannercore.PlanBuilderOptNoExecution{}).Init(s.GetPlanCtx(), is, hint.NewQBHintHandler(nil)) var err error viewLogicalPlan, err = planBuilder.BuildDataSourceFromView(ctx, schema.Name, tbl, nil, nil) return errors.Trace(err) @@ -3246,9 +3251,10 @@ func (e *TiFlashSystemTableRetriever) dataForTiFlashSystemTables(ctx context.Con if !ok { return nil, errors.New("Get tiflash system tables can only run with tikv compatible storage") } - // send request to tiflash, timeout is 1s + // send request to tiflash, use 5 minutes as per-request timeout instanceID := e.instanceIDs[e.instanceIdx] - resp, err := tikvStore.GetTiKVClient().SendRequest(ctx, instanceID, &request, time.Second) + timeout := time.Duration(5*60) * time.Second + resp, err := tikvStore.GetTiKVClient().SendRequest(ctx, instanceID, &request, timeout) if err != nil { return nil, errors.Trace(err) } diff --git a/pkg/executor/infoschema_reader_test.go b/pkg/executor/infoschema_reader_test.go index 598f573b91ac7..632b71dd28616 100644 --- a/pkg/executor/infoschema_reader_test.go +++ b/pkg/executor/infoschema_reader_test.go @@ -31,6 +31,7 @@ import ( "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/store/mockstore" "github.com/pingcap/tidb/pkg/testkit" + "github.com/pingcap/tidb/pkg/testkit/testfailpoint" "github.com/pingcap/tidb/pkg/util/stringutil" "github.com/stretchr/testify/require" "github.com/tikv/client-go/v2/tikv" @@ -552,21 +553,23 @@ func TestShowColumnsWithSubQueryView(t *testing.T) { tk := testkit.NewTestKit(t, store) tk.MustExec("use test") - if tk.MustQuery("select @@tidb_schema_cache_size > 0").Equal(testkit.Rows("1")) { - // infoschema v2 requires network, so it cannot be tested this way. - t.Skip() - } + tk.MustExec("set @@global.tidb_schema_cache_size = 0;") + t.Cleanup(func() { + tk.MustExec("set @@global.tidb_schema_cache_size = default;") + }) tk.MustExec("CREATE TABLE added (`id` int(11), `name` text, `some_date` timestamp);") tk.MustExec("CREATE TABLE incremental (`id` int(11), `name`text, `some_date` timestamp);") tk.MustExec("create view temp_view as (select * from `added` where id > (select max(id) from `incremental`));") // Show columns should not send coprocessor request to the storage. - require.NoError(t, failpoint.Enable("tikvclient/tikvStoreSendReqResult", `return("timeout")`)) + testfailpoint.Enable(t, "tikvclient/tikvStoreSendReqResult", `return("timeout")`) + testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/planner/core/BuildDataSourceFailed", "panic") + tk.MustQuery("show columns from temp_view;").Check(testkit.Rows( "id int(11) YES ", "name text YES ", "some_date timestamp YES ")) - require.NoError(t, failpoint.Disable("tikvclient/tikvStoreSendReqResult")) + tk.MustQuery("select COLUMN_NAME from information_schema.columns where table_name = 'temp_view';").Check(testkit.Rows("id", "name", "some_date")) } // Code below are helper utilities for the test cases. diff --git a/pkg/executor/test/issuetest/BUILD.bazel b/pkg/executor/test/issuetest/BUILD.bazel index 584acbb753be7..d825d3bd34733 100644 --- a/pkg/executor/test/issuetest/BUILD.bazel +++ b/pkg/executor/test/issuetest/BUILD.bazel @@ -8,7 +8,7 @@ go_test( "main_test.go", ], flaky = True, - shard_count = 21, + shard_count = 22, deps = [ "//pkg/autoid_service", "//pkg/config", diff --git a/pkg/executor/test/issuetest/executor_issue_test.go b/pkg/executor/test/issuetest/executor_issue_test.go index b9e5e04a0c926..5855c1ee17e4a 100644 --- a/pkg/executor/test/issuetest/executor_issue_test.go +++ b/pkg/executor/test/issuetest/executor_issue_test.go @@ -693,3 +693,22 @@ func TestIssue53867(t *testing.T) { // Need no panic tk.MustQuery("select /*+ STREAM_AGG() */ (ref_4.c_k3kss19 / ref_4.c_k3kss19) as c2 from t_bhze93f as ref_4 where (EXISTS (select ref_5.c_wp7o_0sstj as c0 from t_bhze93f as ref_5 where (207007502 < (select distinct ref_6.c_weg as c0 from t_xf1at0 as ref_6 union all (select ref_7.c_xb as c0 from t_b0t as ref_7 where (-16090 != ref_4.c_x393ej_)) limit 1)) limit 1));") } + +func TestIssue55881(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test;") + tk.MustExec("drop table if exists aaa;") + tk.MustExec("drop table if exists bbb;") + tk.MustExec("create table aaa(id int, value int);") + tk.MustExec("create table bbb(id int, value int);") + tk.MustExec("insert into aaa values(1,2),(2,3)") + tk.MustExec("insert into bbb values(1,2),(2,3),(3,4)") + // set tidb_executor_concurrency to 1 to let the issue happens with high probability. + tk.MustExec("set tidb_executor_concurrency=1;") + // this is a random issue, so run it 100 times to increase the probability of the issue. + for i := 0; i < 100; i++ { + tk.MustQuery("with cte as (select * from aaa) select id, (select id from (select * from aaa where aaa.id != bbb.id union all select * from cte union all select * from cte) d limit 1)," + + "(select max(value) from (select * from cte union all select * from cte union all select * from aaa where aaa.id > bbb.id)) from bbb;") + } +} diff --git a/pkg/executor/union_scan_test.go b/pkg/executor/union_scan_test.go index 08478e87b5aee..3a5f2dcb206e2 100644 --- a/pkg/executor/union_scan_test.go +++ b/pkg/executor/union_scan_test.go @@ -295,6 +295,21 @@ func TestIssue32422(t *testing.T) { tk.MustExec("rollback") } +func TestSnapshotWithConcurrentWrite(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (id int auto_increment key, b int, index(b));") + + tk.MustExec("begin") + tk.MustExec("insert into t1 (b) values (1),(2),(3),(4),(5),(6),(7),(8);") + for j := 0; j < 16; j++ { + tk.MustExec("insert into t1 (b) select /*+ use_index(t1, b) */ id from t1;") + } + tk.MustQuery("select count(1) from t1").Check(testkit.Rows("524288")) // 8 * 2^16 rows + tk.MustExec("rollback") +} + func BenchmarkUnionScanRead(b *testing.B) { store := testkit.CreateMockStore(b) diff --git a/pkg/expression/bench_test.go b/pkg/expression/bench_test.go index 87bfe9d7905df..f9cbfcdc43522 100644 --- a/pkg/expression/bench_test.go +++ b/pkg/expression/bench_test.go @@ -1449,7 +1449,7 @@ func genVecBuiltinFuncBenchCase(ctx BuildContext, funcName string, testCase vecE case types.ETJson: fc = &castAsJSONFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} case types.ETString: - fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} + fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp, false} } baseFunc, err = fc.getFunction(ctx, cols) } else if funcName == ast.GetVar { diff --git a/pkg/expression/builtin.go b/pkg/expression/builtin.go index 5db08b06cb151..b18a037d4d9b4 100644 --- a/pkg/expression/builtin.go +++ b/pkg/expression/builtin.go @@ -448,6 +448,35 @@ func newBaseBuiltinCastFunc(builtinFunc baseBuiltinFunc, inUnion bool) baseBuilt } } +func newBaseBuiltinCastFunc4String(ctx BuildContext, funcName string, args []Expression, tp *types.FieldType, isExplicitCharset bool) (baseBuiltinFunc, error) { + var bf baseBuiltinFunc + var err error + if isExplicitCharset { + bf = baseBuiltinFunc{ + bufAllocator: newLocalColumnPool(), + childrenVectorizedOnce: new(sync.Once), + + args: args, + tp: tp, + } + bf.SetCharsetAndCollation(tp.GetCharset(), tp.GetCollate()) + bf.setCollator(collate.GetCollator(tp.GetCollate())) + bf.SetCoercibility(CoercibilityExplicit) + bf.SetExplicitCharset(true) + if tp.GetCharset() == charset.CharsetASCII { + bf.SetRepertoire(ASCII) + } else { + bf.SetRepertoire(UNICODE) + } + } else { + bf, err = newBaseBuiltinFunc(ctx, funcName, args, tp) + if err != nil { + return baseBuiltinFunc{}, err + } + } + return bf, nil +} + // vecBuiltinFunc contains all vectorized methods for a builtin function. type vecBuiltinFunc interface { // vectorized returns if this builtin function itself supports vectorized evaluation. diff --git a/pkg/expression/builtin_cast.go b/pkg/expression/builtin_cast.go index 33ee29323f73a..3eeba359aa43e 100644 --- a/pkg/expression/builtin_cast.go +++ b/pkg/expression/builtin_cast.go @@ -270,14 +270,15 @@ func (c *castAsDecimalFunctionClass) getFunction(ctx BuildContext, args []Expres type castAsStringFunctionClass struct { baseFunctionClass - tp *types.FieldType + tp *types.FieldType + isExplicitCharset bool } func (c *castAsStringFunctionClass) getFunction(ctx BuildContext, args []Expression) (sig builtinFunc, err error) { if err := c.verifyArgs(args); err != nil { return nil, err } - bf, err := newBaseBuiltinFunc(ctx, c.funcName, args, c.tp) + bf, err := newBaseBuiltinCastFunc4String(ctx, c.funcName, args, c.tp, c.isExplicitCharset) if err != nil { return nil, err } @@ -2057,7 +2058,9 @@ func BuildCastFunction4Union(ctx BuildContext, expr Expression, tp *types.FieldT defer func() { ctx.SetValue(inUnionCastContext, nil) }() - return BuildCastFunction(ctx, expr, tp) + res, err := BuildCastFunctionWithCheck(ctx, expr, tp, false) + terror.Log(err) + return } // BuildCastCollationFunction builds a ScalarFunction which casts the collation. @@ -2092,13 +2095,13 @@ func BuildCastCollationFunction(ctx BuildContext, expr Expression, ec *ExprColla // BuildCastFunction builds a CAST ScalarFunction from the Expression. func BuildCastFunction(ctx BuildContext, expr Expression, tp *types.FieldType) (res Expression) { - res, err := BuildCastFunctionWithCheck(ctx, expr, tp) + res, err := BuildCastFunctionWithCheck(ctx, expr, tp, false) terror.Log(err) return } // BuildCastFunctionWithCheck builds a CAST ScalarFunction from the Expression and return error if any. -func BuildCastFunctionWithCheck(ctx BuildContext, expr Expression, tp *types.FieldType) (res Expression, err error) { +func BuildCastFunctionWithCheck(ctx BuildContext, expr Expression, tp *types.FieldType, isExplicitCharset bool) (res Expression, err error) { argType := expr.GetType() // If source argument's nullable, then target type should be nullable if !mysql.HasNotNullFlag(argType.GetFlag()) { @@ -2124,7 +2127,7 @@ func BuildCastFunctionWithCheck(ctx BuildContext, expr Expression, tp *types.Fie fc = &castAsJSONFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} } case types.ETString: - fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} + fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp, isExplicitCharset} if expr.GetType().GetType() == mysql.TypeBit { tp.SetFlen((expr.GetType().GetFlen() + 7) / 8) } diff --git a/pkg/expression/builtin_cast_test.go b/pkg/expression/builtin_cast_test.go index d2c54affc2d1b..eeeeb32aca27c 100644 --- a/pkg/expression/builtin_cast_test.go +++ b/pkg/expression/builtin_cast_test.go @@ -655,7 +655,7 @@ func TestCastFuncSig(t *testing.T) { tp := types.NewFieldType(mysql.TypeVarString) tp.SetCharset(charset.CharsetBin) args := []Expression{c.before} - stringFunc, err := newBaseBuiltinFunc(ctx, "", args, tp) + stringFunc, err := newBaseBuiltinCastFunc4String(ctx, "", args, tp, false) require.NoError(t, err) switch i { case 0: @@ -742,7 +742,7 @@ func TestCastFuncSig(t *testing.T) { tp := types.NewFieldType(mysql.TypeVarString) tp.SetFlen(c.flen) tp.SetCharset(charset.CharsetBin) - stringFunc, err := newBaseBuiltinFunc(ctx, "", args, tp) + stringFunc, err := newBaseBuiltinCastFunc4String(ctx, "", args, tp, false) require.NoError(t, err) switch i { case 0: @@ -1099,7 +1099,7 @@ func TestCastFuncSig(t *testing.T) { // null case args := []Expression{&Column{RetType: types.NewFieldType(mysql.TypeDouble), Index: 0}} row := chunk.MutRowFromDatums([]types.Datum{types.NewDatum(nil)}) - bf, err := newBaseBuiltinFunc(ctx, "", args, types.NewFieldType(mysql.TypeVarString)) + bf, err := newBaseBuiltinCastFunc4String(ctx, "", args, types.NewFieldType(mysql.TypeVarString), false) require.NoError(t, err) sig = &builtinCastRealAsStringSig{bf} sRes, err := evalBuiltinFunc(sig, ctx, row.ToRow()) @@ -1694,7 +1694,7 @@ func TestCastArrayFunc(t *testing.T) { }, } for _, tt := range tbl { - f, err := BuildCastFunctionWithCheck(ctx, datumsToConstants(types.MakeDatums(types.CreateBinaryJSON(tt.input)))[0], tt.tp) + f, err := BuildCastFunctionWithCheck(ctx, datumsToConstants(types.MakeDatums(types.CreateBinaryJSON(tt.input)))[0], tt.tp, false) if !tt.buildFuncSuccess { require.Error(t, err, tt.input) continue diff --git a/pkg/expression/collation.go b/pkg/expression/collation.go index bd96f34bd41d6..a4d5b905d4bfd 100644 --- a/pkg/expression/collation.go +++ b/pkg/expression/collation.go @@ -43,6 +43,8 @@ type collationInfo struct { charset string collation string + + isExplicitCharset bool } func (c *collationInfo) HasCoercibility() bool { @@ -75,6 +77,14 @@ func (c *collationInfo) CharsetAndCollation() (string, string) { return c.charset, c.collation } +func (c *collationInfo) IsExplicitCharset() bool { + return c.isExplicitCharset +} + +func (c *collationInfo) SetExplicitCharset(explicit bool) { + c.isExplicitCharset = explicit +} + // CollationInfo contains all interfaces about dealing with collation. type CollationInfo interface { // HasCoercibility returns if the Coercibility value is initialized. @@ -97,6 +107,12 @@ type CollationInfo interface { // SetCharsetAndCollation sets charset and collation. SetCharsetAndCollation(chs, coll string) + + // IsExplicitCharset return the charset is explicit set or not. + IsExplicitCharset() bool + + // SetExplicitCharset set the charset is explicit or not. + SetExplicitCharset(bool) } // Coercibility values are used to check whether the collation of one item can be coerced to @@ -245,9 +261,8 @@ func deriveCollation(ctx BuildContext, funcName string, args []Expression, retTy case ast.Cast: // We assume all the cast are implicit. ec = &ExprCollation{args[0].Coercibility(), args[0].Repertoire(), args[0].GetType().GetCharset(), args[0].GetType().GetCollate()} - // Non-string type cast to string type should use @@character_set_connection and @@collation_connection. - // String type cast to string type should keep its original charset and collation. It should not happen. - if retType == types.ETString && argTps[0] != types.ETString { + // Cast to string type should use @@character_set_connection and @@collation_connection. + if retType == types.ETString { ec.Charset, ec.Collation = ctx.GetCharsetInfo() } return ec, nil diff --git a/pkg/expression/evaluator.go b/pkg/expression/evaluator.go index 904db0ab4bfdc..00a8dc670db4f 100644 --- a/pkg/expression/evaluator.go +++ b/pkg/expression/evaluator.go @@ -18,26 +18,6 @@ import ( "github.com/pingcap/tidb/pkg/util/chunk" ) -type columnEvaluator struct { - inputIdxToOutputIdxes map[int][]int -} - -// run evaluates "Column" expressions. -// NOTE: It should be called after all the other expressions are evaluated -// -// since it will change the content of the input Chunk. -func (e *columnEvaluator) run(ctx EvalContext, input, output *chunk.Chunk) error { - for inputIdx, outputIdxes := range e.inputIdxToOutputIdxes { - if err := output.SwapColumn(outputIdxes[0], input, inputIdx); err != nil { - return err - } - for i, length := 1, len(outputIdxes); i < length; i++ { - output.MakeRef(outputIdxes[0], outputIdxes[i]) - } - } - return nil -} - type defaultEvaluator struct { outputIdxes []int exprs []Expression @@ -78,8 +58,8 @@ func (e *defaultEvaluator) run(ctx EvalContext, vecEnabled bool, input, output * // It separates them to "column" and "other" expressions and evaluates "other" // expressions before "column" expressions. type EvaluatorSuite struct { - *columnEvaluator // Evaluator for column expressions. - *defaultEvaluator // Evaluator for other expressions. + ColumnSwapHelper *chunk.ColumnSwapHelper // Evaluator for column expressions. + *defaultEvaluator // Evaluator for other expressions. } // NewEvaluatorSuite creates an EvaluatorSuite to evaluate all the exprs. @@ -89,11 +69,11 @@ func NewEvaluatorSuite(exprs []Expression, avoidColumnEvaluator bool) *Evaluator for i := 0; i < len(exprs); i++ { if col, isCol := exprs[i].(*Column); isCol && !avoidColumnEvaluator { - if e.columnEvaluator == nil { - e.columnEvaluator = &columnEvaluator{inputIdxToOutputIdxes: make(map[int][]int)} + if e.ColumnSwapHelper == nil { + e.ColumnSwapHelper = &chunk.ColumnSwapHelper{InputIdxToOutputIdxes: make(map[int][]int)} } inputIdx, outputIdx := col.Index, i - e.columnEvaluator.inputIdxToOutputIdxes[inputIdx] = append(e.columnEvaluator.inputIdxToOutputIdxes[inputIdx], outputIdx) + e.ColumnSwapHelper.InputIdxToOutputIdxes[inputIdx] = append(e.ColumnSwapHelper.InputIdxToOutputIdxes[inputIdx], outputIdx) continue } if e.defaultEvaluator == nil { @@ -127,8 +107,10 @@ func (e *EvaluatorSuite) Run(ctx EvalContext, vecEnabled bool, input, output *ch } } - if e.columnEvaluator != nil { - return e.columnEvaluator.run(ctx, input, output) + // NOTE: It should be called after all the other expressions are evaluated + // since it will change the content of the input Chunk. + if e.ColumnSwapHelper != nil { + return e.ColumnSwapHelper.SwapColumns(input, output) } return nil } diff --git a/pkg/expression/integration_test/BUILD.bazel b/pkg/expression/integration_test/BUILD.bazel index b7b517b8762fb..eab016ca24fa1 100644 --- a/pkg/expression/integration_test/BUILD.bazel +++ b/pkg/expression/integration_test/BUILD.bazel @@ -8,7 +8,7 @@ go_test( "main_test.go", ], flaky = True, - shard_count = 25, + shard_count = 26, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/expression/integration_test/integration_test.go b/pkg/expression/integration_test/integration_test.go index 889d97d9545f4..3ca9ca44e833c 100644 --- a/pkg/expression/integration_test/integration_test.go +++ b/pkg/expression/integration_test/integration_test.go @@ -3002,3 +3002,52 @@ func TestIssue43527(t *testing.T) { "SELECT @total := @total + d FROM (SELECT d FROM test) AS temp, (SELECT @total := b FROM test) AS T1 where @total >= 100", ).Check(testkit.Rows("200", "300", "400", "500")) } + +func TestIssue55885(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t_jg8o (c_s int not null unique ,c__qy double ,c_z int not null ,c_a90ol text not null);") + tk.MustExec("insert into t_jg8o (c_s, c__qy, c_z, c_a90ol) values" + + "(-975033779, 85.65, -355481284, 'gnip' ),(-2018599732, 85.86, 1617093413, 'm' )," + + "(-960107027, 4.6, -2042358076, 'y1q')," + + "(-3, 38.1, -1528586343, 'ex_2')," + + "(69386953, 32768.0, -62220810, 'tfkxjj5c')," + + "(587181689, -9223372036854775806.3, -1666156943, 'queemvgj')," + + "(-218561796, 85.2, -670390288, 'nf990nol')," + + "(858419954, 2147483646.0, -1649362344, 'won_9')," + + "(-1120115215, 22.100, 1509989939, 'w')," + + "(-1388119356, 94.32, -1694148464, 'gu4i4knyhm')," + + "(-1016230734, -4294967295.8, 1430313391, 's')," + + "(-1861825796, 36.52, -1457928755, 'j')," + + "(1963621165, 88.87, 18928603, 'gxbsloff' )," + + "(1492879828, cast(null as double), 759883041, 'zwue')," + + "(-1607192175, 12.36, 1669523024, 'qt5zch71a')," + + "(1534068569, 46.79, -392085130, 'bc')," + + "(155707446, 9223372036854775809.4, 1727199557, 'qyghenu9t6')," + + "(-1524976778, 75.99, 335492222, 'sdgde0z')," + + "(175403335, cast(null as double), -69711503, 'ja')," + + "(-272715456, 48.62, 753928713, 'ur')," + + "(-2035825967, 257.3, -1598426762, 'lmqmn')," + + "(-1178957955, 2147483648.100000, 1432554380, 'dqpb210')," + + "(-2056628646, 254.5, -1476177588, 'k41ajpt7x')," + + "(-914210874, 126.7, -421919910, 'x57ud7oy1')," + + "(-88586773, 1.2, 1568247510, 'drmxi8')," + + "(-834563269, -4294967296.7, 1163133933, 'wp')," + + "(-84490060, 54.13, -630289437, '_3_twecg5h')," + + " (267700893, 54.75, 370343042, 'n72')," + + "(552106333, 32766.2, 2365745, 's7tt')," + + "(643440707, 65536.8, -850412592, 'wmluxa9a')," + + "(1709853766, -4294967296.5, -21041749, 'obqj0uu5v')," + + "(-7, 80.88, 528792379, 'n5qr9m26i')," + + "(-456431629, 28.43, 1958788149, 'b')," + + "(-28841240, 11.86, -1089765168, 'pqg')," + + "(-807839288, 25.89, 504535500, 'cs3tkhs')," + + "(-52910064, 85.16, 354032882, '_ffjo67yxe')," + + "(1919869830, 81.81, -272247558, 'aj')," + + "(165434725, -2147483648.0, 11, 'xxnsf5')," + + "(3, -2147483648.7, 1616632952, 'g7t8tqyi')," + + "(1851859144, 70.73, -1105664209, 'qjfhjr');") + + tk.MustQuery("SELECT subq_0.c3 as c1 FROM (select c_a90ol as c3, c_a90ol as c4, var_pop(cast(c__qy as double)) over (partition by c_a90ol, c_s order by c_z) as c5 from t_jg8o limit 65) as subq_0 LIMIT 37") +} diff --git a/pkg/expression/scalar_function.go b/pkg/expression/scalar_function.go index 438e8b359a1b0..bb8520f4f8287 100644 --- a/pkg/expression/scalar_function.go +++ b/pkg/expression/scalar_function.go @@ -824,6 +824,16 @@ func (sf *ScalarFunction) SetRepertoire(r Repertoire) { sf.Function.SetRepertoire(r) } +// IsExplicitCharset return the charset is explicit set or not. +func (sf *ScalarFunction) IsExplicitCharset() bool { + return sf.Function.IsExplicitCharset() +} + +// SetExplicitCharset set the charset is explicit or not. +func (sf *ScalarFunction) SetExplicitCharset(explicit bool) { + sf.Function.SetExplicitCharset(explicit) +} + const emptyScalarFunctionSize = int64(unsafe.Sizeof(ScalarFunction{})) // MemoryUsage return the memory usage of ScalarFunction diff --git a/pkg/expression/util.go b/pkg/expression/util.go index c3f92d69feabe..f1eb5b14e7c5c 100644 --- a/pkg/expression/util.go +++ b/pkg/expression/util.go @@ -455,8 +455,10 @@ func ColumnSubstituteImpl(ctx BuildContext, expr Expression, schema *Schema, new if substituted { flag := v.RetType.GetFlag() var e Expression + var err error if v.FuncName.L == ast.Cast { - e = BuildCastFunction(ctx, newArg, v.RetType) + e, err = BuildCastFunctionWithCheck(ctx, newArg, v.RetType, v.Function.IsExplicitCharset()) + terror.Log(err) } else { // for grouping function recreation, use clone (meta included) instead of newFunction e = v.Clone() diff --git a/pkg/expression/util_test.go b/pkg/expression/util_test.go index 25186011246d6..efd6a96cc1da4 100644 --- a/pkg/expression/util_test.go +++ b/pkg/expression/util_test.go @@ -593,6 +593,8 @@ func (m *MockExpr) Coercibility() Coercibility { return func (m *MockExpr) SetCoercibility(Coercibility) {} func (m *MockExpr) Repertoire() Repertoire { return UNICODE } func (m *MockExpr) SetRepertoire(Repertoire) {} +func (m *MockExpr) IsExplicitCharset() bool { return false } +func (m *MockExpr) SetExplicitCharset(bool) {} func (m *MockExpr) CharsetAndCollation() (string, string) { return "", "" diff --git a/pkg/lightning/backend/backend.go b/pkg/lightning/backend/backend.go index d72e92e20b316..b562f6b7858f6 100644 --- a/pkg/lightning/backend/backend.go +++ b/pkg/lightning/backend/backend.go @@ -90,7 +90,10 @@ type EngineConfig struct { // when opening the engine, instead of removing it. KeepSortDir bool // TS is the preset timestamp of data in the engine. When it's 0, the used TS - // will be set lazily. + // will be set lazily. This is used by local backend. This field will be written + // to engineMeta.TS and take effect in below cases: + // - engineManager.openEngine + // - engineManager.closeEngine only for an external engine TS uint64 } @@ -109,13 +112,13 @@ type LocalEngineConfig struct { // ExternalEngineConfig is the configuration used for local backend external engine. type ExternalEngineConfig struct { - StorageURI string - DataFiles []string - StatFiles []string - StartKey []byte - EndKey []byte - SplitKeys [][]byte - RegionSplitSize int64 + StorageURI string + DataFiles []string + StatFiles []string + StartKey []byte + EndKey []byte + JobKeys [][]byte + SplitKeys [][]byte // TotalFileSize can be an estimated value. TotalFileSize int64 // TotalKVCount can be an estimated value. @@ -307,13 +310,6 @@ func (engine *OpenedEngine) LocalWriter(ctx context.Context, cfg *LocalWriterCon return engine.backend.LocalWriter(ctx, cfg, engine.uuid) } -// SetTS sets the TS of the engine. In most cases if the caller wants to specify -// TS it should use the TS field in EngineConfig. This method is only used after -// a ResetEngine. -func (engine *OpenedEngine) SetTS(ts uint64) { - engine.config.TS = ts -} - // UnsafeCloseEngine closes the engine without first opening it. // This method is "unsafe" as it does not follow the normal operation sequence // (Open -> Write -> Close -> Import). This method should only be used when one diff --git a/pkg/lightning/backend/external/BUILD.bazel b/pkg/lightning/backend/external/BUILD.bazel index eabceb6a5fb1e..4b39e3c41b08a 100644 --- a/pkg/lightning/backend/external/BUILD.bazel +++ b/pkg/lightning/backend/external/BUILD.bazel @@ -30,7 +30,6 @@ go_library( "//pkg/lightning/backend/encode", "//pkg/lightning/backend/kv", "//pkg/lightning/common", - "//pkg/lightning/config", "//pkg/lightning/log", "//pkg/metrics", "//pkg/util", diff --git a/pkg/lightning/backend/external/engine.go b/pkg/lightning/backend/external/engine.go index 7a42354e20acc..b562f3f5bc81b 100644 --- a/pkg/lightning/backend/external/engine.go +++ b/pkg/lightning/backend/external/engine.go @@ -29,7 +29,6 @@ import ( "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/lightning/common" - "github.com/pingcap/tidb/pkg/lightning/config" "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/metrics" "github.com/pingcap/tidb/pkg/util/logutil" @@ -97,6 +96,7 @@ type Engine struct { statsFiles []string startKey []byte endKey []byte + jobKeys [][]byte splitKeys [][]byte regionSplitSize int64 smallBlockBufPool *membuf.Pool @@ -127,6 +127,8 @@ type Engine struct { importedKVCount *atomic.Int64 } +var _ common.Engine = (*Engine)(nil) + const ( memLimit = 12 * units.GiB smallBlockSize = units.MiB @@ -139,8 +141,8 @@ func NewExternalEngine( statsFiles []string, startKey []byte, endKey []byte, + jobKeys [][]byte, splitKeys [][]byte, - regionSplitSize int64, keyAdapter common.KeyAdapter, duplicateDetection bool, duplicateDB *pebble.DB, @@ -153,13 +155,13 @@ func NewExternalEngine( ) common.Engine { memLimiter := membuf.NewLimiter(memLimit) return &Engine{ - storage: storage, - dataFiles: dataFiles, - statsFiles: statsFiles, - startKey: startKey, - endKey: endKey, - splitKeys: splitKeys, - regionSplitSize: regionSplitSize, + storage: storage, + dataFiles: dataFiles, + statsFiles: statsFiles, + startKey: startKey, + endKey: endKey, + jobKeys: jobKeys, + splitKeys: splitKeys, smallBlockBufPool: membuf.NewPool( membuf.WithBlockNum(0), membuf.WithPoolMemoryLimiter(memLimiter), @@ -257,7 +259,7 @@ func getFilesReadConcurrency( return result, startOffs, nil } -func (e *Engine) loadBatchRegionData(ctx context.Context, startKey, endKey []byte, outCh chan<- common.DataAndRange) error { +func (e *Engine) loadBatchRegionData(ctx context.Context, jobKeys [][]byte, outCh chan<- common.DataAndRanges) error { readAndSortRateHist := metrics.GlobalSortReadFromCloudStorageRate.WithLabelValues("read_and_sort") readAndSortDurHist := metrics.GlobalSortReadFromCloudStorageDuration.WithLabelValues("read_and_sort") readRateHist := metrics.GlobalSortReadFromCloudStorageRate.WithLabelValues("read") @@ -265,16 +267,16 @@ func (e *Engine) loadBatchRegionData(ctx context.Context, startKey, endKey []byt sortRateHist := metrics.GlobalSortReadFromCloudStorageRate.WithLabelValues("sort") sortDurHist := metrics.GlobalSortReadFromCloudStorageDuration.WithLabelValues("sort") + startKey := jobKeys[0] + endKey := jobKeys[len(jobKeys)-1] readStart := time.Now() - readDtStartKey := e.keyAdapter.Encode(nil, startKey, common.MinRowID) - readDtEndKey := e.keyAdapter.Encode(nil, endKey, common.MinRowID) err := readAllData( ctx, e.storage, e.dataFiles, e.statsFiles, - readDtStartKey, - readDtEndKey, + startKey, + endKey, e.smallBlockBufPool, e.largeBlockBufPool, &e.memKVsAndBuffers, @@ -329,21 +331,50 @@ func (e *Engine) loadBatchRegionData(ctx context.Context, startKey, endKey []byt e.memKVsAndBuffers.memKVBuffers = nil e.memKVsAndBuffers.size = 0 - sendFn := func(dr common.DataAndRange) error { - select { - case <-ctx.Done(): - return ctx.Err() - case outCh <- dr: + ranges := make([]common.Range, 0, len(jobKeys)-1) + prev, err2 := e.keyAdapter.Decode(nil, jobKeys[0]) + if err2 != nil { + return err + } + for i := 1; i < len(jobKeys)-1; i++ { + cur, err3 := e.keyAdapter.Decode(nil, jobKeys[i]) + if err3 != nil { + return err3 } - return nil + ranges = append(ranges, common.Range{ + Start: prev, + End: cur, + }) + prev = cur + } + // last range key may be a nextKey so we should try to remove the trailing 0 if decoding failed + nextKey := false + lastKey := jobKeys[len(jobKeys)-1] + cur, err4 := e.keyAdapter.Decode(nil, lastKey) + if err4 != nil && lastKey[len(lastKey)-1] == 0 { + nextKey = true + cur, err4 = e.keyAdapter.Decode(nil, lastKey[:len(lastKey)-1]) + } + if err4 != nil { + return err4 + } + if nextKey { + cur = kv.Key(cur).Next() } - return sendFn(common.DataAndRange{ - Data: data, - Range: common.Range{ - Start: startKey, - End: endKey, - }, + ranges = append(ranges, common.Range{ + Start: prev, + End: cur, }) + + select { + case <-ctx.Done(): + return ctx.Err() + case outCh <- common.DataAndRanges{ + Data: data, + SortedRanges: ranges, + }: + } + return nil } // LoadIngestData loads the data from the external storage to memory in [start, @@ -352,16 +383,17 @@ func (e *Engine) loadBatchRegionData(ctx context.Context, startKey, endKey []byt // MemoryIngestData.DecRef(). func (e *Engine) LoadIngestData( ctx context.Context, - regionRanges []common.Range, - outCh chan<- common.DataAndRange, + outCh chan<- common.DataAndRanges, ) error { // try to make every worker busy for each batch regionBatchSize := e.workerConcurrency failpoint.Inject("LoadIngestDataBatchSize", func(val failpoint.Value) { regionBatchSize = val.(int) }) - for i := 0; i < len(regionRanges); i += regionBatchSize { - err := e.loadBatchRegionData(ctx, regionRanges[i].Start, regionRanges[min(i+regionBatchSize, len(regionRanges))-1].End, outCh) + for start := 0; start < len(e.jobKeys)-1; start += regionBatchSize { + // want to generate N ranges, so we need N+1 keys + end := min(1+start+regionBatchSize, len(e.jobKeys)) + err := e.loadBatchRegionData(ctx, e.jobKeys[start:end], outCh) if err != nil { return err } @@ -385,9 +417,6 @@ func (e *Engine) buildIngestData(keys, values [][]byte, buf []*membuf.Buffer) *M } } -// LargeRegionSplitDataThreshold is exposed for test. -var LargeRegionSplitDataThreshold = int(config.SplitRegionSize) - // KVStatistics returns the total kv size and total kv count. func (e *Engine) KVStatistics() (totalKVSize int64, totalKVCount int64) { return e.totalKVSize, e.totalKVCount @@ -433,13 +462,9 @@ func (e *Engine) GetKeyRange() (startKey []byte, endKey []byte, err error) { return start, kv.Key(end).Next(), nil } -// SplitRanges split the ranges by split keys provided by external engine. -func (e *Engine) SplitRanges( - startKey, endKey []byte, - _, _ int64, - _ log.Logger, -) ([]common.Range, error) { - splitKeys := e.splitKeys +// GetRegionSplitKeys implements common.Engine. +func (e *Engine) GetRegionSplitKeys() ([][]byte, error) { + splitKeys := make([][]byte, len(e.splitKeys)) for i, k := range e.splitKeys { var err error splitKeys[i], err = e.keyAdapter.Decode(nil, k) @@ -447,18 +472,7 @@ func (e *Engine) SplitRanges( return nil, err } } - ranges := make([]common.Range, 0, len(splitKeys)+1) - ranges = append(ranges, common.Range{Start: startKey}) - for i := 0; i < len(splitKeys); i++ { - ranges[len(ranges)-1].End = splitKeys[i] - var endK []byte - if i < len(splitKeys)-1 { - endK = splitKeys[i+1] - } - ranges = append(ranges, common.Range{Start: splitKeys[i], End: endK}) - } - ranges[len(ranges)-1].End = endKey - return ranges, nil + return splitKeys, nil } // Close implements common.Engine. diff --git a/pkg/lightning/backend/external/merge_v2.go b/pkg/lightning/backend/external/merge_v2.go index af32569d6745d..68dc4ac57ddab 100644 --- a/pkg/lightning/backend/external/merge_v2.go +++ b/pkg/lightning/backend/external/merge_v2.go @@ -79,6 +79,8 @@ func MergeOverlappingFilesV2( math.MaxInt64, int64(4*size.GB), math.MaxInt64, + math.MaxInt64, + math.MaxInt64, ) if err != nil { return err @@ -114,7 +116,7 @@ func MergeOverlappingFilesV2( var curEnd kv.Key for { - endKeyOfGroup, dataFilesOfGroup, statFilesOfGroup, _, err1 := splitter.SplitOneRangesGroup() + endKeyOfGroup, dataFilesOfGroup, statFilesOfGroup, _, _, err1 := splitter.SplitOneRangesGroup() if err1 != nil { logutil.Logger(ctx).Warn("split one ranges group failed", zap.Error(err1)) return diff --git a/pkg/lightning/backend/external/reader.go b/pkg/lightning/backend/external/reader.go index 28b3392353460..45065bd053767 100644 --- a/pkg/lightning/backend/external/reader.go +++ b/pkg/lightning/backend/external/reader.go @@ -17,6 +17,7 @@ package external import ( "bytes" "context" + "encoding/hex" "io" "time" @@ -43,8 +44,8 @@ func readAllData( task.Info("arguments", zap.Int("data-file-count", len(dataFiles)), zap.Int("stat-file-count", len(statsFiles)), - zap.Binary("start-key", startKey), - zap.Binary("end-key", endKey), + zap.String("start-key", hex.EncodeToString(startKey)), + zap.String("end-key", hex.EncodeToString(endKey)), ) defer func() { if err != nil { diff --git a/pkg/lightning/backend/external/split.go b/pkg/lightning/backend/external/split.go index 66ad46862ab62..18e5f6344b0bd 100644 --- a/pkg/lightning/backend/external/split.go +++ b/pkg/lightning/backend/external/split.go @@ -57,60 +57,67 @@ func (h *exhaustedHeap) Pop() any { return x } -// RangeSplitter is used to split key ranges of an external engine. It will -// return one group of ranges by invoking `SplitOneRangesGroup` once. +// RangeSplitter is used to split key ranges of an external engine. Please see +// NewRangeSplitter and SplitOneRangesGroup for more details. type RangeSplitter struct { rangesGroupSize int64 rangesGroupKeys int64 - rangeSize int64 - rangeKeys int64 + + rangeJobSize int64 + rangeJobKeyCnt int64 + rangeJobKeys [][]byte + + regionSplitSize int64 + regionSplitKeyCnt int64 + regionSplitKeys [][]byte propIter *MergePropIter multiFileStat []MultipleFilesStat // filename -> 2 level index in dataFiles/statFiles - activeDataFiles map[string][2]int - activeStatFiles map[string][2]int - curGroupSize int64 - curGroupKeys int64 - curRangeSize int64 - curRangeKeys int64 - recordSplitKeyAfterNextProp bool - lastDataFile string - lastStatFile string - lastRangeProperty *rangeProperty - willExhaustHeap exhaustedHeap - - rangeSplitKeysBuf [][]byte + activeDataFiles map[string][2]int + activeStatFiles map[string][2]int + curGroupSize int64 + curGroupKeyCnt int64 + curRangeJobSize int64 + curRangeJobKeyCnt int64 + recordRangeJobAfterNextProp bool + curRegionSplitSize int64 + curRegionSplitKeyCnt int64 + recordRegionSplitAfterNextProp bool + lastDataFile string + lastStatFile string + lastRangeProperty *rangeProperty + willExhaustHeap exhaustedHeap logger *zap.Logger } -// NewRangeSplitter creates a new RangeSplitter. -// `rangesGroupSize` and `rangesGroupKeys` controls the total range group -// size of one `SplitOneRangesGroup` invocation, while `rangeSize` and -// `rangeKeys` controls the size of one range. +// NewRangeSplitter creates a new RangeSplitter to process the stat files of +// `multiFileStat` stored in `externalStorage`. +// +// `rangesGroupSize` and `rangesGroupKeyCnt` controls the total size and key +// count limit of the ranges group returned by one `SplitOneRangesGroup` +// invocation. The ranges group may contain multiple range jobs and region split +// keys. The size and keys limit of one range job are controlled by +// `rangeJobSize` and `rangeJobKeyCnt`. The size and keys limit of intervals of +// region split keys are controlled by `regionSplitSize` and `regionSplitKeyCnt`. func NewRangeSplitter( ctx context.Context, multiFileStat []MultipleFilesStat, externalStorage storage.ExternalStorage, - rangesGroupSize, rangesGroupKeys int64, - maxRangeSize, maxRangeKeys int64, + rangesGroupSize, rangesGroupKeyCnt int64, + rangeJobSize, rangeJobKeyCnt int64, + regionSplitSize, regionSplitKeyCnt int64, ) (*RangeSplitter, error) { logger := logutil.Logger(ctx) - overlaps := make([]int64, 0, len(multiFileStat)) - fileNums := make([]int, 0, len(multiFileStat)) - for _, m := range multiFileStat { - overlaps = append(overlaps, m.MaxOverlappingNum) - fileNums = append(fileNums, len(m.Filenames)) - } logger.Info("create range splitter", - zap.Int64s("overlaps", overlaps), - zap.Ints("fileNums", fileNums), zap.Int64("rangesGroupSize", rangesGroupSize), - zap.Int64("rangesGroupKeys", rangesGroupKeys), - zap.Int64("maxRangeSize", maxRangeSize), - zap.Int64("maxRangeKeys", maxRangeKeys), + zap.Int64("rangesGroupKeyCnt", rangesGroupKeyCnt), + zap.Int64("rangeJobSize", rangeJobSize), + zap.Int64("rangeJobKeyCnt", rangeJobKeyCnt), + zap.Int64("regionSplitSize", regionSplitSize), + zap.Int64("regionSplitKeyCnt", regionSplitKeyCnt), ) propIter, err := NewMergePropIter(ctx, multiFileStat, externalStorage) if err != nil { @@ -119,15 +126,19 @@ func NewRangeSplitter( return &RangeSplitter{ rangesGroupSize: rangesGroupSize, - rangesGroupKeys: rangesGroupKeys, + rangesGroupKeys: rangesGroupKeyCnt, propIter: propIter, multiFileStat: multiFileStat, activeDataFiles: make(map[string][2]int), activeStatFiles: make(map[string][2]int), - rangeSize: maxRangeSize, - rangeKeys: maxRangeKeys, - rangeSplitKeysBuf: make([][]byte, 0, 16), + rangeJobSize: rangeJobSize, + rangeJobKeyCnt: rangeJobKeyCnt, + rangeJobKeys: make([][]byte, 0, 16), + + regionSplitSize: regionSplitSize, + regionSplitKeyCnt: regionSplitKeyCnt, + regionSplitKeys: make([][]byte, 0, 16), logger: logger, }, nil @@ -140,21 +151,19 @@ func (r *RangeSplitter) Close() error { return err } -// GetRangeSplitSize returns the expected size of one range. -func (r *RangeSplitter) GetRangeSplitSize() int64 { - return r.rangeSize -} - -// SplitOneRangesGroup splits one group of ranges. `endKeyOfGroup` represents the -// end key of the group, but it will be nil when the group is the last one. -// `dataFiles` and `statFiles` are all the files that have overlapping key ranges -// in this group. -// `rangeSplitKeys` are the internal split keys of the ranges in this group. +// SplitOneRangesGroup splits one ranges group may contain multiple range jobs +// and region split keys. `endKeyOfGroup` represents the end key of the group, +// but it will be nil when the group is the last one. `dataFiles` and `statFiles` +// are all the files that have overlapping key ranges in this group. +// `interiorRangeJobKeys` are the interior boundary keys of the range jobs, the +// range can be constructed with start/end key at caller. `regionSplitKeys` are +// the split keys that will be used later to split regions. func (r *RangeSplitter) SplitOneRangesGroup() ( endKeyOfGroup []byte, dataFiles []string, statFiles []string, - rangeSplitKeys [][]byte, + interiorRangeJobKeys [][]byte, + regionSplitKeys [][]byte, err error, ) { var ( @@ -165,13 +174,15 @@ func (r *RangeSplitter) SplitOneRangesGroup() ( for r.propIter.Next() { if err = r.propIter.Error(); err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, err } prop := r.propIter.prop() r.curGroupSize += int64(prop.size) - r.curRangeSize += int64(prop.size) - r.curGroupKeys += int64(prop.keys) - r.curRangeKeys += int64(prop.keys) + r.curRangeJobSize += int64(prop.size) + r.curRegionSplitSize += int64(prop.size) + r.curGroupKeyCnt += int64(prop.keys) + r.curRangeJobKeyCnt += int64(prop.keys) + r.curRegionSplitKeyCnt += int64(prop.keys) // if this Next call will close the last reader if *r.propIter.baseCloseReaderFlag { @@ -208,24 +219,34 @@ func (r *RangeSplitter) SplitOneRangesGroup() ( delete(r.activeStatFiles, p) } exhaustedStatFiles = exhaustedStatFiles[:0] - return prop.firstKey, retDataFiles, retStatFiles, r.takeSplitKeys(), nil + return prop.firstKey, retDataFiles, retStatFiles, r.takeRangeJobKeys(), r.takeRegionSplitKeys(), nil } - if r.recordSplitKeyAfterNextProp { - r.rangeSplitKeysBuf = append(r.rangeSplitKeysBuf, slices.Clone(prop.firstKey)) - r.recordSplitKeyAfterNextProp = false + if r.recordRangeJobAfterNextProp { + r.rangeJobKeys = append(r.rangeJobKeys, slices.Clone(prop.firstKey)) + r.recordRangeJobAfterNextProp = false + } + if r.recordRegionSplitAfterNextProp { + r.regionSplitKeys = append(r.regionSplitKeys, slices.Clone(prop.firstKey)) + r.recordRegionSplitAfterNextProp = false + } + + if r.curRangeJobSize >= r.rangeJobSize || r.curRangeJobKeyCnt >= r.rangeJobKeyCnt { + r.curRangeJobSize = 0 + r.curRangeJobKeyCnt = 0 + r.recordRangeJobAfterNextProp = true } - if r.curRangeSize >= r.rangeSize || r.curRangeKeys >= r.rangeKeys { - r.curRangeSize = 0 - r.curRangeKeys = 0 - r.recordSplitKeyAfterNextProp = true + if r.curRegionSplitSize >= r.regionSplitSize || r.curRegionSplitKeyCnt >= r.regionSplitKeyCnt { + r.curRegionSplitSize = 0 + r.curRegionSplitKeyCnt = 0 + r.recordRegionSplitAfterNextProp = true } - if r.curGroupSize >= r.rangesGroupSize || r.curGroupKeys >= r.rangesGroupKeys { + if r.curGroupSize >= r.rangesGroupSize || r.curGroupKeyCnt >= r.rangesGroupKeys { retDataFiles, retStatFiles = r.cloneActiveFiles() r.curGroupSize = 0 - r.curGroupKeys = 0 + r.curGroupKeyCnt = 0 returnAfterNextProp = true } } @@ -233,7 +254,7 @@ func (r *RangeSplitter) SplitOneRangesGroup() ( retDataFiles, retStatFiles = r.cloneActiveFiles() r.activeDataFiles = make(map[string][2]int) r.activeStatFiles = make(map[string][2]int) - return nil, retDataFiles, retStatFiles, r.takeSplitKeys(), r.propIter.Error() + return nil, retDataFiles, retStatFiles, r.takeRangeJobKeys(), r.takeRegionSplitKeys(), r.propIter.Error() } func (r *RangeSplitter) cloneActiveFiles() (data []string, stat []string) { @@ -264,9 +285,16 @@ func (r *RangeSplitter) cloneActiveFiles() (data []string, stat []string) { return dataFiles, statFiles } -func (r *RangeSplitter) takeSplitKeys() [][]byte { - ret := make([][]byte, len(r.rangeSplitKeysBuf)) - copy(ret, r.rangeSplitKeysBuf) - r.rangeSplitKeysBuf = r.rangeSplitKeysBuf[:0] +func (r *RangeSplitter) takeRangeJobKeys() [][]byte { + ret := make([][]byte, len(r.rangeJobKeys)) + copy(ret, r.rangeJobKeys) + r.rangeJobKeys = r.rangeJobKeys[:0] + return ret +} + +func (r *RangeSplitter) takeRegionSplitKeys() [][]byte { + ret := make([][]byte, len(r.regionSplitKeys)) + copy(ret, r.regionSplitKeys) + r.regionSplitKeys = r.regionSplitKeys[:0] return ret } diff --git a/pkg/lightning/backend/external/split_test.go b/pkg/lightning/backend/external/split_test.go index 2c5b2ad608e3f..c98747cb0ed30 100644 --- a/pkg/lightning/backend/external/split_test.go +++ b/pkg/lightning/backend/external/split_test.go @@ -64,11 +64,12 @@ func TestGeneralProperties(t *testing.T) { require.NoError(t, err) multiFileStat := mockOneMultiFileStat(dataFiles, statFiles) splitter, err := NewRangeSplitter( - ctx, multiFileStat, memStore, 1000, 30, 1000, 1, + ctx, multiFileStat, memStore, 1000, 30, 1000, 1, 35, 1000, ) + require.NoError(t, err) var lastEndKey []byte notExhausted: - endKey, dataFiles, statFiles, splitKeys, err := splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err := splitter.SplitOneRangesGroup() require.NoError(t, err) // endKey should be strictly greater than lastEndKey @@ -82,19 +83,25 @@ notExhausted: lenStatFiles := len(statFiles) require.Equal(t, lenDataFiles, lenStatFiles) require.Greater(t, lenDataFiles, 0) - if len(splitKeys) > 0 { - // splitKeys should be strictly increasing - for i := 1; i < len(splitKeys); i++ { - cmp := bytes.Compare(splitKeys[i], splitKeys[i-1]) - require.Equal(t, 1, cmp, "splitKeys: %v", splitKeys) - } - // first splitKeys should be strictly greater than lastEndKey - cmp := bytes.Compare(splitKeys[0], lastEndKey) - require.Equal(t, 1, cmp, "splitKeys: %v, lastEndKey: %v", splitKeys, lastEndKey) - // last splitKeys should be strictly less than endKey - if endKey != nil { - cmp = bytes.Compare(splitKeys[len(splitKeys)-1], endKey) - require.Equal(t, -1, cmp, "splitKeys: %v, endKey: %v", splitKeys, endKey) + + for _, toCheck := range []struct { + varName string + keys [][]byte + }{{"rangeJobKeys", rangeJobKeys}, {"regionSplitKeys", regionSplitKeys}} { + if len(toCheck.keys) > 0 { + // keys should be strictly increasing + for i := 1; i < len(toCheck.keys); i++ { + cmp := bytes.Compare(toCheck.keys[i], toCheck.keys[i-1]) + require.Equal(t, 1, cmp, "%s: %v", toCheck.varName, toCheck.keys) + } + // first splitKeys should be strictly greater than lastEndKey + cmp := bytes.Compare(toCheck.keys[0], lastEndKey) + require.Equal(t, 1, cmp, "%s: %v, lastEndKey: %v", toCheck.varName, toCheck.keys, lastEndKey) + // last splitKeys should be strictly less than endKey + if endKey != nil { + cmp = bytes.Compare(toCheck.keys[len(toCheck.keys)-1], endKey) + require.Equal(t, -1, cmp, "%s: %v, endKey: %v", toCheck.varName, toCheck.keys, endKey) + } } } @@ -124,27 +131,29 @@ func TestOnlyOneGroup(t *testing.T) { multiFileStat := mockOneMultiFileStat(dataFiles, statFiles) splitter, err := NewRangeSplitter( - ctx, multiFileStat, memStore, 1000, 30, 1000, 10, + ctx, multiFileStat, memStore, 1000, 30, 1000, 10, int64(math.MaxInt64), int64(math.MaxInt64), ) require.NoError(t, err) - endKey, dataFiles, statFiles, splitKeys, err := splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err := splitter.SplitOneRangesGroup() require.NoError(t, err) require.Nil(t, endKey) require.Len(t, dataFiles, 1) require.Len(t, statFiles, 1) - require.Len(t, splitKeys, 0) + require.Len(t, rangeJobKeys, 0) + require.Len(t, regionSplitKeys, 0) require.NoError(t, splitter.Close()) splitter, err = NewRangeSplitter( - ctx, multiFileStat, memStore, 1000, 30, 1000, 1, + ctx, multiFileStat, memStore, 1000, 30, 1000, 1, int64(math.MaxInt64), int64(math.MaxInt64), ) require.NoError(t, err) - endKey, dataFiles, statFiles, splitKeys, err = splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err = splitter.SplitOneRangesGroup() require.NoError(t, err) require.Nil(t, endKey) require.Len(t, dataFiles, 1) require.Len(t, statFiles, 1) - require.Equal(t, [][]byte{{2}}, splitKeys) + require.Equal(t, [][]byte{{2}}, rangeJobKeys) + require.Len(t, regionSplitKeys, 0) require.NoError(t, splitter.Close()) } @@ -170,12 +179,12 @@ func TestSortedData(t *testing.T) { multiFileStat := mockOneMultiFileStat(dataFiles, statFiles) splitter, err := NewRangeSplitter( - ctx, multiFileStat, memStore, 1000, int64(rangesGroupKV), 1000, 10, + ctx, multiFileStat, memStore, 1000, int64(rangesGroupKV), 1000, 10, int64(math.MaxInt64), int64(math.MaxInt64), ) require.NoError(t, err) notExhausted: - endKey, dataFiles, statFiles, _, err := splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, _, _, err := splitter.SplitOneRangesGroup() require.NoError(t, err) require.LessOrEqual(t, len(dataFiles), groupFileNumUpperBound) require.LessOrEqual(t, len(statFiles), groupFileNumUpperBound) @@ -254,60 +263,66 @@ func TestRangeSplitterStrictCase(t *testing.T) { multiFileStat := []MultipleFilesStat{multi[0], multi2[0]} // group keys = 2, region keys = 1 splitter, err := NewRangeSplitter( - ctx, multiFileStat, memStore, 1000, 2, 1000, 1, + ctx, multiFileStat, memStore, 1000, 2, 1000, 1, 1000, 1, ) require.NoError(t, err) // [key01, key03), split at key02 - endKey, dataFiles, statFiles, splitKeys, err := splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err := splitter.SplitOneRangesGroup() require.NoError(t, err) require.EqualValues(t, kv.Key("key03"), endKey) require.Equal(t, []string{"/mock-test/1/0", "/mock-test/2/0"}, dataFiles) require.Equal(t, []string{"/mock-test/1_stat/0", "/mock-test/2_stat/0"}, statFiles) - require.Equal(t, [][]byte{[]byte("key02")}, splitKeys) + require.Equal(t, [][]byte{[]byte("key02")}, rangeJobKeys) + require.Equal(t, [][]byte{[]byte("key02")}, regionSplitKeys) // [key03, key12), split at key11 - endKey, dataFiles, statFiles, splitKeys, err = splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err = splitter.SplitOneRangesGroup() require.NoError(t, err) require.EqualValues(t, kv.Key("key12"), endKey) require.Equal(t, []string{"/mock-test/1/0", "/mock-test/2/0", "/mock-test/3/0"}, dataFiles) require.Equal(t, []string{"/mock-test/1_stat/0", "/mock-test/2_stat/0", "/mock-test/3_stat/0"}, statFiles) - require.Equal(t, [][]byte{[]byte("key11")}, splitKeys) + require.Equal(t, [][]byte{[]byte("key11")}, rangeJobKeys) + require.Equal(t, [][]byte{[]byte("key11")}, regionSplitKeys) // [key12, key21), split at key13. the last key of "/mock-test/1/0" is "key11", // so it's not used - endKey, dataFiles, statFiles, splitKeys, err = splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err = splitter.SplitOneRangesGroup() require.NoError(t, err) require.EqualValues(t, kv.Key("key21"), endKey) require.Equal(t, []string{"/mock-test/2/0", "/mock-test/3/0"}, dataFiles) require.Equal(t, []string{"/mock-test/2_stat/0", "/mock-test/3_stat/0"}, statFiles) - require.Equal(t, [][]byte{[]byte("key13")}, splitKeys) + require.Equal(t, [][]byte{[]byte("key13")}, rangeJobKeys) + require.Equal(t, [][]byte{[]byte("key13")}, regionSplitKeys) // [key21, key23), split at key22. // the last key of "/mock-test/2/0" is "key12", and the last key of "/mock-test/3/0" is "key13", // so they are not used - endKey, dataFiles, statFiles, splitKeys, err = splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err = splitter.SplitOneRangesGroup() require.NoError(t, err) require.EqualValues(t, kv.Key("key23"), endKey) require.Equal(t, []string{"/mock-test/1/1", "/mock-test/2/1"}, dataFiles) require.Equal(t, []string{"/mock-test/1_stat/1", "/mock-test/2_stat/1"}, statFiles) - require.Equal(t, [][]byte{[]byte("key22")}, splitKeys) + require.Equal(t, [][]byte{[]byte("key22")}, rangeJobKeys) + require.Equal(t, [][]byte{[]byte("key22")}, regionSplitKeys) // [key23, nil), no split key - endKey, dataFiles, statFiles, splitKeys, err = splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err = splitter.SplitOneRangesGroup() require.NoError(t, err) require.Nil(t, endKey) require.Equal(t, []string{"/mock-test/3/1"}, dataFiles) require.Equal(t, []string{"/mock-test/3_stat/1"}, statFiles) - require.Len(t, splitKeys, 0) + require.Len(t, rangeJobKeys, 0) + require.Len(t, regionSplitKeys, 0) // read after drain all data - endKey, dataFiles, statFiles, splitKeys, err = splitter.SplitOneRangesGroup() + endKey, dataFiles, statFiles, rangeJobKeys, regionSplitKeys, err = splitter.SplitOneRangesGroup() require.NoError(t, err) require.Nil(t, endKey) require.Len(t, dataFiles, 0) require.Len(t, statFiles, 0) - require.Len(t, splitKeys, 0) + require.Len(t, rangeJobKeys, 0) + require.Len(t, regionSplitKeys, 0) require.NoError(t, splitter.Close()) } @@ -337,27 +352,29 @@ func TestExactlyKeyNum(t *testing.T) { // maxRangeKeys = 3 splitter, err := NewRangeSplitter( - ctx, multiFileStat, memStore, 1000, 100, 1000, 3, + ctx, multiFileStat, memStore, 1000, 100, 1000, 3, 1000, 3, ) require.NoError(t, err) - endKey, splitDataFiles, splitStatFiles, splitKeys, err := splitter.SplitOneRangesGroup() + endKey, splitDataFiles, splitStatFiles, rangeJobKeys, regionSplitKeys, err := splitter.SplitOneRangesGroup() require.NoError(t, err) require.Nil(t, endKey) require.Equal(t, dataFiles, splitDataFiles) require.Equal(t, statFiles, splitStatFiles) - require.Len(t, splitKeys, 0) + require.Len(t, rangeJobKeys, 0) + require.Len(t, regionSplitKeys, 0) // rangesGroupKeys = 3 splitter, err = NewRangeSplitter( - ctx, multiFileStat, memStore, 1000, 3, 1000, 1, + ctx, multiFileStat, memStore, 1000, 3, 1000, 1, 1000, 2, ) require.NoError(t, err) - endKey, splitDataFiles, splitStatFiles, splitKeys, err = splitter.SplitOneRangesGroup() + endKey, splitDataFiles, splitStatFiles, rangeJobKeys, regionSplitKeys, err = splitter.SplitOneRangesGroup() require.NoError(t, err) require.Nil(t, endKey) require.Equal(t, dataFiles, splitDataFiles) require.Equal(t, statFiles, splitStatFiles) - require.Equal(t, [][]byte{[]byte("key001"), []byte("key002")}, splitKeys) + require.Equal(t, [][]byte{[]byte("key001"), []byte("key002")}, rangeJobKeys) + require.Equal(t, [][]byte{[]byte("key002")}, regionSplitKeys) } func Test3KFilesRangeSplitter(t *testing.T) { @@ -481,11 +498,13 @@ func Test3KFilesRangeSplitter(t *testing.T) { int64(math.MaxInt64), int64(config.SplitRegionSize), int64(config.SplitRegionKeys), + int64(math.MaxInt64), + int64(math.MaxInt64), ) require.NoError(t, err) var lastEndKey []byte for { - endKey, _, statFiles, _, err := splitter.SplitOneRangesGroup() + endKey, _, statFiles, _, _, err := splitter.SplitOneRangesGroup() require.NoError(t, err) require.Greater(t, len(statFiles), 0) if endKey == nil { diff --git a/pkg/lightning/backend/external/testutil.go b/pkg/lightning/backend/external/testutil.go index 0851a83015919..1e5c5ad33760e 100644 --- a/pkg/lightning/backend/external/testutil.go +++ b/pkg/lightning/backend/external/testutil.go @@ -54,6 +54,8 @@ func testReadAndCompare( math.MaxInt64, 4*1024*1024*1024, math.MaxInt64, + math.MaxInt64, + math.MaxInt64, ) require.NoError(t, err) @@ -63,7 +65,7 @@ func testReadAndCompare( kvIdx := 0 for { - endKeyOfGroup, dataFilesOfGroup, statFilesOfGroup, _, err := splitter.SplitOneRangesGroup() + endKeyOfGroup, dataFilesOfGroup, statFilesOfGroup, _, _, err := splitter.SplitOneRangesGroup() require.NoError(t, err) curEnd := dbkv.Key(endKeyOfGroup).Clone() if len(endKeyOfGroup) == 0 { diff --git a/pkg/lightning/backend/local/BUILD.bazel b/pkg/lightning/backend/local/BUILD.bazel index e0978e0b2b2ee..eff91332048d6 100644 --- a/pkg/lightning/backend/local/BUILD.bazel +++ b/pkg/lightning/backend/local/BUILD.bazel @@ -60,6 +60,7 @@ go_library( "//pkg/util/compress", "//pkg/util/engine", "//pkg/util/hack", + "//pkg/util/intest", "//pkg/util/logutil", "//pkg/util/mathutil", "//pkg/util/ranger", diff --git a/pkg/lightning/backend/local/engine.go b/pkg/lightning/backend/local/engine.go index 27bba1f826071..9d4cd5ee42ec7 100644 --- a/pkg/lightning/backend/local/engine.go +++ b/pkg/lightning/backend/local/engine.go @@ -43,6 +43,7 @@ import ( "github.com/pingcap/tidb/pkg/lightning/backend/kv" "github.com/pingcap/tidb/pkg/lightning/checkpoints" "github.com/pingcap/tidb/pkg/lightning/common" + "github.com/pingcap/tidb/pkg/lightning/config" "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/util/hack" @@ -112,6 +113,10 @@ type Engine struct { UUID uuid.UUID localWriters sync.Map + regionSplitSize int64 + regionSplitKeyCnt int64 + regionSplitKeysCache [][]byte + // isImportingAtomic is an atomic variable indicating whether this engine is importing. // This should not be used as a "spin lock" indicator. isImportingAtomic atomic.Uint32 @@ -152,6 +157,8 @@ type Engine struct { logger log.Logger } +var _ common.Engine = (*Engine)(nil) + func (e *Engine) setError(err error) { if err != nil { e.ingestErr.Set(err) @@ -301,13 +308,17 @@ func (e *Engine) GetKeyRange() (startKey []byte, endKey []byte, err error) { return firstLey, nextKey(lastKey), nil } -// SplitRanges gets size properties from pebble and split ranges according to size/keys limit. -func (e *Engine) SplitRanges( - startKey, endKey []byte, - sizeLimit, keysLimit int64, - logger log.Logger, -) ([]common.Range, error) { - sizeProps, err := getSizePropertiesFn(logger, e.getDB(), e.keyAdapter) +// GetRegionSplitKeys implements common.Engine. +func (e *Engine) GetRegionSplitKeys() ([][]byte, error) { + return e.getRegionSplitKeys(e.regionSplitSize, e.regionSplitKeyCnt) +} + +func (e *Engine) getRegionSplitKeys(regionSplitSize, regionSplitKeyCnt int64) ([][]byte, error) { + sizeProps, err := getSizePropertiesFn(e.logger, e.getDB(), e.keyAdapter) + if err != nil { + return nil, errors.Trace(err) + } + startKey, endKey, err := e.GetKeyRange() if err != nil { return nil, errors.Trace(err) } @@ -315,10 +326,16 @@ func (e *Engine) SplitRanges( ranges := splitRangeBySizeProps( common.Range{Start: startKey, End: endKey}, sizeProps, - sizeLimit, - keysLimit, + regionSplitSize, + regionSplitKeyCnt, ) - return ranges, nil + keys := make([][]byte, 0, len(ranges)+1) + for _, r := range ranges { + keys = append(keys, r.Start) + } + keys = append(keys, ranges[len(ranges)-1].End) + e.regionSplitKeysCache = keys + return keys, nil } type rangeOffsets struct { @@ -1078,15 +1095,33 @@ func (e *Engine) Finish(totalBytes, totalCount int64) { // IngestData interface. func (e *Engine) LoadIngestData( ctx context.Context, - regionRanges []common.Range, - outCh chan<- common.DataAndRange, -) error { - for _, r := range regionRanges { + outCh chan<- common.DataAndRanges, +) (err error) { + jobRangeKeys := e.regionSplitKeysCache + // when the region is large, we need to split to smaller job ranges to increase + // the concurrency. + if jobRangeKeys == nil || e.regionSplitSize > 2*int64(config.SplitRegionSize) { + e.regionSplitKeysCache = nil + jobRangeKeys, err = e.getRegionSplitKeys( + int64(config.SplitRegionSize), int64(config.SplitRegionKeys), + ) + if err != nil { + return errors.Trace(err) + } + } + + prev := jobRangeKeys[0] + for i := 1; i < len(jobRangeKeys); i++ { + cur := jobRangeKeys[i] select { case <-ctx.Done(): return ctx.Err() - case outCh <- common.DataAndRange{Data: e, Range: r}: + case outCh <- common.DataAndRanges{ + Data: e, + SortedRanges: []common.Range{{Start: prev, End: cur}}, + }: } + prev = cur } return nil } diff --git a/pkg/lightning/backend/local/engine_mgr.go b/pkg/lightning/backend/local/engine_mgr.go index dfaaa62feaa7e..6a7e95447b4d2 100644 --- a/pkg/lightning/backend/local/engine_mgr.go +++ b/pkg/lightning/backend/local/engine_mgr.go @@ -312,8 +312,8 @@ func (em *engineManager) closeEngine( externalCfg.StatFiles, externalCfg.StartKey, externalCfg.EndKey, + externalCfg.JobKeys, externalCfg.SplitKeys, - externalCfg.RegionSplitSize, em.keyAdapter, em.DupeDetectEnabled, em.duplicateDB, diff --git a/pkg/lightning/backend/local/engine_test.go b/pkg/lightning/backend/local/engine_test.go index e4bfe65a76e20..59f21b3b2bf59 100644 --- a/pkg/lightning/backend/local/engine_test.go +++ b/pkg/lightning/backend/local/engine_test.go @@ -23,6 +23,7 @@ import ( "path/filepath" "sync" "testing" + "time" "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/objstorage/objstorageprovider" @@ -34,6 +35,7 @@ import ( "github.com/pingcap/tidb/pkg/lightning/common" "github.com/pingcap/tidb/pkg/lightning/log" "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" ) func makePebbleDB(t *testing.T, opt *pebble.Options) (*pebble.DB, string) { @@ -68,6 +70,7 @@ func TestGetEngineSizeWhenImport(t *testing.T) { keyAdapter: common.NoopKeyAdapter{}, logger: log.L(), } + f.TS = oracle.GoTimeToTS(time.Now()) f.db.Store(db) // simulate import f.lock(importMutexStateImport) @@ -106,6 +109,7 @@ func TestIngestSSTWithClosedEngine(t *testing.T) { keyAdapter: common.NoopKeyAdapter{}, logger: log.L(), } + f.TS = oracle.GoTimeToTS(time.Now()) f.db.Store(db) f.sstIngester = dbSSTIngester{e: f} sstPath := path.Join(tmpPath, uuid.New().String()+".sst") @@ -142,6 +146,7 @@ func TestGetFirstAndLastKey(t *testing.T) { f := &Engine{ sstDir: tmpPath, } + f.TS = oracle.GoTimeToTS(time.Now()) f.db.Store(db) err := db.Set([]byte("a"), []byte("a"), nil) require.NoError(t, err) @@ -184,6 +189,7 @@ func TestIterOutputHasUniqueMemorySpace(t *testing.T) { f := &Engine{ sstDir: tmpPath, } + f.TS = oracle.GoTimeToTS(time.Now()) f.db.Store(db) err := db.Set([]byte("a"), []byte("a"), nil) require.NoError(t, err) diff --git a/pkg/lightning/backend/local/local.go b/pkg/lightning/backend/local/local.go index 63b04cf8d68f5..01d1ce56054c2 100644 --- a/pkg/lightning/backend/local/local.go +++ b/pkg/lightning/backend/local/local.go @@ -56,6 +56,7 @@ import ( "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/engine" + "github.com/pingcap/tidb/pkg/util/intest" tikvclient "github.com/tikv/client-go/v2/tikv" pd "github.com/tikv/pd/client" pdhttp "github.com/tikv/pd/client/http" @@ -74,9 +75,6 @@ const ( dialTimeout = 5 * time.Minute maxRetryTimes = 20 defaultRetryBackoffTime = 3 * time.Second - // maxWriteAndIngestRetryTimes is the max retry times for write and ingest. - // A large retry times is for tolerating tikv cluster failures. - maxWriteAndIngestRetryTimes = 30 gRPCKeepAliveTime = 10 * time.Minute gRPCKeepAliveTimeout = 5 * time.Minute @@ -110,6 +108,10 @@ var ( errorEngineClosed = errors.New("engine is closed") maxRetryBackoffSecond = 30 + + // MaxWriteAndIngestRetryTimes is the max retry times for write and ingest. + // A large retry times is for tolerating tikv cluster failures. + MaxWriteAndIngestRetryTimes = 30 ) // ImportClientFactory is factory to create new import client for specific store. @@ -823,12 +825,12 @@ func splitRangeBySizeProps(fullRange common.Range, sizeProps *sizeProperties, si return ranges } -func readAndSplitIntoRange( +func getRegionSplitKeys( ctx context.Context, engine common.Engine, sizeLimit int64, keysLimit int64, -) ([]common.Range, error) { +) ([][]byte, error) { startKey, endKey, err := engine.GetKeyRange() if err != nil { return nil, err @@ -840,17 +842,17 @@ func readAndSplitIntoRange( engineFileTotalSize, engineFileLength := engine.KVStatistics() if engineFileTotalSize <= sizeLimit && engineFileLength <= keysLimit { - ranges := []common.Range{{Start: startKey, End: endKey}} - return ranges, nil + return [][]byte{startKey, endKey}, nil } logger := log.FromContext(ctx).With(zap.String("engine", engine.ID())) - ranges, err := engine.SplitRanges(startKey, endKey, sizeLimit, keysLimit, logger) + keys, err := engine.GetRegionSplitKeys() logger.Info("split engine key ranges", - zap.Int64("totalSize", engineFileTotalSize), zap.Int64("totalCount", engineFileLength), + zap.Int64("totalSize", engineFileTotalSize), + zap.Int64("totalCount", engineFileLength), logutil.Key("startKey", startKey), logutil.Key("endKey", endKey), - zap.Int("ranges", len(ranges)), zap.Error(err)) - return ranges, err + zap.Int("len(keys)", len(keys)), zap.Error(err)) + return keys, err } // prepareAndSendJob will read the engine to get estimated key range, @@ -862,20 +864,17 @@ func readAndSplitIntoRange( func (local *Backend) prepareAndSendJob( ctx context.Context, engine common.Engine, - initialSplitRanges []common.Range, - regionSplitSize, regionSplitKeys int64, + regionSplitKeys [][]byte, + regionSplitSize, regionSplitKeyCnt int64, jobToWorkerCh chan<- *regionJob, jobWg *sync.WaitGroup, ) error { lfTotalSize, lfLength := engine.KVStatistics() - log.FromContext(ctx).Info("import engine ranges", zap.Int("count", len(initialSplitRanges))) - if len(initialSplitRanges) == 0 { - return nil - } + log.FromContext(ctx).Info("import engine ranges", zap.Int("len(regionSplitKeyCnt)", len(regionSplitKeys))) // if all the kv can fit in one region, skip split regions. TiDB will split one region for // the table when table is created. - needSplit := len(initialSplitRanges) > 1 || lfTotalSize > regionSplitSize || lfLength > regionSplitKeys + needSplit := len(regionSplitKeys) > 2 || lfTotalSize > regionSplitSize || lfLength > regionSplitKeyCnt // split region by given ranges failpoint.Inject("failToSplit", func(_ failpoint.Value) { needSplit = true @@ -890,7 +889,7 @@ func (local *Backend) prepareAndSendJob( failpoint.Break() }) - err = local.SplitAndScatterRegionInBatches(ctx, initialSplitRanges, maxBatchSplitRanges) + err = local.splitAndScatterRegionInBatches(ctx, regionSplitKeys, maxBatchSplitRanges) if err == nil || common.IsContextCanceledError(err) { break } @@ -916,9 +915,8 @@ func (local *Backend) prepareAndSendJob( return local.generateAndSendJob( ctx, engine, - initialSplitRanges, regionSplitSize, - regionSplitKeys, + regionSplitKeyCnt, jobToWorkerCh, jobWg, ) @@ -928,34 +926,13 @@ func (local *Backend) prepareAndSendJob( func (local *Backend) generateAndSendJob( ctx context.Context, engine common.Engine, - jobRanges []common.Range, regionSplitSize, regionSplitKeys int64, jobToWorkerCh chan<- *regionJob, jobWg *sync.WaitGroup, ) error { - logger := log.FromContext(ctx) - // for external engine, it will split into smaller data inside LoadIngestData - if localEngine, ok := engine.(*Engine); ok { - // when use dynamic region feature, the region may be very big, we need - // to split to smaller ranges to increase the concurrency. - if regionSplitSize > 2*int64(config.SplitRegionSize) { - start := jobRanges[0].Start - end := jobRanges[len(jobRanges)-1].End - sizeLimit := int64(config.SplitRegionSize) - keysLimit := int64(config.SplitRegionKeys) - jrs, err := localEngine.SplitRanges(start, end, sizeLimit, keysLimit, logger) - if err != nil { - return errors.Trace(err) - } - jobRanges = jrs - } - } - - logger.Debug("the ranges length write to tikv", zap.Int("length", len(jobRanges))) - eg, egCtx := util.NewErrorGroupWithRecoverWithCtx(ctx) - dataAndRangeCh := make(chan common.DataAndRange) + dataAndRangeCh := make(chan common.DataAndRanges) conn := local.WorkerConcurrency if _, ok := engine.(*external.Engine); ok { // currently external engine will generate a large IngestData, se we lower the @@ -980,7 +957,7 @@ func (local *Backend) generateAndSendJob( jobToWorkerCh <- ®ionJob{} time.Sleep(5 * time.Second) }) - jobs, err := local.generateJobForRange(egCtx, p.Data, p.Range, regionSplitSize, regionSplitKeys) + jobs, err := local.generateJobForRange(egCtx, p.Data, p.SortedRanges, regionSplitSize, regionSplitKeys) if err != nil { if common.IsContextCanceledError(err) { return nil @@ -1006,7 +983,7 @@ func (local *Backend) generateAndSendJob( } eg.Go(func() error { - err := engine.LoadIngestData(egCtx, jobRanges, dataAndRangeCh) + err := engine.LoadIngestData(egCtx, dataAndRangeCh) if err != nil { return errors.Trace(err) } @@ -1028,16 +1005,18 @@ var fakeRegionJobs map[[2]string]struct { func (local *Backend) generateJobForRange( ctx context.Context, data common.IngestData, - keyRange common.Range, + sortedJobRanges []common.Range, regionSplitSize, regionSplitKeys int64, ) ([]*regionJob, error) { + startOfAllRanges, endOfAllRanges := sortedJobRanges[0].Start, sortedJobRanges[len(sortedJobRanges)-1].End + failpoint.Inject("fakeRegionJobs", func() { if ctx.Err() != nil { failpoint.Return(nil, ctx.Err()) } - key := [2]string{string(keyRange.Start), string(keyRange.End)} + key := [2]string{string(startOfAllRanges), string(endOfAllRanges)} injected := fakeRegionJobs[key] - // overwrite the stage to regionScanned, because some time same keyRange + // overwrite the stage to regionScanned, because some time same sortedJobRanges // will be generated more than once. for _, job := range injected.jobs { job.stage = regionScanned @@ -1045,8 +1024,7 @@ func (local *Backend) generateJobForRange( failpoint.Return(injected.jobs, injected.err) }) - start, end := keyRange.Start, keyRange.End - pairStart, pairEnd, err := data.GetFirstAndLastKey(start, end) + pairStart, pairEnd, err := data.GetFirstAndLastKey(startOfAllRanges, endOfAllRanges) if err != nil { return nil, err } @@ -1056,8 +1034,8 @@ func (local *Backend) generateJobForRange( logFn = log.FromContext(ctx).Warn } logFn("There is no pairs in range", - logutil.Key("start", start), - logutil.Key("end", end)) + logutil.Key("startOfAllRanges", startOfAllRanges), + logutil.Key("endOfAllRanges", endOfAllRanges)) // trigger cleanup data.IncRef() data.DecRef() @@ -1075,27 +1053,14 @@ func (local *Backend) generateJobForRange( return nil, err } - jobs := make([]*regionJob, 0, len(regions)) - for _, region := range regions { - log.FromContext(ctx).Debug("get region", - zap.Binary("startKey", startKey), - zap.Binary("endKey", endKey), - zap.Uint64("id", region.Region.GetId()), - zap.Stringer("epoch", region.Region.GetRegionEpoch()), - zap.Binary("start", region.Region.GetStartKey()), - zap.Binary("end", region.Region.GetEndKey()), - zap.Reflect("peers", region.Region.GetPeers())) - - jobs = append(jobs, ®ionJob{ - keyRange: intersectRange(region.Region, common.Range{Start: start, End: end}), - region: region, - stage: regionScanned, - ingestData: data, - regionSplitSize: regionSplitSize, - regionSplitKeys: regionSplitKeys, - metrics: local.metrics, - }) - } + jobs := newRegionJobs(regions, data, sortedJobRanges, regionSplitSize, regionSplitKeys, local.metrics) + log.FromContext(ctx).Info("generate region jobs", + zap.Int("len(jobs)", len(jobs)), + zap.String("startOfAllRanges", hex.EncodeToString(startOfAllRanges)), + zap.String("endOfAllRanges", hex.EncodeToString(endOfAllRanges)), + zap.String("startKeyOfFirstRegion", hex.EncodeToString(regions[0].Region.GetStartKey())), + zap.String("endKeyOfLastRegion", hex.EncodeToString(regions[len(regions)-1].Region.GetEndKey())), + ) return jobs, nil } @@ -1130,7 +1095,7 @@ func (local *Backend) startWorker( jobs, err2 := local.generateJobForRange( ctx, job.ingestData, - job.keyRange, + []common.Range{job.keyRange}, job.regionSplitSize, job.regionSplitKeys, ) @@ -1275,6 +1240,18 @@ func (local *Backend) ImportEngine( engineUUID uuid.UUID, regionSplitSize, regionSplitKeys int64, ) error { + kvRegionSplitSize, kvRegionSplitKeys, err := GetRegionSplitSizeKeys(ctx, local.pdCli, local.tls) + if err == nil { + if kvRegionSplitSize > regionSplitSize { + regionSplitSize = kvRegionSplitSize + } + if kvRegionSplitKeys > regionSplitKeys { + regionSplitKeys = kvRegionSplitKeys + } + } else { + log.FromContext(ctx).Warn("fail to get region split keys and size", zap.Error(err)) + } + var e common.Engine if externalEngine, ok := local.engineMgr.getExternalEngine(engineUUID); ok { e = externalEngine @@ -1285,44 +1262,34 @@ func (local *Backend) ImportEngine( return nil } defer localEngine.unlock() + localEngine.regionSplitSize = regionSplitSize + localEngine.regionSplitKeyCnt = regionSplitKeys e = localEngine } - lfTotalSize, lfLength := e.KVStatistics() if lfTotalSize == 0 { // engine is empty, this is likes because it's a index engine but the table contains no index log.FromContext(ctx).Info("engine contains no kv, skip import", zap.Stringer("engine", engineUUID)) return nil } - kvRegionSplitSize, kvRegionSplitKeys, err := GetRegionSplitSizeKeys(ctx, local.pdCli, local.tls) - if err == nil { - if kvRegionSplitSize > regionSplitSize { - regionSplitSize = kvRegionSplitSize - } - if kvRegionSplitKeys > regionSplitKeys { - regionSplitKeys = kvRegionSplitKeys - } - } else { - log.FromContext(ctx).Warn("fail to get region split keys and size", zap.Error(err)) - } // split sorted file into range about regionSplitSize per file - regionRanges, err := readAndSplitIntoRange(ctx, e, regionSplitSize, regionSplitKeys) + splitKeys, err := getRegionSplitKeys(ctx, e, regionSplitSize, regionSplitKeys) if err != nil { return err } - if len(regionRanges) > 0 && local.PausePDSchedulerScope == config.PausePDSchedulerScopeTable { + if len(splitKeys) > 0 && local.PausePDSchedulerScope == config.PausePDSchedulerScopeTable { log.FromContext(ctx).Info("pause pd scheduler of table scope") subCtx, cancel := context.WithCancel(ctx) defer cancel() var startKey, endKey []byte - if len(regionRanges[0].Start) > 0 { - startKey = codec.EncodeBytes(nil, regionRanges[0].Start) + if len(splitKeys[0]) > 0 { + startKey = codec.EncodeBytes(nil, splitKeys[0]) } - if len(regionRanges[len(regionRanges)-1].End) > 0 { - endKey = codec.EncodeBytes(nil, regionRanges[len(regionRanges)-1].End) + if len(splitKeys[len(splitKeys)-1]) > 0 { + endKey = codec.EncodeBytes(nil, splitKeys[len(splitKeys)-1]) } done, err := pdutil.PauseSchedulersByKeyRange(subCtx, local.pdHTTPCli, startKey, endKey) if err != nil { @@ -1334,14 +1301,14 @@ func (local *Backend) ImportEngine( }() } - if len(regionRanges) > 0 && local.BackendConfig.RaftKV2SwitchModeDuration > 0 { + if len(splitKeys) > 0 && local.BackendConfig.RaftKV2SwitchModeDuration > 0 { log.FromContext(ctx).Info("switch import mode of ranges", - zap.String("startKey", hex.EncodeToString(regionRanges[0].Start)), - zap.String("endKey", hex.EncodeToString(regionRanges[len(regionRanges)-1].End))) + zap.String("startKey", hex.EncodeToString(splitKeys[0])), + zap.String("endKey", hex.EncodeToString(splitKeys[len(splitKeys)-1]))) subCtx, cancel := context.WithCancel(ctx) defer cancel() - done, err := local.SwitchModeByKeyRanges(subCtx, regionRanges) + done, err := local.switchModeBySplitKeys(subCtx, splitKeys) if err != nil { return errors.Trace(err) } @@ -1353,13 +1320,13 @@ func (local *Backend) ImportEngine( log.FromContext(ctx).Info("start import engine", zap.Stringer("uuid", engineUUID), - zap.Int("region ranges", len(regionRanges)), + zap.Int("region ranges", len(splitKeys)), zap.Int64("count", lfLength), zap.Int64("size", lfTotalSize)) failpoint.Inject("ReadyForImportEngine", func() {}) - err = local.doImport(ctx, e, regionRanges, regionSplitSize, regionSplitKeys) + err = local.doImport(ctx, e, splitKeys, regionSplitSize, regionSplitKeys) if err == nil { importedSize, importedLength := e.ImportedStatistics() log.FromContext(ctx).Info("import engine success", @@ -1378,7 +1345,12 @@ var ( testJobWg *sync.WaitGroup ) -func (local *Backend) doImport(ctx context.Context, engine common.Engine, regionRanges []common.Range, regionSplitSize, regionSplitKeys int64) error { +func (local *Backend) doImport( + ctx context.Context, + engine common.Engine, + regionSplitKeys [][]byte, + regionSplitSize, regionSplitKeyCnt int64, +) error { /* [prepareAndSendJob]-----jobToWorkerCh--->[workers] ^ | @@ -1432,8 +1404,20 @@ func (local *Backend) doImport(ctx context.Context, engine common.Engine, region switch job.stage { case regionScanned, wrote: job.retryCount++ - if job.retryCount > maxWriteAndIngestRetryTimes { - firstErr.Set(job.lastRetryableErr) + if job.retryCount > MaxWriteAndIngestRetryTimes { + lastErr := job.lastRetryableErr + intest.Assert(lastErr != nil, "lastRetryableErr should not be nil") + if lastErr == nil { + lastErr = errors.New("retry limit exceeded") + log.FromContext(ctx).Error( + "lastRetryableErr should not be nil", + logutil.Key("startKey", job.keyRange.Start), + logutil.Key("endKey", job.keyRange.End), + zap.Stringer("stage", job.stage), + zap.Error(lastErr)) + } + + firstErr.Set(lastErr) workerCancel() job.done(&jobWg) continue @@ -1478,9 +1462,9 @@ func (local *Backend) doImport(ctx context.Context, engine common.Engine, region err := local.prepareAndSendJob( workerCtx, engine, - regionRanges, - regionSplitSize, regionSplitKeys, + regionSplitSize, + regionSplitKeyCnt, jobToWorkerCh, &jobWg, ) @@ -1518,12 +1502,25 @@ func (local *Backend) ResetEngine(ctx context.Context, engineUUID uuid.UUID) err } // ResetEngineSkipAllocTS is like ResetEngine but the inner TS of the engine is -// invalid. Caller must use OpenedEngine.SetTS to set a valid TS before import +// invalid. Caller must use SetTSAfterResetEngine to set a valid TS before import // the engine. func (local *Backend) ResetEngineSkipAllocTS(ctx context.Context, engineUUID uuid.UUID) error { return local.engineMgr.resetEngine(ctx, engineUUID, true) } +// SetTSAfterResetEngine allocates a new TS for the engine after it's reset. +// This is typically called after persisting the chosen TS of the engine to make +// sure TS is not changed after task failover. +func (local *Backend) SetTSAfterResetEngine(engineUUID uuid.UUID, ts uint64) error { + e := local.engineMgr.lockEngine(engineUUID, importMutexStateClose) + if e == nil { + return errors.Errorf("engine %s not found in SetTSAfterResetEngine", engineUUID.String()) + } + defer e.unlock() + e.engineMeta.TS = ts + return e.saveEngineMeta() +} + // CleanupEngine cleanup the engine and reclaim the space. func (local *Backend) CleanupEngine(ctx context.Context, engineUUID uuid.UUID) error { return local.engineMgr.cleanupEngine(ctx, engineUUID) @@ -1572,47 +1569,43 @@ func (local *Backend) LocalWriter(ctx context.Context, cfg *backend.LocalWriterC return local.engineMgr.localWriter(ctx, cfg, engineUUID) } -// SwitchModeByKeyRanges will switch tikv mode for regions in the specific key range for multirocksdb. -// This function will spawn a goroutine to keep switch mode periodically until the context is done. -// The return done channel is used to notify the caller that the background goroutine is exited. -func (local *Backend) SwitchModeByKeyRanges(ctx context.Context, ranges []common.Range) (<-chan struct{}, error) { +// switchModeBySplitKeys will switch tikv mode for regions in the specific keys +// for multirocksdb. This function will spawn a goroutine to keep switch mode +// periodically until the context is done. The return done channel is used to +// notify the caller that the background goroutine is exited. +func (local *Backend) switchModeBySplitKeys( + ctx context.Context, + splitKeys [][]byte, +) (<-chan struct{}, error) { switcher := NewTiKVModeSwitcher(local.tls.TLSConfig(), local.pdHTTPCli, log.FromContext(ctx).Logger) done := make(chan struct{}) - keyRanges := make([]*sst.Range, 0, len(ranges)) - for _, r := range ranges { - startKey := r.Start - if len(r.Start) > 0 { - startKey = codec.EncodeBytes(nil, r.Start) - } - endKey := r.End - if len(r.End) > 0 { - endKey = codec.EncodeBytes(nil, r.End) - } - keyRanges = append(keyRanges, &sst.Range{ - Start: startKey, - End: endKey, - }) + keyRange := &sst.Range{} + if len(splitKeys[0]) > 0 { + keyRange.Start = codec.EncodeBytes(nil, splitKeys[0]) + } + if len(splitKeys[len(splitKeys)-1]) > 0 { + keyRange.End = codec.EncodeBytes(nil, splitKeys[len(splitKeys)-1]) } go func() { defer close(done) ticker := time.NewTicker(local.BackendConfig.RaftKV2SwitchModeDuration) defer ticker.Stop() - switcher.ToImportMode(ctx, keyRanges...) + switcher.ToImportMode(ctx, keyRange) loop: for { select { case <-ctx.Done(): break loop case <-ticker.C: - switcher.ToImportMode(ctx, keyRanges...) + switcher.ToImportMode(ctx, keyRange) } } // Use a new context to avoid the context is canceled by the caller. recoverCtx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() - switcher.ToNormalMode(recoverCtx, keyRanges...) + switcher.ToNormalMode(recoverCtx, keyRange) }() return done, nil } diff --git a/pkg/lightning/backend/local/local_test.go b/pkg/lightning/backend/local/local_test.go index d1060a188612e..d3a8275c68d01 100644 --- a/pkg/lightning/backend/local/local_test.go +++ b/pkg/lightning/backend/local/local_test.go @@ -59,6 +59,7 @@ import ( "github.com/pingcap/tidb/pkg/util/engine" "github.com/pingcap/tidb/pkg/util/hack" "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" pd "github.com/tikv/pd/client" "github.com/tikv/pd/client/http" "google.golang.org/grpc" @@ -343,6 +344,7 @@ func testLocalWriter(t *testing.T, needSort bool, partitialSort bool) { keyAdapter: common.NoopKeyAdapter{}, logger: log.L(), } + f.TS = oracle.GoTimeToTS(time.Now()) f.db.Store(db) f.sstIngester = dbSSTIngester{e: f} f.wg.Add(1) @@ -433,20 +435,6 @@ func TestEngineLocalWriter(t *testing.T) { testLocalWriter(t, true, true) } -type mockSplitClient struct { - split.SplitClient -} - -func (c *mockSplitClient) GetRegion(ctx context.Context, key []byte) (*split.RegionInfo, error) { - return &split.RegionInfo{ - Leader: &metapb.Peer{Id: 1}, - Region: &metapb.Region{ - Id: 1, - StartKey: key, - }, - }, nil -} - type testIngester struct{} func (i testIngester) mergeSSTs(metas []*sstMeta, dir string, blockSize int) (*sstMeta, error) { @@ -589,6 +577,7 @@ func testMergeSSTs(t *testing.T, kvs [][]common.KvPair, meta *sstMeta) { }, logger: log.L(), } + f.TS = oracle.GoTimeToTS(time.Now()) f.db.Store(db) createSSTWriter := func() (*sstWriter, error) { @@ -1211,7 +1200,7 @@ func (m mockIngestData) NewIter(_ context.Context, lowerBound, upperBound []byte return &mockIngestIter{data: m, startIdx: i, endIdx: j, curIdx: i} } -func (m mockIngestData) GetTS() uint64 { return 0 } +func (m mockIngestData) GetTS() uint64 { return oracle.GoTimeToTS(time.Now()) } func (m mockIngestData) IncRef() {} @@ -1352,6 +1341,7 @@ func TestCheckPeersBusy(t *testing.T) { } func TestNotLeaderErrorNeedUpdatePeers(t *testing.T) { + log.InitLogger(&log.Config{Level: "debug"}, "") ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1398,7 +1388,7 @@ func TestNotLeaderErrorNeedUpdatePeers(t *testing.T) { jobCh := make(chan *regionJob, 10) staleJob := ®ionJob{ - keyRange: common.Range{Start: []byte("a"), End: []byte("")}, + keyRange: common.Range{Start: []byte("a"), End: []byte("b")}, region: &split.RegionInfo{ Region: &metapb.Region{ Id: 1, @@ -1599,6 +1589,7 @@ func TestPartialWriteIngestBusy(t *testing.T) { keyAdapter: common.NoopKeyAdapter{}, logger: log.L(), } + f.TS = oracle.GoTimeToTS(time.Now()) f.db.Store(db) err = db.Set([]byte("a"), []byte("a"), nil) require.NoError(t, err) @@ -1732,14 +1723,17 @@ func TestSplitRangeAgain4BigRegion(t *testing.T) { ctx := context.Background() engineCtx, cancel := context.WithCancel(context.Background()) f := &Engine{ - UUID: engineUUID, - sstDir: tmpPath, - ctx: engineCtx, - cancel: cancel, - sstMetasChan: make(chan metaOrFlush, 64), - keyAdapter: common.NoopKeyAdapter{}, - logger: log.L(), - } + UUID: engineUUID, + sstDir: tmpPath, + ctx: engineCtx, + cancel: cancel, + sstMetasChan: make(chan metaOrFlush, 64), + keyAdapter: common.NoopKeyAdapter{}, + logger: log.L(), + regionSplitKeysCache: [][]byte{{1}, {11}}, + regionSplitSize: 1 << 30, + } + f.TS = oracle.GoTimeToTS(time.Now()) f.db.Store(db) // keys starts with 0 is meta keys, so we start with 1. for i := byte(1); i <= 10; i++ { @@ -1749,13 +1743,11 @@ func TestSplitRangeAgain4BigRegion(t *testing.T) { require.NoError(t, err) } - bigRegionRange := []common.Range{{Start: []byte{1}, End: []byte{11}}} jobCh := make(chan *regionJob, 10) jobWg := sync.WaitGroup{} err := local.generateAndSendJob( ctx, f, - bigRegionRange, 10*units.GB, 1<<30, jobCh, @@ -1763,24 +1755,23 @@ func TestSplitRangeAgain4BigRegion(t *testing.T) { ) require.NoError(t, err) require.Len(t, jobCh, 10) - for i := 0; i < 10; i++ { + for i := 0; i < 9; i++ { job := <-jobCh require.Equal(t, []byte{byte(i + 1)}, job.keyRange.Start) require.Equal(t, []byte{byte(i + 2)}, job.keyRange.End) jobWg.Done() } + // the end key of the last job is different, it's the nextKey of the last key + job := <-jobCh + require.Equal(t, []byte{10}, job.keyRange.Start) + require.Equal(t, []byte{10, 1, 0}, job.keyRange.End) + jobWg.Done() + jobWg.Wait() require.NoError(t, f.Close()) } func TestSplitRangeAgain4BigRegionExternalEngine(t *testing.T) { - t.Skip("skip due to the delay of dynamic region feature, and external engine changed its behaviour") - backup := external.LargeRegionSplitDataThreshold - external.LargeRegionSplitDataThreshold = 1 - t.Cleanup(func() { - external.LargeRegionSplitDataThreshold = backup - }) - ctx := context.Background() local := &Backend{ splitCli: initTestSplitClient( @@ -1789,7 +1780,6 @@ func TestSplitRangeAgain4BigRegionExternalEngine(t *testing.T) { ), } local.BackendConfig.WorkerConcurrency = 1 - bigRegionRange := []common.Range{{Start: []byte{1}, End: []byte{11}}} keys := make([][]byte, 0, 10) value := make([][]byte, 0, 10) @@ -1808,8 +1798,8 @@ func TestSplitRangeAgain4BigRegionExternalEngine(t *testing.T) { statFiles, []byte{1}, []byte{10}, + keys, [][]byte{{1}, {11}}, - 1<<30, common.NoopKeyAdapter{}, false, nil, @@ -1821,24 +1811,23 @@ func TestSplitRangeAgain4BigRegionExternalEngine(t *testing.T) { true, ) - jobCh := make(chan *regionJob, 10) + jobCh := make(chan *regionJob, 9) jobWg := sync.WaitGroup{} err = local.generateAndSendJob( ctx, extEngine, - bigRegionRange, 10*units.GB, 1<<30, jobCh, &jobWg, ) require.NoError(t, err) - require.Len(t, jobCh, 10) - for i := 0; i < 10; i++ { + require.Len(t, jobCh, 9) + for i := 0; i < 9; i++ { job := <-jobCh require.Equal(t, []byte{byte(i + 1)}, job.keyRange.Start) require.Equal(t, []byte{byte(i + 2)}, job.keyRange.End) - firstKey, lastKey, err := job.ingestData.GetFirstAndLastKey(nil, nil) + firstKey, lastKey, err := job.ingestData.GetFirstAndLastKey(job.keyRange.Start, job.keyRange.End) require.NoError(t, err) require.Equal(t, []byte{byte(i + 1)}, firstKey) require.Equal(t, []byte{byte(i + 1)}, lastKey) @@ -1900,11 +1889,7 @@ func TestDoImport(t *testing.T) { // - one job need rescan when ingest // - one job need retry when write - initRanges := []common.Range{ - {Start: []byte{'a'}, End: []byte{'b'}}, - {Start: []byte{'b'}, End: []byte{'c'}}, - {Start: []byte{'c'}, End: []byte{'d'}}, - } + initRegionKeys := [][]byte{{'a'}, {'b'}, {'c'}, {'d'}} fakeRegionJobs = map[[2]string]struct { jobs []*regionJob err error @@ -1999,8 +1984,8 @@ func TestDoImport(t *testing.T) { WorkerConcurrency: 2, }, } - e := &Engine{} - err := l.doImport(ctx, e, initRanges, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) + e := &Engine{regionSplitKeysCache: initRegionKeys} + err := l.doImport(ctx, e, initRegionKeys, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) require.NoError(t, err) for _, v := range fakeRegionJobs { for _, job := range v.jobs { @@ -2027,7 +2012,7 @@ func TestDoImport(t *testing.T) { err: errors.New("meet error when generateJobForRange"), }, } - err = l.doImport(ctx, e, initRanges, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) + err = l.doImport(ctx, e, initRegionKeys, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) require.ErrorContains(t, err, "meet error when generateJobForRange") // test second call to generateJobForRange (needRescan) meet error @@ -2072,7 +2057,7 @@ func TestDoImport(t *testing.T) { err: errors.New("meet error when generateJobForRange again"), }, } - err = l.doImport(ctx, e, initRanges, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) + err = l.doImport(ctx, e, initRegionKeys, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) require.ErrorContains(t, err, "meet error when generateJobForRange again") // test write meet unretryable error @@ -2087,7 +2072,7 @@ func TestDoImport(t *testing.T) { { keyRange: common.Range{Start: []byte{'a'}, End: []byte{'b'}}, ingestData: &Engine{}, - retryCount: maxWriteAndIngestRetryTimes - 1, + retryCount: MaxWriteAndIngestRetryTimes - 1, injected: getSuccessInjectedBehaviour(), }, }, @@ -2097,7 +2082,7 @@ func TestDoImport(t *testing.T) { { keyRange: common.Range{Start: []byte{'b'}, End: []byte{'c'}}, ingestData: &Engine{}, - retryCount: maxWriteAndIngestRetryTimes - 1, + retryCount: MaxWriteAndIngestRetryTimes - 1, injected: getSuccessInjectedBehaviour(), }, }, @@ -2107,7 +2092,7 @@ func TestDoImport(t *testing.T) { { keyRange: common.Range{Start: []byte{'c'}, End: []byte{'d'}}, ingestData: &Engine{}, - retryCount: maxWriteAndIngestRetryTimes - 2, + retryCount: MaxWriteAndIngestRetryTimes - 2, injected: []injectedBehaviour{ { write: injectedWriteBehaviour{ @@ -2120,7 +2105,7 @@ func TestDoImport(t *testing.T) { }, }, } - err = l.doImport(ctx, e, initRanges, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) + err = l.doImport(ctx, e, initRegionKeys, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) require.ErrorContains(t, err, "fatal error") for _, v := range fakeRegionJobs { for _, job := range v.jobs { @@ -2145,9 +2130,7 @@ func TestRegionJobResetRetryCounter(t *testing.T) { // test that job need rescan when ingest - initRanges := []common.Range{ - {Start: []byte{'c'}, End: []byte{'d'}}, - } + initRegionKeys := [][]byte{{'c'}, {'d'}} fakeRegionJobs = map[[2]string]struct { jobs []*regionJob err error @@ -2158,13 +2141,13 @@ func TestRegionJobResetRetryCounter(t *testing.T) { keyRange: common.Range{Start: []byte{'c'}, End: []byte{'c', '2'}}, ingestData: &Engine{}, injected: getNeedRescanWhenIngestBehaviour(), - retryCount: maxWriteAndIngestRetryTimes, + retryCount: MaxWriteAndIngestRetryTimes, }, { keyRange: common.Range{Start: []byte{'c', '2'}, End: []byte{'d'}}, ingestData: &Engine{}, injected: getSuccessInjectedBehaviour(), - retryCount: maxWriteAndIngestRetryTimes, + retryCount: MaxWriteAndIngestRetryTimes, }, }, }, @@ -2185,8 +2168,8 @@ func TestRegionJobResetRetryCounter(t *testing.T) { WorkerConcurrency: 2, }, } - e := &Engine{} - err := l.doImport(ctx, e, initRanges, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) + e := &Engine{regionSplitKeysCache: initRegionKeys} + err := l.doImport(ctx, e, initRegionKeys, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) require.NoError(t, err) for _, v := range fakeRegionJobs { for _, job := range v.jobs { @@ -2213,10 +2196,7 @@ func TestCtxCancelIsIgnored(t *testing.T) { _ = failpoint.Disable("github.com/pingcap/tidb/pkg/lightning/backend/local/WriteToTiKVNotEnoughDiskSpace") }) - initRanges := []common.Range{ - {Start: []byte{'c'}, End: []byte{'d'}}, - {Start: []byte{'d'}, End: []byte{'e'}}, - } + initRegionKeys := [][]byte{{'c'}, {'d'}, {'e'}} fakeRegionJobs = map[[2]string]struct { jobs []*regionJob err error @@ -2247,8 +2227,8 @@ func TestCtxCancelIsIgnored(t *testing.T) { WorkerConcurrency: 1, }, } - e := &Engine{} - err := l.doImport(ctx, e, initRanges, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) + e := &Engine{regionSplitKeysCache: initRegionKeys} + err := l.doImport(ctx, e, initRegionKeys, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) require.ErrorContains(t, err, "the remaining storage capacity of TiKV") } @@ -2270,9 +2250,7 @@ func TestWorkerFailedWhenGeneratingJobs(t *testing.T) { _ = failpoint.Disable("github.com/pingcap/tidb/pkg/lightning/backend/local/WriteToTiKVNotEnoughDiskSpace") }) - initRanges := []common.Range{ - {Start: []byte{'c'}, End: []byte{'d'}}, - } + initRegionKeys := [][]byte{{'c'}, {'d'}} ctx := context.Background() l := &Backend{ @@ -2284,11 +2262,32 @@ func TestWorkerFailedWhenGeneratingJobs(t *testing.T) { panicSplitRegionClient{}, ), } - e := &Engine{} - err := l.doImport(ctx, e, initRanges, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) + e := &Engine{regionSplitKeysCache: initRegionKeys} + err := l.doImport(ctx, e, initRegionKeys, int64(config.SplitRegionSize), int64(config.SplitRegionKeys)) require.ErrorContains(t, err, "the remaining storage capacity of TiKV") } +type recordScanRegionsHook struct { + beforeScanRegions [][2][]byte +} + +func (r *recordScanRegionsHook) BeforeSplitRegion(ctx context.Context, regionInfo *split.RegionInfo, keys [][]byte) (*split.RegionInfo, [][]byte) { + return regionInfo, keys +} + +func (r *recordScanRegionsHook) AfterSplitRegion(ctx context.Context, info *split.RegionInfo, i [][]byte, infos []*split.RegionInfo, err error) ([]*split.RegionInfo, error) { + return infos, err +} + +func (r *recordScanRegionsHook) BeforeScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]byte, []byte, int) { + r.beforeScanRegions = append(r.beforeScanRegions, [2][]byte{key, endKey}) + return key, endKey, limit +} + +func (r *recordScanRegionsHook) AfterScanRegions(infos []*split.RegionInfo, err error) ([]*split.RegionInfo, error) { + return infos, err +} + func TestExternalEngine(t *testing.T) { _ = failpoint.Enable("github.com/pingcap/tidb/pkg/lightning/backend/local/skipSplitAndScatter", "return()") _ = failpoint.Enable("github.com/pingcap/tidb/pkg/lightning/backend/local/skipStartWorker", "return()") @@ -2326,11 +2325,13 @@ func TestExternalEngine(t *testing.T) { StatFiles: statFiles, StartKey: keys[0], EndKey: endKey, - SplitKeys: [][]byte{keys[20], keys[30], keys[50], keys[60], keys[80], keys[90]}, + JobKeys: [][]byte{keys[0], keys[20], keys[30], keys[50], keys[60], keys[80], keys[90], endKey}, + SplitKeys: [][]byte{keys[0], keys[50], endKey}, TotalFileSize: int64(config.SplitRegionSize) + 1, TotalKVCount: int64(config.SplitRegionKeys) + 1, } engineUUID := uuid.New() + hook := &recordScanRegionsHook{} local := &Backend{ BackendConfig: BackendConfig{ WorkerConcurrency: 2, @@ -2338,7 +2339,7 @@ func TestExternalEngine(t *testing.T) { }, splitCli: initTestSplitClient([][]byte{ keys[0], keys[50], endKey, - }, nil), + }, hook), pdCli: &mockPdClient{}, } local.engineMgr, err = newEngineManager(local.BackendConfig, local, local.logger) @@ -2350,7 +2351,7 @@ func TestExternalEngine(t *testing.T) { done := make(chan struct{}) go func() { - for i := 0; i < 5; i++ { + for i := 0; i < 7; i++ { jobs = append(jobs, <-jobToWorkerCh) testJobWg.Done() } @@ -2376,10 +2377,12 @@ func TestExternalEngine(t *testing.T) { return bytes.Compare(jobs[i].keyRange.Start, jobs[j].keyRange.Start) < 0 }) expectedKeyRanges := []common.Range{ - {Start: keys[0], End: keys[30]}, + {Start: keys[0], End: keys[20]}, + {Start: keys[20], End: keys[30]}, {Start: keys[30], End: keys[50]}, {Start: keys[50], End: keys[60]}, - {Start: keys[60], End: keys[90]}, + {Start: keys[60], End: keys[80]}, + {Start: keys[80], End: keys[90]}, {Start: keys[90], End: endKey}, } kvIdx := 0 @@ -2395,6 +2398,13 @@ func TestExternalEngine(t *testing.T) { require.NoError(t, iter.Close()) } require.Equal(t, 100, kvIdx) + + require.Equal(t, [][2][]byte{ + {codec.EncodeBytes(nil, keys[0]), codec.EncodeBytes(nil, nextKey(keys[29]))}, + {codec.EncodeBytes(nil, keys[30]), codec.EncodeBytes(nil, nextKey(keys[59]))}, + {codec.EncodeBytes(nil, keys[60]), codec.EncodeBytes(nil, nextKey(keys[89]))}, + {codec.EncodeBytes(nil, keys[90]), codec.EncodeBytes(nil, nextKey(keys[99]))}, + }, hook.beforeScanRegions) } func TestCheckDiskAvail(t *testing.T) { diff --git a/pkg/lightning/backend/local/localhelper.go b/pkg/lightning/backend/local/localhelper.go index e7783e4ab2397..e042a78c5cdf5 100644 --- a/pkg/lightning/backend/local/localhelper.go +++ b/pkg/lightning/backend/local/localhelper.go @@ -26,11 +26,9 @@ import ( sst "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb/pkg/lightning/checkpoints" - "github.com/pingcap/tidb/pkg/lightning/common" "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/lightning/metric" "github.com/pingcap/tidb/pkg/lightning/mydump" - "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/mathutil" "go.uber.org/zap" "golang.org/x/time/rate" @@ -43,34 +41,30 @@ var ( splitRegionBaseBackOffTime = time.Second ) -// SplitAndScatterRegionInBatches splits&scatter regions in batches. +// splitAndScatterRegionInBatches splits&scatter regions in batches. // Too many split&scatter requests may put a lot of pressure on TiKV and PD. -func (local *Backend) SplitAndScatterRegionInBatches( +func (local *Backend) splitAndScatterRegionInBatches( ctx context.Context, - ranges []common.Range, + splitKeys [][]byte, batchCnt int, ) error { - for i := 0; i < len(ranges); i += batchCnt { - batch := ranges[i:] + for i := 0; i < len(splitKeys); i += batchCnt { + batch := splitKeys[i:] if len(batch) > batchCnt { batch = batch[:batchCnt] } - if err := local.SplitAndScatterRegionByRanges(ctx, batch); err != nil { + if err := local.splitAndScatterRegionByRanges(ctx, batch); err != nil { return errors.Trace(err) } } return nil } -// SplitAndScatterRegionByRanges include region split & scatter operation just like br. -// we can simply call br function, but we need to change some function signature of br -// When the ranges total size is small, we can skip the split to avoid generate empty regions. -// TODO: remove this file and use br internal functions -func (local *Backend) SplitAndScatterRegionByRanges( +func (local *Backend) splitAndScatterRegionByRanges( ctx context.Context, - ranges []common.Range, + splitKeys [][]byte, ) (err error) { - if len(ranges) == 0 { + if len(splitKeys) == 0 { return nil } @@ -83,7 +77,7 @@ func (local *Backend) SplitAndScatterRegionByRanges( }() } - scatterRegions, err := local.splitCli.SplitKeysAndScatter(ctx, getSplitKeysByRanges(ranges)) + scatterRegions, err := local.splitCli.SplitKeysAndScatter(ctx, splitKeys) if err != nil { return errors.Trace(err) } @@ -111,19 +105,6 @@ func (local *Backend) hasRegion(ctx context.Context, regionID uint64) (bool, err return regionInfo != nil, nil } -func getSplitKeysByRanges(ranges []common.Range) [][]byte { - checkKeys := make([][]byte, 0) - var lastEnd []byte - for _, rg := range ranges { - if !bytes.Equal(lastEnd, rg.Start) { - checkKeys = append(checkKeys, rg.Start) - } - checkKeys = append(checkKeys, rg.End) - lastEnd = rg.End - } - return checkKeys -} - func beforeEnd(key []byte, end []byte) bool { return bytes.Compare(key, end) < 0 || len(end) == 0 } @@ -141,22 +122,11 @@ func keyInsideRegion(region *metapb.Region, key []byte) bool { return bytes.Compare(key, region.GetStartKey()) >= 0 && (beforeEnd(key, region.GetEndKey())) } -func intersectRange(region *metapb.Region, rg common.Range) common.Range { - var startKey, endKey []byte - if len(region.StartKey) > 0 { - _, startKey, _ = codec.DecodeBytes(region.StartKey, []byte{}) +func largerStartKey(a, b []byte) []byte { + if bytes.Compare(a, b) > 0 { + return a } - if bytes.Compare(startKey, rg.Start) < 0 { - startKey = rg.Start - } - if len(region.EndKey) > 0 { - _, endKey, _ = codec.DecodeBytes(region.EndKey, []byte{}) - } - if beforeEnd(rg.End, endKey) { - endKey = rg.End - } - - return common.Range{Start: startKey, End: endKey} + return b } // StoreWriteLimiter is used to limit the write rate of a store. diff --git a/pkg/lightning/backend/local/region_job.go b/pkg/lightning/backend/local/region_job.go index 7bc812e4b9bb6..60a51d6aa0367 100644 --- a/pkg/lightning/backend/local/region_job.go +++ b/pkg/lightning/backend/local/region_job.go @@ -15,6 +15,7 @@ package local import ( + "bytes" "container/heap" "context" "fmt" @@ -39,6 +40,8 @@ import ( "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/lightning/metric" "github.com/pingcap/tidb/pkg/util/codec" + "github.com/pingcap/tidb/pkg/util/intest" + "github.com/tikv/client-go/v2/oracle" "github.com/tikv/client-go/v2/util" "go.uber.org/zap" "google.golang.org/grpc" @@ -138,6 +141,113 @@ type injectedIngestBehaviour struct { err error } +func newRegionJob( + region *split.RegionInfo, + data common.IngestData, + jobStart []byte, + jobEnd []byte, + regionSplitSize int64, + regionSplitKeys int64, + metrics *metric.Common, +) *regionJob { + log.L().Debug("new region job", + zap.Binary("jobStart", jobStart), + zap.Binary("jobEnd", jobEnd), + zap.Uint64("id", region.Region.GetId()), + zap.Stringer("epoch", region.Region.GetRegionEpoch()), + zap.Binary("regionStart", region.Region.GetStartKey()), + zap.Binary("regionEnd", region.Region.GetEndKey()), + zap.Reflect("peers", region.Region.GetPeers())) + return ®ionJob{ + keyRange: common.Range{Start: jobStart, End: jobEnd}, + region: region, + stage: regionScanned, + ingestData: data, + regionSplitSize: regionSplitSize, + regionSplitKeys: regionSplitKeys, + metrics: metrics, + } +} + +// newRegionJobs creates a list of regionJob from the given regions and job +// ranges. +// +// pre-condition: +// - sortedRegions must be non-empty, sorted and continuous +// - sortedJobRanges must be non-empty, sorted and continuous +// - sortedRegions can cover sortedJobRanges +func newRegionJobs( + sortedRegions []*split.RegionInfo, + data common.IngestData, + sortedJobRanges []common.Range, + regionSplitSize int64, + regionSplitKeys int64, + metrics *metric.Common, +) []*regionJob { + var ( + lenRegions = len(sortedRegions) + lenJobRanges = len(sortedJobRanges) + ret = make([]*regionJob, 0, max(lenRegions, lenJobRanges)*2) + + curRegionIdx = 0 + curRegion = sortedRegions[curRegionIdx].Region + curRegionStart []byte + curRegionEnd []byte + ) + + _, curRegionStart, _ = codec.DecodeBytes(curRegion.StartKey, nil) + _, curRegionEnd, _ = codec.DecodeBytes(curRegion.EndKey, nil) + + for _, jobRange := range sortedJobRanges { + // build the job and move to next region for these cases: + // + // --region--) or -----region--) + // -------job range--) --job range--) + for !beforeEnd(jobRange.End, curRegionEnd) { + ret = append(ret, newRegionJob( + sortedRegions[curRegionIdx], + data, + largerStartKey(jobRange.Start, curRegionStart), + curRegionEnd, + regionSplitSize, + regionSplitKeys, + metrics, + )) + + curRegionIdx++ + if curRegionIdx >= lenRegions { + return ret + } + curRegion = sortedRegions[curRegionIdx].Region + _, curRegionStart, _ = codec.DecodeBytes(curRegion.StartKey, nil) + _, curRegionEnd, _ = codec.DecodeBytes(curRegion.EndKey, nil) + } + + // now we can make sure + // + // --region--) + // --job range--) + // + // only need to handle the case that job range has remaining part after above loop: + // + // [----region--) + // --job range--) + if bytes.Compare(curRegionStart, jobRange.End) < 0 { + ret = append(ret, newRegionJob( + sortedRegions[curRegionIdx], + data, + largerStartKey(jobRange.Start, curRegionStart), + jobRange.End, + regionSplitSize, + regionSplitKeys, + metrics, + )) + } + } + + return ret +} + func (j *regionJob) convertStageTo(stage jobStageTp) { j.stage = stage switch stage { @@ -320,6 +430,20 @@ func (local *Backend) doWrite(ctx context.Context, j *regionJob) error { allPeers = append(allPeers, peer) } dataCommitTS := j.ingestData.GetTS() + intest.AssertFunc(func() bool { + timeOfTS := oracle.GetTimeFromTS(dataCommitTS) + now := time.Now() + if timeOfTS.After(now) { + return false + } + if now.Sub(timeOfTS) > 24*time.Hour { + return false + } + return true + }, "TS used in import should in [now-1d, now], but got %d", dataCommitTS) + if dataCommitTS == 0 { + return errors.New("data commitTS is 0") + } req.Chunk = &sst.WriteRequest_Batch{ Batch: &sst.WriteBatch{ CommitTs: dataCommitTS, @@ -522,6 +646,7 @@ func (local *Backend) ingest(ctx context.Context, j *regionJob) (err error) { log.FromContext(ctx).Warn("meet underlying error, will retry ingest", log.ShortError(err), logutil.SSTMetas(j.writeResult.sstMeta), logutil.Region(j.region.Region), logutil.Leader(j.region.Leader)) + j.lastRetryableErr = err continue } canContinue, err := j.convertStageOnIngestError(resp) @@ -572,6 +697,9 @@ func (local *Backend) checkWriteStall( // doIngest send ingest commands to TiKV based on regionJob.writeResult.sstMeta. // When meet error, it will remove finished sstMetas before return. func (local *Backend) doIngest(ctx context.Context, j *regionJob) (*sst.IngestResponse, error) { + failpoint.Inject("doIngestFailed", func() { + failpoint.Return(nil, errors.New("injected error")) + }) clientFactory := local.importClientFactory supportMultiIngest := local.supportMultiIngest shouldCheckWriteStall := local.ShouldCheckWriteStall diff --git a/pkg/lightning/backend/local/region_job_test.go b/pkg/lightning/backend/local/region_job_test.go index beb50fa55d8ea..c3ed8048863ec 100644 --- a/pkg/lightning/backend/local/region_job_test.go +++ b/pkg/lightning/backend/local/region_job_test.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/pkg/lightning/common" + "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) @@ -258,3 +259,83 @@ func TestRegionJobRetryer(t *testing.T) { cancel() jobWg.Wait() } + +func TestNewRegionJobs(t *testing.T) { + buildRegion := func(regionKeys [][]byte) []*split.RegionInfo { + ret := make([]*split.RegionInfo, 0, len(regionKeys)-1) + for i := 0; i < len(regionKeys)-1; i++ { + ret = append(ret, &split.RegionInfo{ + Region: &metapb.Region{ + StartKey: codec.EncodeBytes(nil, regionKeys[i]), + EndKey: codec.EncodeBytes(nil, regionKeys[i+1]), + }, + }) + } + return ret + } + buildJobRanges := func(jobRangeKeys [][]byte) []common.Range { + ret := make([]common.Range, 0, len(jobRangeKeys)-1) + for i := 0; i < len(jobRangeKeys)-1; i++ { + ret = append(ret, common.Range{ + Start: jobRangeKeys[i], + End: jobRangeKeys[i+1], + }) + } + return ret + } + + cases := []struct { + regionKeys [][]byte + jobRangeKeys [][]byte + jobKeys [][]byte + }{ + { + regionKeys: [][]byte{{1}, nil}, + jobRangeKeys: [][]byte{{2}, {3}, {4}}, + jobKeys: [][]byte{{2}, {3}, {4}}, + }, + { + regionKeys: [][]byte{{1}, {4}}, + jobRangeKeys: [][]byte{{1}, {2}, {3}, {4}}, + jobKeys: [][]byte{{1}, {2}, {3}, {4}}, + }, + { + + regionKeys: [][]byte{{1}, {2}, {3}, {4}}, + jobRangeKeys: [][]byte{{1}, {4}}, + jobKeys: [][]byte{{1}, {2}, {3}, {4}}, + }, + { + + regionKeys: [][]byte{{1}, {3}, {5}, {7}}, + jobRangeKeys: [][]byte{{2}, {4}, {6}}, + jobKeys: [][]byte{{2}, {3}, {4}, {5}, {6}}, + }, + { + + regionKeys: [][]byte{{1}, {4}, {7}}, + jobRangeKeys: [][]byte{{2}, {3}, {4}, {5}, {6}}, + jobKeys: [][]byte{{2}, {3}, {4}, {5}, {6}}, + }, + { + + regionKeys: [][]byte{{1}, {5}, {6}, {7}, {8}, {12}}, + jobRangeKeys: [][]byte{{1}, {2}, {3}, {4}, {9}, {10}, {12}}, + jobKeys: [][]byte{{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {12}}, + }, + } + + for caseIdx, c := range cases { + jobs := newRegionJobs( + buildRegion(c.regionKeys), + nil, + buildJobRanges(c.jobRangeKeys), + 0, 0, nil, + ) + require.Len(t, jobs, len(c.jobKeys)-1, "case %d", caseIdx) + for i, j := range jobs { + require.Equal(t, c.jobKeys[i], j.keyRange.Start, "case %d", caseIdx) + require.Equal(t, c.jobKeys[i+1], j.keyRange.End, "case %d", caseIdx) + } + } +} diff --git a/pkg/lightning/common/engine.go b/pkg/lightning/common/engine.go index e5b91693c832d..c11eac98a1ae2 100644 --- a/pkg/lightning/common/engine.go +++ b/pkg/lightning/common/engine.go @@ -16,11 +16,10 @@ package common import ( "context" - - "github.com/pingcap/tidb/pkg/lightning/log" ) -// Range contains a start key and an end key. +// Range contains a start key and an end key. The Range's key should not be +// encoded by duplicate detection. type Range struct { Start []byte End []byte // end is always exclusive except import_sstpb.SSTMeta @@ -31,9 +30,8 @@ type Range struct { type Engine interface { // ID is the identifier of an engine. ID() string - // LoadIngestData sends DataAndRange to outCh. Implementation may choose smaller - // ranges than given regionRanges, and data is contained in its range. - LoadIngestData(ctx context.Context, regionRanges []Range, outCh chan<- DataAndRange) error + // LoadIngestData sends DataAndRanges to outCh. + LoadIngestData(ctx context.Context, outCh chan<- DataAndRanges) error // KVStatistics returns the total kv size and total kv count. KVStatistics() (totalKVSize int64, totalKVCount int64) // ImportedStatistics returns the imported kv size and imported kv count. @@ -42,9 +40,10 @@ type Engine interface { // duplicate detection is enabled, the keys in engine are encoded by duplicate // detection but the returned keys should not be encoded. GetKeyRange() (startKey []byte, endKey []byte, err error) - // SplitRanges splits the range [startKey, endKey) into multiple ranges. If the - // duplicate detection is enabled, the keys in engine are encoded by duplicate - // detection but the returned keys should not be encoded. - SplitRanges(startKey, endKey []byte, sizeLimit, keysLimit int64, logger log.Logger) ([]Range, error) + // GetRegionSplitKeys checks the KV distribution of the Engine and returns the + // keys that can be used as region split keys. If the duplicate detection is + // enabled, the keys stored in engine are encoded by duplicate detection but the + // returned keys should not be encoded. + GetRegionSplitKeys() ([][]byte, error) Close() error } diff --git a/pkg/lightning/common/ingest_data.go b/pkg/lightning/common/ingest_data.go index d9a360be47861..d4fdfe021dde1 100644 --- a/pkg/lightning/common/ingest_data.go +++ b/pkg/lightning/common/ingest_data.go @@ -73,8 +73,9 @@ type ForwardIter interface { ReleaseBuf() } -// DataAndRange is a pair of IngestData and Range. -type DataAndRange struct { - Data IngestData - Range Range +// DataAndRanges is a pair of IngestData and list of Range. Each Range will +// become a regionJob, and the regionJob will read data from Data field. +type DataAndRanges struct { + Data IngestData + SortedRanges []Range } diff --git a/pkg/lightning/config/const.go b/pkg/lightning/config/const.go index e01f1cd4f6e8b..47a8ffe40a33f 100644 --- a/pkg/lightning/config/const.go +++ b/pkg/lightning/config/const.go @@ -27,7 +27,7 @@ const ( DefaultBatchImportRatio = 0.75 ReadBlockSize ByteSize = 64 * units.KiB - // SplitRegionSize See: + // SplitRegionSize is the default coprocessor.region-split-keys value before TiKV v8.4.0 See: // https://github.com/tikv/tikv/blob/e030a0aae9622f3774df89c62f21b2171a72a69e/etc/config-template.toml#L360 // lower the max-key-count to avoid tikv trigger region auto split SplitRegionSize ByteSize = 96 * units.MiB diff --git a/pkg/parser/ast/dml.go b/pkg/parser/ast/dml.go index 3a124060ef2b5..092e36dc76d75 100644 --- a/pkg/parser/ast/dml.go +++ b/pkg/parser/ast/dml.go @@ -285,6 +285,11 @@ type TableName struct { TableSample *TableSample // AS OF is used to see the data as it was at a specific point in time. AsOf *AsOfClause + // IsAlias is true if this table name is an alias. + // sometime, we need to distinguish the table name is an alias or not. + // for example ```delete tt1 from t1 tt1,(select max(id) id from t2)tt2 where tt1.id<=tt2.id``` + // ```tt1``` is a alias name. so we need to set IsAlias to true and restore the table name without database name. + IsAlias bool } func (*TableName) resultSet() {} @@ -296,7 +301,7 @@ func (n *TableName) restoreName(ctx *format.RestoreCtx) { if n.Schema.String() != "" { ctx.WriteName(n.Schema.String()) ctx.WritePlain(".") - } else if ctx.DefaultDB != "" { + } else if ctx.DefaultDB != "" && !n.IsAlias { // Try CTE, for a CTE table name, we shouldn't write the database name. if !ctx.IsCTETableName(n.Name.L) { ctx.WriteName(ctx.DefaultDB) diff --git a/pkg/planner/OWNERS b/pkg/planner/OWNERS new file mode 100644 index 0000000000000..904863f05e7fc --- /dev/null +++ b/pkg/planner/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +approvers: + - sig-approvers-planner +labels: + - sig/planner diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 8a278e53fa508..d1575324d7173 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1485,7 +1485,7 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok return retNode, false } - castFunction, err := expression.BuildCastFunctionWithCheck(er.sctx, arg, v.Tp) + castFunction, err := expression.BuildCastFunctionWithCheck(er.sctx, arg, v.Tp, v.ExplicitCharSet) if err != nil { er.err = err return retNode, false diff --git a/pkg/planner/core/logical_plan_builder.go b/pkg/planner/core/logical_plan_builder.go index efec0862b0b7c..489a58c62efe4 100644 --- a/pkg/planner/core/logical_plan_builder.go +++ b/pkg/planner/core/logical_plan_builder.go @@ -5456,6 +5456,7 @@ func (b *PlanBuilder) BuildDataSourceFromView(ctx context.Context, dbName model. terror.ErrorNotEqual(err, plannererrors.ErrNotSupportedYet) { err = plannererrors.ErrViewInvalid.GenWithStackByArgs(dbName.O, tableInfo.Name.O) } + failpoint.Inject("BuildDataSourceFailed", func() {}) return nil, err } pm := privilege.GetPrivilegeManager(b.ctx) diff --git a/pkg/planner/core/point_get_plan.go b/pkg/planner/core/point_get_plan.go index 65da16445b2ca..c584246473bba 100644 --- a/pkg/planner/core/point_get_plan.go +++ b/pkg/planner/core/point_get_plan.go @@ -2006,7 +2006,11 @@ func buildOrderedList(ctx PlanContext, plan Plan, list []*ast.Assignment, if err != nil { return nil, true } - expr = expression.BuildCastFunction(ctx.GetExprCtx(), expr, col.GetType()) + castToTP := col.GetType() + if castToTP.GetType() == mysql.TypeEnum && assign.Expr.GetType().EvalType() == types.ETInt { + castToTP.AddFlag(mysql.EnumSetAsIntFlag) + } + expr = expression.BuildCastFunction(ctx.GetExprCtx(), expr, castToTP) if allAssignmentsAreConstant { _, isConst := expr.(*expression.Constant) allAssignmentsAreConstant = isConst diff --git a/pkg/planner/core/point_get_plan_test.go b/pkg/planner/core/point_get_plan_test.go index cde9eb6be3f26..97d194062e138 100644 --- a/pkg/planner/core/point_get_plan_test.go +++ b/pkg/planner/core/point_get_plan_test.go @@ -224,3 +224,14 @@ func TestIssue18042(t *testing.T) { require.Equal(t, uint64(100), tk.Session().GetSessionVars().StmtCtx.MaxExecutionTime) tk.MustExec("drop table t") } + +func TestIssue56832(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t (id int primary key, c enum('0', '1', '2'));") + tk.MustExec("insert into t values (0,'0'), (1,'1'), (2,'2');") + tk.MustExec("update t set c = 2 where id = 0;") + tk.MustQuery("select c from t where id = 0").Check(testkit.Rows("1")) +} diff --git a/pkg/planner/core/preprocess.go b/pkg/planner/core/preprocess.go index 3a4efbcd2d136..7deb50160eaaa 100644 --- a/pkg/planner/core/preprocess.go +++ b/pkg/planner/core/preprocess.go @@ -554,7 +554,9 @@ func (p *preprocessor) checkBindGrammar(originNode, hintedNode ast.StmtNode, def tn.TableInfo = tableInfo tn.DBInfo = dbInfo } - + aliasChecker := &aliasChecker{} + originNode.Accept(aliasChecker) + hintedNode.Accept(aliasChecker) originSQL := parser.NormalizeForBinding(utilparser.RestoreWithDefaultDB(originNode, defaultDB, originNode.Text()), false) hintedSQL := parser.NormalizeForBinding(utilparser.RestoreWithDefaultDB(hintedNode, defaultDB, hintedNode.Text()), false) if originSQL != hintedSQL { @@ -1931,3 +1933,59 @@ func (p *preprocessor) skipLockMDL() bool { // skip lock mdl for ANALYZE statement. return p.flag&inImportInto > 0 || p.flag&inAnalyze > 0 } + +// aliasChecker is used to check the alias of the table in delete statement. +// +// for example: delete tt1 from t1 tt1,(select max(id) id from t2)tt2 where tt1.id<=tt2.id +// `delete tt1` will be transformed to `delete current_database.t1` by default. +// because `tt1` cannot be used as alias in delete statement. +// so we have to set `tt1` as alias by aliasChecker. +type aliasChecker struct{} + +func (*aliasChecker) Enter(in ast.Node) (ast.Node, bool) { + if deleteStmt, ok := in.(*ast.DeleteStmt); ok { + // 1. check the tableRefs of deleteStmt to find the alias + var aliases []*model.CIStr + if deleteStmt.TableRefs != nil && deleteStmt.TableRefs.TableRefs != nil { + tableRefs := deleteStmt.TableRefs.TableRefs + if val := getTableRefsAlias(tableRefs.Left); val != nil { + aliases = append(aliases, val) + } + if val := getTableRefsAlias(tableRefs.Right); val != nil { + aliases = append(aliases, val) + } + } + // 2. check the Tables to tag the alias + if deleteStmt.Tables != nil && deleteStmt.Tables.Tables != nil { + for _, table := range deleteStmt.Tables.Tables { + if table.Schema.String() != "" { + continue + } + for _, alias := range aliases { + if table.Name.L == alias.L { + table.IsAlias = true + break + } + } + } + } + return in, true + } + return in, false +} + +func getTableRefsAlias(tableRefs ast.ResultSetNode) *model.CIStr { + switch v := tableRefs.(type) { + case *ast.Join: + if v.Left != nil { + return getTableRefsAlias(v.Left) + } + case *ast.TableSource: + return &v.AsName + } + return nil +} + +func (*aliasChecker) Leave(in ast.Node) (ast.Node, bool) { + return in, true +} diff --git a/pkg/planner/core/preprocess_test.go b/pkg/planner/core/preprocess_test.go index c508cda3d120b..d518fb4472701 100644 --- a/pkg/planner/core/preprocess_test.go +++ b/pkg/planner/core/preprocess_test.go @@ -411,3 +411,14 @@ func TestPreprocessCTE(t *testing.T) { require.Equal(t, tc.after, rs.String()) } } + +func TestPreprocessDeleteFromWithAlias(t *testing.T) { + // https://github.com/pingcap/tidb/issues/56726 + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1(id int);") + tk.MustExec(" create table t2(id int);") + tk.MustExec("delete tt1 from t1 tt1,(select max(id) id from t2)tt2 where tt1.id<=tt2.id;") + tk.MustExec("create global binding for delete tt1 from t1 tt1,(select max(id) id from t2)tt2 where tt1.id<=tt2.id using delete /*+ MAX_EXECUTION_TIME(10)*/ tt1 from t1 tt1,(select max(id) id from t2)tt2 where tt1.id<=tt2.id;") +} diff --git a/pkg/planner/core/runtime_filter_generator.go b/pkg/planner/core/runtime_filter_generator.go index aa55c4324fa9d..4f003eef69370 100644 --- a/pkg/planner/core/runtime_filter_generator.go +++ b/pkg/planner/core/runtime_filter_generator.go @@ -81,9 +81,6 @@ func (generator *RuntimeFilterGenerator) GenerateRuntimeFilter(plan PhysicalPlan func (generator *RuntimeFilterGenerator) generateRuntimeFilterInterval(hashJoinPlan *PhysicalHashJoin) { // precondition: the storage type of hash join must be TiFlash if hashJoinPlan.storeTp != kv.TiFlash { - logutil.BgLogger().Warn("RF only support TiFlash compute engine while storage type of hash join node is not TiFlash", - zap.Int("PhysicalHashJoinId", hashJoinPlan.ID()), - zap.String("StoreTP", hashJoinPlan.storeTp.Name())) return } // check hash join pattern diff --git a/pkg/server/handler/optimizor/plan_replayer_test.go b/pkg/server/handler/optimizor/plan_replayer_test.go index 0748d0e12359a..a43165be6a6f0 100644 --- a/pkg/server/handler/optimizor/plan_replayer_test.go +++ b/pkg/server/handler/optimizor/plan_replayer_test.go @@ -185,7 +185,7 @@ func TestDumpPlanReplayerAPI(t *testing.T) { tk.MustExec(`plan replayer load "/tmp/plan_replayer.zip"`) // 3-3. assert that the count and modify count in the stats is as expected - rows := tk.MustQuery("show stats_meta") + rows := tk.MustQuery(`show stats_meta where table_name="t"`) require.True(t, rows.Next(), "unexpected data") var dbName, tableName string var modifyCount, count int64 @@ -216,6 +216,7 @@ func prepareData4PlanReplayer(t *testing.T, client *testserverclient.TestServerC tk.MustExec("create table t(a int)") tk.MustExec("CREATE TABLE authors (id INT PRIMARY KEY AUTO_INCREMENT,name VARCHAR(100) NOT NULL,email VARCHAR(100) UNIQUE NOT NULL);") tk.MustExec("CREATE TABLE books (id INT PRIMARY KEY AUTO_INCREMENT,title VARCHAR(200) NOT NULL,publication_date DATE NOT NULL,author_id INT,FOREIGN KEY (author_id) REFERENCES authors(id) ON DELETE CASCADE);") + tk.MustExec("create table tt(a int, b varchar(10)) PARTITION BY HASH(a) PARTITIONS 4;") err = h.HandleDDLEvent(<-h.DDLEventCh()) require.NoError(t, err) tk.MustExec("insert into t values(1), (2), (3), (4)") @@ -223,6 +224,9 @@ func prepareData4PlanReplayer(t *testing.T, client *testserverclient.TestServerC tk.MustExec("analyze table t") tk.MustExec("insert into t values(5), (6), (7), (8)") require.NoError(t, h.DumpStatsDeltaToKV(true)) + tk.MustExec("INSERT INTO tt (a, b) VALUES (1, 'str1'), (2, 'str2'), (3, 'str3'), (4, 'str4'),(5, 'str5'), (6, 'str6'), (7, 'str7'), (8, 'str8'),(9, 'str9'), (10, 'str10'), (11, 'str11'), (12, 'str12'),(13, 'str13'), (14, 'str14'), (15, 'str15'), (16, 'str16'),(17, 'str17'), (18, 'str18'), (19, 'str19'), (20, 'str20'),(21, 'str21'), (22, 'str22'), (23, 'str23'), (24, 'str24'),(25, 'str25'), (26, 'str26'), (27, 'str27'), (28, 'str28'),(29, 'str29'), (30, 'str30'), (31, 'str31'), (32, 'str32'),(33, 'str33'), (34, 'str34'), (35, 'str35'), (36, 'str36'),(37, 'str37'), (38, 'str38'), (39, 'str39'), (40, 'str40'),(41, 'str41'), (42, 'str42'), (43, 'str43'), (44, 'str44'),(45, 'str45'), (46, 'str46'), (47, 'str47'), (48, 'str48'),(49, 'str49'), (50, 'str50'), (51, 'str51'), (52, 'str52'),(53, 'str53'), (54, 'str54'), (55, 'str55'), (56, 'str56'),(57, 'str57'), (58, 'str58'), (59, 'str59'), (60, 'str60'),(61, 'str61'), (62, 'str62'), (63, 'str63'), (64, 'str64'),(65, 'str65'), (66, 'str66'), (67, 'str67'), (68, 'str68'),(69, 'str69'), (70, 'str70'), (71, 'str71'), (72, 'str72'),(73, 'str73'), (74, 'str74'), (75, 'str75'), (76, 'str76'),(77, 'str77'), (78, 'str78'), (79, 'str79'), (80, 'str80'),(81, 'str81'), (82, 'str82'), (83, 'str83'), (84, 'str84'),(85, 'str85'), (86, 'str86'), (87, 'str87'), (88, 'str88'),(89, 'str89'), (90, 'str90'), (91, 'str91'), (92, 'str92'),(93, 'str93'), (94, 'str94'), (95, 'str95'), (96, 'str96'),(97, 'str97'), (98, 'str98'), (99, 'str99'), (100, 'str100');") + require.NoError(t, h.DumpStatsDeltaToKV(true)) + tk.MustExec("analyze table tt") rows := tk.MustQuery("plan replayer dump explain select * from t") require.True(t, rows.Next(), "unexpected data") var filename string @@ -423,8 +427,12 @@ func prepareData4Issue43192(t *testing.T, client *testserverclient.TestServerCli tk.MustExec("create database planReplayer") tk.MustExec("use planReplayer") - tk.MustExec("create table t(a int, b int, INDEX ia (a), INDEX ib (b));") + tk.MustExec("create table t(a int, b int, INDEX ia (a), INDEX ib (b)) PARTITION BY HASH(a) PARTITIONS 4;") err = h.HandleDDLEvent(<-h.DDLEventCh()) + tk.MustExec("INSERT INTO t (a, b) VALUES (1, 1), (2, 2), (3, 3), (4, 4),(5, 5), (6, 6), (7, 7), (8, 8),(9, 9), (10, 10), (11, 11), (12, 12),(13, 13), (14, 14), (15, 15), (16, 16),(17, 17), (18, 18), (19, 19), (20, 20),(21, 21), (22, 22), (23, 23), (24, 24),(25, 25), (26, 26), (27, 27), (28, 28),(29, 29), (30, 30), (31, 31), (32, 32),(33, 33), (34, 34), (35, 35), (36, 36),(37, 37), (38, 38), (39, 39), (40, 40),(41, 41), (42, 42), (43, 43), (44, 44),(45, 45), (46, 46), (47, 47), (48, 48),(49, 49), (50, 50), (51, 51), (52, 52),(53, 53), (54, 54), (55, 55), (56, 56),(57, 57), (58, 58), (59, 59), (60, 60),(61, 61), (62, 62), (63, 63), (64, 64),(65, 65), (66, 66), (67, 67), (68, 68),(69, 69), (70, 70), (71, 71), (72, 72),(73, 73), (74, 74), (75, 75), (76, 76),(77, 77), (78, 78), (79, 79), (80, 80),(81, 81), (82, 82), (83, 83), (84, 84),(85, 85), (86, 86), (87, 87), (88, 88),(89, 89), (90, 90), (91, 91), (92, 92),(93, 93), (94, 94), (95, 95), (96, 96),(97, 97), (98, 98), (99, 99), (100, 100);") + require.NoError(t, h.DumpStatsDeltaToKV(true)) + tk.MustExec("analyze table t") + require.NoError(t, err) tk.MustExec("create global binding for select a, b from t where a in (1, 2, 3) using select a, b from t use index (ib) where a in (1, 2, 3)") rows := tk.MustQuery("plan replayer dump explain select a, b from t where a in (1, 2, 3)") diff --git a/pkg/session/test/txn/BUILD.bazel b/pkg/session/test/txn/BUILD.bazel index df41965e0377d..456c700f3123a 100644 --- a/pkg/session/test/txn/BUILD.bazel +++ b/pkg/session/test/txn/BUILD.bazel @@ -9,7 +9,7 @@ go_test( ], flaky = True, race = "on", - shard_count = 8, + shard_count = 10, deps = [ "//pkg/config", "//pkg/kv", diff --git a/pkg/session/test/txn/txn_test.go b/pkg/session/test/txn/txn_test.go index 42732cc26dba3..37196a1c71ec5 100644 --- a/pkg/session/test/txn/txn_test.go +++ b/pkg/session/test/txn/txn_test.go @@ -508,3 +508,74 @@ func TestInTrans(t *testing.T) { tk.MustExec("rollback") require.False(t, txn.Valid()) } + +func TestMemBufferSnapshotRead(t *testing.T) { + store := testkit.CreateMockStore(t) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tk.MustExec("drop table if exists t;") + tk.MustExec("create table t(a int primary key, b int, index i(b));") + + tk.MustExec("set session tidb_distsql_scan_concurrency = 1;") + tk.MustExec("set session tidb_index_lookup_join_concurrency = 1;") + tk.MustExec("set session tidb_projection_concurrency=1;") + tk.MustExec("set session tidb_init_chunk_size=1;") + tk.MustExec("set session tidb_max_chunk_size=40;") + tk.MustExec("set session tidb_index_join_batch_size = 10") + + tk.MustExec("begin;") + // write (0, 0), (1, 1), ... ,(100, 100) into membuffer + var sb strings.Builder + sb.WriteString("insert into t values ") + for i := 0; i <= 100; i++ { + if i > 0 { + sb.WriteString(", ") + } + sb.WriteString(fmt.Sprintf("(%d, %d)", i, i)) + } + tk.MustExec(sb.String()) + + // insert on duplicate key statement should update the table to (0, 100), (1, 99), ... (100, 0) + // This statement will create UnionScan dynamically during execution, and some UnionScan will see staging data(should be bypassed), + // so it relies on correct snapshot read to get the expected result. + tk.MustExec("insert into t (select /*+ INL_JOIN(t1) */ 100 - t1.a as a, t1.b from t t1, (select a, b from t) t2 where t1.b = t2.b) on duplicate key update b = values(b)") + + require.Empty(t, tk.MustQuery("select a, b from t where a + b != 100;").Rows()) + tk.MustExec("commit;") + require.Empty(t, tk.MustQuery("select a, b from t where a + b != 100;").Rows()) + + tk.MustExec("set session tidb_distsql_scan_concurrency = default;") + tk.MustExec("set session tidb_index_lookup_join_concurrency = default;") + tk.MustExec("set session tidb_projection_concurrency=default;") + tk.MustExec("set session tidb_init_chunk_size=default;") + tk.MustExec("set session tidb_max_chunk_size=default;") + tk.MustExec("set session tidb_index_join_batch_size = default") +} + +func TestMemBufferCleanupMemoryLeak(t *testing.T) { + // Test if cleanup memory will cause a memory leak. + // When an in-txn statement fails, TiDB cleans up the mutations from this statement. + // If there's a memory leak, the memory usage could increase uncontrollably with retries. + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a varchar(255) primary key)") + key1 := strings.Repeat("a", 255) + key2 := strings.Repeat("b", 255) + tk.MustExec(`set global tidb_mem_oom_action='cancel'`) + tk.MustExec("set session tidb_mem_quota_query=10240") + tk.MustExec("begin") + tk.MustExec("insert into t values(?)", key2) + for i := 0; i < 100; i++ { + // The insert statement will fail because of the duplicate key error. + err := tk.ExecToErr("insert into t values(?), (?)", key1, key2) + require.Error(t, err) + if strings.Contains(err.Error(), "Duplicate") { + continue + } + require.NoError(t, err) + } + tk.MustExec("commit") +} diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index 27c87046f4f8d..1fb5ca04f3a69 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -80,7 +80,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 37, + shard_count = 38, deps = [ "//pkg/config", "//pkg/parser/ast", diff --git a/pkg/statistics/OWNERS b/pkg/statistics/OWNERS new file mode 100644 index 0000000000000..904863f05e7fc --- /dev/null +++ b/pkg/statistics/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners +options: + no_parent_owners: true +approvers: + - sig-approvers-planner +labels: + - sig/planner diff --git a/pkg/statistics/column.go b/pkg/statistics/column.go index fff29c7ab571a..3637ed867c942 100644 --- a/pkg/statistics/column.go +++ b/pkg/statistics/column.go @@ -263,3 +263,13 @@ func (c *Column) StatsAvailable() bool { // StatsVer, so we check NDV > 0 || NullCount > 0 for the case. return c.IsAnalyzed() || c.NDV > 0 || c.NullCount > 0 } + +// EmptyColumn creates an empty column object. It may be used for pseudo estimation or to stop loading unexisting stats. +func EmptyColumn(tid int64, pkIsHandle bool, colInfo *model.ColumnInfo) *Column { + return &Column{ + PhysicalID: tid, + Info: colInfo, + Histogram: *NewHistogram(colInfo.ID, 0, 0, 0, &colInfo.FieldType, 0, 0), + IsHandle: pkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), + } +} diff --git a/pkg/statistics/handle/autoanalyze/priorityqueue/dynamic_partitioned_table_analysis_job.go b/pkg/statistics/handle/autoanalyze/priorityqueue/dynamic_partitioned_table_analysis_job.go index e51ca51f2ec43..4d22b0321a77c 100644 --- a/pkg/statistics/handle/autoanalyze/priorityqueue/dynamic_partitioned_table_analysis_job.go +++ b/pkg/statistics/handle/autoanalyze/priorityqueue/dynamic_partitioned_table_analysis_job.go @@ -202,7 +202,6 @@ func (j *DynamicPartitionedTableAnalysisJob) analyzePartitionIndexes( ) { analyzePartitionBatchSize := int(variable.AutoAnalyzePartitionBatchSize.Load()) -OnlyPickOneIndex: for indexName, partitionNames := range j.PartitionIndexes { needAnalyzePartitionNames := make([]any, 0, len(partitionNames)) for _, partition := range partitionNames { @@ -219,11 +218,11 @@ OnlyPickOneIndex: params := append([]any{j.TableSchema, j.GlobalTableName}, needAnalyzePartitionNames[start:end]...) params = append(params, indexName) exec.AutoAnalyze(sctx, statsHandle, sysProcTracker, j.TableStatsVer, sql, params...) - // Halt execution after analyzing one index. - // This is because analyzing a single index also analyzes all other indexes and columns. - // Therefore, to avoid redundancy, we prevent multiple analyses of the same partition. - break OnlyPickOneIndex } + // Halt execution after analyzing one index. + // This is because analyzing a single index also analyzes all other indexes and columns. + // Therefore, to avoid redundancy, we prevent multiple analyses of the same partition. + break } } diff --git a/pkg/statistics/handle/handletest/BUILD.bazel b/pkg/statistics/handle/handletest/BUILD.bazel index 505a2c5737c70..81a73e7f19ef3 100644 --- a/pkg/statistics/handle/handletest/BUILD.bazel +++ b/pkg/statistics/handle/handletest/BUILD.bazel @@ -9,7 +9,7 @@ go_test( ], flaky = True, race = "on", - shard_count = 33, + shard_count = 34, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/statistics/handle/handletest/handle_test.go b/pkg/statistics/handle/handletest/handle_test.go index ce1b8d2d1f97b..93453f903e1d2 100644 --- a/pkg/statistics/handle/handletest/handle_test.go +++ b/pkg/statistics/handle/handletest/handle_test.go @@ -1462,3 +1462,30 @@ func TestSkipMissingPartitionStats(t *testing.T) { require.True(t, idx.IsStatsInitialized()) } } + +func TestStatsCacheUpdateTimeout(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic'") + tk.MustExec("set @@tidb_skip_missing_partition_stats = 1") + tk.MustExec("create table t (a int, b int, c int, index idx_b(b)) partition by range (a) (partition p0 values less than (100), partition p1 values less than (200), partition p2 values less than (300))") + tk.MustExec("insert into t values (1,1,1), (2,2,2), (101,101,101), (102,102,102), (201,201,201), (202,202,202)") + h := dom.StatsHandle() + require.NoError(t, h.DumpStatsDeltaToKV(true)) + tk.MustExec("analyze table t partition p0, p1") + tbl, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblInfo := tbl.Meta() + globalStats := h.GetTableStats(tblInfo) + require.Equal(t, 6, int(globalStats.RealtimeCount)) + require.Equal(t, 2, int(globalStats.ModifyCount)) + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/statistics/handle/util/ExecRowsTimeout", "return()")) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/util/ExecRowsTimeout")) + }() + require.Error(t, h.Update(dom.InfoSchema())) + globalStats2 := h.GetTableStats(tblInfo) + require.Equal(t, 6, int(globalStats2.RealtimeCount)) + require.Equal(t, 2, int(globalStats2.ModifyCount)) +} diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index 49a023905a028..63b34737b4b20 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/config" + "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/model" @@ -528,8 +529,11 @@ func TableStatsFromStorage(sctx sessionctx.Context, snapshot uint64, tableInfo * table.RealtimeCount = realtimeCount rows, _, err := util.ExecRows(sctx, "select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation, last_analyze_pos from mysql.stats_histograms where table_id = %?", tableID) + if err != nil { + return nil, err + } // Check deleted table. - if err != nil || len(rows) == 0 { + if len(rows) == 0 { return nil, nil } for _, row := range rows { @@ -576,7 +580,7 @@ func LoadHistogram(sctx sessionctx.Context, tableID int64, isIndex int, histID i } // LoadNeededHistograms will load histograms for those needed columns/indices. -func LoadNeededHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, loadFMSketch bool) (err error) { +func LoadNeededHistograms(sctx sessionctx.Context, statsCache statstypes.StatsHandle, loadFMSketch bool) (err error) { items := statistics.HistogramNeededItems.AllItems() for _, item := range items { if !item.IsIndex { @@ -619,18 +623,42 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int { return reallyNeeded } -func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) { - tbl, ok := statsCache.Get(col.TableID) +func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.StatsHandle, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) { + statsTbl, ok := statsCache.Get(col.TableID) if !ok { return nil } + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) + tbl, ok := statsCache.TableInfoByID(is, col.TableID) + if !ok { + return nil + } + tblInfo := tbl.Meta() var colInfo *model.ColumnInfo - _, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(col.ID, true) + _, loadNeeded, analyzed := statsTbl.ColumnIsLoadNeeded(col.ID, true) + for _, ci := range tblInfo.Columns { + if col.ID == ci.ID { + colInfo = ci + break + } + } + if colInfo == nil { + statistics.HistogramNeededItems.Delete(col) + return nil + } if !loadNeeded || !analyzed { + // If this column is not analyzed yet and we don't have it in memory. + // We create a fake one for the pseudo estimation. + // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. + if loadNeeded && !analyzed { + fakeCol := statistics.EmptyColumn(colInfo.ID, tblInfo.PKIsHandle, colInfo) + statsTbl.Columns[col.ID] = fakeCol + statsCache.UpdateStatsCache([]*statistics.Table{statsTbl}, nil) + } statistics.HistogramNeededItems.Delete(col) return nil } - colInfo = tbl.ColAndIdxExistenceMap.GetCol(col.ID) + hg, _, statsVer, _, err := HistMetaFromStorageWithHighPriority(sctx, &col, colInfo) if hg == nil || err != nil { statistics.HistogramNeededItems.Delete(col) @@ -664,29 +692,29 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S CMSketch: cms, TopN: topN, FMSketch: fms, - IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), + IsHandle: tblInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), StatsVer: statsVer, } // Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions // like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already. - tbl, ok = statsCache.Get(col.TableID) + statsTbl, ok = statsCache.Get(col.TableID) if !ok { return nil } - tbl = tbl.Copy() + statsTbl = statsTbl.Copy() if colHist.StatsAvailable() { if fullLoad { colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus() } else { colHist.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() } - tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, colHist.LastUpdateVersion) if statsVer != statistics.Version0 { - tbl.StatsVer = int(statsVer) + statsTbl.LastAnalyzeVersion = max(statsTbl.LastAnalyzeVersion, colHist.LastUpdateVersion) + statsTbl.StatsVer = int(statsVer) } } - tbl.Columns[col.ID] = colHist - statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil) + statsTbl.Columns[col.ID] = colHist + statsCache.UpdateStatsCache([]*statistics.Table{statsTbl}, nil) statistics.HistogramNeededItems.Delete(col) if col.IsSyncLoadFailed { logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.", @@ -741,9 +769,9 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache statstypes.St tbl = tbl.Copy() if idxHist.StatsVer != statistics.Version0 { tbl.StatsVer = int(idxHist.StatsVer) + tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion) } tbl.Indices[idx.ID] = idxHist - tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion) statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil) if idx.IsSyncLoadFailed { logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.", diff --git a/pkg/statistics/handle/syncload/BUILD.bazel b/pkg/statistics/handle/syncload/BUILD.bazel index f0bcd2e4af2ec..c46cc54c4995c 100644 --- a/pkg/statistics/handle/syncload/BUILD.bazel +++ b/pkg/statistics/handle/syncload/BUILD.bazel @@ -33,7 +33,7 @@ go_test( srcs = ["stats_syncload_test.go"], flaky = True, race = "on", - shard_count = 5, + shard_count = 6, deps = [ ":syncload", "//pkg/config", diff --git a/pkg/statistics/handle/syncload/stats_syncload.go b/pkg/statistics/handle/syncload/stats_syncload.go index 04360bc13ee97..952aaf844cb86 100644 --- a/pkg/statistics/handle/syncload/stats_syncload.go +++ b/pkg/statistics/handle/syncload/stats_syncload.go @@ -40,7 +40,7 @@ import ( ) // RetryCount is the max retry count for a sync load task. -const RetryCount = 3 +const RetryCount = 2 type statsSyncLoad struct { statsHandle statstypes.StatsHandle @@ -96,9 +96,13 @@ func (s *statsSyncLoad) SendLoadRequests(sc *stmtctx.StatementContext, neededHis } select { case s.StatsLoad.NeededItemsCh <- task: - result, ok := <-task.ResultCh - intest.Assert(ok, "task.ResultCh cannot be closed") - return result, nil + select { + case <-timer.C: + return nil, errors.New("sync load took too long to return") + case result, ok := <-task.ResultCh: + intest.Assert(ok, "task.ResultCh cannot be closed") + return result, nil + } case <-timer.C: return nil, errors.New("sync load stats channel is full and timeout sending task to channel") } @@ -310,13 +314,9 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err } // If this column is not analyzed yet and we don't have it in memory. // We create a fake one for the pseudo estimation. + // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. if loadNeeded && !analyzed { - wrapper.col = &statistics.Column{ - PhysicalID: item.TableID, - Info: wrapper.colInfo, - Histogram: *statistics.NewHistogram(item.ID, 0, 0, 0, &wrapper.colInfo.FieldType, 0, 0), - IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(wrapper.colInfo.GetFlag()), - } + wrapper.col = statistics.EmptyColumn(item.TableID, tbl.IsPkIsHandle, wrapper.colInfo) s.updateCachedItem(item, wrapper.col, wrapper.idx, task.Item.FullLoad) return nil } diff --git a/pkg/statistics/handle/syncload/stats_syncload_test.go b/pkg/statistics/handle/syncload/stats_syncload_test.go index 8a8929d9d93e5..3483a94f5225c 100644 --- a/pkg/statistics/handle/syncload/stats_syncload_test.go +++ b/pkg/statistics/handle/syncload/stats_syncload_test.go @@ -344,3 +344,47 @@ func TestRetry(t *testing.T) { } require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/syncload/mockReadStatsForOneFail")) } + +func TestSendLoadRequestsWaitTooLong(t *testing.T) { + originConfig := config.GetGlobalConfig() + newConfig := config.NewConfig() + newConfig.Performance.StatsLoadConcurrency = 0 // no worker to consume channel + newConfig.Performance.StatsLoadQueueSize = 10000 + config.StoreGlobalConfig(newConfig) + defer config.StoreGlobalConfig(originConfig) + store, dom := testkit.CreateMockStoreAndDomain(t) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b,c))") + tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)") + + oriLease := dom.StatsHandle().Lease() + dom.StatsHandle().SetLease(1) + defer func() { + dom.StatsHandle().SetLease(oriLease) + }() + tk.MustExec("analyze table t all columns") + h := dom.StatsHandle() + is := dom.InfoSchema() + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tableInfo := tbl.Meta() + neededColumns := make([]model.StatsLoadItem, 0, len(tableInfo.Columns)) + for _, col := range tableInfo.Columns { + neededColumns = append(neededColumns, model.StatsLoadItem{TableItemID: model.TableItemID{TableID: tableInfo.ID, ID: col.ID, IsIndex: false}, FullLoad: true}) + } + stmtCtx := stmtctx.NewStmtCtx() + timeout := time.Nanosecond * 100 + require.NoError(t, h.SendLoadRequests(stmtCtx, neededColumns, timeout)) + for _, resultCh := range stmtCtx.StatsLoad.ResultCh { + rs1 := <-resultCh + require.Error(t, rs1.Err) + } + stmtCtx1 := stmtctx.NewStmtCtx() + require.NoError(t, h.SendLoadRequests(stmtCtx1, neededColumns, timeout)) + for _, resultCh := range stmtCtx1.StatsLoad.ResultCh { + rs1 := <-resultCh + require.Error(t, rs1.Err) + } +} diff --git a/pkg/statistics/handle/util/BUILD.bazel b/pkg/statistics/handle/util/BUILD.bazel index 51be207e062e5..b41c62030485e 100644 --- a/pkg/statistics/handle/util/BUILD.bazel +++ b/pkg/statistics/handle/util/BUILD.bazel @@ -29,6 +29,7 @@ go_library( "//pkg/util/sqlexec/mock", "@com_github_ngaut_pools//:pools", "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_tipb//go-tipb", "@com_github_tiancaiamao_gp//:gp", "@com_github_tikv_client_go_v2//oracle", diff --git a/pkg/statistics/handle/util/util.go b/pkg/statistics/handle/util/util.go index 1db8addc5fae3..c546b410623e0 100644 --- a/pkg/statistics/handle/util/util.go +++ b/pkg/statistics/handle/util/util.go @@ -20,6 +20,7 @@ import ( "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/terror" @@ -220,6 +221,9 @@ func Exec(sctx sessionctx.Context, sql string, args ...any) (sqlexec.RecordSet, // ExecRows is a helper function to execute sql and return rows and fields. func ExecRows(sctx sessionctx.Context, sql string, args ...any) (rows []chunk.Row, fields []*ast.ResultField, err error) { + failpoint.Inject("ExecRowsTimeout", func() { + failpoint.Return(nil, nil, errors.New("inject timeout error")) + }) if intest.InTest { if v := sctx.Value(mock.RestrictedSQLExecutorKey{}); v != nil { return v.(*mock.MockRestrictedSQLExecutor).ExecRestrictedSQL(StatsCtx, diff --git a/pkg/statistics/histogram.go b/pkg/statistics/histogram.go index 000dd80c5be2c..7a498e187b57a 100644 --- a/pkg/statistics/histogram.go +++ b/pkg/statistics/histogram.go @@ -795,8 +795,8 @@ func HistogramToProto(hg *Histogram) *tipb.Histogram { for i := 0; i < hg.Len(); i++ { bkt := &tipb.Bucket{ Count: hg.Buckets[i].Count, - LowerBound: hg.GetLower(i).GetBytes(), - UpperBound: hg.GetUpper(i).GetBytes(), + LowerBound: DeepSlice(hg.GetLower(i).GetBytes()), + UpperBound: DeepSlice(hg.GetUpper(i).GetBytes()), Repeats: hg.Buckets[i].Repeat, Ndv: &hg.Buckets[i].NDV, } @@ -805,6 +805,13 @@ func HistogramToProto(hg *Histogram) *tipb.Histogram { return protoHg } +// DeepSlice sallowly clones a slice. +func DeepSlice[T any](s []T) []T { + r := make([]T, len(s)) + copy(r, s) + return r +} + // HistogramFromProto converts Histogram from its protobuf representation. // Note that we will set BytesDatum for the lower/upper bound in the bucket, the decode will // be after all histograms merged. diff --git a/pkg/statistics/integration_test.go b/pkg/statistics/integration_test.go index 4fa12778dd69b..acc970d3e769f 100644 --- a/pkg/statistics/integration_test.go +++ b/pkg/statistics/integration_test.go @@ -574,3 +574,25 @@ func TestTableLastAnalyzeVersion(t *testing.T) { require.True(t, found) require.NotEqual(t, uint64(0), statsTbl.LastAnalyzeVersion) } + +func TestLastAnalyzeVersionNotChangedWithAsyncStatsLoad(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("set @@tidb_stats_load_sync_wait = 0;") + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int);") + require.NoError(t, dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh())) + require.NoError(t, dom.StatsHandle().Update(dom.InfoSchema())) + tk.MustExec("insert into t values (1, 1);") + err := dom.StatsHandle().DumpStatsDeltaToKV(true) + require.NoError(t, err) + tk.MustExec("alter table t add column c int default 1;") + dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh()) + tk.MustExec("select * from t where a = 1 or b = 1 or c = 1;") + require.NoError(t, dom.StatsHandle().LoadNeededHistograms()) + result := tk.MustQuery("show stats_meta where table_name = 't'") + require.Len(t, result.Rows(), 1) + // The last analyze time. + require.Equal(t, "", result.Rows()[0][6]) +} diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 1f6248d9a74ac..5a0d32999146b 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -644,7 +644,7 @@ func (t *Table) GetStatsHealthy() (int64, bool) { } // ColumnIsLoadNeeded checks whether the column needs trigger the async/sync load. -// The Column should be visible in the table and really has analyzed statistics in the stroage. +// The Column should be visible in the table and really has analyzed statistics in the storage. // Also, if the stats has been loaded into the memory, we also don't need to load it. // We return the Column together with the checking result, to avoid accessing the map multiple times. // The first bool is whether we have it in memory. The second bool is whether this column has stats in the system table or not. diff --git a/pkg/ttl/ttlworker/job_manager.go b/pkg/ttl/ttlworker/job_manager.go index 14801989efb78..d8112568d475f 100644 --- a/pkg/ttl/ttlworker/job_manager.go +++ b/pkg/ttl/ttlworker/job_manager.go @@ -576,6 +576,18 @@ func (m *JobManager) rescheduleJobs(se session.Session, now time.Time) { now = now.In(tz) } + // Try to lock HB timeout jobs, to avoid the case that when the `tidb_ttl_job_enable = 'OFF'`, the HB timeout job will + // never be cancelled. + jobTables := m.readyForLockHBTimeoutJobTables(now) + // TODO: also consider to resume tables, but it's fine to left them there, as other nodes will take this job + // when the heart beat is not sent + for _, table := range jobTables { + logutil.Logger(m.ctx).Info("try lock new job", zap.Int64("tableID", table.ID)) + if _, err := m.lockHBTimeoutJob(m.ctx, se, table, now); err != nil { + logutil.Logger(m.ctx).Warn("failed to lock heartbeat timeout job", zap.Error(err)) + } + } + if !variable.EnableTTLJob.Load() || !timeutil.WithinDayTimePeriod(variable.TTLJobScheduleWindowStartTime.Load(), variable.TTLJobScheduleWindowEndTime.Load(), now) { if len(m.runningJobs) > 0 { for _, job := range m.runningJobs { @@ -616,16 +628,6 @@ func (m *JobManager) rescheduleJobs(se session.Session, now time.Time) { } m.removeJob(job) } - - jobTables := m.readyForLockHBTimeoutJobTables(now) - // TODO: also consider to resume tables, but it's fine to left them there, as other nodes will take this job - // when the heart beat is not sent - for _, table := range jobTables { - logutil.Logger(m.ctx).Info("try lock new job", zap.Int64("tableID", table.ID)) - if _, err := m.lockHBTimeoutJob(m.ctx, se, table, now); err != nil { - logutil.Logger(m.ctx).Warn("failed to lock heartbeat timeout job", zap.Error(err)) - } - } } func (m *JobManager) localJobs() []*ttlJob { diff --git a/pkg/ttl/ttlworker/job_manager_integration_test.go b/pkg/ttl/ttlworker/job_manager_integration_test.go index f5e7f3fee164c..66e94e7bbf27e 100644 --- a/pkg/ttl/ttlworker/job_manager_integration_test.go +++ b/pkg/ttl/ttlworker/job_manager_integration_test.go @@ -1490,3 +1490,43 @@ func TestFinishError(t *testing.T) { m.UpdateHeartBeat(context.Background(), se, now) tk.MustQuery("select count(*) from mysql.tidb_ttl_task").Check(testkit.Rows("0")) } + +func TestDisableTTLAfterLoseHeartbeat(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + waitAndStopTTLManager(t, dom) + tk := testkit.NewTestKit(t, store) + + sessionFactory := sessionFactory(t, store) + se := sessionFactory() + + tk.MustExec("use test") + tk.MustExec("CREATE TABLE t (id INT PRIMARY KEY, created_at DATETIME) TTL = created_at + INTERVAL 1 HOUR") + testTable, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + + ctx := context.Background() + m1 := ttlworker.NewJobManager("test-ttl-job-manager-1", nil, store, nil, nil) + require.NoError(t, m1.InfoSchemaCache().Update(se)) + require.NoError(t, m1.TableStatusCache().Update(ctx, se)) + + now := se.Now() + _, err = m1.LockJob(context.Background(), se, m1.InfoSchemaCache().Tables[testTable.Meta().ID], now, uuid.NewString(), false) + require.NoError(t, err) + tk.MustQuery("select current_job_status from mysql.tidb_ttl_table_status").Check(testkit.Rows("running")) + + // lose heartbeat. Simulate the situation that m1 doesn't update the hearbeat for 8 hours. + now = now.Add(time.Hour * 8) + + // stop the tidb_ttl_job_enable + tk.MustExec("set global tidb_ttl_job_enable = 'OFF'") + defer tk.MustExec("set global tidb_ttl_job_enable = 'ON'") + + // reschedule and try to get the job + m2 := ttlworker.NewJobManager("test-ttl-job-manager-2", nil, store, nil, nil) + require.NoError(t, m2.InfoSchemaCache().Update(se)) + require.NoError(t, m2.TableStatusCache().Update(ctx, se)) + m2.RescheduleJobs(se, now) + + // the job should have been cancelled + tk.MustQuery("select current_job_status from mysql.tidb_ttl_table_status").Check(testkit.Rows("")) +} diff --git a/pkg/util/chunk/BUILD.bazel b/pkg/util/chunk/BUILD.bazel index 7f3cfca2a24a6..0d8a6e96c52bc 100644 --- a/pkg/util/chunk/BUILD.bazel +++ b/pkg/util/chunk/BUILD.bazel @@ -27,9 +27,11 @@ go_library( "//pkg/parser/terror", "//pkg/types", "//pkg/util/checksum", + "//pkg/util/disjointset", "//pkg/util/disk", "//pkg/util/encrypt", "//pkg/util/hack", + "//pkg/util/intest", "//pkg/util/logutil", "//pkg/util/memory", "//pkg/util/syncutil", diff --git a/pkg/util/chunk/chunk.go b/pkg/util/chunk/chunk.go index 03f2abd0b2ea1..b0070c36b5c6c 100644 --- a/pkg/util/chunk/chunk.go +++ b/pkg/util/chunk/chunk.go @@ -215,10 +215,12 @@ func (c *Chunk) MakeRefTo(dstColIdx int, src *Chunk, srcColIdx int) error { return nil } -// SwapColumn swaps Column "c.columns[colIdx]" with Column +// swapColumn swaps Column "c.columns[colIdx]" with Column // "other.columns[otherIdx]". If there exists columns refer to the Column to be // swapped, we need to re-build the reference. -func (c *Chunk) SwapColumn(colIdx int, other *Chunk, otherIdx int) error { +// this function should not be used directly, if you wants to swap columns between two chunks, +// use ColumnSwapHelper.SwapColumns instead. +func (c *Chunk) swapColumn(colIdx int, other *Chunk, otherIdx int) error { if c.sel != nil || other.sel != nil { return errors.New(msgErrSelNotNil) } diff --git a/pkg/util/chunk/chunk_test.go b/pkg/util/chunk/chunk_test.go index 690f831001597..18d6a0c64fffa 100644 --- a/pkg/util/chunk/chunk_test.go +++ b/pkg/util/chunk/chunk_test.go @@ -640,25 +640,25 @@ func TestSwapColumn(t *testing.T) { checkRef() // swap two chunk's columns - require.NoError(t, chk1.SwapColumn(0, chk2, 0)) + require.NoError(t, chk1.swapColumn(0, chk2, 0)) checkRef() - require.NoError(t, chk1.SwapColumn(0, chk2, 0)) + require.NoError(t, chk1.swapColumn(0, chk2, 0)) checkRef() // swap reference and referenced columns - require.NoError(t, chk2.SwapColumn(1, chk2, 0)) + require.NoError(t, chk2.swapColumn(1, chk2, 0)) checkRef() // swap the same column in the same chunk - require.NoError(t, chk2.SwapColumn(1, chk2, 1)) + require.NoError(t, chk2.swapColumn(1, chk2, 1)) checkRef() // swap reference and another column - require.NoError(t, chk2.SwapColumn(1, chk2, 2)) + require.NoError(t, chk2.swapColumn(1, chk2, 2)) checkRef() - require.NoError(t, chk2.SwapColumn(2, chk2, 0)) + require.NoError(t, chk2.swapColumn(2, chk2, 0)) checkRef() } diff --git a/pkg/util/chunk/chunk_util.go b/pkg/util/chunk/chunk_util.go index b0157f2b2bdc7..8761aab7e2c21 100644 --- a/pkg/util/chunk/chunk_util.go +++ b/pkg/util/chunk/chunk_util.go @@ -17,11 +17,14 @@ package chunk import ( "io" "os" + "sync/atomic" "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/util/checksum" + "github.com/pingcap/tidb/pkg/util/disjointset" "github.com/pingcap/tidb/pkg/util/encrypt" + "github.com/pingcap/tidb/pkg/util/intest" ) // CopySelectedJoinRowsDirect directly copies the selected joined rows from the source Chunk @@ -237,3 +240,125 @@ func (l *diskFileReaderWriter) write(writeData []byte) (n int, err error) { l.offWrite += int64(writeNum) return writeNum, err } + +// ColumnSwapHelper is used to help swap columns in a chunk. +type ColumnSwapHelper struct { + // InputIdxToOutputIdxes maps the input column index to the output column indexes. + InputIdxToOutputIdxes map[int][]int + // mergedInputIdxToOutputIdxes is only determined in runtime when saw the input chunk. + mergedInputIdxToOutputIdxes atomic.Pointer[map[int][]int] +} + +// SwapColumns evaluates "Column" expressions. +// it will change the content of the input Chunk. +func (helper *ColumnSwapHelper) SwapColumns(input, output *Chunk) error { + // mergedInputIdxToOutputIdxes only can be determined in runtime when we saw the input chunk structure. + if helper.mergedInputIdxToOutputIdxes.Load() == nil { + helper.mergeInputIdxToOutputIdxes(input, helper.InputIdxToOutputIdxes) + } + for inputIdx, outputIdxes := range *helper.mergedInputIdxToOutputIdxes.Load() { + if err := output.swapColumn(outputIdxes[0], input, inputIdx); err != nil { + return err + } + for i, length := 1, len(outputIdxes); i < length; i++ { + output.MakeRef(outputIdxes[0], outputIdxes[i]) + } + } + return nil +} + +// mergeInputIdxToOutputIdxes merges separate inputIdxToOutputIdxes entries when column references +// are detected within the input chunk. This process ensures consistent handling of columns derived +// from the same original source. +// +// Consider the following scenario: +// +// Initial scan operation produces a column 'a': +// +// scan: a (addr: ???) +// +// This column 'a' is used in the first projection (proj1) to create two columns a1 and a2, both referencing 'a': +// +// proj1 +// / \ +// / \ +// / \ +// a1 (addr: 0xe) a2 (addr: 0xe) +// / \ +// / \ +// / \ +// proj2 proj2 +// / \ / \ +// / \ / \ +// a3 a4 a5 a6 +// +// (addr: 0xe) (addr: 0xe) (addr: 0xe) (addr: 0xe) +// +// Here, a1 and a2 share the same address (0xe), indicating they reference the same data from the original 'a'. +// +// When moving to the second projection (proj2), the system tries to project these columns further: +// - The first set (left side) consists of a3 and a4, derived from a1, both retaining the address (0xe). +// - The second set (right side) consists of a5 and a6, derived from a2, also starting with address (0xe). +// +// When proj1 is complete, the output chunk contains two columns [a1, a2], both derived from the single column 'a' from the scan. +// Since both a1 and a2 are column references with the same address (0xe), they are treated as referencing the same data. +// +// In proj2, two separate items are created: +// - <0, [0,1]>: This means the 0th input column (a1) is projected twice, into the 0th and 1st columns of the output chunk. +// - <1, [2,3]>: This means the 1st input column (a2) is projected twice, into the 2nd and 3rd columns of the output chunk. +// +// Due to the column swapping logic in each projection, after applying the <0, [0,1]> projection, +// the addresses for a1 and a2 may become swapped or invalid: +// +// proj1: a1 (addr: invalid) a2 (addr: invalid) +// +// This can lead to issues in proj2, where further operations on these columns may be unsafe: +// +// proj2: a3 (addr: 0xe) a4 (addr: 0xe) a5 (addr: ???) a6 (addr: ???) +// +// Therefore, it's crucial to identify and merge the original column references early, ensuring +// the final inputIdxToOutputIdxes mapping accurately reflects the shared origins of the data. +// For instance, <0, [0,1,2,3]> indicates that the 0th input column (original 'a') is referenced +// by all four output columns in the final output. +// +// mergeInputIdxToOutputIdxes merges inputIdxToOutputIdxes based on detected column references. +// This ensures that columns with the same reference are correctly handled in the output chunk. +func (helper *ColumnSwapHelper) mergeInputIdxToOutputIdxes(input *Chunk, inputIdxToOutputIdxes map[int][]int) { + originalDJSet := disjointset.NewSet[int](4) + flag := make([]bool, input.NumCols()) + // Detect self column-references inside the input chunk by comparing column addresses + for i := 0; i < input.NumCols(); i++ { + if flag[i] { + continue + } + for j := i + 1; j < input.NumCols(); j++ { + if input.Column(i) == input.Column(j) { + flag[j] = true + originalDJSet.Union(i, j) + } + } + } + // Merge inputIdxToOutputIdxes based on the detected column references. + newInputIdxToOutputIdxes := make(map[int][]int, len(inputIdxToOutputIdxes)) + for inputIdx := range inputIdxToOutputIdxes { + // Root idx is internal offset, not the right column index. + originalRootIdx := originalDJSet.FindRoot(inputIdx) + originalVal, ok := originalDJSet.FindVal(originalRootIdx) + intest.Assert(ok) + mergedOutputIdxes := newInputIdxToOutputIdxes[originalVal] + mergedOutputIdxes = append(mergedOutputIdxes, inputIdxToOutputIdxes[inputIdx]...) + newInputIdxToOutputIdxes[originalVal] = mergedOutputIdxes + } + // Update the merged inputIdxToOutputIdxes automatically. + // Once failed, it means other worker has done this job at meantime. + helper.mergedInputIdxToOutputIdxes.CompareAndSwap(nil, &newInputIdxToOutputIdxes) +} + +// NewColumnSwapHelper creates a new ColumnSwapHelper. +func NewColumnSwapHelper(usedColumnIndex []int) *ColumnSwapHelper { + helper := &ColumnSwapHelper{InputIdxToOutputIdxes: make(map[int][]int)} + for outputIndex, inputIndex := range usedColumnIndex { + helper.InputIdxToOutputIdxes[inputIndex] = append(helper.InputIdxToOutputIdxes[inputIndex], outputIndex) + } + return helper +} diff --git a/pkg/util/chunk/chunk_util_test.go b/pkg/util/chunk/chunk_util_test.go index 32614ada18703..453c9cdcf28c2 100644 --- a/pkg/util/chunk/chunk_util_test.go +++ b/pkg/util/chunk/chunk_util_test.go @@ -17,8 +17,10 @@ package chunk import ( "math/rand" "reflect" + "slices" "testing" + "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/types" "github.com/stretchr/testify/require" ) @@ -219,3 +221,41 @@ func BenchmarkAppendSelectedRow(b *testing.B) { } } } + +func TestMergeInputIdxToOutputIdxes(t *testing.T) { + inputIdxToOutputIdxes := make(map[int][]int) + // input 0th should be column referred as 0th and 1st in output columns. + inputIdxToOutputIdxes[0] = []int{0, 1} + // input 1th should be column referred as 2nd and 3rd in output columns. + inputIdxToOutputIdxes[1] = []int{2, 3} + columnEval := ColumnSwapHelper{InputIdxToOutputIdxes: inputIdxToOutputIdxes} + + input := NewEmptyChunk([]*types.FieldType{types.NewFieldType(mysql.TypeLonglong), types.NewFieldType(mysql.TypeLonglong)}) + input.AppendInt64(0, 99) + // input chunk's 0th and 1st are column referred itself. + input.MakeRef(0, 1) + + // chunk: col1 <---(ref) col2 + // ____________/ \___________/ \___ + // proj: col1 col2 col3 col4 + // + // original case after inputIdxToOutputIdxes[0], the original col2 will be nil pointer + // cause consecutive col3,col4 ref projection are invalid. + // + // after fix, the new inputIdxToOutputIdxes should be: inputIdxToOutputIdxes[0]: {0, 1, 2, 3} + + output := NewEmptyChunk([]*types.FieldType{types.NewFieldType(mysql.TypeLonglong), types.NewFieldType(mysql.TypeLonglong), + types.NewFieldType(mysql.TypeLonglong), types.NewFieldType(mysql.TypeLonglong)}) + + err := columnEval.SwapColumns(input, output) + require.NoError(t, err) + // all four columns are column-referred, pointing to the first one. + require.Equal(t, output.Column(0), output.Column(1)) + require.Equal(t, output.Column(1), output.Column(2)) + require.Equal(t, output.Column(2), output.Column(3)) + require.Equal(t, output.GetRow(0).GetInt64(0), int64(99)) + + require.Equal(t, len(*columnEval.mergedInputIdxToOutputIdxes.Load()), 1) + slices.Sort((*columnEval.mergedInputIdxToOutputIdxes.Load())[0]) + require.Equal(t, (*columnEval.mergedInputIdxToOutputIdxes.Load())[0], []int{0, 1, 2, 3}) +} diff --git a/pkg/util/disjointset/BUILD.bazel b/pkg/util/disjointset/BUILD.bazel index 941410ed9d54b..293f0194a0760 100644 --- a/pkg/util/disjointset/BUILD.bazel +++ b/pkg/util/disjointset/BUILD.bazel @@ -2,7 +2,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "disjointset", - srcs = ["int_set.go"], + srcs = [ + "int_set.go", + "set.go", + ], importpath = "github.com/pingcap/tidb/pkg/util/disjointset", visibility = ["//visibility:public"], ) diff --git a/pkg/util/disjointset/int_set.go b/pkg/util/disjointset/int_set.go index 05846e3840850..1396c634965aa 100644 --- a/pkg/util/disjointset/int_set.go +++ b/pkg/util/disjointset/int_set.go @@ -38,6 +38,7 @@ func (m *IntSet) FindRoot(a int) int { if a == m.parent[a] { return a } + // Path compression, which leads the time complexity to the inverse Ackermann function. m.parent[a] = m.FindRoot(m.parent[a]) return m.parent[a] } diff --git a/pkg/util/disjointset/set.go b/pkg/util/disjointset/set.go new file mode 100644 index 0000000000000..9e8eee37f7677 --- /dev/null +++ b/pkg/util/disjointset/set.go @@ -0,0 +1,85 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disjointset + +// Set is the universal implementation of a disjoint set. +// It's designed for sparse cases or non-integer types. +// If you are dealing with continuous integers, you should use SimpleIntSet to avoid the cost of a hash map. +// We hash the original value to an integer index and then apply the core disjoint set algorithm. +// Time complexity: the union operation has an inverse Ackermann function time complexity, which is very close to O(1). +type Set[T comparable] struct { + parent []int + val2Idx map[T]int + idx2Val map[int]T + tailIdx int +} + +// NewSet creates a disjoint set. +func NewSet[T comparable](size int) *Set[T] { + return &Set[T]{ + parent: make([]int, 0, size), + val2Idx: make(map[T]int, size), + idx2Val: make(map[int]T, size), + tailIdx: 0, + } +} + +func (s *Set[T]) findRootOriginalVal(a T) int { + idx, ok := s.val2Idx[a] + if !ok { + s.parent = append(s.parent, s.tailIdx) + s.val2Idx[a] = s.tailIdx + s.tailIdx++ + s.idx2Val[s.tailIdx-1] = a + return s.tailIdx - 1 + } + return s.findRootInternal(idx) +} + +// findRoot is an internal implementation. Call it inside findRootOriginalVal. +func (s *Set[T]) findRootInternal(a int) int { + if s.parent[a] != a { + // Path compression, which leads the time complexity to the inverse Ackermann function. + s.parent[a] = s.findRootInternal(s.parent[a]) + } + return s.parent[a] +} + +// InSameGroup checks whether a and b are in the same group. +func (s *Set[T]) InSameGroup(a, b T) bool { + return s.findRootOriginalVal(a) == s.findRootOriginalVal(b) +} + +// Union joins two sets in the disjoint set. +func (s *Set[T]) Union(a, b T) { + rootA := s.findRootOriginalVal(a) + rootB := s.findRootOriginalVal(b) + // take b as successor, respect the rootA as the root of the new set. + if rootA != rootB { + s.parent[rootB] = rootA + } +} + +// FindRoot finds the root of the set that contains a. +func (s *Set[T]) FindRoot(a T) int { + // if a is not in the set, assign a new index to it. + return s.findRootOriginalVal(a) +} + +// FindVal finds the value of the set corresponding to the index. +func (s *Set[T]) FindVal(idx int) (T, bool) { + v, ok := s.idx2Val[s.findRootInternal(idx)] + return v, ok +} diff --git a/tests/integrationtest/r/executor/ddl.result b/tests/integrationtest/r/executor/ddl.result index fcb645e82954a..78ddc972a5003 100644 --- a/tests/integrationtest/r/executor/ddl.result +++ b/tests/integrationtest/r/executor/ddl.result @@ -379,7 +379,10 @@ create or replace definer='root'@'localhost' view v_nested as select * from v_ne Error 1146 (42S02): Table 'executor__ddl.v_nested' doesn't exist drop table test_v_nested; drop view v_nested, v_nested2; -create view v_stale as select * from source_table as of timestamp current_timestamp(3); +select sleep(1); +sleep(1) +0 +create view v_stale as select * from source_table as of timestamp date_sub(current_timestamp(3), interval 1 second); Error 1356 (HY000): View 'executor__ddl.v_stale' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them drop view if exists v1,v2; drop table if exists t1; diff --git a/tests/integrationtest/r/executor/executor.result b/tests/integrationtest/r/executor/executor.result index cbb02d5e20027..e2ed62b485521 100644 --- a/tests/integrationtest/r/executor/executor.result +++ b/tests/integrationtest/r/executor/executor.result @@ -4578,4 +4578,5 @@ LOCK TABLE executor__executor.t WRITE, test2.t2 WRITE; LOCK TABLE executor__executor.t WRITE, test2.t2 WRITE; Error 8020 (HY000): Table 't' was locked in WRITE by server: session: unlock tables; +unlock tables; drop user 'testuser'@'localhost'; diff --git a/tests/integrationtest/r/executor/infoschema_reader.result b/tests/integrationtest/r/executor/infoschema_reader.result index 799b7b98eefde..35d17c0d46f76 100644 --- a/tests/integrationtest/r/executor/infoschema_reader.result +++ b/tests/integrationtest/r/executor/infoschema_reader.result @@ -330,3 +330,9 @@ sleep(1) select table_rows, avg_row_length, data_length, index_length from information_schema.tables where table_name='t' AND TABLE_SCHEMA='executor__infoschema_reader'; table_rows avg_row_length data_length index_length 3 18 54 6 +drop table if exists t; +create table t (c text); +alter table t add index idx_t (c(16)); +select SUB_PART from information_schema.statistics where TABLE_SCHEMA = 'executor__infoschema_reader' and TABLE_NAME = 't'; +SUB_PART +16 diff --git a/tests/integrationtest/r/expression/cast.result b/tests/integrationtest/r/expression/cast.result index 76c9977dc52fb..a919c1e53ccf2 100644 --- a/tests/integrationtest/r/expression/cast.result +++ b/tests/integrationtest/r/expression/cast.result @@ -148,3 +148,33 @@ select 1.194192591e9 > t0.c0 from t0; select 1.194192591e9 < t0.c0 from t0; 1.194192591e9 < t0.c0 0 +drop table if exists test; +CREATE TABLE `test` ( +`id` bigint(20) NOT NULL, +`update_user` varchar(32) DEFAULT NULL, +PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */ +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; +insert into test values(1,'张三'); +insert into test values(2,'李四'); +insert into test values(3,'张三'); +insert into test values(4,'李四'); +select * from test order by cast(update_user as char character set gbk) desc , id limit 3; +id update_user +1 张三 +3 张三 +2 李四 +drop table test; +CREATE TABLE `test` ( +`id` bigint NOT NULL, +`update_user` varchar(32) CHARACTER SET gbk COLLATE gbk_chinese_ci DEFAULT NULL, +PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; +insert into test values(1,'张三'); +insert into test values(2,'李四'); +insert into test values(3,'张三'); +insert into test values(4,'李四'); +select * from test order by cast(update_user as char) desc , id limit 3; +id update_user +2 李四 +4 李四 +1 张三 diff --git a/tests/integrationtest/t/executor/ddl.test b/tests/integrationtest/t/executor/ddl.test index 59bdbcc68cfa1..def024b766479 100644 --- a/tests/integrationtest/t/executor/ddl.test +++ b/tests/integrationtest/t/executor/ddl.test @@ -313,8 +313,9 @@ create or replace definer='root'@'localhost' view v_nested as select * from v_ne drop table test_v_nested; drop view v_nested, v_nested2; ## Refer https://github.com/pingcap/tidb/issues/25876 +select sleep(1); -- error 1356 -create view v_stale as select * from source_table as of timestamp current_timestamp(3); +create view v_stale as select * from source_table as of timestamp date_sub(current_timestamp(3), interval 1 second); ## Refer https://github.com/pingcap/tidb/issues/32682 drop view if exists v1,v2; drop table if exists t1; diff --git a/tests/integrationtest/t/executor/executor.test b/tests/integrationtest/t/executor/executor.test index e177f2cd6df01..8c910e62e517f 100644 --- a/tests/integrationtest/t/executor/executor.test +++ b/tests/integrationtest/t/executor/executor.test @@ -2909,6 +2909,9 @@ connection default; --error 8020 LOCK TABLE executor__executor.t WRITE, test2.t2 WRITE; +connection conn1; +unlock tables; + disconnect conn1; unlock tables; drop user 'testuser'@'localhost'; diff --git a/tests/integrationtest/t/executor/infoschema_reader.test b/tests/integrationtest/t/executor/infoschema_reader.test index 04f1318948268..4a9c9f360db25 100644 --- a/tests/integrationtest/t/executor/infoschema_reader.test +++ b/tests/integrationtest/t/executor/infoschema_reader.test @@ -264,3 +264,9 @@ insert into t(a, b, c) values(1, 2, 'c'), (7, 3, 'd'), (12, 4, 'e'); analyze table t; select sleep(1); select table_rows, avg_row_length, data_length, index_length from information_schema.tables where table_name='t' AND TABLE_SCHEMA='executor__infoschema_reader'; + + +drop table if exists t; +create table t (c text); +alter table t add index idx_t (c(16)); +select SUB_PART from information_schema.statistics where TABLE_SCHEMA = 'executor__infoschema_reader' and TABLE_NAME = 't'; diff --git a/tests/integrationtest/t/expression/cast.test b/tests/integrationtest/t/expression/cast.test index 03843628ab9e6..7af48437383a9 100644 --- a/tests/integrationtest/t/expression/cast.test +++ b/tests/integrationtest/t/expression/cast.test @@ -87,3 +87,29 @@ select t0.c0 > 1.194192591e9 from t0; select t0.c0 < 1.194192591e9 from t0; select 1.194192591e9 > t0.c0 from t0; select 1.194192591e9 < t0.c0 from t0; + +# TestCastAsStringExplicitCharSet +drop table if exists test; +CREATE TABLE `test` ( + `id` bigint(20) NOT NULL, + `update_user` varchar(32) DEFAULT NULL, + PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */ +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; +insert into test values(1,'张三'); +insert into test values(2,'李四'); +insert into test values(3,'张三'); +insert into test values(4,'李四'); +select * from test order by cast(update_user as char character set gbk) desc , id limit 3; + +drop table test; +CREATE TABLE `test` ( + `id` bigint NOT NULL, + `update_user` varchar(32) CHARACTER SET gbk COLLATE gbk_chinese_ci DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; +insert into test values(1,'张三'); +insert into test values(2,'李四'); +insert into test values(3,'张三'); +insert into test values(4,'李四'); +select * from test order by cast(update_user as char) desc , id limit 3; + diff --git a/tests/realtikvtest/addindextest2/BUILD.bazel b/tests/realtikvtest/addindextest2/BUILD.bazel index e711ecdf094b8..68dcbb28538b0 100644 --- a/tests/realtikvtest/addindextest2/BUILD.bazel +++ b/tests/realtikvtest/addindextest2/BUILD.bazel @@ -26,5 +26,6 @@ go_test( "@com_github_phayes_freeport//:freeport", "@com_github_pingcap_failpoint//:failpoint", "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//oracle", ], ) diff --git a/tests/realtikvtest/addindextest2/global_sort_test.go b/tests/realtikvtest/addindextest2/global_sort_test.go index 24abda2c699b2..258e856c17f2d 100644 --- a/tests/realtikvtest/addindextest2/global_sort_test.go +++ b/tests/realtikvtest/addindextest2/global_sort_test.go @@ -20,6 +20,7 @@ import ( "strconv" "strings" "testing" + "time" "github.com/fsouza/fake-gcs-server/fakestorage" "github.com/phayes/freeport" @@ -38,6 +39,7 @@ import ( "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/tests/realtikvtest" "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" ) func init() { @@ -313,7 +315,11 @@ func TestIngestUseGivenTS(t *testing.T) { t.Cleanup(func() { tk.MustExec("set @@global.tidb_cloud_storage_uri = '';") }) - err = failpoint.Enable("github.com/pingcap/tidb/pkg/ddl/mockTSForGlobalSort", `return(123456789)`) + + presetTS := oracle.GoTimeToTS(time.Now()) + failpointTerm := fmt.Sprintf(`return(%d)`, presetTS) + + err = failpoint.Enable("github.com/pingcap/tidb/pkg/ddl/mockTSForGlobalSort", failpointTerm) require.NoError(t, err) tk.MustExec("create table t (a int);") @@ -330,10 +336,10 @@ func TestIngestUseGivenTS(t *testing.T) { require.NoError(t, err) tikvStore := dom.Store().(helper.Storage) newHelper := helper.NewHelper(tikvStore) - mvccResp, err := newHelper.GetMvccByEncodedKeyWithTS(idxKey, 123456789) + mvccResp, err := newHelper.GetMvccByEncodedKeyWithTS(idxKey, presetTS) require.NoError(t, err) require.NotNil(t, mvccResp) require.NotNil(t, mvccResp.Info) require.Greater(t, len(mvccResp.Info.Writes), 0) - require.Equal(t, uint64(123456789), mvccResp.Info.Writes[0].CommitTs) + require.Equal(t, presetTS, mvccResp.Info.Writes[0].CommitTs) } diff --git a/tests/realtikvtest/addindextest4/ingest_test.go b/tests/realtikvtest/addindextest4/ingest_test.go index 2e393ce4f8670..604f6ae18988a 100644 --- a/tests/realtikvtest/addindextest4/ingest_test.go +++ b/tests/realtikvtest/addindextest4/ingest_test.go @@ -391,6 +391,36 @@ func TestAddIndexFinishImportError(t *testing.T) { require.True(t, strings.Contains(jobTp, "ingest"), jobTp) } +func TestAddIndexDiskQuotaTS(t *testing.T) { + store := realtikvtest.CreateMockStoreAndSetup(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("set @@global.tidb_enable_dist_task = 0;") + testAddIndexDiskQuotaTS(t, tk) + tk.MustExec("set @@global.tidb_enable_dist_task = 1;") + testAddIndexDiskQuotaTS(t, tk) + // reset changed global variable + tk.MustExec("set @@global.tidb_enable_dist_task = 0;") +} + +func testAddIndexDiskQuotaTS(t *testing.T, tk *testkit.TestKit) { + tk.MustExec("drop database if exists addindexlit;") + tk.MustExec("create database addindexlit;") + tk.MustExec("use addindexlit;") + tk.MustExec(`set global tidb_ddl_enable_fast_reorg=on;`) + tk.MustExec("set global tidb_ddl_reorg_worker_cnt=1;") + + tk.MustExec("create table t(id int primary key, b int, k int);") + tk.MustQuery("split table t by (30000);").Check(testkit.Rows("1 1")) + tk.MustExec("insert into t values(1, 1, 1);") + tk.MustExec("insert into t values(100000, 1, 1);") + + ingest.ForceSyncFlagForTest = true + tk.MustExec("alter table t add index idx_test(b);") + ingest.ForceSyncFlagForTest = false + tk.MustExec("update t set b = b + 1;") +} + func TestAddIndexRemoteDuplicateCheck(t *testing.T) { store := realtikvtest.CreateMockStoreAndSetup(t) tk := testkit.NewTestKit(t, store) @@ -614,3 +644,31 @@ func TestConcFastReorg(t *testing.T) { wg.Wait() } + +func TestIssue55808(t *testing.T) { + store := realtikvtest.CreateMockStoreAndSetup(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("drop database if exists addindexlit;") + tk.MustExec("create database addindexlit;") + tk.MustExec("use addindexlit;") + tk.MustExec(`set global tidb_ddl_enable_fast_reorg=on;`) + tk.MustExec("set global tidb_enable_dist_task = off;") + tk.MustExec("set global tidb_ddl_error_count_limit = 0") + + backup := local.MaxWriteAndIngestRetryTimes + local.MaxWriteAndIngestRetryTimes = 1 + t.Cleanup(func() { + local.MaxWriteAndIngestRetryTimes = backup + }) + + tk.MustExec("create table t (a int primary key, b int);") + for i := 0; i < 4; i++ { + tk.MustExec(fmt.Sprintf("insert into t values (%d, %d);", i*10000, i*10000)) + } + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/lightning/backend/local/doIngestFailed", "return()")) + err := tk.ExecToErr("alter table t add index idx(a);") + require.ErrorContains(t, err, "injected error") + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/lightning/backend/local/doIngestFailed")) + + tk.MustExec("set global tidb_ddl_error_count_limit = default") +}