diff --git a/ddl/backfilling.go b/ddl/backfilling.go index a7c23a545208e..aae3a9b75790e 100644 --- a/ddl/backfilling.go +++ b/ddl/backfilling.go @@ -146,7 +146,7 @@ func GetLeaseGoTime(currTime time.Time, lease time.Duration) types.Time { // Backfilling is time consuming, to accelerate this process, TiDB has built some sub // workers to do this in the DDL owner node. // -// DDL owner thread +// DDL owner thread (also see comments before runReorgJob func) // ^ // | (reorgCtx.doneCh) // | @@ -583,9 +583,10 @@ func (dc *ddlCtx) sendTasksAndWait(scheduler *backfillScheduler, totalAddedCount err = dc.isReorgRunnable(reorgInfo.Job.ID) } + // Update the reorg handle that has been processed. + err1 := reorgInfo.UpdateReorgMeta(nextKey, scheduler.sessPool) + if err != nil { - // Update the reorg handle that has been processed. - err1 := reorgInfo.UpdateReorgMeta(nextKey, scheduler.sessPool) metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblError).Observe(elapsedTime.Seconds()) logutil.BgLogger().Warn("[ddl] backfill worker handle batch tasks failed", @@ -614,7 +615,8 @@ func (dc *ddlCtx) sendTasksAndWait(scheduler *backfillScheduler, totalAddedCount zap.String("start key", hex.EncodeToString(startKey)), zap.String("next key", hex.EncodeToString(nextKey)), zap.Int64("batch added count", taskAddedCount), - zap.String("take time", elapsedTime.String())) + zap.String("take time", elapsedTime.String()), + zap.NamedError("updateHandleError", err1)) return nil } @@ -1320,15 +1322,15 @@ func GetMaxBackfillJob(sess *session, jobID, currEleID int64, currEleKey []byte) } // MoveBackfillJobsToHistoryTable moves backfill table jobs to the backfill history table. -func MoveBackfillJobsToHistoryTable(sessCtx sessionctx.Context, bfJob *BackfillJob) error { - sess, ok := sessCtx.(*session) +func MoveBackfillJobsToHistoryTable(sctx sessionctx.Context, bfJob *BackfillJob) error { + s, ok := sctx.(*session) if !ok { - return errors.Errorf("sess ctx:%#v convert session failed", sessCtx) + return errors.Errorf("sess ctx:%#v convert session failed", sctx) } - return runInTxn(sess, func(se *session) error { + return s.runInTxn(func(se *session) error { // TODO: Consider batch by batch update backfill jobs and insert backfill history jobs. - bJobs, err := GetBackfillJobs(sess, BackfillTable, fmt.Sprintf("ddl_job_id = %d and ele_id = %d and ele_key = '%s'", + bJobs, err := GetBackfillJobs(se, BackfillTable, fmt.Sprintf("ddl_job_id = %d and ele_id = %d and ele_key = '%s'", bfJob.JobID, bfJob.EleID, bfJob.EleKey), "update_backfill_job") if err != nil { return errors.Trace(err) @@ -1342,13 +1344,13 @@ func MoveBackfillJobsToHistoryTable(sessCtx sessionctx.Context, bfJob *BackfillJ return errors.Trace(err) } startTS := txn.StartTS() - err = RemoveBackfillJob(sess, true, bJobs[0]) + err = RemoveBackfillJob(se, true, bJobs[0]) if err == nil { for _, bj := range bJobs { bj.State = model.JobStateCancelled bj.FinishTS = startTS } - err = AddBackfillHistoryJob(sess, bJobs) + err = AddBackfillHistoryJob(se, bJobs) } logutil.BgLogger().Info("[ddl] move backfill jobs to history table", zap.Int("job count", len(bJobs))) return errors.Trace(err) diff --git a/ddl/column.go b/ddl/column.go index 25ce1f81b9557..9893d6528038b 100644 --- a/ddl/column.go +++ b/ddl/column.go @@ -806,7 +806,13 @@ func doReorgWorkForModifyColumnMultiSchema(w *worker, d *ddlCtx, t *meta.Meta, j func doReorgWorkForModifyColumn(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, tbl table.Table, oldCol, changingCol *model.ColumnInfo, changingIdxs []*model.IndexInfo) (done bool, ver int64, err error) { job.ReorgMeta.ReorgTp = model.ReorgTypeTxn - rh := newReorgHandler(t, w.sess) + sctx, err1 := w.sessPool.get() + if err1 != nil { + err = errors.Trace(err1) + return + } + defer w.sessPool.put(sctx) + rh := newReorgHandler(newSession(sctx)) dbInfo, err := t.GetDatabase(job.SchemaID) if err != nil { return false, ver, errors.Trace(err) @@ -1291,8 +1297,8 @@ func (w *updateColumnWorker) getRowRecord(handle kv.Handle, recordKey []byte, ra if err != nil { return w.reformatErrors(err) } - if w.sessCtx.GetSessionVars().StmtCtx.GetWarnings() != nil && len(w.sessCtx.GetSessionVars().StmtCtx.GetWarnings()) != 0 { - warn := w.sessCtx.GetSessionVars().StmtCtx.GetWarnings() + warn := w.sessCtx.GetSessionVars().StmtCtx.GetWarnings() + if len(warn) != 0 { //nolint:forcetypeassert recordWarning = errors.Cause(w.reformatErrors(warn[0].Err)).(*terror.Error) } @@ -1376,8 +1382,9 @@ func (w *updateColumnWorker) BackfillDataInTxn(handleRange reorgBackfillTask) (t taskCtx.nextKey = nextKey taskCtx.done = taskDone - warningsMap := make(map[errors.ErrorID]*terror.Error, len(rowRecords)) - warningsCountMap := make(map[errors.ErrorID]int64, len(rowRecords)) + // Optimize for few warnings! + warningsMap := make(map[errors.ErrorID]*terror.Error, 2) + warningsCountMap := make(map[errors.ErrorID]int64, 2) for _, rowRecord := range rowRecords { taskCtx.scanCount++ diff --git a/ddl/column_type_change_test.go b/ddl/column_type_change_test.go index 4f79ce7782368..308a815773ce9 100644 --- a/ddl/column_type_change_test.go +++ b/ddl/column_type_change_test.go @@ -2421,3 +2421,18 @@ func TestColumnTypeChangeTimestampToInt(t *testing.T) { tk.MustExec("alter table t add index idx1(id, c1);") tk.MustExec("admin check table t") } + +func TestFixDDLTxnWillConflictWithReorgTxn(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tk.MustExec("create table t (a int)") + tk.MustExec("set global tidb_ddl_enable_fast_reorg = OFF") + tk.MustExec("alter table t add index(a)") + tk.MustExec("set @@sql_mode=''") + tk.MustExec("insert into t values(128),(129)") + tk.MustExec("alter table t modify column a tinyint") + + tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1690 2 warnings with this error code, first warning: constant 128 overflows tinyint")) +} diff --git a/ddl/db_partition_test.go b/ddl/db_partition_test.go index 6be12283c920a..c61eeaf885aa6 100644 --- a/ddl/db_partition_test.go +++ b/ddl/db_partition_test.go @@ -4528,6 +4528,25 @@ func TestPartitionTableWithAnsiQuotes(t *testing.T) { ` PARTITION "pMax" VALUES LESS THAN (MAXVALUE,MAXVALUE))`)) } +func TestAlterModifyPartitionColTruncateWarning(t *testing.T) { + t.Skip("waiting for supporting Modify Partition Column again") + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + schemaName := "truncWarn" + tk.MustExec("create database " + schemaName) + tk.MustExec("use " + schemaName) + tk.MustExec(`set sql_mode = default`) + tk.MustExec(`create table t (a varchar(255)) partition by range columns (a) (partition p1 values less than ("0"), partition p2 values less than ("zzzz"))`) + tk.MustExec(`insert into t values ("123456"),(" 654321")`) + tk.MustContainErrMsg(`alter table t modify a varchar(5)`, "[types:1265]Data truncated for column 'a', value is '") + tk.MustExec(`set sql_mode = ''`) + tk.MustExec(`alter table t modify a varchar(5)`) + // Fix the duplicate warning, see https://github.com/pingcap/tidb/issues/38699 + tk.MustQuery(`show warnings`).Check(testkit.Rows(""+ + "Warning 1265 Data truncated for column 'a', value is ' 654321'", + "Warning 1265 Data truncated for column 'a', value is ' 654321'")) +} + func TestAlterModifyColumnOnPartitionedTableRename(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) diff --git a/ddl/db_test.go b/ddl/db_test.go index 629316af251a4..46cfe301ec4f4 100644 --- a/ddl/db_test.go +++ b/ddl/db_test.go @@ -618,10 +618,7 @@ func TestAddExpressionIndexRollback(t *testing.T) { // Check whether the reorg information is cleaned up. err := sessiontxn.NewTxn(context.Background(), ctx) require.NoError(t, err) - txn, err := ctx.Txn(true) - require.NoError(t, err) - m := meta.NewMeta(txn) - element, start, end, physicalID, err := ddl.NewReorgHandlerForTest(m, testkit.NewTestKit(t, store).Session()).GetDDLReorgHandle(currJob) + element, start, end, physicalID, err := ddl.NewReorgHandlerForTest(testkit.NewTestKit(t, store).Session()).GetDDLReorgHandle(currJob) require.True(t, meta.ErrDDLReorgElementNotExist.Equal(err)) require.Nil(t, element) require.Nil(t, start) diff --git a/ddl/ddl.go b/ddl/ddl.go index b89c8264fd125..9fa2a9f99cc10 100644 --- a/ddl/ddl.go +++ b/ddl/ddl.go @@ -1343,7 +1343,7 @@ func GetDDLInfo(s sessionctx.Context) (*Info, error) { return info, nil } - _, info.ReorgHandle, _, _, err = newReorgHandler(t, sess).GetDDLReorgHandle(reorgJob) + _, info.ReorgHandle, _, _, err = newReorgHandler(sess).GetDDLReorgHandle(reorgJob) if err != nil { if meta.ErrDDLReorgElementNotExist.Equal(err) { return info, nil @@ -1584,6 +1584,19 @@ func (s *session) session() sessionctx.Context { return s.Context } +func (s *session) runInTxn(f func(*session) error) (err error) { + err = s.begin() + if err != nil { + return err + } + err = f(s) + if err != nil { + s.rollback() + return + } + return errors.Trace(s.commit()) +} + // GetAllHistoryDDLJobs get all the done DDL jobs. func GetAllHistoryDDLJobs(m *meta.Meta) ([]*model.Job, error) { iterator, err := GetLastHistoryDDLJobsIterator(m) diff --git a/ddl/ddl_worker.go b/ddl/ddl_worker.go index cc75cb43e50d7..5c700f6273a3b 100644 --- a/ddl/ddl_worker.go +++ b/ddl/ddl_worker.go @@ -699,6 +699,7 @@ func (w *worker) HandleJobDone(d *ddlCtx, job *model.Job, t *meta.Meta) error { if err != nil { return err } + CleanupDDLReorgHandles(job, w.sess) asyncNotify(d.ddlJobDoneCh) return nil } diff --git a/ddl/index.go b/ddl/index.go index 512c856faa8ef..ae42ad84aba2e 100644 --- a/ddl/index.go +++ b/ddl/index.go @@ -882,7 +882,13 @@ func doReorgWorkForCreateIndex(w *worker, d *ddlCtx, t *meta.Meta, job *model.Jo func runReorgJobAndHandleErr(w *worker, d *ddlCtx, t *meta.Meta, job *model.Job, tbl table.Table, indexInfo *model.IndexInfo, mergingTmpIdx bool) (done bool, ver int64, err error) { elements := []*meta.Element{{ID: indexInfo.ID, TypeKey: meta.IndexElementKey}} - rh := newReorgHandler(t, w.sess) + sctx, err1 := w.sessPool.get() + if err1 != nil { + err = err1 + return + } + defer w.sessPool.put(sctx) + rh := newReorgHandler(newSession(sctx)) dbInfo, err := t.GetDatabase(job.SchemaID) if err != nil { return false, ver, errors.Trace(err) @@ -1274,13 +1280,10 @@ func (w *baseIndexWorker) String() string { } func (w *baseIndexWorker) UpdateTask(bfJob *BackfillJob) error { - sess, ok := w.backfillCtx.sessCtx.(*session) - if !ok { - return errors.Errorf("sess ctx:%#v convert session failed", w.backfillCtx.sessCtx) - } + s := newSession(w.backfillCtx.sessCtx) - return runInTxn(sess, func(se *session) error { - jobs, err := GetBackfillJobs(sess, BackfillTable, fmt.Sprintf("ddl_job_id = %d and ele_id = %d and ele_key = '%s' and id = %d", + return s.runInTxn(func(se *session) error { + jobs, err := GetBackfillJobs(se, BackfillTable, fmt.Sprintf("ddl_job_id = %d and ele_id = %d and ele_key = '%s' and id = %d", bfJob.JobID, bfJob.EleID, bfJob.EleKey, bfJob.ID), "update_backfill_task") if err != nil { return err @@ -1297,26 +1300,23 @@ func (w *baseIndexWorker) UpdateTask(bfJob *BackfillJob) error { return err } bfJob.InstanceLease = GetLeaseGoTime(currTime, InstanceLease) - return updateBackfillJob(sess, BackfillTable, bfJob, "update_backfill_task") + return updateBackfillJob(se, BackfillTable, bfJob, "update_backfill_task") }) } func (w *baseIndexWorker) FinishTask(bfJob *BackfillJob) error { - sess, ok := w.backfillCtx.sessCtx.(*session) - if !ok { - return errors.Errorf("sess ctx:%#v convert session failed", w.backfillCtx.sessCtx) - } - return runInTxn(sess, func(se *session) error { + s := newSession(w.backfillCtx.sessCtx) + return s.runInTxn(func(se *session) error { txn, err := se.txn() if err != nil { return errors.Trace(err) } bfJob.FinishTS = txn.StartTS() - err = RemoveBackfillJob(sess, false, bfJob) + err = RemoveBackfillJob(se, false, bfJob) if err != nil { return err } - return AddBackfillHistoryJob(sess, []*BackfillJob{bfJob}) + return AddBackfillHistoryJob(se, []*BackfillJob{bfJob}) }) } diff --git a/ddl/job_table.go b/ddl/job_table.go index 16dd6fa45f1e3..782abcc8b5765 100644 --- a/ddl/job_table.go +++ b/ddl/job_table.go @@ -432,15 +432,8 @@ func getDDLReorgHandle(sess *session, job *model.Job) (element *meta.Element, st return } -// updateDDLReorgStartHandle update the startKey of the handle. -func updateDDLReorgStartHandle(sess *session, job *model.Job, element *meta.Element, startKey kv.Key) error { - sql := fmt.Sprintf("update mysql.tidb_ddl_reorg set ele_id = %d, ele_type = %s, start_key = %s where job_id = %d", - element.ID, wrapKey2String(element.TypeKey), wrapKey2String(startKey), job.ID) - _, err := sess.execute(context.Background(), sql, "update_start_handle") - return err -} - // updateDDLReorgHandle update startKey, endKey physicalTableID and element of the handle. +// Caller should wrap this in a separate transaction, to avoid conflicts. func updateDDLReorgHandle(sess *session, jobID int64, startKey kv.Key, endKey kv.Key, physicalTableID int64, element *meta.Element) error { sql := fmt.Sprintf("update mysql.tidb_ddl_reorg set ele_id = %d, ele_type = %s, start_key = %s, end_key = %s, physical_id = %d where job_id = %d", element.ID, wrapKey2String(element.TypeKey), wrapKey2String(startKey), wrapKey2String(endKey), physicalTableID, jobID) @@ -449,28 +442,48 @@ func updateDDLReorgHandle(sess *session, jobID int64, startKey kv.Key, endKey kv } // initDDLReorgHandle initializes the handle for ddl reorg. -func initDDLReorgHandle(sess *session, jobID int64, startKey kv.Key, endKey kv.Key, physicalTableID int64, element *meta.Element) error { - sql := fmt.Sprintf("insert into mysql.tidb_ddl_reorg(job_id, ele_id, ele_type, start_key, end_key, physical_id) values (%d, %d, %s, %s, %s, %d)", +func initDDLReorgHandle(s *session, jobID int64, startKey kv.Key, endKey kv.Key, physicalTableID int64, element *meta.Element) error { + del := fmt.Sprintf("delete from mysql.tidb_ddl_reorg where job_id = %d", jobID) + ins := fmt.Sprintf("insert into mysql.tidb_ddl_reorg(job_id, ele_id, ele_type, start_key, end_key, physical_id) values (%d, %d, %s, %s, %s, %d)", jobID, element.ID, wrapKey2String(element.TypeKey), wrapKey2String(startKey), wrapKey2String(endKey), physicalTableID) - _, err := sess.execute(context.Background(), sql, "update_handle") - return err + return s.runInTxn(func(se *session) error { + _, err := se.execute(context.Background(), del, "init_handle") + if err != nil { + logutil.BgLogger().Info("initDDLReorgHandle failed to delete", zap.Int64("jobID", jobID), zap.Error(err)) + } + _, err = se.execute(context.Background(), ins, "init_handle") + return err + }) } // deleteDDLReorgHandle deletes the handle for ddl reorg. -func removeDDLReorgHandle(sess *session, job *model.Job, elements []*meta.Element) error { +func removeDDLReorgHandle(s *session, job *model.Job, elements []*meta.Element) error { if len(elements) == 0 { return nil } sql := fmt.Sprintf("delete from mysql.tidb_ddl_reorg where job_id = %d", job.ID) - _, err := sess.execute(context.Background(), sql, "remove_handle") - return err + return s.runInTxn(func(se *session) error { + _, err := se.execute(context.Background(), sql, "remove_handle") + return err + }) } // removeReorgElement removes the element from ddl reorg, it is the same with removeDDLReorgHandle, only used in failpoint -func removeReorgElement(sess *session, job *model.Job) error { +func removeReorgElement(s *session, job *model.Job) error { sql := fmt.Sprintf("delete from mysql.tidb_ddl_reorg where job_id = %d", job.ID) - _, err := sess.execute(context.Background(), sql, "remove_handle") - return err + return s.runInTxn(func(se *session) error { + _, err := se.execute(context.Background(), sql, "remove_handle") + return err + }) +} + +// cleanDDLReorgHandles removes handles that are no longer needed. +func cleanDDLReorgHandles(s *session, job *model.Job) error { + sql := "delete from mysql.tidb_ddl_reorg where job_id = " + strconv.FormatInt(job.ID, 10) + return s.runInTxn(func(se *session) error { + _, err := se.execute(context.Background(), sql, "clean_handle") + return err + }) } func wrapKey2String(key []byte) string { @@ -532,10 +545,10 @@ func AddBackfillHistoryJob(sess *session, backfillJobs []*BackfillJob) error { } // AddBackfillJobs adds the backfill jobs to the tidb_ddl_backfill table. -func AddBackfillJobs(sess *session, backfillJobs []*BackfillJob) error { +func AddBackfillJobs(s *session, backfillJobs []*BackfillJob) error { label := fmt.Sprintf("add_%s_job", BackfillTable) // Do runInTxn to get StartTS. - return runInTxn(newSession(sess), func(se *session) error { + return s.runInTxn(func(se *session) error { txn, err := se.txn() if err != nil { return errors.Trace(err) @@ -549,26 +562,13 @@ func AddBackfillJobs(sess *session, backfillJobs []*BackfillJob) error { if err != nil { return err } - _, err = sess.execute(context.Background(), sql, label) + _, err = se.execute(context.Background(), sql, label) return errors.Trace(err) }) } -func runInTxn(se *session, f func(*session) error) (err error) { - err = se.begin() - if err != nil { - return err - } - err = f(se) - if err != nil { - se.rollback() - return - } - return errors.Trace(se.commit()) -} - // GetBackfillJobsForOneEle batch gets the backfill jobs in the tblName table that contains only one element. -func GetBackfillJobsForOneEle(sess *session, batch int, excludedJobIDs []int64, lease time.Duration) ([]*BackfillJob, error) { +func GetBackfillJobsForOneEle(s *session, batch int, excludedJobIDs []int64, lease time.Duration) ([]*BackfillJob, error) { eJobIDsBuilder := strings.Builder{} for i, id := range excludedJobIDs { if i == 0 { @@ -584,14 +584,13 @@ func GetBackfillJobsForOneEle(sess *session, batch int, excludedJobIDs []int64, var err error var bJobs []*BackfillJob - s := newSession(sess) - err = runInTxn(s, func(se *session) error { - currTime, err := GetOracleTimeWithStartTS(s) + err = s.runInTxn(func(se *session) error { + currTime, err := GetOracleTimeWithStartTS(se) if err != nil { return err } - bJobs, err = GetBackfillJobs(sess, BackfillTable, + bJobs, err = GetBackfillJobs(se, BackfillTable, fmt.Sprintf("(exec_ID = '' or exec_lease < '%v') %s order by ddl_job_id, ele_key, ele_id limit %d", currTime.Add(-lease), eJobIDsBuilder.String(), batch), "get_backfill_job") return err @@ -614,17 +613,16 @@ func GetBackfillJobsForOneEle(sess *session, batch int, excludedJobIDs []int64, // GetAndMarkBackfillJobsForOneEle batch gets the backfill jobs in the tblName table that contains only one element, // and update these jobs with instance ID and lease. -func GetAndMarkBackfillJobsForOneEle(sess *session, batch int, jobID int64, uuid string, lease time.Duration) ([]*BackfillJob, error) { +func GetAndMarkBackfillJobsForOneEle(s *session, batch int, jobID int64, uuid string, lease time.Duration) ([]*BackfillJob, error) { var validLen int var bJobs []*BackfillJob - s := newSession(sess) - err := runInTxn(s, func(se *session) error { + err := s.runInTxn(func(se *session) error { currTime, err := GetOracleTimeWithStartTS(se) if err != nil { return err } - bJobs, err = GetBackfillJobs(sess, BackfillTable, + bJobs, err = GetBackfillJobs(se, BackfillTable, fmt.Sprintf("(exec_ID = '' or exec_lease < '%v') and ddl_job_id = %d order by ddl_job_id, ele_key, ele_id limit %d", currTime.Add(-lease), jobID, batch), "get_mark_backfill_job") if err != nil { @@ -645,7 +643,7 @@ func GetAndMarkBackfillJobsForOneEle(sess *session, batch int, jobID int64, uuid bJobs[i].InstanceID = uuid bJobs[i].InstanceLease = GetLeaseGoTime(currTime, lease) // TODO: batch update - if err = updateBackfillJob(sess, BackfillTable, bJobs[i], "get_mark_backfill_job"); err != nil { + if err = updateBackfillJob(se, BackfillTable, bJobs[i], "get_mark_backfill_job"); err != nil { return err } } diff --git a/ddl/modify_column_test.go b/ddl/modify_column_test.go index bd9c574970f71..6eb8e633be007 100644 --- a/ddl/modify_column_test.go +++ b/ddl/modify_column_test.go @@ -17,6 +17,7 @@ package ddl_test import ( "context" "fmt" + "strconv" "sync" "testing" "time" @@ -117,14 +118,18 @@ func TestModifyColumnReorgInfo(t *testing.T) { require.NoError(t, checkErr) // Check whether the reorg information is cleaned up when executing "modify column" failed. checkReorgHandle := func(gotElements, expectedElements []*meta.Element) { + require.Equal(t, len(expectedElements), len(gotElements)) for i, e := range gotElements { require.Equal(t, expectedElements[i], e) } + // check the consistency of the tables. + currJobID := strconv.FormatInt(currJob.ID, 10) + tk.MustQuery("select job_id, reorg, schema_ids, table_ids, type, processing from mysql.tidb_ddl_job where job_id = " + currJobID).Check(testkit.Rows()) + tk.MustQuery("select job_id from mysql.tidb_ddl_history where job_id = " + currJobID).Check(testkit.Rows(currJobID)) + tk.MustQuery("select job_id, ele_id, ele_type, physical_id from mysql.tidb_ddl_reorg where job_id = " + currJobID).Check(testkit.Rows()) require.NoError(t, sessiontxn.NewTxn(context.Background(), ctx)) - txn, err := ctx.Txn(true) - require.NoError(t, err) - m := meta.NewMeta(txn) - e, start, end, physicalID, err := ddl.NewReorgHandlerForTest(m, testkit.NewTestKit(t, store).Session()).GetDDLReorgHandle(currJob) + e, start, end, physicalID, err := ddl.NewReorgHandlerForTest(testkit.NewTestKit(t, store).Session()).GetDDLReorgHandle(currJob) + require.Error(t, err, "Error not ErrDDLReorgElementNotExists, found orphan row in tidb_ddl_reorg for job.ID %d: e: '%s', physicalID: %d, start: 0x%x end: 0x%x", currJob.ID, e, physicalID, start, end) require.True(t, meta.ErrDDLReorgElementNotExist.Equal(err)) require.Nil(t, e) require.Nil(t, start) diff --git a/ddl/partition.go b/ddl/partition.go index 5b67c82c5bf8b..1a3cab2e3eb01 100644 --- a/ddl/partition.go +++ b/ddl/partition.go @@ -1756,7 +1756,12 @@ func (w *worker) onDropTablePartition(d *ddlCtx, t *meta.Meta, job *model.Job) ( elements = append(elements, &meta.Element{ID: idxInfo.ID, TypeKey: meta.IndexElementKey}) } } - rh := newReorgHandler(t, w.sess) + sctx, err1 := w.sessPool.get() + if err1 != nil { + return ver, err1 + } + defer w.sessPool.put(sctx) + rh := newReorgHandler(newSession(sctx)) reorgInfo, err := getReorgInfoFromPartitions(d.jobContext(job.ID), d, rh, job, dbInfo, tbl, physicalTableIDs, elements) if err != nil || reorgInfo.first { diff --git a/ddl/reorg.go b/ddl/reorg.go index 7912560499344..e760e43c11221 100644 --- a/ddl/reorg.go +++ b/ddl/reorg.go @@ -141,11 +141,9 @@ func (rc *reorgCtx) increaseRowCount(count int64) { atomic.AddInt64(&rc.rowCount, count) } -func (rc *reorgCtx) getRowCountAndKey() (int64, kv.Key, *meta.Element) { +func (rc *reorgCtx) getRowCount() int64 { row := atomic.LoadInt64(&rc.rowCount) - h, _ := (rc.doneKey.Load()).(nullableKey) - element, _ := (rc.element.Load()).(*meta.Element) - return row, h.key, element + return row } // runReorgJob is used as a portal to do the reorganization work. @@ -232,7 +230,7 @@ func (w *worker) runReorgJob(rh *reorgHandler, reorgInfo *reorgInfo, tblInfo *mo d.removeReorgCtx(job) return dbterror.ErrCancelledDDLJob } - rowCount, _, _ := rc.getRowCountAndKey() + rowCount := rc.getRowCount() if err != nil { logutil.BgLogger().Warn("[ddl] run reorg job done", zap.Int64("handled rows", rowCount), zap.Error(err)) } else { @@ -252,17 +250,13 @@ func (w *worker) runReorgJob(rh *reorgHandler, reorgInfo *reorgInfo, tblInfo *mo } updateBackfillProgress(w, reorgInfo, tblInfo, 0) - if err1 := rh.RemoveDDLReorgHandle(job, reorgInfo.elements); err1 != nil { - logutil.BgLogger().Warn("[ddl] run reorg job done, removeDDLReorgHandle failed", zap.Error(err1)) - return errors.Trace(err1) - } case <-w.ctx.Done(): logutil.BgLogger().Info("[ddl] run reorg job quit") d.removeReorgCtx(job) // We return dbterror.ErrWaitReorgTimeout here too, so that outer loop will break. return dbterror.ErrWaitReorgTimeout case <-time.After(waitTimeout): - rowCount, doneKey, currentElement := rc.getRowCountAndKey() + rowCount := rc.getRowCount() job.SetRowCount(rowCount) updateBackfillProgress(w, reorgInfo, tblInfo, rowCount) @@ -271,17 +265,9 @@ func (w *worker) runReorgJob(rh *reorgHandler, reorgInfo *reorgInfo, tblInfo *mo rc.resetWarnings() - // Update a reorgInfo's handle. - // Since daemon-worker is triggered by timer to store the info half-way. - // you should keep these infos is read-only (like job) / atomic (like doneKey & element) / concurrent safe. - err := updateDDLReorgStartHandle(rh.s, job, currentElement, doneKey) logutil.BgLogger().Info("[ddl] run reorg job wait timeout", zap.Duration("wait time", waitTimeout), - zap.ByteString("element type", currentElement.TypeKey), - zap.Int64("element ID", currentElement.ID), - zap.Int64("total added row count", rowCount), - zap.String("done key", hex.EncodeToString(doneKey)), - zap.Error(err)) + zap.Int64("total added row count", rowCount)) // If timeout, we will return, check the owner and retry to wait job done again. return dbterror.ErrWaitReorgTimeout } @@ -640,10 +626,6 @@ func getReorgInfo(ctx *JobContext, d *ddlCtx, rh *reorgHandler, job *model.Job, failpoint.Inject("errorUpdateReorgHandle", func() (*reorgInfo, error) { return &info, errors.New("occur an error when update reorg handle") }) - err = rh.RemoveDDLReorgHandle(job, elements) - if err != nil { - return &info, errors.Trace(err) - } err = rh.InitDDLReorgHandle(job, start, end, pid, elements[0]) if err != nil { return &info, errors.Trace(err) @@ -750,27 +732,24 @@ func getReorgInfoFromPartitions(ctx *JobContext, d *ddlCtx, rh *reorgHandler, jo return &info, nil } +// UpdateReorgMeta creates a new transaction and updates tidb_ddl_reorg table, +// so the reorg can restart in case of issues. func (r *reorgInfo) UpdateReorgMeta(startKey kv.Key, pool *sessionPool) (err error) { if startKey == nil && r.EndKey == nil { return nil } - se, err := pool.get() + sctx, err := pool.get() if err != nil { return } - defer pool.put(se) + defer pool.put(sctx) - sess := newSession(se) + sess := newSession(sctx) err = sess.begin() if err != nil { return } - txn, err := sess.txn() - if err != nil { - sess.rollback() - return err - } - rh := newReorgHandler(meta.NewMeta(txn), sess) + rh := newReorgHandler(sess) err = updateDDLReorgHandle(rh.s, r.Job.ID, startKey, r.EndKey, r.PhysicalTableID, r.currElement) err1 := sess.commit() if err == nil { @@ -781,17 +760,16 @@ func (r *reorgInfo) UpdateReorgMeta(startKey kv.Key, pool *sessionPool) (err err // reorgHandler is used to handle the reorg information duration reorganization DDL job. type reorgHandler struct { - m *meta.Meta s *session } // NewReorgHandlerForTest creates a new reorgHandler, only used in test. -func NewReorgHandlerForTest(t *meta.Meta, sess sessionctx.Context) *reorgHandler { - return newReorgHandler(t, newSession(sess)) +func NewReorgHandlerForTest(sess sessionctx.Context) *reorgHandler { + return newReorgHandler(newSession(sess)) } -func newReorgHandler(t *meta.Meta, sess *session) *reorgHandler { - return &reorgHandler{m: t, s: sess} +func newReorgHandler(sess *session) *reorgHandler { + return &reorgHandler{s: sess} } // InitDDLReorgHandle initializes the job reorganization information. @@ -809,6 +787,20 @@ func (r *reorgHandler) RemoveDDLReorgHandle(job *model.Job, elements []*meta.Ele return removeDDLReorgHandle(r.s, job, elements) } +// CleanupDDLReorgHandles removes the job reorganization related handles. +func CleanupDDLReorgHandles(job *model.Job, s *session) { + if job != nil && !job.IsFinished() && !job.IsSynced() { + // Job is given, but it is neither finished nor synced; do nothing + return + } + + err := cleanDDLReorgHandles(s, job) + if err != nil { + // ignore error, cleanup is not that critical + logutil.BgLogger().Warn("Failed removing the DDL reorg entry in tidb_ddl_reorg", zap.String("job", job.String()), zap.Error(err)) + } +} + // GetDDLReorgHandle gets the latest processed DDL reorganize position. func (r *reorgHandler) GetDDLReorgHandle(job *model.Job) (element *meta.Element, startKey, endKey kv.Key, physicalTableID int64, err error) { return getDDLReorgHandle(r.s, job) diff --git a/parser/model/ddl.go b/parser/model/ddl.go index d14733d4df317..8eb26ca238d3f 100644 --- a/parser/model/ddl.go +++ b/parser/model/ddl.go @@ -668,6 +668,9 @@ func (job *Job) String() string { rowCount := job.GetRowCount() ret := fmt.Sprintf("ID:%d, Type:%s, State:%s, SchemaState:%s, SchemaID:%d, TableID:%d, RowCount:%d, ArgLen:%d, start time: %v, Err:%v, ErrCount:%d, SnapshotVersion:%v", job.ID, job.Type, job.State, job.SchemaState, job.SchemaID, job.TableID, rowCount, len(job.Args), TSConvert2Time(job.StartTS), job.Error, job.ErrorCount, job.SnapshotVer) + if job.ReorgMeta != nil { + ret += fmt.Sprintf(", UniqueWarnings:%d", len(job.ReorgMeta.Warnings)) + } if job.Type != ActionMultiSchemaChange && job.MultiSchemaInfo != nil { ret += fmt.Sprintf(", Multi-Schema Change:true, Revertible:%v", job.MultiSchemaInfo.Revertible) }