diff --git a/pkg/ccl/backupccl/full_cluster_backup_restore_test.go b/pkg/ccl/backupccl/full_cluster_backup_restore_test.go
index fcbcfb908474..04ae6cce272d 100644
--- a/pkg/ccl/backupccl/full_cluster_backup_restore_test.go
+++ b/pkg/ccl/backupccl/full_cluster_backup_restore_test.go
@@ -87,19 +87,6 @@ func TestFullClusterBackup(t *testing.T) {
 		}
 	}
 
-	// The claim_session_id field in jobs is a uuid and so needs to be excluded
-	// when comparing jobs pre/post restore. The span config reconciliation job
-	// too is something we exclude; because it's a singleton job, when restored
-	// into another cluster it self-terminates.
-	const jobsQuery = `
-SELECT id, status, created, payload, progress, created_by_type, created_by_id, claim_instance_id
-FROM system.jobs
-WHERE id NOT IN
-(
-	SELECT job_id FROM [SHOW AUTOMATIC JOBS]
-  WHERE job_type = 'AUTO SPAN CONFIG RECONCILIATION'
-)
-	`
 	// Pause SQL Stats compaction job to ensure the test is deterministic.
 	sqlDB.Exec(t, `PAUSE SCHEDULES SELECT id FROM [SHOW SCHEDULES FOR SQL STATISTICS]`)
 
@@ -151,7 +138,6 @@ CREATE TABLE data2.foo (a int);
 	// should appear in the restore.
 	// This job will eventually fail since it will run from a new cluster.
 	sqlDB.Exec(t, `BACKUP data.bank TO 'nodelocal://0/throwawayjob'`)
-	preBackupJobs := sqlDB.QueryStr(t, jobsQuery)
 	// Populate system.settings.
 	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_io_write.concurrent_addsstable_requests = 5`)
 	sqlDB.Exec(t, `INSERT INTO system.ui (key, value, "lastUpdated") VALUES ($1, $2, now())`, "some_key", "some_val")
@@ -323,27 +309,6 @@ CREATE TABLE data2.foo (a int);
 		sqlDBRestore.CheckQueryResults(t, grantCheck, sqlDB.QueryStr(t, grantCheck))
 	})
 
-	t.Run("ensure that jobs are restored", func(t *testing.T) {
-		// Ensure that the jobs in the RESTORE cluster is a superset of the jobs
-		// that were in the BACKUP cluster (before the full cluster BACKUP job was
-		// run). There may be more jobs now because the restore can run jobs of
-		// its own.
-		newJobsStr := sqlDBRestore.QueryStr(t, jobsQuery)
-		newJobs := make(map[string][]string)
-
-		for _, newJob := range newJobsStr {
-			// The first element of the slice is the job id.
-			newJobs[newJob[0]] = newJob
-		}
-		for _, oldJob := range preBackupJobs {
-			newJob, ok := newJobs[oldJob[0]]
-			if !ok {
-				t.Errorf("Expected to find job %+v in RESTORE cluster, but not found", oldJob)
-			}
-			require.Equal(t, oldJob, newJob)
-		}
-	})
-
 	t.Run("zone_configs", func(t *testing.T) {
 		// The restored zones should be a superset of the zones in the backed up
 		// cluster.
@@ -676,7 +641,6 @@ func TestClusterRestoreFailCleanup(t *testing.T) {
 				{"comments"},
 				{"database_role_settings"},
 				{"external_connections"},
-				{"jobs"},
 				{"locations"},
 				{"role_members"},
 				{"role_options"},
@@ -768,7 +732,6 @@ func TestClusterRestoreFailCleanup(t *testing.T) {
 				{"comments"},
 				{"database_role_settings"},
 				{"external_connections"},
-				{"jobs"},
 				{"locations"},
 				{"role_members"},
 				{"role_options"},
@@ -1043,30 +1006,6 @@ func TestReintroduceOfflineSpans(t *testing.T) {
 		expectedCount = srcDB.QueryStr(t, checkQuery)
 		destDB.CheckQueryResults(t, `SELECT count(*) FROM restoredb.bank@new_idx`, expectedCount)
 	})
-
-	t.Run("restore-canceled", func(t *testing.T) {
-		args := base.TestClusterArgs{ServerArgs: base.TestServerArgs{
-			Knobs: base.TestingKnobs{JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals()}},
-		}
-		_, destDB, cleanupDst := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, args)
-		defer cleanupDst()
-
-		destDB.Exec(t, `RESTORE FROM $1 AS OF SYSTEM TIME `+tsMidRestore, clusterBackupLoc)
-
-		// Wait for the cluster restore job to finish, as well as the restored RESTORE TABLE
-		// job to cancel.
-		destDB.CheckQueryResultsRetry(t, `
-		SELECT description, status FROM [SHOW JOBS]
-		WHERE job_type = 'RESTORE' AND status NOT IN ('succeeded', 'canceled')`,
-			[][]string{},
-		)
-		// The cluster restore should succeed, but the table restore should have failed.
-		destDB.CheckQueryResults(t,
-			`SELECT status, count(*) FROM [SHOW JOBS] WHERE job_type = 'RESTORE' GROUP BY status ORDER BY status`,
-			[][]string{{"canceled", "1"}, {"succeeded", "1"}})
-
-		destDB.ExpectErr(t, `relation "restoredb.bank" does not exist`, `SELECT count(*) FROM restoredb.bank`)
-	})
 }
 
 // TestClusterRevisionDoesNotBackupOptOutSystemTables is a regression test for a
diff --git a/pkg/ccl/backupccl/restore_job.go b/pkg/ccl/backupccl/restore_job.go
index 96907cae8350..af61e4136ca1 100644
--- a/pkg/ccl/backupccl/restore_job.go
+++ b/pkg/ccl/backupccl/restore_job.go
@@ -1825,18 +1825,15 @@ func (r *restoreResumer) publishDescriptors(
 			mutTable.RowLevelTTL.ScheduleID = j.ScheduleID()
 		}
 		newTables = append(newTables, mutTable.TableDesc())
-		// For cluster restores, all the jobs are restored directly from the jobs
-		// table, so there is no need to re-create ongoing schema change jobs,
-		// otherwise we'll create duplicate jobs.
-		if details.DescriptorCoverage != tree.AllDescriptors || len(badIndexes) > 0 {
-			// Convert any mutations that were in progress on the table descriptor
-			// when the backup was taken, and convert them to schema change jobs.
-			if err := createSchemaChangeJobsFromMutations(ctx,
-				r.execCfg.JobRegistry, r.execCfg.Codec, txn, r.job.Payload().UsernameProto.Decode(), mutTable,
-			); err != nil {
-				return err
-			}
+
+		// Convert any mutations that were in progress on the table descriptor
+		// when the backup was taken, and convert them to schema change jobs.
+		if err := createSchemaChangeJobsFromMutations(ctx,
+			r.execCfg.JobRegistry, r.execCfg.Codec, txn, r.job.Payload().UsernameProto.Decode(), mutTable,
+		); err != nil {
+			return err
 		}
+
 	}
 	// For all of the newly created types, make type schema change jobs for any
 	// type descriptors that were backed up in the middle of a type schema change.
diff --git a/pkg/ccl/backupccl/restore_mid_schema_change_test.go b/pkg/ccl/backupccl/restore_mid_schema_change_test.go
index 124f1c19c73b..a1f024856142 100644
--- a/pkg/ccl/backupccl/restore_mid_schema_change_test.go
+++ b/pkg/ccl/backupccl/restore_mid_schema_change_test.go
@@ -95,7 +95,7 @@ func TestRestoreMidSchemaChange(t *testing.T) {
 							for _, backupDir := range backupDirs {
 								fullBackupDir, err := filepath.Abs(filepath.Join(fullClusterVersionDir, backupDir.Name()))
 								require.NoError(t, err)
-								t.Run(backupDir.Name(), restoreMidSchemaChange(fullBackupDir, backupDir.Name(), isClusterRestore, blockLocation == "after"))
+								t.Run(backupDir.Name(), restoreMidSchemaChange(fullBackupDir, backupDir.Name(), isClusterRestore))
 							}
 						})
 					}
@@ -107,52 +107,25 @@ func TestRestoreMidSchemaChange(t *testing.T) {
 
 // expectedSCJobCount returns the expected number of schema change jobs
 // we expect to find.
-func expectedSCJobCount(scName string, isClusterRestore, after bool) int {
+func expectedSCJobCount(scName string) int {
 	// The number of schema change under test. These will be the ones that are
 	// synthesized in database restore.
 	var expNumSCJobs int
-	var numBackgroundSCJobs int
 
 	// Some test cases may have more than 1 background schema change job.
 	switch scName {
 	case "midmany":
-		numBackgroundSCJobs = 1 // the create table
-		// This test runs 3 schema changes on a single table.
 		expNumSCJobs = 3
 	case "midmultitable":
-		numBackgroundSCJobs = 2 // this test creates 2 tables
-		expNumSCJobs = 2        // this test perform a schema change for each table
+		expNumSCJobs = 2 // this test perform a schema change for each table
 	case "midprimarykeyswap":
-		// Create table + alter column is done in the prep stage of this test.
-		numBackgroundSCJobs = 2
 		// PK change + PK cleanup
 		expNumSCJobs = 2
-		if isClusterRestore && after {
-			expNumSCJobs = 1
-		}
 	case "midprimarykeyswapcleanup":
-		// This test performs an ALTER COLUMN, and the original ALTER PRIMARY
-		// KEY that is being cleaned up.
-		numBackgroundSCJobs = 3
 		expNumSCJobs = 1
 	default:
 		// Most test cases only have 1 schema change under test.
 		expNumSCJobs = 1
-		// Most test cases have just a CREATE TABLE job that created the table
-		// under test.
-		numBackgroundSCJobs = 1
-	}
-
-	// We drop defaultdb and postgres for full cluster restores
-	numBackgroundDropDatabaseSCJobs := 2
-	// Since we're doing a cluster restore, we need to account for all of
-	// the schema change jobs that existed in the backup.
-	if isClusterRestore {
-		expNumSCJobs += numBackgroundSCJobs + numBackgroundDropDatabaseSCJobs
-
-		// If we're performing a cluster restore, we also need to include the drop
-		// crdb_temp_system job.
-		expNumSCJobs++
 	}
 
 	return expNumSCJobs
@@ -189,32 +162,16 @@ func getTablesInTest(scName string) (tableNames []string) {
 	return
 }
 
-func verifyMidSchemaChange(
-	t *testing.T, scName string, kvDB *kv.DB, sqlDB *sqlutils.SQLRunner, isClusterRestore, after bool,
-) {
+func verifyMidSchemaChange(t *testing.T, scName string, kvDB *kv.DB, sqlDB *sqlutils.SQLRunner) {
 	tables := getTablesInTest(scName)
 
 	// Check that we are left with the expected number of schema change jobs.
-	expNumSchemaChangeJobs := expectedSCJobCount(scName, isClusterRestore, after)
-	schemaChangeJobs := sqlDB.QueryStr(t, "SELECT description FROM crdb_internal.jobs WHERE job_type = 'SCHEMA CHANGE'")
-	require.Equal(t, expNumSchemaChangeJobs, len(schemaChangeJobs),
-		"Expected %d schema change jobs but found %v", expNumSchemaChangeJobs, schemaChangeJobs)
-	if isClusterRestore {
-		// Cluster restores should be restoring the exact job entries that were
-		// backed up, and therefore should not create jobs that contains "RESTORING"
-		// in the description.
-		schemaChangeJobs := sqlDB.QueryStr(t,
-			"SELECT description FROM crdb_internal.jobs WHERE job_type = 'SCHEMA CHANGE' AND description NOT LIKE '%RESTORING%'")
-		require.Equal(t, expNumSchemaChangeJobs, len(schemaChangeJobs),
-			"Expected %d schema change jobs but found %v", expNumSchemaChangeJobs, schemaChangeJobs)
-	} else {
-		// Non-cluster restores should create jobs with "RESTORE" in the job
-		// description.
-		schemaChangeJobs := sqlDB.QueryStr(t,
-			"SELECT description FROM crdb_internal.jobs WHERE job_type = 'SCHEMA CHANGE' AND description LIKE '%RESTORING%'")
-		require.Equal(t, expNumSchemaChangeJobs, len(schemaChangeJobs),
-			"Expected %d schema change jobs but found %v", expNumSchemaChangeJobs, schemaChangeJobs)
-	}
+	expNumSchemaChangeJobs := expectedSCJobCount(scName)
+
+	synthesizedSchemaChangeJobs := sqlDB.QueryStr(t,
+		"SELECT description FROM crdb_internal.jobs WHERE job_type = 'SCHEMA CHANGE' AND description LIKE '%RESTORING%'")
+	require.Equal(t, expNumSchemaChangeJobs, len(synthesizedSchemaChangeJobs),
+		"Expected %d schema change jobs but found %v", expNumSchemaChangeJobs, synthesizedSchemaChangeJobs)
 
 	for _, tableName := range tables {
 		validateTable(t, kvDB, sqlDB, "defaultdb", tableName)
@@ -226,7 +183,7 @@ func verifyMidSchemaChange(
 }
 
 func restoreMidSchemaChange(
-	backupDir, schemaChangeName string, isClusterRestore bool, after bool,
+	backupDir, schemaChangeName string, isClusterRestore bool,
 ) func(t *testing.T) {
 	return func(t *testing.T) {
 		ctx := context.Background()
@@ -265,6 +222,7 @@ func restoreMidSchemaChange(
 		// Wait for all jobs to terminate. Some may fail since we don't restore
 		// adding spans.
 		sqlDB.CheckQueryResultsRetry(t, "SELECT * FROM crdb_internal.jobs WHERE job_type = 'SCHEMA CHANGE' AND NOT (status = 'succeeded' OR status = 'failed')", [][]string{})
-		verifyMidSchemaChange(t, schemaChangeName, kvDB, sqlDB, isClusterRestore, after)
+		verifyMidSchemaChange(t, schemaChangeName, kvDB, sqlDB)
+		sqlDB.CheckQueryResultsRetry(t, "SELECT * from crdb_internal.invalid_objects", [][]string{})
 	}
 }
diff --git a/pkg/ccl/backupccl/system_schema.go b/pkg/ccl/backupccl/system_schema.go
index 17775dc69162..a28e32fa0692 100644
--- a/pkg/ccl/backupccl/system_schema.go
+++ b/pkg/ccl/backupccl/system_schema.go
@@ -12,19 +12,14 @@ import (
 	"context"
 	fmt "fmt"
 	"math"
-	"strings"
 
-	"github.com/cockroachdb/cockroach/pkg/jobs"
-	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
 	"github.com/cockroachdb/cockroach/pkg/kv"
-	"github.com/cockroachdb/cockroach/pkg/security/username"
 	"github.com/cockroachdb/cockroach/pkg/sql"
 	"github.com/cockroachdb/cockroach/pkg/sql/catalog"
 	descpb "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
 	"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
 	"github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
-	"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
 	"github.com/cockroachdb/cockroach/pkg/util/log"
 	"github.com/cockroachdb/errors"
 )
@@ -155,98 +150,6 @@ func queryTableRowCount(
 	return int64(*count), nil
 }
 
-// jobsMigrationFunc resets the progress on schema change jobs, and marks all
-// other jobs as reverting.
-func jobsMigrationFunc(
-	ctx context.Context, execCfg *sql.ExecutorConfig, txn *kv.Txn, tempTableName string,
-) (err error) {
-	executor := execCfg.InternalExecutor
-
-	const statesToRevert = `('` + string(jobs.StatusRunning) + `', ` +
-		`'` + string(jobs.StatusPauseRequested) + `', ` +
-		`'` + string(jobs.StatusPaused) + `')`
-
-	jobsToRevert := make([]int64, 0)
-	query := `SELECT id, payload FROM ` + tempTableName + ` WHERE status IN ` + statesToRevert
-	it, err := executor.QueryIteratorEx(
-		ctx, "restore-fetching-job-payloads", txn,
-		sessiondata.InternalExecutorOverride{User: username.RootUserName()},
-		query)
-	if err != nil {
-		return errors.Wrap(err, "fetching job payloads")
-	}
-	defer func() {
-		closeErr := it.Close()
-		if err == nil {
-			err = closeErr
-		}
-	}()
-	for {
-		ok, err := it.Next(ctx)
-		if !ok {
-			if err != nil {
-				return err
-			}
-			break
-		}
-
-		r := it.Cur()
-		id, payloadBytes := r[0], r[1]
-		rawJobID, ok := id.(*tree.DInt)
-		if !ok {
-			return errors.Errorf("job: failed to read job id as DInt (was %T)", id)
-		}
-		jobID := int64(*rawJobID)
-
-		payload, err := jobs.UnmarshalPayload(payloadBytes)
-		if err != nil {
-			return errors.Wrap(err, "failed to unmarshal job to restore")
-		}
-		if payload.Type() == jobspb.TypeImport || payload.Type() == jobspb.TypeRestore {
-			jobsToRevert = append(jobsToRevert, jobID)
-		}
-	}
-
-	// Update the status for other jobs.
-	var updateStatusQuery strings.Builder
-	fmt.Fprintf(&updateStatusQuery, "UPDATE %s SET status = $1 WHERE id IN ", tempTableName)
-	fmt.Fprint(&updateStatusQuery, "(")
-	for i, job := range jobsToRevert {
-		if i > 0 {
-			fmt.Fprint(&updateStatusQuery, ", ")
-		}
-		fmt.Fprintf(&updateStatusQuery, "'%d'", job)
-	}
-	fmt.Fprint(&updateStatusQuery, ")")
-
-	if _, err := executor.Exec(ctx, "updating-job-status", txn, updateStatusQuery.String(), jobs.StatusCancelRequested); err != nil {
-		return errors.Wrap(err, "updating restored jobs as reverting")
-	}
-
-	return nil
-}
-
-// When restoring the jobs table we don't want to remove existing jobs, since
-// that includes the restore that we're running.
-func jobsRestoreFunc(
-	ctx context.Context,
-	execCfg *sql.ExecutorConfig,
-	txn *kv.Txn,
-	systemTableName, tempTableName string,
-) error {
-	executor := execCfg.InternalExecutor
-
-	// When restoring jobs, don't clear the existing table.
-
-	restoreQuery := fmt.Sprintf("INSERT INTO system.%s (SELECT * FROM %s) ON CONFLICT DO NOTHING;",
-		systemTableName, tempTableName)
-	opName := systemTableName + "-data-insert"
-	if _, err := executor.Exec(ctx, opName, txn, restoreQuery); err != nil {
-		return errors.Wrapf(err, "inserting data to system.%s", systemTableName)
-	}
-	return nil
-}
-
 // When restoring the settings table, we want to make sure to not override the
 // version.
 func settingsRestoreFunc(
@@ -314,9 +217,7 @@ var systemTableBackupConfiguration = map[string]systemBackupConfiguration{
 		shouldIncludeInClusterBackup: optInToClusterBackup,
 	},
 	systemschema.JobsTable.GetName(): {
-		shouldIncludeInClusterBackup: optInToClusterBackup,
-		migrationFunc:                jobsMigrationFunc,
-		customRestoreFunc:            jobsRestoreFunc,
+		shouldIncludeInClusterBackup: optOutOfClusterBackup,
 	},
 	systemschema.ScheduledJobsTable.GetName(): {
 		shouldIncludeInClusterBackup: optInToClusterBackup,
diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range.go b/pkg/kv/kvserver/batcheval/cmd_delete_range.go
index f0c0c52cd82e..eb4a0c0eb1f9 100644
--- a/pkg/kv/kvserver/batcheval/cmd_delete_range.go
+++ b/pkg/kv/kvserver/batcheval/cmd_delete_range.go
@@ -58,6 +58,8 @@ func declareKeysDeleteRange(
 	}
 }
 
+const maxDeleteRangeBatchBytes = 32 << 20
+
 // DeleteRange deletes the range of key/value pairs specified by
 // start and end keys.
 func DeleteRange(
@@ -67,7 +69,14 @@ func DeleteRange(
 	h := cArgs.Header
 	reply := resp.(*roachpb.DeleteRangeResponse)
 
-	// Use experimental MVCC range tombstone if requested.
+	if args.Predicates != (roachpb.DeleteRangePredicates{}) && !args.UseRangeTombstone {
+		// This ensures predicate based DeleteRange piggybacks on the version gate,
+		// roachpb api flags, and latch declarations used by the UseRangeTombstone.
+		return result.Result{}, errors.AssertionFailedf(
+			"UseRangeTombstones must be passed with predicate based Delete Range")
+	}
+
+	// Use MVCC range tombstone if requested.
 	if args.UseRangeTombstone {
 		if cArgs.Header.Txn != nil {
 			return result.Result{}, ErrTransactionUnsupported
@@ -79,14 +88,55 @@ func DeleteRange(
 			return result.Result{}, errors.AssertionFailedf(
 				"ReturnKeys can't be used with range tombstones")
 		}
-
 		desc := cArgs.EvalCtx.Desc()
 		leftPeekBound, rightPeekBound := rangeTombstonePeekBounds(
 			args.Key, args.EndKey, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey())
 		maxIntents := storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV)
 
-		err := storage.MVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats,
-			args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, maxIntents)
+		if args.Predicates == (roachpb.DeleteRangePredicates{}) {
+			// If no predicate parameters are passed, use the fast path.
+			err := storage.MVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats,
+				args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, maxIntents)
+			return result.Result{}, err
+		}
+
+		if h.MaxSpanRequestKeys == 0 {
+			// In production, MaxSpanRequestKeys must be greater than zero to ensure
+			// the DistSender serializes predicate based DeleteRange requests across
+			// ranges. This ensures that only one resumeSpan gets returned to the
+			// client.
+			//
+			// Also, note that DeleteRangeUsingTombstone requests pass the isAlone
+			// flag in roachpb.api.proto, ensuring the requests do not intermingle with
+			// other types of requests, preventing further resume span muddling.
+			return result.Result{}, errors.AssertionFailedf(
+				"MaxSpanRequestKeys must be greater than zero when using predicated based DeleteRange")
+		}
+		// TODO (msbutler): Tune the threshold once DeleteRange and DeleteRangeUsingTombstone have
+		// been further optimized.
+		defaultRangeTombstoneThreshold := int64(64)
+		resumeSpan, err := storage.MVCCPredicateDeleteRange(ctx, readWriter, cArgs.Stats,
+			args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound,
+			args.Predicates, h.MaxSpanRequestKeys, maxDeleteRangeBatchBytes,
+			defaultRangeTombstoneThreshold, maxIntents)
+
+		if resumeSpan != nil {
+			reply.ResumeSpan = resumeSpan
+			reply.ResumeReason = roachpb.RESUME_KEY_LIMIT
+
+			// Note: While MVCCPredicateDeleteRange _could_ return reply.NumKeys, as
+			// the number of keys iterated through, doing so could lead to a
+			// significant performance drawback. The DistSender would have used
+			// NumKeys to subtract the number of keys processed by one range from the
+			// MaxSpanRequestKeys limit given to the next range. In this case, that's
+			// specifically not what we want, because this request does not use the
+			// normal meaning of MaxSpanRequestKeys (i.e. number of keys to return).
+			// Here, MaxSpanRequest keys is used to limit the number of tombstones
+			// written. Thus, setting NumKeys would just reduce the limit available to
+			// the next range for no good reason.
+		}
+		// Return result is always empty, since the reply is populated into the
+		// passed in resp pointer.
 		return result.Result{}, err
 	}
 
diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go
index 9692e243dc69..4e454a1001ea 100644
--- a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go
+++ b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go
@@ -12,6 +12,8 @@ package batcheval
 
 import (
 	"context"
+	"fmt"
+	"math"
 	"testing"
 
 	"github.com/cockroachdb/cockroach/pkg/keys"
@@ -27,8 +29,8 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-// TestDeleteRangeTombstone tests DeleteRange range tombstones directly, using
-// only a Pebble engine.
+// TestDeleteRangeTombstone tests DeleteRange range tombstones and predicated based DeleteRange
+// directly, using only a Pebble engine.
 //
 // MVCC range tombstone logic is tested exhaustively in the MVCC history tests,
 // this just tests the RPC plumbing.
@@ -76,6 +78,11 @@ func TestDeleteRangeTombstone(t *testing.T) {
 		inline     bool
 		returnKeys bool
 		expectErr  interface{} // error type, substring, or true (any)
+
+		// The fields below test predicate based delete range rpc plumbing.
+		predicateStartTime int64 // if set, the test will only run with predicate based delete range
+		onlyPointKeys      bool  // if set UsingRangeTombstone arg is set to false
+		maxBatchSize       int64 // if predicateStartTime is set, then MaxBatchSize must be set
 	}{
 		"above points succeed": {
 			start: "a",
@@ -142,130 +149,216 @@ func TestDeleteRangeTombstone(t *testing.T) {
 			ts:        1e9,
 			expectErr: &roachpb.WriteTooOldError{},
 		},
+		"predicate without UsingRangeTombstone error": {
+			start:              "a",
+			end:                "f",
+			ts:                 10e9,
+			predicateStartTime: 1,
+			maxBatchSize:       maxDeleteRangeBatchBytes,
+			onlyPointKeys:      true,
+			expectErr:          "UseRangeTombstones must be passed with predicate based Delete Range",
+		},
+		"predicate maxBatchSize error": {
+			start:              "a",
+			end:                "f",
+			ts:                 10e9,
+			predicateStartTime: 1,
+			maxBatchSize:       0,
+			expectErr:          "MaxSpanRequestKeys must be greater than zero when using predicated based DeleteRange",
+		},
 	}
 	for name, tc := range testcases {
 		t.Run(name, func(t *testing.T) {
-			ctx := context.Background()
-			st := cluster.MakeTestingClusterSettings()
-			engine := storage.NewDefaultInMemForTesting()
-			defer engine.Close()
-
-			writeInitialData(t, ctx, engine)
+			for _, runWithPredicates := range []bool{false, true} {
+				if tc.predicateStartTime > 0 && !runWithPredicates {
+					continue
+				}
+				t.Run(fmt.Sprintf("Predicates=%v", runWithPredicates), func(t *testing.T) {
+					ctx := context.Background()
+					st := cluster.MakeTestingClusterSettings()
+					engine := storage.NewDefaultInMemForTesting()
+					defer engine.Close()
 
-			rangeKey := storage.MVCCRangeKey{
-				StartKey:  roachpb.Key(tc.start),
-				EndKey:    roachpb.Key(tc.end),
-				Timestamp: hlc.Timestamp{WallTime: tc.ts},
-			}
+					writeInitialData(t, ctx, engine)
 
-			// Prepare the request and environment.
-			evalCtx := &MockEvalCtx{
-				ClusterSettings: st,
-				Desc: &roachpb.RangeDescriptor{
-					StartKey: roachpb.RKey(rangeStart),
-					EndKey:   roachpb.RKey(rangeEnd),
-				},
-			}
-
-			h := roachpb.Header{
-				Timestamp: rangeKey.Timestamp,
-			}
-			if tc.txn {
-				txn := roachpb.MakeTransaction("txn", nil /* baseKey */, roachpb.NormalUserPriority, rangeKey.Timestamp, 0, 0)
-				h.Txn = &txn
-			}
+					rangeKey := storage.MVCCRangeKey{
+						StartKey:  roachpb.Key(tc.start),
+						EndKey:    roachpb.Key(tc.end),
+						Timestamp: hlc.Timestamp{WallTime: tc.ts},
+					}
 
-			req := &roachpb.DeleteRangeRequest{
-				RequestHeader: roachpb.RequestHeader{
-					Key:    rangeKey.StartKey,
-					EndKey: rangeKey.EndKey,
-				},
-				UseRangeTombstone: true,
-				Inline:            tc.inline,
-				ReturnKeys:        tc.returnKeys,
-			}
+					// Prepare the request and environment.
+					evalCtx := &MockEvalCtx{
+						ClusterSettings: st,
+						Desc: &roachpb.RangeDescriptor{
+							StartKey: roachpb.RKey(rangeStart),
+							EndKey:   roachpb.RKey(rangeEnd),
+						},
+					}
 
-			ms := computeStats(t, engine, rangeStart, rangeEnd, rangeKey.Timestamp.WallTime)
+					h := roachpb.Header{
+						Timestamp: rangeKey.Timestamp,
+					}
+					if tc.txn {
+						txn := roachpb.MakeTransaction("txn", nil /* baseKey */, roachpb.NormalUserPriority, rangeKey.Timestamp, 0, 0)
+						h.Txn = &txn
+					}
+					var predicates roachpb.DeleteRangePredicates
+					if runWithPredicates {
+						predicates = roachpb.DeleteRangePredicates{
+							StartTime: hlc.Timestamp{WallTime: 1},
+						}
+						h.MaxSpanRequestKeys = math.MaxInt64
+					}
+					if tc.predicateStartTime > 0 {
+						predicates = roachpb.DeleteRangePredicates{
+							StartTime: hlc.Timestamp{WallTime: tc.predicateStartTime},
+						}
+						h.MaxSpanRequestKeys = tc.maxBatchSize
+					}
 
-			// Use a spanset batch to assert latching of all accesses. In particular,
-			// the additional seeks necessary to check for adjacent range keys that we
-			// may merge with (for stats purposes) which should not cross the range
-			// bounds.
-			var latchSpans, lockSpans spanset.SpanSet
-			declareKeysDeleteRange(evalCtx.Desc, &h, req, &latchSpans, &lockSpans, 0)
-			batch := spanset.NewBatchAt(engine.NewBatch(), &latchSpans, h.Timestamp)
-			defer batch.Close()
+					req := &roachpb.DeleteRangeRequest{
+						RequestHeader: roachpb.RequestHeader{
+							Key:    rangeKey.StartKey,
+							EndKey: rangeKey.EndKey,
+						},
+						UseRangeTombstone: !tc.onlyPointKeys,
+						Inline:            tc.inline,
+						ReturnKeys:        tc.returnKeys,
+						Predicates:        predicates,
+					}
 
-			// Run the request.
-			resp := &roachpb.DeleteRangeResponse{}
-			_, err := DeleteRange(ctx, batch, CommandArgs{
-				EvalCtx: evalCtx.EvalContext(),
-				Stats:   &ms,
-				Now:     now,
-				Header:  h,
-				Args:    req,
-			}, resp)
+					ms := computeStats(t, engine, rangeStart, rangeEnd, rangeKey.Timestamp.WallTime)
 
-			// Check the error.
-			if tc.expectErr != nil {
-				require.Error(t, err)
-				if b, ok := tc.expectErr.(bool); ok && b {
-					// any error is fine
-				} else if expectMsg, ok := tc.expectErr.(string); ok {
-					require.Contains(t, err.Error(), expectMsg)
-				} else if e, ok := tc.expectErr.(error); ok {
-					require.True(t, errors.HasType(err, e), "expected %T, got %v", e, err)
-				} else {
-					require.Fail(t, "invalid expectErr", "expectErr=%v", tc.expectErr)
-				}
-				return
-			}
-			require.NoError(t, err)
-			require.NoError(t, batch.Commit(true))
+					// Use a spanset batch to assert latching of all accesses. In particular,
+					// the additional seeks necessary to check for adjacent range keys that we
+					// may merge with (for stats purposes) which should not cross the range
+					// bounds.
+					var latchSpans, lockSpans spanset.SpanSet
+					declareKeysDeleteRange(evalCtx.Desc, &h, req, &latchSpans, &lockSpans, 0)
+					batch := spanset.NewBatchAt(engine.NewBatch(), &latchSpans, h.Timestamp)
+					defer batch.Close()
 
-			// Check that the range tombstone was written successfully.
-			iter := engine.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{
-				KeyTypes:   storage.IterKeyTypeRangesOnly,
-				LowerBound: rangeKey.StartKey,
-				UpperBound: rangeKey.EndKey,
-			})
-			defer iter.Close()
-			iter.SeekGE(storage.MVCCKey{Key: rangeKey.StartKey})
+					// Run the request.
+					resp := &roachpb.DeleteRangeResponse{}
+					_, err := DeleteRange(ctx, batch, CommandArgs{
+						EvalCtx: evalCtx.EvalContext(),
+						Stats:   &ms,
+						Now:     now,
+						Header:  h,
+						Args:    req,
+					}, resp)
 
-			var seen storage.MVCCRangeKeyValue
-			for {
-				ok, err := iter.Valid()
-				require.NoError(t, err)
-				if !ok {
-					break
-				}
-				require.True(t, ok)
-				for _, rkv := range iter.RangeKeys() {
-					if rkv.RangeKey.Timestamp.Equal(rangeKey.Timestamp) {
-						if len(seen.RangeKey.StartKey) == 0 {
-							seen = rkv.Clone()
+					// Check the error.
+					if tc.expectErr != nil {
+						require.Error(t, err)
+						if b, ok := tc.expectErr.(bool); ok && b {
+							// any error is fine
+						} else if expectMsg, ok := tc.expectErr.(string); ok {
+							require.Contains(t, err.Error(), expectMsg)
+						} else if e, ok := tc.expectErr.(error); ok {
+							require.True(t, errors.HasType(err, e), "expected %T, got %v", e, err)
 						} else {
-							seen.RangeKey.EndKey = rkv.RangeKey.EndKey.Clone()
-							require.Equal(t, seen.Value, rkv.Value)
+							require.Fail(t, "invalid expectErr", "expectErr=%v", tc.expectErr)
 						}
-						break
+						return
 					}
-				}
-				iter.Next()
-			}
-			require.Equal(t, rangeKey, seen.RangeKey)
+					require.NoError(t, err)
+					require.NoError(t, batch.Commit(true))
 
-			value, err := storage.DecodeMVCCValue(seen.Value)
-			require.NoError(t, err)
-			require.True(t, value.IsTombstone())
-			require.Equal(t, now, value.LocalTimestamp)
+					if runWithPredicates {
+						checkPredicateDeleteRange(t, engine, rangeKey)
+					} else {
+						checkDeleteRangeTombstone(t, engine, rangeKey, now)
+					}
 
-			// Check that range tombstone stats were updated correctly.
-			require.Equal(t, computeStats(t, engine, rangeStart, rangeEnd, rangeKey.Timestamp.WallTime), ms)
+					// Check that range tombstone stats were updated correctly.
+					require.Equal(t, computeStats(t, engine, rangeStart, rangeEnd, rangeKey.Timestamp.WallTime), ms)
+				})
+			}
 		})
 	}
 }
 
+// checkDeleteRangeTombstone checks that the span targeted by the predicate
+// based delete range operation only has point tombstones, as the size of the
+// spans in this test are below rangeTombstoneThreshold
+//
+// the passed in rangekey contains info on the span PredicateDeleteRange
+// operated on. The command should not have written an actual rangekey!
+func checkPredicateDeleteRange(t *testing.T, engine storage.Reader, rKeyInfo storage.MVCCRangeKey) {
+
+	iter := engine.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{
+		KeyTypes:   storage.IterKeyTypePointsAndRanges,
+		LowerBound: rKeyInfo.StartKey,
+		UpperBound: rKeyInfo.EndKey,
+	})
+	defer iter.Close()
+
+	for iter.SeekGE(storage.MVCCKey{Key: rKeyInfo.StartKey}); ; iter.NextKey() {
+		ok, err := iter.Valid()
+		require.NoError(t, err)
+		if !ok {
+			break
+		}
+		hasPoint, hashRange := iter.HasPointAndRange()
+		if !hasPoint && hashRange {
+			// PredicateDeleteRange should not have written any delete tombstones;
+			// therefore, any range key tombstones in the span should have been
+			// written before the request was issued.
+			for _, rKey := range iter.RangeKeys() {
+				require.Equal(t, true, rKey.RangeKey.Timestamp.Less(rKeyInfo.Timestamp))
+			}
+			continue
+		}
+		value, err := storage.DecodeMVCCValue(iter.UnsafeValue())
+		require.NoError(t, err)
+		require.True(t, value.IsTombstone())
+	}
+}
+
+// checkDeleteRangeTombstone checks that the range tombstone was written successfully.
+func checkDeleteRangeTombstone(
+	t *testing.T, engine storage.Reader, rangeKey storage.MVCCRangeKey, now hlc.ClockTimestamp,
+) {
+	iter := engine.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{
+		KeyTypes:   storage.IterKeyTypeRangesOnly,
+		LowerBound: rangeKey.StartKey,
+		UpperBound: rangeKey.EndKey,
+	})
+	defer iter.Close()
+	iter.SeekGE(storage.MVCCKey{Key: rangeKey.StartKey})
+
+	var seen storage.MVCCRangeKeyValue
+	for {
+		ok, err := iter.Valid()
+		require.NoError(t, err)
+		if !ok {
+			break
+		}
+		require.True(t, ok)
+		for _, rkv := range iter.RangeKeys() {
+			if rkv.RangeKey.Timestamp.Equal(rangeKey.Timestamp) {
+				if len(seen.RangeKey.StartKey) == 0 {
+					seen = rkv.Clone()
+				} else {
+					seen.RangeKey.EndKey = rkv.RangeKey.EndKey.Clone()
+					require.Equal(t, seen.Value, rkv.Value)
+				}
+				break
+			}
+		}
+		iter.Next()
+	}
+	rangeKey.StartKey.Equal(seen.RangeKey.StartKey)
+	require.Equal(t, rangeKey, seen.RangeKey)
+
+	value, err := storage.DecodeMVCCValue(seen.Value)
+	require.NoError(t, err)
+	require.True(t, value.IsTombstone())
+	require.Equal(t, now, value.LocalTimestamp)
+}
+
 // computeStats computes MVCC stats for the given range.
 //
 // TODO(erikgrinaker): This, storage.computeStats(), and engineStats() should be
diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto
index cb94741f68a3..85afc74f43c3 100644
--- a/pkg/roachpb/api.proto
+++ b/pkg/roachpb/api.proto
@@ -356,6 +356,34 @@ message DeleteRangeRequest {
   // The caller must check the MVCCRangeTombstones version gate before using
   // this parameter, as it is new in 22.2.
   bool use_range_tombstone = 5;
+
+  DeleteRangePredicates predicates = 6 [(gogoproto.nullable) = false];
+}
+
+// DeleteRangePredicates if specified, will conduct predicate based DeleteRange.
+// Predicate based delete range will issue tombstones on live keys that match the
+// filters provided by the caller. In particular, long runs of matched keys will
+// get deleted with a range tombstone, while smaller runs will get deleted with
+// point tombstones. Note that the keyspace across runs does not overlap.
+//
+// To pass DeleteRangePredicates, the client must also pass UseRangeTombstone.
+message DeleteRangePredicates {
+  // StartTime specifies an exclusive lower bound to surface keys
+  // for deletion. If specified, DeleteRange will only issue tombstones to keys
+  // within the span [startKey, endKey) that also have MVCC versions with
+  // timestamps between (startTime, endTime), where endTime is the request timestamp.
+  //
+  // The main application for this is a rollback of IMPORT INTO on a non-empty
+  // table. Here, DeleteRange with startTime = ImportStartTime, must only delete
+  // keys written by the import. In other words, older, pre-import, data cannot
+  // be touched. Because IMPORT INTO takes a table offline and does not allow
+  // masking an existing key, this operation will not issue tombstones to
+  // pre-import data that were written at or below StartTime.
+  //
+  // In other words, this operation assumes that for a k@t in the importing table:
+  //  - t must be < endTime
+  //  - if t in (startTime, endTime), then there is no other k@t' where t' <= startTime.
+  util.hlc.Timestamp start_time = 6 [(gogoproto.nullable) = false];
 }
 
 // A DeleteRangeResponse is the return value from the DeleteRange()
diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go
index 41c3e00f7f84..705ddc85bde9 100644
--- a/pkg/storage/mvcc.go
+++ b/pkg/storage/mvcc.go
@@ -2321,8 +2321,16 @@ func MVCCClearTimeRange(
 	})
 	defer iter.Close()
 
+	// clearedMetaKey is the latest surfaced key that will get cleared
 	var clearedMetaKey MVCCKey
-	var clearedMeta, restoredMeta enginepb.MVCCMetadata
+
+	// clearedMeta contains metadata on the clearedMetaKey
+	var clearedMeta enginepb.MVCCMetadata
+
+	// restoredMeta contains metadata on the previous version the clearedMetaKey.
+	// Once the key in clearedMetaKey is cleared, the key represented in
+	// restoredMeta becomes the latest version of this MVCC key.
+	var restoredMeta enginepb.MVCCMetadata
 	iter.SeekGE(MVCCKey{Key: key})
 	for {
 		if ok, err := iter.Valid(); err != nil {
@@ -2466,6 +2474,297 @@ func MVCCDeleteRange(
 	return keys, res.ResumeSpan, res.NumKeys, nil
 }
 
+// MVCCPredicateDeleteRange issues MVCC tombstones at endTime to live keys
+// within the span [startKey, endKey) that also have MVCC versions that match
+// the predicate filters. Long runs of matched keys will get deleted with a
+// range Tombstone, while smaller runs will get deleted with point tombstones.
+// The keyspaces of each run do not overlap.
+//
+// This operation is non-transactional, but will check for existing intents in
+// the target key span, regardless of timestamp, and return a WriteIntentError
+// containing up to maxIntents intents.
+//
+// MVCCPredicateDeleteRange will return with a resumeSpan if the number of tombstones
+// written exceeds maxBatchSize or the size of the written tombstones exceeds maxByteSize.
+// These constraints prevent overwhelming raft.
+//
+// If an MVCC key surfaced has a timestamp at or above endTime,
+// MVCCPredicateDeleteRange returns a WriteTooOldError without a resumeSpan,
+// even if tombstones were already written to disk. To resolve, the caller
+// should retry the call at a higher timestamp, assuming they have the
+// appropriate level of isolation (e.g. the span covers an offline table, in the
+// case of IMPORT rollbacks).
+//
+// An example of how this works: Issuing DeleteRange[a,e)@3 with
+// Predicate{StartTime=1} on the following keys would issue tombstones at a@3,
+// b@3, and d@3.
+//
+// t3
+// t2 a2 b2    d2 e2
+// t1    b1 c1
+//    a  b  c  d  e
+func MVCCPredicateDeleteRange(
+	ctx context.Context,
+	rw ReadWriter,
+	ms *enginepb.MVCCStats,
+	startKey, endKey roachpb.Key,
+	endTime hlc.Timestamp,
+	localTimestamp hlc.ClockTimestamp,
+	leftPeekBound, rightPeekBound roachpb.Key,
+	predicates roachpb.DeleteRangePredicates,
+	maxBatchSize, maxBatchByteSize int64,
+	rangeTombstoneThreshold int64,
+	maxIntents int64,
+) (*roachpb.Span, error) {
+
+	if maxBatchSize == 0 {
+		// Set maxBatchSize to a large number to ensure MVCCPredicateDeleteRange
+		// doesn't return early due to batch size. Note that maxBatchSize is only
+		// set to 0 during testing.
+		maxBatchSize = math.MaxInt64
+	}
+
+	// batchSize is the number tombstones (point and range) that have been flushed.
+	var batchSize int64
+	var batchByteSize int64
+
+	// runSize is the number tombstones (point and range) that will get flushed in
+	// the current run.
+	var runSize int64
+	var runByteSize int64
+
+	var runStart, runEnd roachpb.Key
+
+	buf := make([]roachpb.Key, rangeTombstoneThreshold)
+
+	if ms == nil {
+		return nil, errors.AssertionFailedf(
+			"MVCCStats passed in to MVCCPredicateDeleteRange must be non-nil to ensure proper stats" +
+				" computation during Delete operations")
+	}
+
+	// Check for any overlapping intents, and return them to be resolved.
+	if intents, err := ScanIntents(ctx, rw, startKey, endKey, maxIntents, 0); err != nil {
+		return nil, err
+	} else if len(intents) > 0 {
+		return nil, &roachpb.WriteIntentError{Intents: intents}
+	}
+
+	// continueRun returns three bools: the first is true if the current run
+	// should continue; the second is true if the latest key is a point tombstone;
+	// the third is true if the latest key is a range tombstone. If a non-nil
+	// error is returned, the booleans are invalid. The run should continue if:
+	//
+	//  1) The latest version of the key is a point or range tombstone, with a
+	//  timestamp below the client provided EndTime. Since the goal is to create
+	//  long runs, any tombstoned key should continue the run.
+	//
+	//  2) The latest key is live, matches the predicates, and has a
+	//  timestamp below EndTime.
+	continueRun := func(k MVCCKey, iter SimpleMVCCIterator,
+	) (toContinue bool, isPointTombstone bool, isRangeTombstone bool, err error) {
+		hasPointKey, hasRangeKey := iter.HasPointAndRange()
+		if hasRangeKey {
+			// TODO (msbutler): cache the range keys while the range bounds remain
+			// constant, since iter.RangeKeys() is expensive. Manual caching may not be necessary if
+			// https://github.com/cockroachdb/cockroach/issues/84379 lands.
+			rangeKeys := iter.RangeKeys()
+			if endTime.LessEq(rangeKeys[0].RangeKey.Timestamp) {
+				return false, false, false, roachpb.NewWriteTooOldError(endTime,
+					rangeKeys[0].RangeKey.Timestamp.Next(), k.Key.Clone())
+			}
+			if !hasPointKey {
+				// landed on bare range key.
+				return true, false, true, nil
+			}
+			if k.Timestamp.Less(rangeKeys[0].RangeKey.Timestamp) {
+				// The latest range tombstone shadows the point key; ok to continue run.
+				return true, false, true, nil
+			}
+		}
+
+		// At this point, there exists a point key that shadows all range keys,
+		// if they exist.
+		vRaw := iter.UnsafeValue()
+
+		if endTime.LessEq(k.Timestamp) {
+			return false, false, false, roachpb.NewWriteTooOldError(endTime, k.Timestamp.Next(),
+				k.Key.Clone())
+		}
+		if len(vRaw) == 0 {
+			// The latest version of the key is a point tombstone.
+			return true, true, false, nil
+		}
+
+		// The latest key is a live point key. Conduct predicate filtering.
+		if k.Timestamp.LessEq(predicates.StartTime) {
+			return false, false, false, nil
+		}
+
+		// TODO (msbutler): use MVCCValueHeader to match on job ID predicate
+		_, err = DecodeMVCCValue(vRaw)
+		if err != nil {
+			return false, false, false, err
+		}
+		return true, false, false, nil
+	}
+
+	// Create some reusable machinery for flushing a run with point tombstones
+	// that is typically used in a single MVCCPut call.
+	pointTombstoneIter := newMVCCIterator(rw, endTime, false /* rangeKeyMasking */, IterOptions{
+		KeyTypes: IterKeyTypePointsAndRanges,
+		Prefix:   true,
+	})
+	defer pointTombstoneIter.Close()
+	pointTombstoneBuf := newPutBuffer()
+	defer pointTombstoneBuf.release()
+
+	flushDeleteKeys := func() error {
+		if runSize == 0 {
+			return nil
+		}
+		if runSize >= rangeTombstoneThreshold ||
+			// Even if we didn't get a large enough number of keys to switch to
+			// using range tombstones, the byte size of the keys we did get is now too large to
+			// encode them all within the byte size limit, so use a range tombstone anyway.
+			batchByteSize+runByteSize >= maxBatchByteSize {
+			if err := MVCCDeleteRangeUsingTombstone(ctx, rw, ms,
+				runStart, runEnd.Next(), endTime, localTimestamp, leftPeekBound, rightPeekBound,
+				maxIntents); err != nil {
+				return err
+			}
+			batchByteSize += int64(MVCCRangeKey{StartKey: runStart, EndKey: runEnd, Timestamp: endTime}.EncodedSize())
+			batchSize++
+		} else {
+			// Use Point tombstones
+			for i := int64(0); i < runSize; i++ {
+				if err := mvccPutInternal(ctx, rw, pointTombstoneIter, ms, buf[i], endTime, localTimestamp, noValue,
+					nil, pointTombstoneBuf, nil); err != nil {
+					return err
+				}
+			}
+			batchByteSize += runByteSize
+			batchSize += runSize
+		}
+		runSize = 0
+		runStart = roachpb.Key{}
+		return nil
+	}
+
+	// Using the IncrementalIterator with the time-bound iter optimization could
+	// potentially be a big win here -- the expected use-case for this is to run
+	// over an entire table's span with a very recent timestamp, issuing tombstones to
+	// writes of some failed IMPORT and that could very likely only have hit
+	// some small subset of the table's keyspace.
+	//
+	// The MVCCIncrementalIterator uses a non-time-bound iter as its source
+	// of truth, and only uses the TBI iterator as an optimization when finding
+	// the next KV to iterate over. This pattern allows us to quickly skip over
+	// swaths of uninteresting keys, but then iterates over the latest key of each MVCC key.
+	//
+	// Notice that the iterator's EndTime is set to hlc.MaxTimestamp, in order to
+	// detect and fail on any keys written at or after the client provided
+	// endTime. We don't _expect_ to hit intents or newer keys in the client
+	// provided span since the MVCCPredicateDeleteRange is only intended for
+	// non-live key spans, but there could be an intent leftover.
+	iter := NewMVCCIncrementalIterator(rw, MVCCIncrementalIterOptions{
+		EndKey:               endKey,
+		StartTime:            predicates.StartTime,
+		EndTime:              hlc.MaxTimestamp,
+		RangeKeyMaskingBelow: endTime,
+		KeyTypes:             IterKeyTypePointsAndRanges,
+	})
+	defer iter.Close()
+
+	iter.SeekGE(MVCCKey{Key: startKey})
+	for {
+		if ok, err := iter.Valid(); err != nil {
+			return nil, err
+		} else if !ok {
+			break
+		}
+		k := iter.UnsafeKey()
+		toContinue, isPointTombstone, isRangeTombstone, err := continueRun(k, iter)
+		if err != nil {
+			return nil, err
+		}
+
+		// If the latest version of the key is a tombstone at a timestamp < endtime,
+		// the timestamp could be less than predicates.startTime. In this case, the
+		// run can continue and Since there's no need to issue another tombstone,
+		// don't update runSize or buf.
+		if isRangeTombstone {
+			// Because range key information can be inferred at point keys,
+			// skip over the surfaced range key, and reason about shadowed keys at
+			// the surfaced point key.
+			//
+			// E.g. Scanning the keys below:
+			//  2  a2
+			//  1  o---o
+			//     a   b
+			//
+			//  would result in two surfaced keys:
+			//   {a-b}@1;
+			//   a2, {a-b}@1
+			//
+			// Note that the range key gets surfaced before the point key,
+			// even though the point key shadows it.
+			iter.NextIgnoringTime()
+		} else if isPointTombstone {
+			// Since the latest version of this key is a point tombstone, skip over
+			// older versions of this key, and move the iterator to the next key
+			// even if it lies outside (startTime, endTime), to see if there's a
+			// need to flush.
+			iter.NextKeyIgnoringTime()
+		} else if toContinue {
+			// The latest version of the key is live, matches the predicate filters
+			// -- e.g. has a timestamp between (predicates.startTime, Endtime);
+			// therefore, plan to delete it.
+			if batchSize+runSize >= maxBatchSize || batchByteSize+runByteSize >= maxBatchByteSize {
+				// The matched key will be the start the resume span.
+				if err := flushDeleteKeys(); err != nil {
+					return nil, err
+				}
+				return &roachpb.Span{Key: k.Key.Clone(), EndKey: endKey}, nil
+			}
+			if runSize == 0 {
+				runStart = append(runStart[:0], k.Key...)
+			}
+			runEnd = append(runEnd[:0], k.Key...)
+
+			if runSize < rangeTombstoneThreshold {
+				// Only buffer keys if there's a possibility of issuing point tombstones.
+				//
+				// To avoid unecessary memory allocation, overwrite the previous key at
+				// buffer's current position. No data corruption occurs because the
+				// buffer is flushed up to runSize.
+				buf[runSize] = append(buf[runSize][:0], runEnd...)
+			}
+
+			runSize++
+			runByteSize += int64(k.EncodedSize())
+
+			// Move the iterator to the next key in linear iteration even if it lies
+			// outside (startTime, endTime), to see if there's a need to flush. We can
+			// skip to the next key, as we don't care about older versions of the
+			// current key we're about to delete.
+			iter.NextKeyIgnoringTime()
+		} else {
+			// This key does not match. Flush the run of matching keys,
+			// to prevent issuing tombstones on keys that do not match the predicates.
+			if err := flushDeleteKeys(); err != nil {
+				return nil, err
+			}
+			// Move the incremental iterator to the next valid MVCC key that can be
+			// deleted. If TBI was enabled when initializing the incremental iterator,
+			// this step could jump over large swaths of keys that do not qualify for
+			// clearing.
+			iter.NextKey()
+		}
+	}
+	return nil, flushDeleteKeys()
+}
+
 // MVCCDeleteRangeUsingTombstone deletes the given MVCC keyspan at the given
 // timestamp using an MVCC range tombstone (rather than MVCC point tombstones).
 // This operation is non-transactional, but will check for existing intents and
diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go
index ec8e44e2fe0e..bd753e7a3494 100644
--- a/pkg/storage/mvcc_history_test.go
+++ b/pkg/storage/mvcc_history_test.go
@@ -13,6 +13,7 @@ package storage
 import (
 	"context"
 	"fmt"
+	"math"
 	"path/filepath"
 	"regexp"
 	"sort"
@@ -73,6 +74,7 @@ var sstIterVerify = util.ConstantWithMetamorphicTestBool("mvcc-histories-sst-ite
 // del            [t=<name>] [ts=<int>[,<int>]] [localTs=<int>[,<int>]] [resolve [status=<txnstatus>]] k=<key>
 // del_range      [t=<name>] [ts=<int>[,<int>]] [localTs=<int>[,<int>]] [resolve [status=<txnstatus>]] k=<key> [end=<key>] [max=<max>] [returnKeys]
 // del_range_ts   [ts=<int>[,<int>]] [localTs=<int>[,<int>]] k=<key> end=<key>
+// del_range_pred [ts=<int>[,<int>]] [localTs=<int>[,<int>]] k=<key> end=<key> [startTime=<int>,max=<int>,maxBytes=<int>,rangeThreshold=<int>]
 // increment      [t=<name>] [ts=<int>[,<int>]] [localTs=<int>[,<int>]] [resolve [status=<txnstatus>]] k=<key> [inc=<val>]
 // initput        [t=<name>] [ts=<int>[,<int>]] [resolve [status=<txnstatus>]] k=<key> v=<string> [raw] [failOnTombstones]
 // merge          [t=<name>] [ts=<int>[,<int>]] [resolve [status=<txnstatus>]] k=<key> v=<string> [raw]
@@ -659,6 +661,7 @@ var commands = map[string]cmd{
 	"del":            {typDataUpdate, cmdDelete},
 	"del_range":      {typDataUpdate, cmdDeleteRange},
 	"del_range_ts":   {typDataUpdate, cmdDeleteRangeTombstone},
+	"del_range_pred": {typDataUpdate, cmdDeleteRangePredicate},
 	"export":         {typReadOnly, cmdExport},
 	"get":            {typReadOnly, cmdGet},
 	"increment":      {typDataUpdate, cmdIncrement},
@@ -1019,6 +1022,40 @@ func cmdDeleteRangeTombstone(e *evalCtx) error {
 	})
 }
 
+func cmdDeleteRangePredicate(e *evalCtx) error {
+	key, endKey := e.getKeyRange()
+	ts := e.getTs(nil)
+	localTs := hlc.ClockTimestamp(e.getTsWithName("localTs"))
+
+	max := math.MaxInt64
+	if e.hasArg("max") {
+		e.scanArg("max", &max)
+	}
+
+	maxBytes := math.MaxInt64
+	if e.hasArg("maxBytes") {
+		e.scanArg("maxBytes", &maxBytes)
+	}
+	predicates := roachpb.DeleteRangePredicates{
+		StartTime: e.getTsWithName("startTime"),
+	}
+	rangeThreshold := 64
+	if e.hasArg("rangeThreshold") {
+		e.scanArg("rangeThreshold", &rangeThreshold)
+	}
+	return e.withWriter("del_range_pred", func(rw ReadWriter) error {
+		resumeSpan, err := MVCCPredicateDeleteRange(e.ctx, rw, e.ms, key, endKey, ts,
+			localTs, nil, nil, predicates, int64(max), int64(maxBytes), int64(rangeThreshold), 0)
+
+		if resumeSpan != nil {
+			e.results.buf.Printf("del_range_pred: resume span [%s,%s)\n", resumeSpan.Key,
+				resumeSpan.EndKey)
+		}
+		return err
+	},
+	)
+}
+
 func cmdGet(e *evalCtx) error {
 	txn := e.getTxn(optional)
 	key := e.getKey()
diff --git a/pkg/storage/testdata/mvcc_histories/delete_range_predicate b/pkg/storage/testdata/mvcc_histories/delete_range_predicate
new file mode 100644
index 000000000000..b3c8e31b4b0d
--- /dev/null
+++ b/pkg/storage/testdata/mvcc_histories/delete_range_predicate
@@ -0,0 +1,382 @@
+# Tests MVCC Del Range with timestamp predicate.
+#
+# Set up some point keys, point tombstones x, range tombstones o--o,
+# and intents [].
+#
+# 7                                [i7]
+# 6
+# 5
+# 4  x           d4     f4   x  h4          o-------------------o
+# 3      b3
+# 2  a2              e2      g2
+# 1              d1
+# 0
+#    a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   p
+run ok
+put k=a ts=2 v=a2
+del k=a ts=4
+put k=b ts=3 v=b3
+put k=d ts=1 v=d1
+put k=d ts=4 v=d4
+put k=e ts=2 v=e2
+put k=f ts=4 v=f4
+put k=g ts=2 v=g2
+del k=g ts=4
+put k=h ts=4 v=h4
+del_range_ts k=k end=p ts=4
+with t=A
+  txn_begin ts=7
+  put k=i v=i7
+----
+>> at end:
+txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=7.000000000,0 wto=false gul=0,0
+rangekey: {k-p}/[4.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+
+# Writing next to or above point keys and tombstones should work.
+run stats ok
+del_range_pred k=a end=i ts=5 startTime=3 rangeThreshold=2
+----
+>> del_range_pred k=a end=i ts=5 startTime=3 rangeThreshold=2
+stats: key_bytes=+12 val_count=+1 range_key_count=+1 range_key_bytes=+14 range_val_count=+1 live_count=-3 live_bytes=-63 gc_bytes_age=+8455
+>> at end:
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[4.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+stats: key_count=8 key_bytes=160 val_count=12 val_bytes=111 range_key_count=2 range_key_bytes=27 range_val_count=2 live_count=3 live_bytes=111 gc_bytes_age=17863 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+
+# error on intent, no tombstones should be written
+run stats error
+del_range_pred k=a end=p ts=6 startTime=1
+----
+>> del_range_pred k=a end=p ts=6 startTime=1
+stats: no change
+>> at end:
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[4.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+stats: key_count=8 key_bytes=160 val_count=12 val_bytes=111 range_key_count=2 range_key_bytes=27 range_val_count=2 live_count=3 live_bytes=111 gc_bytes_age=17863 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+error: (*roachpb.WriteIntentError:) conflicting intents on "i"
+
+# error encountering point key at d5.
+# a tombstone should not get written at c5 or e5, since
+# DeleteRange didn't flush before reaching d5.
+run stats error
+put k=c ts=2 v=c2
+del_range_pred k=c end=f ts=5 startTime=1
+----
+>> put k=c ts=2 v=c2
+stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21
+>> del_range_pred k=c end=f ts=5 startTime=1
+stats: no change
+>> at end:
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[4.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "c"/2.000000000,0 -> /BYTES/c2
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+stats: key_count=9 key_bytes=174 val_count=13 val_bytes=118 range_key_count=2 range_key_bytes=27 range_val_count=2 live_count=4 live_bytes=132 gc_bytes_age=17863 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "d" at timestamp 5.000000000,0 too old; wrote at 5.000000000,1
+
+# error encountering range key at k4.
+# a tombstones should not get written to j4 or q4 since
+# DeleteRange did not flush before reaching rangekey {k-p}4.
+run stats error
+put k=j ts=2 v=j2
+put k=q ts=2 v=q2
+del_range_pred k=j end=r ts=4 startTime=1 rangeThreshold=2
+----
+>> put k=j ts=2 v=j2
+stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21
+>> put k=q ts=2 v=q2
+stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21
+>> del_range_pred k=j end=r ts=4 startTime=1 rangeThreshold=2
+stats: no change
+>> at end:
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[4.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "c"/2.000000000,0 -> /BYTES/c2
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+data: "j"/2.000000000,0 -> /BYTES/j2
+data: "q"/2.000000000,0 -> /BYTES/q2
+stats: key_count=11 key_bytes=202 val_count=15 val_bytes=132 range_key_count=2 range_key_bytes=27 range_val_count=2 live_count=6 live_bytes=174 gc_bytes_age=17863 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "k" at timestamp 4.000000000,0 too old; wrote at 4.000000000,1
+
+# At this point the keyspace looks like this:
+# 7                                [i7]
+# 6
+# 5              x       o-----------o
+# 4  x           d4      f4  x   h4          o-------------------o
+# 3      b3
+# 2  a2     c2       e2      g2          j2                        q2
+# 1              d1
+# 0
+#    a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   p  q
+
+# check that del_range will not write anything if no live keys are in its span
+# and predicate ts. Note that the range keys bounds are [firstMatchingKey,LastMatchingKey.Next()].
+run stats ok
+del_range_pred k=j end=r ts=5 startTime=2 rangeThreshold=2
+----
+>> del_range_pred k=j end=r ts=5 startTime=2 rangeThreshold=2
+stats: no change
+>> at end:
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[4.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "c"/2.000000000,0 -> /BYTES/c2
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+data: "j"/2.000000000,0 -> /BYTES/j2
+data: "q"/2.000000000,0 -> /BYTES/q2
+stats: key_count=11 key_bytes=202 val_count=15 val_bytes=132 range_key_count=2 range_key_bytes=27 range_val_count=2 live_count=6 live_bytes=174 gc_bytes_age=17863 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+
+# try the same call as above, except with startTime set to 1
+# check that delrange properly continues the run over a range tombstone
+run stats ok
+del_range_pred k=j end=r ts=5 startTime=1 rangeThreshold=2
+----
+>> del_range_pred k=j end=r ts=5 startTime=1 rangeThreshold=2
+stats: range_key_count=+2 range_key_bytes=+36 range_val_count=+3 live_count=-2 live_bytes=-42 gc_bytes_age=+7406
+>> at end:
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {j-k}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[5.000000000,0=/<empty> 4.000000000,0=/<empty>]
+rangekey: {p-q\x00}/[5.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "c"/2.000000000,0 -> /BYTES/c2
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+data: "j"/2.000000000,0 -> /BYTES/j2
+data: "q"/2.000000000,0 -> /BYTES/q2
+stats: key_count=11 key_bytes=202 val_count=15 val_bytes=132 range_key_count=4 range_key_bytes=63 range_val_count=5 live_count=4 live_bytes=132 gc_bytes_age=25269 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+
+# check that we flush with a range tombstone, if maxBytes is exceeded
+# even though range tombstone threshold has not been met.
+# Return a resume span. Note that the run extends past key d, since
+# its latest value is a point tombstone, and is therefore not counted
+# in runByteSize.
+run stats ok
+del_range_pred k=c end=i ts=6 startTime=1 maxBytes=1
+----
+>> del_range_pred k=c end=i ts=6 startTime=1 maxBytes=1
+del_range_pred: resume span ["e","i")
+stats: range_key_count=+1 range_key_bytes=+14 range_val_count=+1 live_count=-1 live_bytes=-21 gc_bytes_age=+3290
+>> at end:
+rangekey: c{-\x00}/[6.000000000,0=/<empty>]
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {j-k}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[5.000000000,0=/<empty> 4.000000000,0=/<empty>]
+rangekey: {p-q\x00}/[5.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "c"/2.000000000,0 -> /BYTES/c2
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+data: "j"/2.000000000,0 -> /BYTES/j2
+data: "q"/2.000000000,0 -> /BYTES/q2
+stats: key_count=11 key_bytes=202 val_count=15 val_bytes=132 range_key_count=5 range_key_bytes=77 range_val_count=6 live_count=3 live_bytes=111 gc_bytes_age=28559 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+
+# check that we flush properly if maxBatchSize is exceeded.
+# Since max is 1, write a tombstone to e, and as soon as it sees the
+# next eligible key to delete (f), return a resume span.
+# Note that we dont count shadowed tombstones in the batchSize
+run stats ok
+put k=f ts=6 v=f6
+del_range_pred k=c end=i ts=7 startTime=1 max=1
+----
+>> put k=f ts=6 v=f6
+stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-190
+>> del_range_pred k=c end=i ts=7 startTime=1 max=1
+del_range_pred: resume span ["f","i")
+stats: key_bytes=+12 val_count=+1 live_count=-1 live_bytes=-21 gc_bytes_age=+3069
+>> at end:
+rangekey: c{-\x00}/[6.000000000,0=/<empty>]
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {j-k}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[5.000000000,0=/<empty> 4.000000000,0=/<empty>]
+rangekey: {p-q\x00}/[5.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "c"/2.000000000,0 -> /BYTES/c2
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/7.000000000,0 -> /<empty>
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/6.000000000,0 -> /BYTES/f6
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+data: "j"/2.000000000,0 -> /BYTES/j2
+data: "q"/2.000000000,0 -> /BYTES/q2
+stats: key_count=11 key_bytes=226 val_count=17 val_bytes=139 range_key_count=5 range_key_bytes=77 range_val_count=6 live_count=3 live_bytes=111 gc_bytes_age=31438 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+
+# Run the same DeleteRange as above at ts 8
+# No resume span should get returned because the iterator goes through
+# the whole span without encountering a second eligible key to delete
+run stats ok
+del_range_pred k=c end=i ts=8 startTime=1 max=1
+----
+>> del_range_pred k=c end=i ts=8 startTime=1 max=1
+stats: key_bytes=+12 val_count=+1 live_count=-1 live_bytes=-21 gc_bytes_age=+3036
+>> at end:
+rangekey: c{-\x00}/[6.000000000,0=/<empty>]
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {j-k}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[5.000000000,0=/<empty> 4.000000000,0=/<empty>]
+rangekey: {p-q\x00}/[5.000000000,0=/<empty>]
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "c"/2.000000000,0 -> /BYTES/c2
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/7.000000000,0 -> /<empty>
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/8.000000000,0 -> /<empty>
+data: "f"/6.000000000,0 -> /BYTES/f6
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+data: "j"/2.000000000,0 -> /BYTES/j2
+data: "q"/2.000000000,0 -> /BYTES/q2
+stats: key_count=11 key_bytes=238 val_count=18 val_bytes=139 range_key_count=5 range_key_bytes=77 range_val_count=6 live_count=2 live_bytes=90 gc_bytes_age=34474 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93
+
+# Write some new keys on a and b and ensure a run of point tombstones gets properly written
+run stats ok
+put k=a ts=5 v=a5
+put k=b ts=5 v=a5
+del_range_pred k=a end=c ts=6 startTime=1
+----
+>> put k=a ts=5 v=a5
+stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-192
+>> put k=b ts=5 v=a5
+stats: key_bytes=+12 val_count=+1 val_bytes=+7 gc_bytes_age=+1805
+>> del_range_pred k=a end=c ts=6 startTime=1
+stats: key_bytes=+24 val_count=+2 live_count=-2 live_bytes=-42 gc_bytes_age=+6204
+>> at end:
+rangekey: c{-\x00}/[6.000000000,0=/<empty>]
+rangekey: {f-h\x00}/[5.000000000,0=/<empty>]
+rangekey: {j-k}/[5.000000000,0=/<empty>]
+rangekey: {k-p}/[5.000000000,0=/<empty> 4.000000000,0=/<empty>]
+rangekey: {p-q\x00}/[5.000000000,0=/<empty>]
+data: "a"/6.000000000,0 -> /<empty>
+data: "a"/5.000000000,0 -> /BYTES/a5
+data: "a"/4.000000000,0 -> /<empty>
+data: "a"/2.000000000,0 -> /BYTES/a2
+data: "b"/6.000000000,0 -> /<empty>
+data: "b"/5.000000000,0 -> /BYTES/a5
+data: "b"/3.000000000,0 -> /BYTES/b3
+data: "c"/2.000000000,0 -> /BYTES/c2
+data: "d"/5.000000000,0 -> /<empty>
+data: "d"/4.000000000,0 -> /BYTES/d4
+data: "d"/1.000000000,0 -> /BYTES/d1
+data: "e"/7.000000000,0 -> /<empty>
+data: "e"/2.000000000,0 -> /BYTES/e2
+data: "f"/8.000000000,0 -> /<empty>
+data: "f"/6.000000000,0 -> /BYTES/f6
+data: "f"/4.000000000,0 -> /BYTES/f4
+data: "g"/4.000000000,0 -> /<empty>
+data: "g"/2.000000000,0 -> /BYTES/g2
+data: "h"/4.000000000,0 -> /BYTES/h4
+meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs=<nil> txnDidNotUpdateMeta=true
+data: "i"/7.000000000,0 -> /BYTES/i7
+data: "j"/2.000000000,0 -> /BYTES/j2
+data: "q"/2.000000000,0 -> /BYTES/q2
+stats: key_count=11 key_bytes=286 val_count=22 val_bytes=153 range_key_count=5 range_key_bytes=77 range_val_count=6 live_count=1 live_bytes=69 gc_bytes_age=42291 intent_count=1 intent_bytes=19 separated_intent_count=1 intent_age=93