From 6eec337ea016d49aa602d3924d1f25675da64547 Mon Sep 17 00:00:00 2001
From: Seth Hoenig <shoenig@hashicorp.com>
Date: Fri, 9 Oct 2020 16:31:38 -0500
Subject: [PATCH] core: implement system batch scheduler

This PR implements a new "System Batch" scheduler type. Jobs can
make use of this new scheduler by setting their type to 'sysbatch'.

Like the name implies, sysbatch can be thought of as a hybrid between
system and batch jobs - it is for running short lived jobs intended to
run on every compatible node in the cluster.

As with batch jobs, sysbatch jobs can also be periodic and/or parameterized
dispatch jobs. A sysbatch job is considered complete when it has been run
on all compatible nodes until reaching a terminal state (success or failed
on retries).

Feasibility and preemption are governed the same as with system jobs. In
this PR, the update stanza is not yet supported. The update stanza is sill
limited in functionality for the underlying system scheduler, and is
not useful yet for sysbatch jobs. Further work in #4740 will improve
support for the update stanza and deployments.

Closes #2527
---
 CHANGELOG.md                                  |    1 +
 api/operator.go                               |    7 +-
 .../taskrunner/restarts/restarts.go           |   18 +-
 command/agent/operator_endpoint.go            |    7 +-
 command/agent/operator_endpoint_test.go       |    8 +
 e2e/e2e_test.go                               |    3 +-
 e2e/e2eutil/utils.go                          |   24 +
 .../input/sysbatch_dispatch.nomad             |   30 +
 .../input/sysbatch_job_fast.nomad             |   25 +
 .../input/sysbatch_job_slow.nomad             |   25 +
 .../input/sysbatch_periodic.nomad             |   30 +
 e2e/scheduler_sysbatch/sysbatch.go            |  269 +++
 .../input/system_job0.nomad                   |    0
 .../input/system_job1.nomad                   |    0
 .../systemsched.go                            |   26 +-
 helper/uuid/uuid.go                           |    6 +
 nomad/config.go                               |   11 +-
 nomad/core_sched.go                           |    5 +-
 nomad/mock/mock.go                            |   84 +-
 nomad/state/schema.go                         |   13 +-
 nomad/state/state_store.go                    |   12 +-
 nomad/structs/funcs.go                        |   69 +-
 nomad/structs/funcs_test.go                   |    4 +-
 nomad/structs/operator.go                     |    3 +
 nomad/structs/structs.go                      |   27 +-
 scheduler/generic_sched.go                    |    2 +-
 scheduler/rank.go                             |    4 +-
 scheduler/scheduler.go                        |    7 +-
 scheduler/stack.go                            |   28 +-
 scheduler/stack_test.go                       |   16 +-
 .../{system_sched.go => system_scheduler.go}  |  101 +-
 scheduler/system_sysbatch_test.go             | 1623 +++++++++++++++++
 ...em_sched_test.go => system_system_test.go} |  610 +++----
 scheduler/util.go                             |   95 +-
 scheduler/util_test.go                        |  160 +-
 .../hashicorp/nomad/api/operator.go           |    7 +-
 website/pages/docs/configuration/server.mdx   |    7 +-
 website/pages/docs/job-specification/job.mdx  |    2 +-
 .../docs/job-specification/reschedule.mdx     |    4 +-
 .../pages/docs/job-specification/restart.mdx  |   12 +-
 website/pages/docs/schedulers.mdx             |   30 +-
 41 files changed, 2800 insertions(+), 615 deletions(-)
 create mode 100644 e2e/scheduler_sysbatch/input/sysbatch_dispatch.nomad
 create mode 100644 e2e/scheduler_sysbatch/input/sysbatch_job_fast.nomad
 create mode 100644 e2e/scheduler_sysbatch/input/sysbatch_job_slow.nomad
 create mode 100644 e2e/scheduler_sysbatch/input/sysbatch_periodic.nomad
 create mode 100644 e2e/scheduler_sysbatch/sysbatch.go
 rename e2e/{systemsched => scheduler_system}/input/system_job0.nomad (100%)
 rename e2e/{systemsched => scheduler_system}/input/system_job1.nomad (100%)
 rename e2e/{systemsched => scheduler_system}/systemsched.go (87%)
 rename scheduler/{system_sched.go => system_scheduler.go} (85%)
 create mode 100644 scheduler/system_sysbatch_test.go
 rename scheduler/{system_sched_test.go => system_system_test.go} (84%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a07114b0d319..c50f5f5936da 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ FEATURES:
 * **Event Stream**: Subscribe to change events as they occur in real time. [[GH-9013](https://github.com/hashicorp/nomad/issues/9013)]
 * **Namespaces OSS**: Namespaces are now available in open source Nomad. [[GH-9135](https://github.com/hashicorp/nomad/issues/9135)]
 * **Topology Visualization**: See all of the clients and allocations in a cluster at once. [[GH-9077](https://github.com/hashicorp/nomad/issues/9077)]
+* **System Batch Scheduling**: New `sysbatch` scheduler type for running short lived jobs across all nodes. [[GH-9160](https://github.com/hashicorp/nomad/pull/9160)]
 
 IMPROVEMENTS:
  * core: Improved job deregistration error logging. [[GH-8745](https://github.com/hashicorp/nomad/issues/8745)]
diff --git a/api/operator.go b/api/operator.go
index d5bc5d061d56..de57bffef4b2 100644
--- a/api/operator.go
+++ b/api/operator.go
@@ -159,9 +159,10 @@ const (
 
 // PreemptionConfig specifies whether preemption is enabled based on scheduler type
 type PreemptionConfig struct {
-	SystemSchedulerEnabled  bool
-	BatchSchedulerEnabled   bool
-	ServiceSchedulerEnabled bool
+	SystemSchedulerEnabled   bool
+	SysBatchSchedulerEnabled bool
+	BatchSchedulerEnabled    bool
+	ServiceSchedulerEnabled  bool
 }
 
 // SchedulerGetConfiguration is used to query the current Scheduler configuration.
diff --git a/client/allocrunner/taskrunner/restarts/restarts.go b/client/allocrunner/taskrunner/restarts/restarts.go
index 6ee0056ccd8b..429ee07a0384 100644
--- a/client/allocrunner/taskrunner/restarts/restarts.go
+++ b/client/allocrunner/taskrunner/restarts/restarts.go
@@ -14,15 +14,19 @@ const (
 	// jitter is the percent of jitter added to restart delays.
 	jitter = 0.25
 
-	ReasonNoRestartsAllowed   = "Policy allows no restarts"
-	ReasonUnrecoverableErrror = "Error was unrecoverable"
-	ReasonWithinPolicy        = "Restart within policy"
-	ReasonDelay               = "Exceeded allowed attempts, applying a delay"
+	ReasonNoRestartsAllowed  = "Policy allows no restarts"
+	ReasonUnrecoverableError = "Error was unrecoverable"
+	ReasonWithinPolicy       = "Restart within policy"
+	ReasonDelay              = "Exceeded allowed attempts, applying a delay"
 )
 
 func NewRestartTracker(policy *structs.RestartPolicy, jobType string, tlc *structs.TaskLifecycleConfig) *RestartTracker {
-	// Batch jobs should not restart if they exit successfully
-	onSuccess := jobType != structs.JobTypeBatch
+	onSuccess := true
+
+	// Batch & SysBatch jobs should not restart if they exit successfully
+	if jobType == structs.JobTypeBatch || jobType == structs.JobTypeSysBatch {
+		onSuccess = false
+	}
 
 	// Prestart sidecars should get restarted on success
 	if tlc != nil && tlc.Hook == structs.TaskLifecycleHookPrestart {
@@ -196,7 +200,7 @@ func (r *RestartTracker) GetState() (string, time.Duration) {
 	if r.startErr != nil {
 		// If the error is not recoverable, do not restart.
 		if !structs.IsRecoverable(r.startErr) {
-			r.reason = ReasonUnrecoverableErrror
+			r.reason = ReasonUnrecoverableError
 			return structs.TaskNotRestarting, 0
 		}
 	} else if r.exitRes != nil {
diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go
index ed4a3c4cb732..e008cd506357 100644
--- a/command/agent/operator_endpoint.go
+++ b/command/agent/operator_endpoint.go
@@ -261,9 +261,10 @@ func (s *HTTPServer) schedulerUpdateConfig(resp http.ResponseWriter, req *http.R
 	args.Config = structs.SchedulerConfiguration{
 		SchedulerAlgorithm: structs.SchedulerAlgorithm(conf.SchedulerAlgorithm),
 		PreemptionConfig: structs.PreemptionConfig{
-			SystemSchedulerEnabled:  conf.PreemptionConfig.SystemSchedulerEnabled,
-			BatchSchedulerEnabled:   conf.PreemptionConfig.BatchSchedulerEnabled,
-			ServiceSchedulerEnabled: conf.PreemptionConfig.ServiceSchedulerEnabled},
+			SystemSchedulerEnabled:   conf.PreemptionConfig.SystemSchedulerEnabled,
+			SysBatchSchedulerEnabled: conf.PreemptionConfig.SysBatchSchedulerEnabled,
+			BatchSchedulerEnabled:    conf.PreemptionConfig.BatchSchedulerEnabled,
+			ServiceSchedulerEnabled:  conf.PreemptionConfig.ServiceSchedulerEnabled},
 	}
 
 	if err := args.Config.Validate(); err != nil {
diff --git a/command/agent/operator_endpoint_test.go b/command/agent/operator_endpoint_test.go
index 8814fad4fa25..316c16ca3659 100644
--- a/command/agent/operator_endpoint_test.go
+++ b/command/agent/operator_endpoint_test.go
@@ -282,6 +282,7 @@ func TestOperator_SchedulerGetConfiguration(t *testing.T) {
 
 		// Only system jobs can preempt other jobs by default.
 		require.True(out.SchedulerConfig.PreemptionConfig.SystemSchedulerEnabled)
+		require.False(out.SchedulerConfig.PreemptionConfig.SysBatchSchedulerEnabled)
 		require.False(out.SchedulerConfig.PreemptionConfig.BatchSchedulerEnabled)
 		require.False(out.SchedulerConfig.PreemptionConfig.ServiceSchedulerEnabled)
 	})
@@ -314,6 +315,8 @@ func TestOperator_SchedulerSetConfiguration(t *testing.T) {
 		err = s.RPC("Operator.SchedulerGetConfiguration", &args, &reply)
 		require.Nil(err)
 		require.True(reply.SchedulerConfig.PreemptionConfig.SystemSchedulerEnabled)
+		require.False(reply.SchedulerConfig.PreemptionConfig.SysBatchSchedulerEnabled)
+		require.False(reply.SchedulerConfig.PreemptionConfig.BatchSchedulerEnabled)
 		require.True(reply.SchedulerConfig.PreemptionConfig.ServiceSchedulerEnabled)
 	})
 }
@@ -324,6 +327,7 @@ func TestOperator_SchedulerCASConfiguration(t *testing.T) {
 		require := require.New(t)
 		body := bytes.NewBuffer([]byte(`{"PreemptionConfig": {
                      "SystemSchedulerEnabled": true,
+                     "SysBatchSchedulerEnabled":true,
                      "BatchSchedulerEnabled":true
         }}`))
 		req, _ := http.NewRequest("PUT", "/v1/operator/scheduler/configuration", body)
@@ -346,7 +350,9 @@ func TestOperator_SchedulerCASConfiguration(t *testing.T) {
 			t.Fatalf("err: %v", err)
 		}
 		require.True(reply.SchedulerConfig.PreemptionConfig.SystemSchedulerEnabled)
+		require.True(reply.SchedulerConfig.PreemptionConfig.SysBatchSchedulerEnabled)
 		require.True(reply.SchedulerConfig.PreemptionConfig.BatchSchedulerEnabled)
+		require.False(reply.SchedulerConfig.PreemptionConfig.ServiceSchedulerEnabled)
 
 		// Create a CAS request, bad index
 		{
@@ -387,7 +393,9 @@ func TestOperator_SchedulerCASConfiguration(t *testing.T) {
 			t.Fatalf("err: %v", err)
 		}
 		require.False(reply.SchedulerConfig.PreemptionConfig.SystemSchedulerEnabled)
+		require.False(reply.SchedulerConfig.PreemptionConfig.SysBatchSchedulerEnabled)
 		require.False(reply.SchedulerConfig.PreemptionConfig.BatchSchedulerEnabled)
+		require.False(reply.SchedulerConfig.PreemptionConfig.ServiceSchedulerEnabled)
 	})
 }
 
diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go
index 2e6c6db2ce8d..a534d8f38b4e 100644
--- a/e2e/e2e_test.go
+++ b/e2e/e2e_test.go
@@ -27,8 +27,9 @@ import (
 	_ "github.com/hashicorp/nomad/e2e/podman"
 	_ "github.com/hashicorp/nomad/e2e/quotas"
 	_ "github.com/hashicorp/nomad/e2e/rescheduling"
+	_ "github.com/hashicorp/nomad/e2e/scheduler_sysbatch"
+	_ "github.com/hashicorp/nomad/e2e/scheduler_system"
 	_ "github.com/hashicorp/nomad/e2e/spread"
-	_ "github.com/hashicorp/nomad/e2e/systemsched"
 	_ "github.com/hashicorp/nomad/e2e/taskevents"
 	_ "github.com/hashicorp/nomad/e2e/vaultsecrets"
 	_ "github.com/hashicorp/nomad/e2e/volumes"
diff --git a/e2e/e2eutil/utils.go b/e2e/e2eutil/utils.go
index 6cf10d574f42..d042c1743b1d 100644
--- a/e2e/e2eutil/utils.go
+++ b/e2e/e2eutil/utils.go
@@ -201,6 +201,30 @@ func WaitForAllocStopped(t *testing.T, nomadClient *api.Client, allocID string)
 	})
 }
 
+func WaitForAllocStatus(t *testing.T, nomadClient *api.Client, allocID string, status string) {
+	testutil.WaitForResultRetries(retries, func() (bool, error) {
+		time.Sleep(time.Millisecond * 100)
+		alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
+		if err != nil {
+			return false, err
+		}
+		switch alloc.ClientStatus {
+		case status:
+			return true, nil
+		default:
+			return false, fmt.Errorf("expected %s alloc, but was: %s", status, alloc.ClientStatus)
+		}
+	}, func(err error) {
+		t.Fatalf("failed to wait on alloc: %v", err)
+	})
+}
+
+func WaitForAllocsStatus(t *testing.T, nomadClient *api.Client, allocIDs []string, status string) {
+	for _, allocID := range allocIDs {
+		WaitForAllocStatus(t, nomadClient, allocID, status)
+	}
+}
+
 func AllocIDsFromAllocationListStubs(allocs []*api.AllocationListStub) []string {
 	allocIDs := make([]string, 0, len(allocs))
 	for _, alloc := range allocs {
diff --git a/e2e/scheduler_sysbatch/input/sysbatch_dispatch.nomad b/e2e/scheduler_sysbatch/input/sysbatch_dispatch.nomad
new file mode 100644
index 000000000000..fcc369efdb6d
--- /dev/null
+++ b/e2e/scheduler_sysbatch/input/sysbatch_dispatch.nomad
@@ -0,0 +1,30 @@
+job "sysbatchjob" {
+  datacenters = ["dc1"]
+
+  type = "sysbatch"
+
+  constraint {
+    attribute = "${attr.kernel.name}"
+    value     = "linux"
+  }
+
+  parameterized {
+    payload       = "forbidden"
+    meta_required = ["KEY"]
+  }
+
+  group "sysbatch_job_group" {
+    count = 1
+
+    task "sysbatch_task" {
+      driver = "docker"
+
+      config {
+        image = "bash:5"
+
+        command = "bash"
+        args    = ["-c", "ping -c 10 example.com"]
+      }
+    }
+  }
+}
diff --git a/e2e/scheduler_sysbatch/input/sysbatch_job_fast.nomad b/e2e/scheduler_sysbatch/input/sysbatch_job_fast.nomad
new file mode 100644
index 000000000000..5aaba9072ba1
--- /dev/null
+++ b/e2e/scheduler_sysbatch/input/sysbatch_job_fast.nomad
@@ -0,0 +1,25 @@
+job "sysbatchjob" {
+  datacenters = ["dc1"]
+
+  type = "sysbatch"
+
+  constraint {
+    attribute = "${attr.kernel.name}"
+    value     = "linux"
+  }
+
+  group "sysbatch_job_group" {
+    count = 1
+
+    task "sysbatch_task" {
+      driver = "docker"
+
+      config {
+        image = "bash:5"
+
+        command = "bash"
+        args    = ["-c", "ping -c 10 example.com"]
+      }
+    }
+  }
+}
diff --git a/e2e/scheduler_sysbatch/input/sysbatch_job_slow.nomad b/e2e/scheduler_sysbatch/input/sysbatch_job_slow.nomad
new file mode 100644
index 000000000000..3a0b667eb25f
--- /dev/null
+++ b/e2e/scheduler_sysbatch/input/sysbatch_job_slow.nomad
@@ -0,0 +1,25 @@
+job "sysbatchjob" {
+  datacenters = ["dc1"]
+
+  type = "sysbatch"
+
+  constraint {
+    attribute = "${attr.kernel.name}"
+    value     = "linux"
+  }
+
+  group "sysbatch_job_group" {
+    count = 1
+
+    task "sysbatch_task" {
+      driver = "docker"
+
+      config {
+        image = "bash:5"
+
+        command = "bash"
+        args    = ["-c", "ping -c 100000 example.com"]
+      }
+    }
+  }
+}
diff --git a/e2e/scheduler_sysbatch/input/sysbatch_periodic.nomad b/e2e/scheduler_sysbatch/input/sysbatch_periodic.nomad
new file mode 100644
index 000000000000..d3521a5355f3
--- /dev/null
+++ b/e2e/scheduler_sysbatch/input/sysbatch_periodic.nomad
@@ -0,0 +1,30 @@
+job "sysbatchjob" {
+  datacenters = ["dc1"]
+
+  type = "sysbatch"
+
+  constraint {
+    attribute = "${attr.kernel.name}"
+    value     = "linux"
+  }
+
+  periodic {
+    cron             = "*/15 * * * * *"
+    prohibit_overlap = true
+  }
+
+  group "sysbatch_job_group" {
+    count = 1
+
+    task "sysbatch_task" {
+      driver = "docker"
+
+      config {
+        image = "bash:5"
+
+        command = "bash"
+        args    = ["-c", "ping -c 10 example.com"]
+      }
+    }
+  }
+}
diff --git a/e2e/scheduler_sysbatch/sysbatch.go b/e2e/scheduler_sysbatch/sysbatch.go
new file mode 100644
index 000000000000..26bc979c04e1
--- /dev/null
+++ b/e2e/scheduler_sysbatch/sysbatch.go
@@ -0,0 +1,269 @@
+package scheduler_sysbatch
+
+import (
+	"strings"
+	"time"
+
+	"github.com/hashicorp/nomad/api"
+	"github.com/hashicorp/nomad/e2e/e2eutil"
+	"github.com/hashicorp/nomad/e2e/framework"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+type SysBatchSchedulerTest struct {
+	framework.TC
+	jobIDs []string
+}
+
+func init() {
+	framework.AddSuites(&framework.TestSuite{
+		Component:   "SysBatchScheduler",
+		CanRunLocal: true,
+		Cases: []framework.TestCase{
+			new(SysBatchSchedulerTest),
+		},
+	})
+}
+
+func (tc *SysBatchSchedulerTest) BeforeAll(f *framework.F) {
+	// Ensure cluster has leader before running tests
+	e2eutil.WaitForLeader(f.T(), tc.Nomad())
+	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4)
+}
+
+func (tc *SysBatchSchedulerTest) TestJobRunBasic(f *framework.F) {
+	t := f.T()
+	nomadClient := tc.Nomad()
+
+	// submit a fast sysbatch job
+	jobID := "sysbatch_run_basic"
+	tc.jobIDs = append(tc.jobIDs, jobID)
+	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "")
+
+	// get our allocations for this sysbatch job
+	jobs := nomadClient.Jobs()
+	allocs, _, err := jobs.Allocations(jobID, true, nil)
+	require.NoError(t, err)
+
+	// make sure this is job is being run on "all" the linux clients
+	// in the future, might be nice to have a way to query that information
+	// during test run time, to create more accurate assertions
+	require.True(t, len(allocs) >= 3)
+
+	// wait for every alloc to reach completion
+	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
+	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete)
+}
+
+func (tc *SysBatchSchedulerTest) TestJobStopEarly(f *framework.F) {
+	t := f.T()
+	nomadClient := tc.Nomad()
+
+	// submit a slow sysbatch job
+	jobID := "sysbatch_stop_early"
+	tc.jobIDs = append(tc.jobIDs, jobID)
+	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "")
+
+	// get our allocations for this sysbatch job
+	jobs := nomadClient.Jobs()
+	allocs, _, err := jobs.Allocations(jobID, true, nil)
+	require.NoError(t, err)
+
+	// make sure this is job is being run on "all" the linux clients
+	// in the future, might be nice to have a way to query that information
+	// during test run time, to create more accurate assertions
+	require.True(t, len(allocs) >= 3)
+
+	// wait for every alloc to reach running status
+	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
+	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusRunning)
+
+	// stop the job before allocs reach completion
+	_, _, err = jobs.Deregister(jobID, false, nil)
+	require.NoError(t, err)
+}
+
+func (tc *SysBatchSchedulerTest) TestJobReplaceRunning(f *framework.F) {
+	t := f.T()
+	nomadClient := tc.Nomad()
+
+	// submit a slow sysbatch job
+	jobID := "sysbatch_replace_running"
+	tc.jobIDs = append(tc.jobIDs, jobID)
+	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "")
+
+	// get out allocations for this sysbatch job
+	jobs := nomadClient.Jobs()
+	allocs, _, err := jobs.Allocations(jobID, true, nil)
+	require.NoError(t, err)
+
+	// make sure this is job is being run on "all" the linux clients
+	require.True(t, len(allocs) >= 3)
+
+	// wait for every alloc to reach running status
+	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
+	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusRunning)
+
+	// replace the slow job with the fast job
+	intermediate := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "")
+
+	// get the allocs for the new updated job
+	var updated []*api.AllocationListStub
+	for _, alloc := range intermediate {
+		if alloc.JobVersion == 1 {
+			updated = append(updated, alloc)
+		}
+	}
+
+	// should be equal number of old and new allocs
+	newAllocIDs := e2eutil.AllocIDsFromAllocationListStubs(updated)
+
+	// make sure this new job is being run on "all" the linux clients
+	require.True(t, len(updated) >= 3)
+
+	// wait for the allocs of the fast job to complete
+	e2eutil.WaitForAllocsStatus(t, nomadClient, newAllocIDs, structs.AllocClientStatusComplete)
+}
+
+func (tc *SysBatchSchedulerTest) TestJobReplaceDead(f *framework.F) {
+	t := f.T()
+	nomadClient := tc.Nomad()
+
+	// submit a fast sysbatch job
+	jobID := "sysbatch_replace_dead"
+	tc.jobIDs = append(tc.jobIDs, jobID)
+	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "")
+
+	// get the allocations for this sysbatch job
+	jobs := nomadClient.Jobs()
+	allocs, _, err := jobs.Allocations(jobID, true, nil)
+	require.NoError(t, err)
+
+	// make sure this is job is being run on "all" the linux clients
+	require.True(t, len(allocs) >= 3)
+
+	// wait for every alloc to reach complete status
+	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
+	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete)
+
+	// replace the fast job with the slow job
+	intermediate := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "")
+
+	// get the allocs for the new updated job
+	var updated []*api.AllocationListStub
+	for _, alloc := range intermediate {
+		if alloc.JobVersion == 1 {
+			updated = append(updated, alloc)
+		}
+	}
+
+	// should be equal number of old and new allocs
+	upAllocIDs := e2eutil.AllocIDsFromAllocationListStubs(updated)
+
+	// make sure this new job is being run on "all" the linux clients
+	require.True(t, len(updated) >= 3)
+
+	// wait for the allocs of the slow job to be running
+	e2eutil.WaitForAllocsStatus(t, nomadClient, upAllocIDs, structs.AllocClientStatusRunning)
+}
+
+func (tc *SysBatchSchedulerTest) TestJobRunPeriodic(f *framework.F) {
+	t := f.T()
+	nomadClient := tc.Nomad()
+
+	// submit a fast sysbatch job
+	jobID := "sysbatch_job_periodic"
+	tc.jobIDs = append(tc.jobIDs, jobID)
+	err := e2eutil.Register(jobID, "scheduler_sysbatch/input/sysbatch_periodic.nomad")
+	require.NoError(t, err)
+
+	// force the cron job to run
+	jobs := nomadClient.Jobs()
+	_, _, err = jobs.PeriodicForce(jobID, nil)
+	require.NoError(t, err)
+
+	// find the cron job that got launched
+	jobsList, _, err := jobs.List(nil)
+	require.NoError(t, err)
+	cronJobID := ""
+	for _, job := range jobsList {
+		if strings.HasPrefix(job.Name, "sysbatch_job_periodic/periodic-") {
+			cronJobID = job.Name
+			break
+		}
+	}
+	require.NotEmpty(t, cronJobID)
+	tc.jobIDs = append(tc.jobIDs, cronJobID)
+
+	// wait for allocs of the cron job
+	var allocs []*api.AllocationListStub
+	require.True(t, assert.Eventually(t, func() bool {
+		var err error
+		allocs, _, err = jobs.Allocations(cronJobID, false, nil)
+		require.NoError(t, err)
+		return len(allocs) >= 3
+	}, 30*time.Second, time.Second))
+
+	// wait for every cron job alloc to reach completion
+	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
+	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete)
+}
+
+func (tc *SysBatchSchedulerTest) TestJobRunDispatch(f *framework.F) {
+	t := f.T()
+	nomadClient := tc.Nomad()
+
+	// submit a fast sysbatch dispatch job
+	jobID := "sysbatch_job_dispatch"
+	tc.jobIDs = append(tc.jobIDs, jobID)
+	err := e2eutil.Register(jobID, "scheduler_sysbatch/input/sysbatch_dispatch.nomad")
+	require.NoError(t, err)
+
+	// dispatch the sysbatch job
+	jobs := nomadClient.Jobs()
+	result, _, err := jobs.Dispatch(jobID, map[string]string{
+		"KEY": "value",
+	}, nil, nil)
+	require.NoError(t, err)
+
+	// grab the new dispatched jobID
+	dispatchID := result.DispatchedJobID
+	tc.jobIDs = append(tc.jobIDs, dispatchID)
+
+	// wait for allocs of the dispatched job
+	var allocs []*api.AllocationListStub
+	require.True(t, assert.Eventually(t, func() bool {
+		var err error
+		allocs, _, err = jobs.Allocations(dispatchID, false, nil)
+		require.NoError(t, err)
+		return len(allocs) >= 3
+	}, 30*time.Second, time.Second))
+
+	// wait for every dispatch alloc to reach completion
+	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
+	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete)
+}
+
+func (tc *SysBatchSchedulerTest) AfterEach(f *framework.F) {
+	nomadClient := tc.Nomad()
+
+	// Mark all nodes eligible
+	nodesAPI := tc.Nomad().Nodes()
+	nodes, _, _ := nodesAPI.List(nil)
+	for _, node := range nodes {
+		_, _ = nodesAPI.ToggleEligibility(node.ID, true, nil)
+	}
+
+	jobs := nomadClient.Jobs()
+
+	// Stop all jobs in test
+	for _, id := range tc.jobIDs {
+		_, _, _ = jobs.Deregister(id, true, nil)
+	}
+	tc.jobIDs = []string{}
+
+	// Garbage collect
+	_ = nomadClient.System().GarbageCollect()
+}
diff --git a/e2e/systemsched/input/system_job0.nomad b/e2e/scheduler_system/input/system_job0.nomad
similarity index 100%
rename from e2e/systemsched/input/system_job0.nomad
rename to e2e/scheduler_system/input/system_job0.nomad
diff --git a/e2e/systemsched/input/system_job1.nomad b/e2e/scheduler_system/input/system_job1.nomad
similarity index 100%
rename from e2e/systemsched/input/system_job1.nomad
rename to e2e/scheduler_system/input/system_job1.nomad
diff --git a/e2e/systemsched/systemsched.go b/e2e/scheduler_system/systemsched.go
similarity index 87%
rename from e2e/systemsched/systemsched.go
rename to e2e/scheduler_system/systemsched.go
index 09b3f9141b33..5ec17ef28547 100644
--- a/e2e/systemsched/systemsched.go
+++ b/e2e/scheduler_system/systemsched.go
@@ -1,4 +1,4 @@
-package systemsched
+package scheduler_system
 
 import (
 	"github.com/hashicorp/nomad/api"
@@ -35,16 +35,14 @@ func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) {
 
 	jobID := "system_deployment"
 	tc.jobIDs = append(tc.jobIDs, jobID)
-	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job0.nomad", jobID, "")
+	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_system/input/system_job0.nomad", jobID, "")
 
 	jobs := nomadClient.Jobs()
 	allocs, _, err := jobs.Allocations(jobID, true, nil)
 	require.NoError(t, err)
+	require.True(t, len(allocs) >= 3)
 
-	var allocIDs []string
-	for _, alloc := range allocs {
-		allocIDs = append(allocIDs, alloc.ID)
-	}
+	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
 
 	// Wait for allocations to get past initial pending state
 	e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
@@ -58,13 +56,9 @@ func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) {
 	// Assert all jobs still running
 	jobs = nomadClient.Jobs()
 	allocs, _, err = jobs.Allocations(jobID, true, nil)
-
-	allocIDs = nil
-	for _, alloc := range allocs {
-		allocIDs = append(allocIDs, alloc.ID)
-	}
-
 	require.NoError(t, err)
+
+	allocIDs = e2eutil.AllocIDsFromAllocationListStubs(allocs)
 	allocForDisabledNode := make(map[string]*api.AllocationListStub)
 
 	// Wait for allocs to run and collect allocs on ineligible node
@@ -89,19 +83,15 @@ func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) {
 	require.Len(t, allocForDisabledNode, 1)
 
 	// Update job
-	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "systemsched/input/system_job1.nomad", jobID, "")
+	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_system/input/system_job1.nomad", jobID, "")
 
 	// Get updated allocations
 	jobs = nomadClient.Jobs()
 	allocs, _, err = jobs.Allocations(jobID, false, nil)
 	require.NoError(t, err)
 
-	allocIDs = nil
-	for _, alloc := range allocs {
-		allocIDs = append(allocIDs, alloc.ID)
-	}
-
 	// Wait for allocs to start
+	allocIDs = e2eutil.AllocIDsFromAllocationListStubs(allocs)
 	e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
 
 	// Get latest alloc status now that they are no longer pending
diff --git a/helper/uuid/uuid.go b/helper/uuid/uuid.go
index 145c817803d0..c0eec178ea9d 100644
--- a/helper/uuid/uuid.go
+++ b/helper/uuid/uuid.go
@@ -19,3 +19,9 @@ func Generate() string {
 		buf[8:10],
 		buf[10:16])
 }
+
+// Short is used to generate a random shortened UUID.
+func Short() string {
+	id := Generate()
+	return id[len(id)-8:]
+}
diff --git a/nomad/config.go b/nomad/config.go
index 08e4f562f5c3..9575416c3bde 100644
--- a/nomad/config.go
+++ b/nomad/config.go
@@ -323,8 +323,8 @@ type Config struct {
 	AutopilotInterval time.Duration
 
 	// DefaultSchedulerConfig configures the initial scheduler config to be persisted in Raft.
-	// Once the cluster is bootstrapped, and Raft persists the config (from here or through API),
-	// This value is ignored.
+	// Once the cluster is bootstrapped, and Raft persists the config (from here or through API)
+	// and this value is ignored.
 	DefaultSchedulerConfig structs.SchedulerConfiguration `hcl:"default_scheduler_config"`
 
 	// PluginLoader is used to load plugins.
@@ -433,9 +433,10 @@ func DefaultConfig() *Config {
 		DefaultSchedulerConfig: structs.SchedulerConfiguration{
 			SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
 			PreemptionConfig: structs.PreemptionConfig{
-				SystemSchedulerEnabled:  true,
-				BatchSchedulerEnabled:   false,
-				ServiceSchedulerEnabled: false,
+				SystemSchedulerEnabled:   true,
+				SysBatchSchedulerEnabled: false,
+				BatchSchedulerEnabled:    false,
+				ServiceSchedulerEnabled:  false,
 			},
 		},
 	}
diff --git a/nomad/core_sched.go b/nomad/core_sched.go
index 1ac135d0aaea..eb796f66bcaa 100644
--- a/nomad/core_sched.go
+++ b/nomad/core_sched.go
@@ -136,9 +136,7 @@ OUTER:
 			gc, allocs, err := c.gcEval(eval, oldThreshold, true)
 			if err != nil {
 				continue OUTER
-			}
-
-			if gc {
+			} else if gc {
 				jobEval = append(jobEval, eval.ID)
 				jobAlloc = append(jobAlloc, allocs...)
 			} else {
@@ -160,6 +158,7 @@ OUTER:
 	if len(gcEval) == 0 && len(gcAlloc) == 0 && len(gcJob) == 0 {
 		return nil
 	}
+
 	c.logger.Debug("job GC found eligible objects",
 		"jobs", len(gcJob), "evals", len(gcEval), "allocs", len(gcAlloc))
 
diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go
index e8c555a10218..6e1a5f283b8e 100644
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@@ -172,6 +172,46 @@ func HCL() string {
 `
 }
 
+func SystemBatchJob() *structs.Job {
+	job := &structs.Job{
+		Region:      "global",
+		ID:          fmt.Sprintf("mock-sysbatch-%s", uuid.Short()),
+		Name:        "my-sysbatch",
+		Namespace:   structs.DefaultNamespace,
+		Type:        structs.JobTypeSysBatch,
+		Priority:    10,
+		Datacenters: []string{"dc1"},
+		Constraints: []*structs.Constraint{
+			{
+				LTarget: "${attr.kernel.name}",
+				RTarget: "linux",
+				Operand: "=",
+			},
+		},
+		TaskGroups: []*structs.TaskGroup{{
+			Count: 1,
+			Name:  "pinger",
+			Tasks: []*structs.Task{{
+				Name:   "ping-example",
+				Driver: "exec",
+				Config: map[string]interface{}{
+					"command": "/usr/bin/ping",
+					"args":    []string{"-c", "5", "example.com"},
+				},
+				LogConfig: structs.DefaultLogConfig(),
+			}},
+		}},
+
+		Status:         structs.JobStatusPending,
+		Version:        0,
+		CreateIndex:    42,
+		ModifyIndex:    99,
+		JobModifyIndex: 99,
+	}
+	job.Canonicalize()
+	return job
+}
+
 func Job() *structs.Job {
 	job := &structs.Job{
 		Region:      "global",
@@ -895,7 +935,7 @@ func Eval() *structs.Evaluation {
 }
 
 func JobSummary(jobID string) *structs.JobSummary {
-	js := &structs.JobSummary{
+	return &structs.JobSummary{
 		JobID:     jobID,
 		Namespace: structs.DefaultNamespace,
 		Summary: map[string]structs.TaskGroupSummary{
@@ -905,7 +945,19 @@ func JobSummary(jobID string) *structs.JobSummary {
 			},
 		},
 	}
-	return js
+}
+
+func JobSysBatchSummary(jobID string) *structs.JobSummary {
+	return &structs.JobSummary{
+		JobID:     jobID,
+		Namespace: structs.DefaultNamespace,
+		Summary: map[string]structs.TaskGroupSummary{
+			"pinger": {
+				Queued:   0,
+				Starting: 0,
+			},
+		},
+	}
 }
 
 func Alloc() *structs.Allocation {
@@ -1191,6 +1243,34 @@ func BatchAlloc() *structs.Allocation {
 	return alloc
 }
 
+func SysBatchAlloc() *structs.Allocation {
+	job := SystemBatchJob()
+	return &structs.Allocation{
+		ID:        uuid.Generate(),
+		EvalID:    uuid.Generate(),
+		NodeID:    "12345678-abcd-efab-cdef-123456789abc",
+		Namespace: structs.DefaultNamespace,
+		TaskGroup: "pinger",
+		AllocatedResources: &structs.AllocatedResources{
+			Tasks: map[string]*structs.AllocatedTaskResources{
+				"ping-example": {
+					Cpu:    structs.AllocatedCpuResources{CpuShares: 500},
+					Memory: structs.AllocatedMemoryResources{MemoryMB: 256},
+					Networks: []*structs.NetworkResource{{
+						Device: "eth0",
+						IP:     "192.168.0.100",
+					}},
+				},
+			},
+			Shared: structs.AllocatedSharedResources{DiskMB: 150},
+		},
+		Job:           job,
+		JobID:         job.ID,
+		DesiredStatus: structs.AllocDesiredStatusRun,
+		ClientStatus:  structs.AllocClientStatusPending,
+	}
+}
+
 func SystemAlloc() *structs.Allocation {
 	alloc := &structs.Allocation{
 		ID:        uuid.Generate(),
diff --git a/nomad/state/schema.go b/nomad/state/schema.go
index 923b44617139..8178ec515f34 100644
--- a/nomad/state/schema.go
+++ b/nomad/state/schema.go
@@ -271,13 +271,16 @@ func jobIsGCable(obj interface{}) (bool, error) {
 		return true, nil
 	}
 
-	// Otherwise, only batch jobs are eligible because they complete on their
-	// own without a user stopping them.
-	if j.Type != structs.JobTypeBatch {
+	switch j.Type {
+	// Otherwise, batch and sysbatch jobs are eligible because they complete on
+	// their own without a user stopping them.
+	case structs.JobTypeBatch, structs.JobTypeSysBatch:
+		return true, nil
+
+	default:
+		// other job types may not be GC until stopped
 		return false, nil
 	}
-
-	return true, nil
 }
 
 // jobIsPeriodic satisfies the ConditionalIndexFunc interface and creates an index
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index d5e5f12f9b0b..f6acd476b6f7 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -1973,7 +1973,7 @@ func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (m
 	return iter, nil
 }
 
-// JobsByGC returns an iterator over all jobs eligible or uneligible for garbage
+// JobsByGC returns an iterator over all jobs eligible or ineligible for garbage
 // collection.
 func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) {
 	txn := s.db.ReadTxn()
@@ -4462,13 +4462,15 @@ func (s *StateStore) setJobStatus(index uint64, txn *txn,
 }
 
 func (s *StateStore) getJobStatus(txn *txn, job *structs.Job, evalDelete bool) (string, error) {
-	// System, Periodic and Parameterized jobs are running until explicitly
-	// stopped
-	if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() {
+	// System, SysBatch, Periodic and Parameterized jobs are running until
+	// explicitly stopped.
+	if job.Type == structs.JobTypeSysBatch ||
+		job.Type == structs.JobTypeSystem ||
+		job.IsParameterized() ||
+		job.IsPeriodic() {
 		if job.Stop {
 			return structs.JobStatusDead, nil
 		}
-
 		return structs.JobStatusRunning, nil
 	}
 
diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go
index 7d5398133ff4..0c693eae904a 100644
--- a/nomad/structs/funcs.go
+++ b/nomad/structs/funcs.go
@@ -70,10 +70,11 @@ func RemoveAllocs(alloc []*Allocation, remove []*Allocation) []*Allocation {
 }
 
 // FilterTerminalAllocs filters out all allocations in a terminal state and
-// returns the latest terminal allocations
+// returns the latest terminal allocations.
 func FilterTerminalAllocs(allocs []*Allocation) ([]*Allocation, map[string]*Allocation) {
 	terminalAllocsByName := make(map[string]*Allocation)
 	n := len(allocs)
+
 	for i := 0; i < n; i++ {
 		if allocs[i].TerminalStatus() {
 
@@ -91,9 +92,75 @@ func FilterTerminalAllocs(allocs []*Allocation) ([]*Allocation, map[string]*Allo
 			n--
 		}
 	}
+
 	return allocs[:n], terminalAllocsByName
 }
 
+// SplitTerminalAllocs splits allocs into non-terminal and terminal allocs, with
+// the terminal allocs indexed by node->alloc.name.
+func SplitTerminalAllocs(allocs []*Allocation) ([]*Allocation, TerminalByNodeByName) {
+	var alive []*Allocation
+	var terminal = make(TerminalByNodeByName)
+
+	for _, alloc := range allocs {
+		if alloc.TerminalStatus() {
+			terminal.Set(alloc)
+		} else {
+			alive = append(alive, alloc)
+		}
+	}
+
+	return alive, terminal
+}
+
+// TerminalByNodeByName is a map of NodeID->Allocation.Name->Allocation used by
+// the sysbatch scheduler for locating the most up-to-date terminal allocations.
+type TerminalByNodeByName map[string]map[string]*Allocation
+
+func (a TerminalByNodeByName) Set(allocation *Allocation) {
+	node := allocation.NodeID
+	name := allocation.Name
+
+	if _, exists := a[node]; !exists {
+		a[node] = make(map[string]*Allocation)
+	}
+
+	if previous, exists := a[node][name]; !exists {
+		a[node][name] = allocation
+	} else {
+		// keep the newest version of the terminal alloc for the coordinate
+		if previous.CreateIndex < allocation.CreateIndex {
+			a[node][name] = allocation
+		}
+	}
+}
+
+func (a TerminalByNodeByName) Get(nodeID, name string) (*Allocation, bool) {
+	if _, exists := a[nodeID]; !exists {
+		return nil, false
+	}
+
+	if _, exists := a[nodeID][name]; !exists {
+		return nil, false
+	}
+
+	return a[nodeID][name], true
+}
+
+// Any returns any matching allocation matching name if exists. Used by the
+// system scheduler to substitute a missing allocation that will be updated
+// later.
+func (a TerminalByNodeByName) Any(name string) *Allocation {
+	for _, names := range a {
+		for aName := range names {
+			if name == aName {
+				return names[name]
+			}
+		}
+	}
+	return nil
+}
+
 // AllocsFit checks if a given set of allocations will fit on a node.
 // The netIdx can optionally be provided if its already been computed.
 // If the netIdx is provided, it is assumed that the client has already
diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go
index 504cc3a8e486..d802274f6f38 100644
--- a/nomad/structs/funcs_test.go
+++ b/nomad/structs/funcs_test.go
@@ -335,8 +335,8 @@ func TestAllocsFit(t *testing.T) {
 				DiskMB: 5000,
 				Networks: Networks{
 					{
-						Mode: "host",
-						IP: "10.0.0.1",
+						Mode:          "host",
+						IP:            "10.0.0.1",
 						ReservedPorts: []Port{{"main", 8000, 0, ""}},
 					},
 				},
diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go
index 8a3afef9f154..4960369219ec 100644
--- a/nomad/structs/operator.go
+++ b/nomad/structs/operator.go
@@ -205,6 +205,9 @@ type PreemptionConfig struct {
 	// SystemSchedulerEnabled specifies if preemption is enabled for system jobs
 	SystemSchedulerEnabled bool `hcl:"system_scheduler_enabled"`
 
+	// SysBatchSchedulerEnabled specifies if preemption is enabled for sysbatch jobs
+	SysBatchSchedulerEnabled bool `hcl:"sysbatch_scheduler_enabled"`
+
 	// BatchSchedulerEnabled specifies if preemption is enabled for batch jobs
 	BatchSchedulerEnabled bool `hcl:"batch_scheduler_enabled"`
 
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 481b157ba038..b8b93c568c4e 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -3761,10 +3761,11 @@ func (c *ComparableResources) NetIndex(n *NetworkResource) int {
 const (
 	// JobTypeNomad is reserved for internal system tasks and is
 	// always handled by the CoreScheduler.
-	JobTypeCore    = "_core"
-	JobTypeService = "service"
-	JobTypeBatch   = "batch"
-	JobTypeSystem  = "system"
+	JobTypeCore     = "_core"
+	JobTypeService  = "service"
+	JobTypeBatch    = "batch"
+	JobTypeSystem   = "system"
+	JobTypeSysBatch = "sysbatch"
 )
 
 const (
@@ -4027,7 +4028,7 @@ func (j *Job) Validate() error {
 		mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace"))
 	}
 	switch j.Type {
-	case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem:
+	case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem, JobTypeSysBatch:
 	case "":
 		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
 	default:
@@ -4119,11 +4120,12 @@ func (j *Job) Validate() error {
 		}
 	}
 
-	// Validate periodic is only used with batch jobs.
+	// Validate periodic is only used with batch or sysbatch jobs.
 	if j.IsPeriodic() && j.Periodic.Enabled {
-		if j.Type != JobTypeBatch {
-			mErr.Errors = append(mErr.Errors,
-				fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch))
+		if j.Type != JobTypeBatch && j.Type != JobTypeSysBatch {
+			mErr.Errors = append(mErr.Errors, fmt.Errorf(
+				"Periodic can only be used with %q or %q scheduler", JobTypeBatch, JobTypeSysBatch,
+			))
 		}
 
 		if err := j.Periodic.Validate(); err != nil {
@@ -4132,9 +4134,10 @@ func (j *Job) Validate() error {
 	}
 
 	if j.IsParameterized() {
-		if j.Type != JobTypeBatch {
-			mErr.Errors = append(mErr.Errors,
-				fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch))
+		if j.Type != JobTypeBatch && j.Type != JobTypeSysBatch {
+			mErr.Errors = append(mErr.Errors, fmt.Errorf(
+				"Parameterized job can only be used with %q or %q scheduler", JobTypeBatch, JobTypeSysBatch,
+			))
 		}
 
 		if err := j.ParameterizedJob.Validate(); err != nil {
diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go
index c67eafad870a..b933deb1eb21 100644
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -36,7 +36,7 @@ const (
 	// allocInPlace is the status used when speculating on an in-place update
 	allocInPlace = "alloc updating in-place"
 
-	// allocNodeTainted is the status used when stopping an alloc because it's
+	// allocNodeTainted is the status used when stopping an alloc because its
 	// node is tainted.
 	allocNodeTainted = "alloc not needed as node is tainted"
 
diff --git a/scheduler/rank.go b/scheduler/rank.go
index 1653d9cf9067..ec4b2635d423 100644
--- a/scheduler/rank.go
+++ b/scheduler/rank.go
@@ -24,7 +24,7 @@ type RankedNode struct {
 	TaskLifecycles map[string]*structs.TaskLifecycleConfig
 	AllocResources *structs.AllocatedSharedResources
 
-	// Allocs is used to cache the proposed allocations on the
+	// Proposed is used to cache the proposed allocations on the
 	// node. This can be shared between iterators that require it.
 	Proposed []*structs.Allocation
 
@@ -60,7 +60,7 @@ func (r *RankedNode) SetTaskResources(task *structs.Task,
 	r.TaskLifecycles[task.Name] = task.Lifecycle
 }
 
-// RankFeasibleIterator is used to iteratively yield nodes along
+// RankIterator is used to iteratively yield nodes along
 // with ranking metadata. The iterators may manage some state for
 // performance optimizations.
 type RankIterator interface {
diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go
index a950690db44f..d1bbfa4c3e41 100644
--- a/scheduler/scheduler.go
+++ b/scheduler/scheduler.go
@@ -21,9 +21,10 @@ const (
 // BuiltinSchedulers contains the built in registered schedulers
 // which are available
 var BuiltinSchedulers = map[string]Factory{
-	"service": NewServiceScheduler,
-	"batch":   NewBatchScheduler,
-	"system":  NewSystemScheduler,
+	"service":  NewServiceScheduler,
+	"batch":    NewBatchScheduler,
+	"system":   NewSystemScheduler,
+	"sysbatch": NewSysBatchScheduler,
 }
 
 // NewScheduler is used to instantiate and return a new scheduler
diff --git a/scheduler/stack.go b/scheduler/stack.go
index bccabc7899ab..cf01c2992afe 100644
--- a/scheduler/stack.go
+++ b/scheduler/stack.go
@@ -198,8 +198,12 @@ type SystemStack struct {
 	scoreNorm                  *ScoreNormalizationIterator
 }
 
-// NewSystemStack constructs a stack used for selecting system job placements.
-func NewSystemStack(ctx Context) *SystemStack {
+// NewSystemStack constructs a stack used for selecting system and sysbatch
+// job placements.
+//
+// sysbatch is used to determine which scheduler config option is used to
+// control the use of preemption.
+func NewSystemStack(sysbatch bool, ctx Context) *SystemStack {
 	// Create a new stack
 	s := &SystemStack{ctx: ctx}
 
@@ -237,10 +241,13 @@ func NewSystemStack(ctx Context) *SystemStack {
 	// previously been marked as eligible or ineligible. Generally this will be
 	// checks that only needs to examine the single node to determine feasibility.
 	jobs := []FeasibilityChecker{s.jobConstraint}
-	tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint,
+	tgs := []FeasibilityChecker{
+		s.taskGroupDrivers,
+		s.taskGroupConstraint,
 		s.taskGroupHostVolumes,
 		s.taskGroupDevices,
-		s.taskGroupNetwork}
+		s.taskGroupNetwork,
+	}
 	avail := []FeasibilityChecker{s.taskGroupCSIVolumes}
 	s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail)
 
@@ -257,9 +264,14 @@ func NewSystemStack(ctx Context) *SystemStack {
 	schedulerAlgorithm := schedConfig.EffectiveSchedulerAlgorithm()
 	enablePreemption := true
 	if schedConfig != nil {
-		enablePreemption = schedConfig.PreemptionConfig.SystemSchedulerEnabled
+		if sysbatch {
+			enablePreemption = schedConfig.PreemptionConfig.SysBatchSchedulerEnabled
+		} else {
+			enablePreemption = schedConfig.PreemptionConfig.SystemSchedulerEnabled
+		}
 	}
 
+	// Create binpack iterator
 	s.binPack = NewBinPackIterator(ctx, rankSource, enablePreemption, 0, schedulerAlgorithm)
 
 	// Apply score normalization
@@ -360,11 +372,13 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack {
 	// previously been marked as eligible or ineligible. Generally this will be
 	// checks that only needs to examine the single node to determine feasibility.
 	jobs := []FeasibilityChecker{s.jobConstraint}
-	tgs := []FeasibilityChecker{s.taskGroupDrivers,
+	tgs := []FeasibilityChecker{
+		s.taskGroupDrivers,
 		s.taskGroupConstraint,
 		s.taskGroupHostVolumes,
 		s.taskGroupDevices,
-		s.taskGroupNetwork}
+		s.taskGroupNetwork,
+	}
 	avail := []FeasibilityChecker{s.taskGroupCSIVolumes}
 	s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail)
 
diff --git a/scheduler/stack_test.go b/scheduler/stack_test.go
index 4650546d32f1..b45d91bc5d16 100644
--- a/scheduler/stack_test.go
+++ b/scheduler/stack_test.go
@@ -389,7 +389,7 @@ func TestServiceStack_Select_BinPack_Overflow(t *testing.T) {
 
 func TestSystemStack_SetNodes(t *testing.T) {
 	_, ctx := testContext(t)
-	stack := NewSystemStack(ctx)
+	stack := NewSystemStack(false, ctx)
 
 	nodes := []*structs.Node{
 		mock.Node(),
@@ -411,7 +411,7 @@ func TestSystemStack_SetNodes(t *testing.T) {
 
 func TestSystemStack_SetJob(t *testing.T) {
 	_, ctx := testContext(t)
-	stack := NewSystemStack(ctx)
+	stack := NewSystemStack(false, ctx)
 
 	job := mock.Job()
 	stack.SetJob(job)
@@ -427,7 +427,7 @@ func TestSystemStack_SetJob(t *testing.T) {
 func TestSystemStack_Select_Size(t *testing.T) {
 	_, ctx := testContext(t)
 	nodes := []*structs.Node{mock.Node()}
-	stack := NewSystemStack(ctx)
+	stack := NewSystemStack(false, ctx)
 	stack.SetNodes(nodes)
 
 	job := mock.Job()
@@ -455,7 +455,7 @@ func TestSystemStack_Select_MetricsReset(t *testing.T) {
 		mock.Node(),
 		mock.Node(),
 	}
-	stack := NewSystemStack(ctx)
+	stack := NewSystemStack(false, ctx)
 	stack.SetNodes(nodes)
 
 	job := mock.Job()
@@ -491,7 +491,7 @@ func TestSystemStack_Select_DriverFilter(t *testing.T) {
 	zero := nodes[0]
 	zero.Attributes["driver.foo"] = "1"
 
-	stack := NewSystemStack(ctx)
+	stack := NewSystemStack(false, ctx)
 	stack.SetNodes(nodes)
 
 	job := mock.Job()
@@ -513,7 +513,7 @@ func TestSystemStack_Select_DriverFilter(t *testing.T) {
 		t.Fatalf("ComputedClass() failed: %v", err)
 	}
 
-	stack = NewSystemStack(ctx)
+	stack = NewSystemStack(false, ctx)
 	stack.SetNodes(nodes)
 	stack.SetJob(job)
 	node = stack.Select(job.TaskGroups[0], selectOptions)
@@ -534,7 +534,7 @@ func TestSystemStack_Select_ConstraintFilter(t *testing.T) {
 		t.Fatalf("ComputedClass() failed: %v", err)
 	}
 
-	stack := NewSystemStack(ctx)
+	stack := NewSystemStack(false, ctx)
 	stack.SetNodes(nodes)
 
 	job := mock.Job()
@@ -577,7 +577,7 @@ func TestSystemStack_Select_BinPack_Overflow(t *testing.T) {
 	}
 	one := nodes[1]
 
-	stack := NewSystemStack(ctx)
+	stack := NewSystemStack(false, ctx)
 	stack.SetNodes(nodes)
 
 	job := mock.Job()
diff --git a/scheduler/system_sched.go b/scheduler/system_scheduler.go
similarity index 85%
rename from scheduler/system_sched.go
rename to scheduler/system_scheduler.go
index 4b1e5c8cbfaa..53e4b4eefbb0 100644
--- a/scheduler/system_sched.go
+++ b/scheduler/system_scheduler.go
@@ -14,15 +14,21 @@ const (
 	// we will attempt to schedule if we continue to hit conflicts for system
 	// jobs.
 	maxSystemScheduleAttempts = 5
+
+	// maxSysBatchScheduleAttempts is used to limit the number of times we will
+	// attempt to schedule if we continue to hit conflicts for sysbatch jobs.
+	maxSysBatchScheduleAttempts = 2
 )
 
-// SystemScheduler is used for 'system' jobs. This scheduler is
-// designed for services that should be run on every client.
-// One for each job, containing an allocation for each node
+// SystemScheduler is used for 'system' and 'sysbatch' jobs. This scheduler is
+// designed for jobs that should be run on every client. The 'system' mode
+// will ensure those jobs continuously run regardless of successful task exits,
+// whereas 'sysbatch' considers the task complete on success.
 type SystemScheduler struct {
-	logger  log.Logger
-	state   State
-	planner Planner
+	logger   log.Logger
+	state    State
+	planner  Planner
+	sysbatch bool
 
 	eval       *structs.Evaluation
 	job        *structs.Job
@@ -30,8 +36,9 @@ type SystemScheduler struct {
 	planResult *structs.PlanResult
 	ctx        *EvalContext
 	stack      *SystemStack
-	nodes      []*structs.Node
-	nodesByDC  map[string]int
+
+	nodes     []*structs.Node
+	nodesByDC map[string]int
 
 	limitReached bool
 	nextEval     *structs.Evaluation
@@ -44,14 +51,25 @@ type SystemScheduler struct {
 // scheduler.
 func NewSystemScheduler(logger log.Logger, state State, planner Planner) Scheduler {
 	return &SystemScheduler{
-		logger:  logger.Named("system_sched"),
-		state:   state,
-		planner: planner,
+		logger:   logger.Named("system_sched"),
+		state:    state,
+		planner:  planner,
+		sysbatch: false,
+	}
+}
+
+func NewSysBatchScheduler(logger log.Logger, state State, planner Planner) Scheduler {
+	return &SystemScheduler{
+		logger:   logger.Named("sysbatch_sched"),
+		state:    state,
+		planner:  planner,
+		sysbatch: true,
 	}
 }
 
 // Process is used to handle a single evaluation.
 func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
+
 	// Store the evaluation
 	s.eval = eval
 
@@ -59,21 +77,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
 	s.logger = s.logger.With("eval_id", eval.ID, "job_id", eval.JobID, "namespace", eval.Namespace)
 
 	// Verify the evaluation trigger reason is understood
-	switch eval.TriggeredBy {
-	case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate, structs.EvalTriggerFailedFollowUp,
-		structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate, structs.EvalTriggerPreemption,
-		structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerNodeDrain, structs.EvalTriggerAllocStop,
-		structs.EvalTriggerQueuedAllocs, structs.EvalTriggerScaling:
-	default:
-		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
-			eval.TriggeredBy)
+	if !s.canHandle(eval.TriggeredBy) {
+		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy)
 		return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, structs.EvalStatusFailed, desc,
 			s.queuedAllocs, "")
 	}
 
+	limit := maxSystemScheduleAttempts
+	if s.sysbatch {
+		limit = maxSysBatchScheduleAttempts
+	}
+
 	// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
 	progress := func() bool { return progressMade(s.planResult) }
-	if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
+	if err := retryMax(limit, s.process, progress); err != nil {
 		if statusErr, ok := err.(*SetStatusError); ok {
 			return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, s.failedTGAllocs, statusErr.EvalStatus, err.Error(),
 				s.queuedAllocs, "")
@@ -94,9 +111,9 @@ func (s *SystemScheduler) process() (bool, error) {
 	ws := memdb.NewWatchSet()
 	s.job, err = s.state.JobByID(ws, s.eval.Namespace, s.eval.JobID)
 	if err != nil {
-		return false, fmt.Errorf("failed to get job '%s': %v",
-			s.eval.JobID, err)
+		return false, fmt.Errorf("failed to get job '%s': %v", s.eval.JobID, err)
 	}
+
 	numTaskGroups := 0
 	if !s.job.Stopped() {
 		numTaskGroups = len(s.job.TaskGroups)
@@ -121,7 +138,7 @@ func (s *SystemScheduler) process() (bool, error) {
 	s.ctx = NewEvalContext(s.state, s.plan, s.logger)
 
 	// Construct the placement stack
-	s.stack = NewSystemStack(s.ctx)
+	s.stack = NewSystemStack(s.sysbatch, s.ctx)
 	if !s.job.Stopped() {
 		s.stack.SetJob(s.job)
 	}
@@ -185,26 +202,24 @@ func (s *SystemScheduler) computeJobAllocs() error {
 	ws := memdb.NewWatchSet()
 	allocs, err := s.state.AllocsByJob(ws, s.eval.Namespace, s.eval.JobID, true)
 	if err != nil {
-		return fmt.Errorf("failed to get allocs for job '%s': %v",
-			s.eval.JobID, err)
+		return fmt.Errorf("failed to get allocs for job '%s': %v", s.eval.JobID, err)
 	}
 
 	// Determine the tainted nodes containing job allocs
 	tainted, err := taintedNodes(s.state, allocs)
 	if err != nil {
-		return fmt.Errorf("failed to get tainted nodes for job '%s': %v",
-			s.eval.JobID, err)
+		return fmt.Errorf("failed to get tainted nodes for job '%s': %v", s.eval.JobID, err)
 	}
 
 	// Update the allocations which are in pending/running state on tainted
-	// nodes to lost
+	// nodes to lost.
 	updateNonTerminalAllocsToLost(s.plan, tainted, allocs)
 
-	// Filter out the allocations in a terminal state
-	allocs, terminalAllocs := structs.FilterTerminalAllocs(allocs)
+	// Split out terminal allocations
+	live, term := structs.SplitTerminalAllocs(allocs)
 
 	// Diff the required and existing allocations
-	diff := diffSystemAllocs(s.job, s.nodes, tainted, allocs, terminalAllocs)
+	diff := diffSystemAllocs(s.job, s.nodes, tainted, live, term)
 	s.logger.Debug("reconciled current state with desired state",
 		"place", len(diff.place), "update", len(diff.update),
 		"migrate", len(diff.migrate), "stop", len(diff.stop),
@@ -423,3 +438,27 @@ func (s *SystemScheduler) addBlocked(node *structs.Node) error {
 
 	return s.planner.CreateEval(blocked)
 }
+
+func (s *SystemScheduler) canHandle(trigger string) bool {
+	switch trigger {
+	case structs.EvalTriggerJobRegister:
+	case structs.EvalTriggerNodeUpdate:
+	case structs.EvalTriggerFailedFollowUp:
+	case structs.EvalTriggerJobDeregister:
+	case structs.EvalTriggerRollingUpdate:
+	case structs.EvalTriggerPreemption:
+	case structs.EvalTriggerDeploymentWatcher:
+	case structs.EvalTriggerNodeDrain:
+	case structs.EvalTriggerAllocStop:
+	case structs.EvalTriggerQueuedAllocs:
+	case structs.EvalTriggerScaling:
+	default:
+		switch s.sysbatch {
+		case true:
+			return trigger == structs.EvalTriggerPeriodicJob
+		case false:
+			return false
+		}
+	}
+	return true
+}
diff --git a/scheduler/system_sysbatch_test.go b/scheduler/system_sysbatch_test.go
new file mode 100644
index 000000000000..1bbfffc02698
--- /dev/null
+++ b/scheduler/system_sysbatch_test.go
@@ -0,0 +1,1623 @@
+package scheduler
+
+import (
+	"fmt"
+	"sort"
+	"testing"
+
+	"github.com/hashicorp/go-memdb"
+	"github.com/hashicorp/nomad/helper"
+	"github.com/hashicorp/nomad/helper/uuid"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSysBatch_JobRegister(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	_ = createNodes(t, h, 10)
+
+	// Create a job
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Create a mock evaluation to deregister the job
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan does not have annotations
+	require.Nil(t, plan.Annotations, "expected no annotations")
+
+	// Ensure the plan allocated
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	require.Len(t, planned, 10)
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure all allocations placed
+	require.Len(t, out, 10)
+
+	// Check the available nodes
+	count, ok := out[0].Metrics.NodesAvailable["dc1"]
+	require.True(t, ok)
+	require.Equal(t, 10, count, "bad metrics %#v:", out[0].Metrics)
+
+	// Ensure no allocations are queued
+	queued := h.Evals[0].QueuedAllocations["my-sysbatch"]
+	require.Equal(t, 0, queued, "unexpected queued allocations")
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_JobRegister_AddNode_Running(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	nodes := createNodes(t, h, 10)
+
+	// Generate a fake sysbatch job with allocations
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.SysBatchAlloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-sysbatch.pinger[0]"
+		alloc.ClientStatus = structs.AllocClientStatusRunning
+		allocs = append(allocs, alloc)
+	}
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
+
+	// Add a new node.
+	node := mock.Node()
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Create a mock evaluation to deal with the node update
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan had no node updates
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	require.Empty(t, update)
+
+	// Ensure the plan allocated on the new node
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	require.Len(t, planned, 1)
+
+	// Ensure it allocated on the right node
+	_, ok := plan.NodeAllocation[node.ID]
+	require.True(t, ok, "allocated on wrong node: %#v", plan)
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure all allocations placed
+	out, _ = structs.FilterTerminalAllocs(out)
+	require.Len(t, out, 11)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_JobRegister_AddNode_Dead(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	nodes := createNodes(t, h, 10)
+
+	// Generate a dead sysbatch job with complete allocations
+	job := mock.SystemBatchJob()
+	job.Status = structs.JobStatusDead // job is dead but not stopped
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.SysBatchAlloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-sysbatch.pinger[0]"
+		alloc.ClientStatus = structs.AllocClientStatusComplete
+		allocs = append(allocs, alloc)
+	}
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
+
+	// Add a new node.
+	node := mock.Node()
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Create a mock evaluation to deal with the node update
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan has no node update
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	require.Len(t, update, 0)
+
+	// Ensure the plan allocates on the new node
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	require.Len(t, planned, 1)
+
+	// Ensure it allocated on the right node
+	_, ok := plan.NodeAllocation[node.ID]
+	require.True(t, ok, "allocated on wrong node: %#v", plan)
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure 1 non-terminal allocation
+	live, _ := structs.FilterTerminalAllocs(out)
+	require.Len(t, live, 1)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_JobModify(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	nodes := createNodes(t, h, 10)
+
+	// Generate a fake job with allocations
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.SysBatchAlloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-sysbatch.pinger[0]"
+		alloc.ClientStatus = structs.AllocClientStatusPending
+		allocs = append(allocs, alloc)
+	}
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
+
+	// Add a few terminal status allocations, these should be reinstated
+	var terminal []*structs.Allocation
+	for i := 0; i < 5; i++ {
+		alloc := mock.SysBatchAlloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = nodes[i].ID
+		alloc.Name = "my-sysbatch.pinger[0]"
+		alloc.ClientStatus = structs.AllocClientStatusComplete
+		terminal = append(terminal, alloc)
+	}
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), terminal))
+
+	// Update the job
+	job2 := mock.SystemBatchJob()
+	job2.ID = job.ID
+
+	// Update the task, such that it cannot be done in-place
+	job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job2))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted all allocs
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	require.Equal(t, len(allocs), len(update))
+
+	// Ensure the plan allocated
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	require.Len(t, planned, 10)
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure all allocations placed
+	out, _ = structs.FilterTerminalAllocs(out)
+	require.Len(t, out, 10)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_JobModify_InPlace(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	nodes := createNodes(t, h, 10)
+
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.SysBatchAlloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-sysbatch.pinger[0]"
+		allocs = append(allocs, alloc)
+	}
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
+
+	// Update the job
+	job2 := mock.SystemBatchJob()
+	job2.ID = job.ID
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job2))
+
+	// Create a mock evaluation to deal with update
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan did not evict any allocs
+	var update []*structs.Allocation
+	for _, updateList := range plan.NodeUpdate {
+		update = append(update, updateList...)
+	}
+	require.Empty(t, update)
+
+	// Ensure the plan updated the existing allocs
+	var planned []*structs.Allocation
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+	}
+	require.Len(t, planned, 10)
+
+	for _, p := range planned {
+		require.Equal(t, job2, p.Job, "should update job")
+	}
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure all allocations placed
+	require.Len(t, out, 10)
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_JobDeregister_Purged(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	nodes := createNodes(t, h, 10)
+
+	// Create a sysbatch job
+	job := mock.SystemBatchJob()
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.SysBatchAlloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-sysbatch.pinger[0]"
+		allocs = append(allocs, alloc)
+	}
+	for _, alloc := range allocs {
+		require.NoError(t, h.State.UpsertJobSummary(h.NextIndex(), mock.JobSysBatchSummary(alloc.JobID)))
+	}
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
+
+	// Create a mock evaluation to deregister the job
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobDeregister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted the job from all nodes.
+	for _, node := range nodes {
+		require.Len(t, plan.NodeUpdate[node.ID], 1)
+	}
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure no remaining allocations
+	out, _ = structs.FilterTerminalAllocs(out)
+	require.Empty(t, out)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_JobDeregister_Stopped(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	nodes := createNodes(t, h, 10)
+
+	// Generate a stopped sysbatch job with allocations
+	job := mock.SystemBatchJob()
+	job.Stop = true
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	var allocs []*structs.Allocation
+	for _, node := range nodes {
+		alloc := mock.SysBatchAlloc()
+		alloc.Job = job
+		alloc.JobID = job.ID
+		alloc.NodeID = node.ID
+		alloc.Name = "my-sysbatch.pinger[0]"
+		allocs = append(allocs, alloc)
+	}
+	for _, alloc := range allocs {
+		require.NoError(t, h.State.UpsertJobSummary(h.NextIndex(), mock.JobSysBatchSummary(alloc.JobID)))
+	}
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs))
+
+	// Create a mock evaluation to deregister the job
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobDeregister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted the job from all nodes.
+	for _, node := range nodes {
+		require.Len(t, plan.NodeUpdate[node.ID], 1)
+	}
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure no remaining allocations
+	out, _ = structs.FilterTerminalAllocs(out)
+	require.Empty(t, out)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_NodeDown(t *testing.T) {
+	h := NewHarness(t)
+
+	// Register a down node
+	node := mock.Node()
+	node.Status = structs.NodeStatusDown
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Generate a sysbatch job allocated on that node
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	alloc := mock.SysBatchAlloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.NodeID = node.ID
+	alloc.Name = "my-sysbatch.pinger[0]"
+	alloc.DesiredTransition.Migrate = helper.BoolToPtr(true)
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted all allocs
+	require.Len(t, plan.NodeUpdate[node.ID], 1)
+
+	// Ensure the plan updated the allocation.
+	planned := make([]*structs.Allocation, 0)
+	for _, allocList := range plan.NodeUpdate {
+		planned = append(planned, allocList...)
+	}
+	require.Len(t, planned, 1)
+
+	// Ensure the allocations is stopped
+	p := planned[0]
+	require.Equal(t, structs.AllocDesiredStatusStop, p.DesiredStatus)
+	// removed badly designed assertion on client_status = lost
+	// the actual client_status is pending
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_NodeDrain_Down(t *testing.T) {
+	h := NewHarness(t)
+
+	// Register a draining node
+	node := mock.Node()
+	node.Drain = true
+	node.Status = structs.NodeStatusDown
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Generate a sysbatch job allocated on that node.
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	alloc := mock.SysBatchAlloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.NodeID = node.ID
+	alloc.Name = "my-sysbatch.pinger[0]"
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
+
+	// Create a mock evaluation to deal with the node update
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted non terminal allocs
+	require.Len(t, plan.NodeUpdate[node.ID], 1)
+
+	// Ensure that the allocation is marked as lost
+	var lost []string
+	for _, alloc := range plan.NodeUpdate[node.ID] {
+		lost = append(lost, alloc.ID)
+	}
+	require.Equal(t, []string{alloc.ID}, lost)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_NodeDrain(t *testing.T) {
+	h := NewHarness(t)
+
+	// Register a draining node
+	node := mock.Node()
+	node.Drain = true
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Generate a sysbatch job allocated on that node.
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	alloc := mock.SysBatchAlloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.NodeID = node.ID
+	alloc.Name = "my-sysbatch.pinger[0]"
+	alloc.DesiredTransition.Migrate = helper.BoolToPtr(true)
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSystemScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted all allocs
+	require.Len(t, plan.NodeUpdate[node.ID], 1)
+
+	// Ensure the plan updated the allocation.
+	planned := make([]*structs.Allocation, 0)
+	for _, allocList := range plan.NodeUpdate {
+		planned = append(planned, allocList...)
+	}
+	require.Len(t, planned, 1)
+
+	// Ensure the allocations is stopped
+	require.Equal(t, structs.AllocDesiredStatusStop, planned[0].DesiredStatus)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_NodeUpdate(t *testing.T) {
+	h := NewHarness(t)
+
+	// Register a node
+	node := mock.Node()
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Generate a sysbatch job allocated on that node.
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	alloc := mock.SysBatchAlloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.NodeID = node.ID
+	alloc.Name = "my-system.pinger[0]"
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
+
+	// Create a mock evaluation to deal with the node update
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure that queued allocations is zero
+	val, ok := h.Evals[0].QueuedAllocations["pinger"]
+	require.True(t, ok)
+	require.Zero(t, val)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_RetryLimit(t *testing.T) {
+	h := NewHarness(t)
+	h.Planner = &RejectPlan{h}
+
+	// Create some nodes
+	_ = createNodes(t, h, 10)
+
+	// Create a job
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Create a mock evaluation to register
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure multiple plans
+	require.NotEmpty(t, h.Plans)
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure no allocations placed
+	require.Empty(t, out)
+
+	// Should hit the retry limit
+	h.AssertEvalStatus(t, structs.EvalStatusFailed)
+}
+
+// This test ensures that the scheduler doesn't increment the queued allocation
+// count for a task group when allocations can't be created on currently
+// available nodes because of constraint mismatches.
+func TestSysBatch_Queued_With_Constraints(t *testing.T) {
+	h := NewHarness(t)
+
+	// Register a node
+	node := mock.Node()
+	node.Attributes["kernel.name"] = "darwin"
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Generate a sysbatch job which can't be placed on the node
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Create a mock evaluation to deal with the node update
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure that queued allocations is zero
+	val, ok := h.Evals[0].QueuedAllocations["pinger"]
+	require.True(t, ok)
+	require.Zero(t, val)
+}
+
+// This test ensures that the scheduler correctly ignores ineligible
+// nodes when scheduling due to a new node being added. The job has two
+// task groups constrained to a particular node class. The desired behavior
+// should be that the TaskGroup constrained to the newly added node class is
+// added and that the TaskGroup constrained to the ineligible node is ignored.
+func TestSysBatch_JobConstraint_AddNode(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create two nodes
+	var node *structs.Node
+	node = mock.Node()
+	node.NodeClass = "Class-A"
+	require.NoError(t, node.ComputeClass())
+	require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	var nodeB *structs.Node
+	nodeB = mock.Node()
+	nodeB.NodeClass = "Class-B"
+	require.NoError(t, nodeB.ComputeClass())
+	require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), nodeB))
+
+	// Make a sysbatch job with two task groups, each constraint to a node class
+	job := mock.SystemBatchJob()
+	tgA := job.TaskGroups[0]
+	tgA.Name = "groupA"
+	tgA.Constraints = []*structs.Constraint{{
+		LTarget: "${node.class}",
+		RTarget: node.NodeClass,
+		Operand: "=",
+	}}
+	tgB := job.TaskGroups[0].Copy()
+	tgB.Name = "groupB"
+	tgB.Constraints = []*structs.Constraint{{
+		LTarget: "${node.class}",
+		RTarget: nodeB.NodeClass,
+		Operand: "=",
+	}}
+
+	// Upsert Job
+	job.TaskGroups = []*structs.TaskGroup{tgA, tgB}
+	require.Nil(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Evaluate the job
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.Nil(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	require.Nil(t, h.Process(NewSysBatchScheduler, eval))
+	require.Equal(t, "complete", h.Evals[0].Status)
+
+	// QueuedAllocations is drained
+	val, ok := h.Evals[0].QueuedAllocations["groupA"]
+	require.True(t, ok)
+	require.Equal(t, 0, val)
+
+	val, ok = h.Evals[0].QueuedAllocations["groupB"]
+	require.True(t, ok)
+	require.Equal(t, 0, val)
+
+	// Single plan with two NodeAllocations
+	require.Len(t, h.Plans, 1)
+	require.Len(t, h.Plans[0].NodeAllocation, 2)
+
+	// Mark the node as ineligible
+	node.SchedulingEligibility = structs.NodeSchedulingIneligible
+
+	// Evaluate the node update
+	eval2 := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		NodeID:      node.ID,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.Nil(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval2}))
+
+	// Process the 2nd evaluation
+	require.Nil(t, h.Process(NewSysBatchScheduler, eval2))
+	require.Equal(t, "complete", h.Evals[1].Status)
+
+	// Ensure no new plans
+	require.Equal(t, 1, len(h.Plans))
+
+	// Ensure all NodeAllocations are from first Eval
+	for _, allocs := range h.Plans[0].NodeAllocation {
+		require.Len(t, allocs, 1)
+		require.Equal(t, eval.ID, allocs[0].EvalID)
+	}
+
+	// Add a new node Class-B
+	var nodeBTwo *structs.Node
+	nodeBTwo = mock.Node()
+	require.NoError(t, nodeBTwo.ComputeClass())
+	nodeBTwo.NodeClass = "Class-B"
+	require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), nodeBTwo))
+
+	// Evaluate the new node
+	eval3 := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		NodeID:      nodeBTwo.ID,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+
+	// Ensure 3rd eval is complete
+	require.Nil(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval3}))
+	require.Nil(t, h.Process(NewSysBatchScheduler, eval3))
+	require.Equal(t, "complete", h.Evals[2].Status)
+
+	// Ensure no failed TG allocs
+	require.Equal(t, 0, len(h.Evals[2].FailedTGAllocs))
+
+	require.Len(t, h.Plans, 2)
+	require.Len(t, h.Plans[1].NodeAllocation, 1)
+	// Ensure all NodeAllocations are from first Eval
+	for _, allocs := range h.Plans[1].NodeAllocation {
+		require.Len(t, allocs, 1)
+		require.Equal(t, eval3.ID, allocs[0].EvalID)
+	}
+
+	ws := memdb.NewWatchSet()
+
+	allocsNodeOne, err := h.State.AllocsByNode(ws, node.ID)
+	require.NoError(t, err)
+	require.Len(t, allocsNodeOne, 1)
+
+	allocsNodeTwo, err := h.State.AllocsByNode(ws, nodeB.ID)
+	require.NoError(t, err)
+	require.Len(t, allocsNodeTwo, 1)
+
+	allocsNodeThree, err := h.State.AllocsByNode(ws, nodeBTwo.ID)
+	require.NoError(t, err)
+	require.Len(t, allocsNodeThree, 1)
+}
+
+// No errors reported when no available nodes prevent placement
+func TestSysBatch_ExistingAllocNoNodes(t *testing.T) {
+	h := NewHarness(t)
+
+	var node *structs.Node
+	// Create a node
+	node = mock.Node()
+	require.NoError(t, node.ComputeClass())
+	require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	// Make a sysbatch job
+	job := mock.SystemBatchJob()
+	job.Meta = map[string]string{"version": "1"}
+	require.Nil(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Evaluate the job
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+
+	require.Nil(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+	require.Nil(t, h.Process(NewSysBatchScheduler, eval))
+	require.Equal(t, "complete", h.Evals[0].Status)
+
+	// QueuedAllocations is drained
+	val, ok := h.Evals[0].QueuedAllocations["pinger"]
+	require.True(t, ok)
+	require.Equal(t, 0, val)
+
+	// The plan has one NodeAllocations
+	require.Equal(t, 1, len(h.Plans))
+
+	// Mark the node as ineligible
+	node.SchedulingEligibility = structs.NodeSchedulingIneligible
+
+	// Evaluate the job
+	eval2 := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.Nil(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval2}))
+	require.Nil(t, h.Process(NewSysBatchScheduler, eval2))
+	require.Equal(t, "complete", h.Evals[1].Status)
+
+	// Create a new job version, deploy
+	job2 := job.Copy()
+	job2.Meta["version"] = "2"
+	require.Nil(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job2))
+
+	// Run evaluation as a plan
+	eval3 := &structs.Evaluation{
+		Namespace:    structs.DefaultNamespace,
+		ID:           uuid.Generate(),
+		Priority:     job2.Priority,
+		TriggeredBy:  structs.EvalTriggerJobRegister,
+		JobID:        job2.ID,
+		Status:       structs.EvalStatusPending,
+		AnnotatePlan: true,
+	}
+
+	// Ensure New eval is complete
+	require.Nil(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval3}))
+	require.Nil(t, h.Process(NewSysBatchScheduler, eval3))
+	require.Equal(t, "complete", h.Evals[2].Status)
+
+	// Ensure there are no FailedTGAllocs
+	require.Equal(t, 0, len(h.Evals[2].FailedTGAllocs))
+	require.Equal(t, 0, h.Evals[2].QueuedAllocations[job2.Name])
+}
+
+func TestSysBatch_ConstraintErrors(t *testing.T) {
+	h := NewHarness(t)
+
+	var node *structs.Node
+	// Register some nodes
+	// the tag "aaaaaa" is hashed so that the nodes are processed
+	// in an order other than good, good, bad
+	for _, tag := range []string{"aaaaaa", "foo", "foo", "foo"} {
+		node = mock.Node()
+		node.Meta["tag"] = tag
+		require.NoError(t, node.ComputeClass())
+		require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+	}
+
+	// Mark the last node as ineligible
+	node.SchedulingEligibility = structs.NodeSchedulingIneligible
+
+	// Make a job with a constraint that matches a subset of the nodes
+	job := mock.SystemBatchJob()
+	job.Priority = 100
+	job.Constraints = append(job.Constraints,
+		&structs.Constraint{
+			LTarget: "${meta.tag}",
+			RTarget: "foo",
+			Operand: "=",
+		})
+
+	require.Nil(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Evaluate the job
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+
+	require.Nil(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+	require.Nil(t, h.Process(NewSysBatchScheduler, eval))
+	require.Equal(t, "complete", h.Evals[0].Status)
+
+	// QueuedAllocations is drained
+	val, ok := h.Evals[0].QueuedAllocations["pinger"]
+	require.True(t, ok)
+	require.Equal(t, 0, val)
+
+	// The plan has two NodeAllocations
+	require.Equal(t, 1, len(h.Plans))
+	require.Nil(t, h.Plans[0].Annotations)
+	require.Equal(t, 2, len(h.Plans[0].NodeAllocation))
+
+	// Two nodes were allocated and are running
+	ws := memdb.NewWatchSet()
+	as, err := h.State.AllocsByJob(ws, structs.DefaultNamespace, job.ID, false)
+	require.Nil(t, err)
+
+	running := 0
+	for _, a := range as {
+		if "running" == a.Job.Status {
+			running++
+		}
+	}
+
+	require.Equal(t, 2, len(as))
+	require.Equal(t, 2, running)
+
+	// Failed allocations is empty
+	require.Equal(t, 0, len(h.Evals[0].FailedTGAllocs))
+}
+
+func TestSysBatch_ChainedAlloc(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create some nodes
+	_ = createNodes(t, h, 10)
+
+	// Create a sysbatch job
+	job := mock.SystemBatchJob()
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Create a mock evaluation to register the job
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	var allocIDs []string
+	for _, allocList := range h.Plans[0].NodeAllocation {
+		for _, alloc := range allocList {
+			allocIDs = append(allocIDs, alloc.ID)
+		}
+	}
+	sort.Strings(allocIDs)
+
+	// Create a new harness to invoke the scheduler again
+	h1 := NewHarnessWithState(t, h.State)
+	job1 := mock.SystemBatchJob()
+	job1.ID = job.ID
+	job1.TaskGroups[0].Tasks[0].Env = make(map[string]string)
+	job1.TaskGroups[0].Tasks[0].Env["foo"] = "bar"
+	require.NoError(t, h1.State.UpsertJob(structs.MsgTypeTestSetup, h1.NextIndex(), job1))
+
+	// Insert two more nodes
+	for i := 0; i < 2; i++ {
+		node := mock.Node()
+		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+	}
+
+	// Create a mock evaluation to update the job
+	eval1 := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job1.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job1.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval1}))
+	// Process the evaluation
+	err = h1.Process(NewSysBatchScheduler, eval1)
+	require.NoError(t, err)
+
+	require.Len(t, h.Plans, 1)
+	plan := h1.Plans[0]
+
+	// Collect all the chained allocation ids and the new allocations which
+	// don't have any chained allocations
+	var prevAllocs []string
+	var newAllocs []string
+	for _, allocList := range plan.NodeAllocation {
+		for _, alloc := range allocList {
+			if alloc.PreviousAllocation == "" {
+				newAllocs = append(newAllocs, alloc.ID)
+				continue
+			}
+			prevAllocs = append(prevAllocs, alloc.PreviousAllocation)
+		}
+	}
+	sort.Strings(prevAllocs)
+
+	// Ensure that the new allocations has their corresponding original
+	// allocation ids
+	require.Equal(t, allocIDs, prevAllocs)
+
+	// Ensuring two new allocations don't have any chained allocations
+	require.Len(t, newAllocs, 2)
+}
+
+func TestSysBatch_PlanWithDrainedNode(t *testing.T) {
+	h := NewHarness(t)
+
+	// Register two nodes with two different classes
+	node := mock.Node()
+	node.NodeClass = "green"
+	node.Drain = true
+	require.NoError(t, node.ComputeClass())
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	node2 := mock.Node()
+	node2.NodeClass = "blue"
+	require.NoError(t, node2.ComputeClass())
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
+
+	// Create a sysbatch job with two task groups, each constrained on node class
+	job := mock.SystemBatchJob()
+	tg1 := job.TaskGroups[0]
+	tg1.Constraints = append(tg1.Constraints,
+		&structs.Constraint{
+			LTarget: "${node.class}",
+			RTarget: "green",
+			Operand: "==",
+		})
+
+	tg2 := tg1.Copy()
+	tg2.Name = "pinger2"
+	tg2.Constraints[0].RTarget = "blue"
+	job.TaskGroups = append(job.TaskGroups, tg2)
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Create an allocation on each node
+	alloc := mock.SysBatchAlloc()
+	alloc.Job = job
+	alloc.JobID = job.ID
+	alloc.NodeID = node.ID
+	alloc.Name = "my-sysbatch.pinger[0]"
+	alloc.DesiredTransition.Migrate = helper.BoolToPtr(true)
+	alloc.TaskGroup = "pinger"
+
+	alloc2 := mock.SysBatchAlloc()
+	alloc2.Job = job
+	alloc2.JobID = job.ID
+	alloc2.NodeID = node2.ID
+	alloc2.Name = "my-sysbatch.pinger2[0]"
+	alloc2.TaskGroup = "pinger2"
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc, alloc2}))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+	plan := h.Plans[0]
+
+	// Ensure the plan evicted the alloc on the failed node
+	planned := plan.NodeUpdate[node.ID]
+	require.Len(t, plan.NodeUpdate[node.ID], 1)
+
+	// Ensure the plan didn't place
+	require.Empty(t, plan.NodeAllocation)
+
+	// Ensure the allocations is stopped
+	require.Equal(t, structs.AllocDesiredStatusStop, planned[0].DesiredStatus)
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_QueuedAllocsMultTG(t *testing.T) {
+	h := NewHarness(t)
+
+	// Register two nodes with two different classes
+	node := mock.Node()
+	node.NodeClass = "green"
+	require.NoError(t, node.ComputeClass())
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+
+	node2 := mock.Node()
+	node2.NodeClass = "blue"
+	require.NoError(t, node2.ComputeClass())
+	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
+
+	// Create a sysbatch job with two task groups, each constrained on node class
+	job := mock.SystemBatchJob()
+	tg1 := job.TaskGroups[0]
+	tg1.Constraints = append(tg1.Constraints,
+		&structs.Constraint{
+			LTarget: "${node.class}",
+			RTarget: "green",
+			Operand: "==",
+		})
+
+	tg2 := tg1.Copy()
+	tg2.Name = "pinger2"
+	tg2.Constraints[0].RTarget = "blue"
+	job.TaskGroups = append(job.TaskGroups, tg2)
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Create a mock evaluation to deal with drain
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerNodeUpdate,
+		JobID:       job.ID,
+		NodeID:      node.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err := h.Process(NewSysBatchScheduler, eval)
+	require.NoError(t, err)
+
+	// Ensure a single plan
+	require.Len(t, h.Plans, 1)
+
+	qa := h.Evals[0].QueuedAllocations
+	require.Zero(t, qa["pinger"])
+	require.Zero(t, qa["pinger2"])
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_Preemption(t *testing.T) {
+	h := NewHarness(t)
+
+	// Create nodes
+	nodes := make([]*structs.Node, 0)
+	for i := 0; i < 2; i++ {
+		node := mock.Node()
+		// TODO: remove in 0.11
+		node.Resources = &structs.Resources{
+			CPU:      3072,
+			MemoryMB: 5034,
+			DiskMB:   20 * 1024,
+			Networks: []*structs.NetworkResource{{
+				Device: "eth0",
+				CIDR:   "192.168.0.100/32",
+				MBits:  1000,
+			}},
+		}
+		node.NodeResources = &structs.NodeResources{
+			Cpu:    structs.NodeCpuResources{CpuShares: 3072},
+			Memory: structs.NodeMemoryResources{MemoryMB: 5034},
+			Disk:   structs.NodeDiskResources{DiskMB: 20 * 1024},
+			Networks: []*structs.NetworkResource{{
+				Device: "eth0",
+				CIDR:   "192.168.0.100/32",
+				MBits:  1000,
+			}},
+			NodeNetworks: []*structs.NodeNetworkResource{{
+				Mode:   "host",
+				Device: "eth0",
+				Addresses: []structs.NodeNetworkAddress{{
+					Family:  structs.NodeNetworkAF_IPv4,
+					Alias:   "default",
+					Address: "192.168.0.100",
+				}},
+			}},
+		}
+		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+		nodes = append(nodes, node)
+	}
+
+	// Enable Preemption
+	err := h.State.SchedulerSetConfig(h.NextIndex(), &structs.SchedulerConfiguration{
+		PreemptionConfig: structs.PreemptionConfig{
+			SysBatchSchedulerEnabled: true,
+		},
+	})
+	require.NoError(t, err)
+
+	// Create some low priority batch jobs and allocations for them
+	// One job uses a reserved port
+	job1 := mock.BatchJob()
+	job1.Type = structs.JobTypeBatch
+	job1.Priority = 20
+	job1.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
+		CPU:      512,
+		MemoryMB: 1024,
+		Networks: []*structs.NetworkResource{{
+			MBits: 200,
+			ReservedPorts: []structs.Port{{
+				Label: "web",
+				Value: 80,
+			}},
+		}},
+	}
+
+	alloc1 := mock.Alloc()
+	alloc1.Job = job1
+	alloc1.JobID = job1.ID
+	alloc1.NodeID = nodes[0].ID
+	alloc1.Name = "my-job[0]"
+	alloc1.TaskGroup = job1.TaskGroups[0].Name
+	alloc1.AllocatedResources = &structs.AllocatedResources{
+		Tasks: map[string]*structs.AllocatedTaskResources{
+			"web": {
+				Cpu:    structs.AllocatedCpuResources{CpuShares: 512},
+				Memory: structs.AllocatedMemoryResources{MemoryMB: 1024},
+				Networks: []*structs.NetworkResource{{
+					Device:        "eth0",
+					IP:            "192.168.0.100",
+					ReservedPorts: []structs.Port{{Label: "web", Value: 80}},
+					MBits:         200,
+				}},
+			},
+		},
+		Shared: structs.AllocatedSharedResources{DiskMB: 5 * 1024},
+	}
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job1))
+
+	job2 := mock.BatchJob()
+	job2.Type = structs.JobTypeBatch
+	job2.Priority = 20
+	job2.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
+		CPU:      512,
+		MemoryMB: 1024,
+		Networks: []*structs.NetworkResource{{MBits: 200}},
+	}
+
+	alloc2 := mock.Alloc()
+	alloc2.Job = job2
+	alloc2.JobID = job2.ID
+	alloc2.NodeID = nodes[0].ID
+	alloc2.Name = "my-job[2]"
+	alloc2.TaskGroup = job2.TaskGroups[0].Name
+	alloc2.AllocatedResources = &structs.AllocatedResources{
+		Tasks: map[string]*structs.AllocatedTaskResources{
+			"web": {
+				Cpu:    structs.AllocatedCpuResources{CpuShares: 512},
+				Memory: structs.AllocatedMemoryResources{MemoryMB: 1024},
+				Networks: []*structs.NetworkResource{{
+					Device: "eth0",
+					IP:     "192.168.0.100",
+					MBits:  200,
+				}},
+			},
+		},
+		Shared: structs.AllocatedSharedResources{DiskMB: 5 * 1024},
+	}
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job2))
+
+	job3 := mock.Job()
+	job3.Type = structs.JobTypeBatch
+	job3.Priority = 40
+	job3.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
+		CPU:      1024,
+		MemoryMB: 2048,
+		Networks: []*structs.NetworkResource{{
+			Device: "eth0",
+			MBits:  400,
+		}},
+	}
+
+	alloc3 := mock.Alloc()
+	alloc3.Job = job3
+	alloc3.JobID = job3.ID
+	alloc3.NodeID = nodes[0].ID
+	alloc3.Name = "my-job[0]"
+	alloc3.TaskGroup = job3.TaskGroups[0].Name
+	alloc3.AllocatedResources = &structs.AllocatedResources{
+		Tasks: map[string]*structs.AllocatedTaskResources{
+			"web": {
+				Cpu:    structs.AllocatedCpuResources{CpuShares: 1024},
+				Memory: structs.AllocatedMemoryResources{MemoryMB: 25},
+				Networks: []*structs.NetworkResource{{
+					Device:        "eth0",
+					IP:            "192.168.0.100",
+					ReservedPorts: []structs.Port{{Label: "web", Value: 80}},
+					MBits:         400,
+				}},
+			},
+		},
+		Shared: structs.AllocatedSharedResources{DiskMB: 5 * 1024},
+	}
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc1, alloc2, alloc3}))
+
+	// Create a high priority job and allocs for it
+	// These allocs should not be preempted
+
+	job4 := mock.BatchJob()
+	job4.Type = structs.JobTypeBatch
+	job4.Priority = 100
+	job4.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
+		CPU:      1024,
+		MemoryMB: 2048,
+		Networks: []*structs.NetworkResource{{MBits: 100}},
+	}
+
+	alloc4 := mock.Alloc()
+	alloc4.Job = job4
+	alloc4.JobID = job4.ID
+	alloc4.NodeID = nodes[0].ID
+	alloc4.Name = "my-job4[0]"
+	alloc4.TaskGroup = job4.TaskGroups[0].Name
+	alloc4.AllocatedResources = &structs.AllocatedResources{
+		Tasks: map[string]*structs.AllocatedTaskResources{
+			"web": {
+				Cpu: structs.AllocatedCpuResources{
+					CpuShares: 1024,
+				},
+				Memory: structs.AllocatedMemoryResources{
+					MemoryMB: 2048,
+				},
+				Networks: []*structs.NetworkResource{
+					{
+						Device:        "eth0",
+						IP:            "192.168.0.100",
+						ReservedPorts: []structs.Port{{Label: "web", Value: 80}},
+						MBits:         100,
+					},
+				},
+			},
+		},
+		Shared: structs.AllocatedSharedResources{
+			DiskMB: 2 * 1024,
+		},
+	}
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job4))
+	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc4}))
+
+	// Create a system job such that it would need to preempt both allocs to succeed
+	job := mock.SystemBatchJob()
+	job.Priority = 100
+	job.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
+		CPU:      1948,
+		MemoryMB: 256,
+		Networks: []*structs.NetworkResource{{
+			MBits:        800,
+			DynamicPorts: []structs.Port{{Label: "http"}},
+		}},
+	}
+	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
+
+	// Create a mock evaluation to register the job
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    job.Priority,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       job.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation
+	err = h.Process(NewSysBatchScheduler, eval)
+	require.Nil(t, err)
+
+	// Ensure a single plan
+	require.Equal(t, 1, len(h.Plans))
+	plan := h.Plans[0]
+
+	// Ensure the plan doesn't have annotations
+	require.Nil(t, plan.Annotations)
+
+	// Ensure the plan allocated on both nodes
+	var planned []*structs.Allocation
+	preemptingAllocId := ""
+	require.Equal(t, 2, len(plan.NodeAllocation))
+
+	// The alloc that got placed on node 1 is the preemptor
+	for _, allocList := range plan.NodeAllocation {
+		planned = append(planned, allocList...)
+		for _, alloc := range allocList {
+			if alloc.NodeID == nodes[0].ID {
+				preemptingAllocId = alloc.ID
+			}
+		}
+	}
+
+	// Lookup the allocations by JobID
+	ws := memdb.NewWatchSet()
+	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
+	require.NoError(t, err)
+
+	// Ensure all allocations placed
+	require.Equal(t, 2, len(out))
+
+	// Verify that one node has preempted allocs
+	require.NotNil(t, plan.NodePreemptions[nodes[0].ID])
+	preemptedAllocs := plan.NodePreemptions[nodes[0].ID]
+
+	// Verify that three jobs have preempted allocs
+	require.Equal(t, 3, len(preemptedAllocs))
+
+	expectedPreemptedJobIDs := []string{job1.ID, job2.ID, job3.ID}
+
+	// We expect job1, job2 and job3 to have preempted allocations
+	// job4 should not have any allocs preempted
+	for _, alloc := range preemptedAllocs {
+		require.Contains(t, expectedPreemptedJobIDs, alloc.JobID)
+	}
+	// Look up the preempted allocs by job ID
+	ws = memdb.NewWatchSet()
+
+	for _, jobId := range expectedPreemptedJobIDs {
+		out, err = h.State.AllocsByJob(ws, structs.DefaultNamespace, jobId, false)
+		require.NoError(t, err)
+		for _, alloc := range out {
+			require.Equal(t, structs.AllocDesiredStatusEvict, alloc.DesiredStatus)
+			require.Equal(t, fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocId), alloc.DesiredDescription)
+		}
+	}
+
+	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestSysBatch_canHandle(t *testing.T) {
+	s := SystemScheduler{sysbatch: true}
+	t.Run("sysbatch register", func(t *testing.T) {
+		require.True(t, s.canHandle(structs.EvalTriggerJobRegister))
+	})
+	t.Run("sysbatch scheduled", func(t *testing.T) {
+		require.False(t, s.canHandle(structs.EvalTriggerScheduled))
+	})
+	t.Run("sysbatch periodic", func(t *testing.T) {
+		require.True(t, s.canHandle(structs.EvalTriggerPeriodicJob))
+	})
+}
+func createNodes(t *testing.T, h *Harness, n int) []*structs.Node {
+	nodes := make([]*structs.Node, n)
+	for i := 0; i < n; i++ {
+		node := mock.Node()
+		nodes[i] = node
+		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
+	}
+	return nodes
+}
diff --git a/scheduler/system_sched_test.go b/scheduler/system_system_test.go
similarity index 84%
rename from scheduler/system_sched_test.go
rename to scheduler/system_system_test.go
index 35ed1ce5189d..e3cff0e646b7 100644
--- a/scheduler/system_sched_test.go
+++ b/scheduler/system_system_test.go
@@ -19,10 +19,7 @@ func TestSystemSched_JobRegister(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	_ = createNodes(t, h, 10)
 
 	// Create a job
 	job := mock.SystemJob()
@@ -41,29 +38,21 @@ func TestSystemSched_JobRegister(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
-	// Ensure the plan doesn't have annotations.
-	if plan.Annotations != nil {
-		t.Fatalf("expected no annotations")
-	}
+	// Ensure the plan does not have annotations
+	require.Nil(t, plan.Annotations, "expected no annotations")
 
 	// Ensure the plan allocated
 	var planned []*structs.Allocation
 	for _, allocList := range plan.NodeAllocation {
 		planned = append(planned, allocList...)
 	}
-	if len(planned) != 10 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, planned, 10)
 
 	// Lookup the allocations by JobID
 	ws := memdb.NewWatchSet()
@@ -71,20 +60,16 @@ func TestSystemSched_JobRegister(t *testing.T) {
 	require.NoError(t, err)
 
 	// Ensure all allocations placed
-	if len(out) != 10 {
-		t.Fatalf("bad: %#v", out)
-	}
+	require.Len(t, out, 10)
 
 	// Check the available nodes
-	if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 10 {
-		t.Fatalf("bad: %#v", out[0].Metrics)
-	}
+	count, ok := out[0].Metrics.NodesAvailable["dc1"]
+	require.True(t, ok)
+	require.Equal(t, 10, count, "bad metrics %#v:", out[0].Metrics)
 
 	// Ensure no allocations are queued
 	queued := h.Evals[0].QueuedAllocations["web"]
-	if queued != 0 {
-		t.Fatalf("expected queued allocations: %v, actual: %v", 0, queued)
-	}
+	require.Equal(t, 0, queued, "unexpected queued allocations")
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -93,10 +78,7 @@ func TestSystemSched_JobRegister_StickyAllocs(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	_ = createNodes(t, h, 10)
 
 	// Create a job
 	job := mock.SystemJob()
@@ -168,7 +150,7 @@ func TestSystemSched_JobRegister_StickyAllocs(t *testing.T) {
 func TestSystemSched_JobRegister_EphemeralDiskConstraint(t *testing.T) {
 	h := NewHarness(t)
 
-	// Create a nodes
+	// Create a node
 	node := mock.Node()
 	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
 
@@ -237,7 +219,7 @@ func TestSystemSched_JobRegister_EphemeralDiskConstraint(t *testing.T) {
 func TestSystemSched_ExhaustResources(t *testing.T) {
 	h := NewHarness(t)
 
-	// Create a nodes
+	// Create a node
 	node := mock.Node()
 	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
 
@@ -412,12 +394,7 @@ func TestSystemSched_JobRegister_AddNode(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	var nodes []*structs.Node
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		nodes = append(nodes, node)
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	nodes := createNodes(t, h, 10)
 
 	// Generate a fake job with allocations
 	job := mock.SystemJob()
@@ -455,9 +432,7 @@ func TestSystemSched_JobRegister_AddNode(t *testing.T) {
 	}
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan had no node updates
@@ -465,19 +440,14 @@ func TestSystemSched_JobRegister_AddNode(t *testing.T) {
 	for _, updateList := range plan.NodeUpdate {
 		update = append(update, updateList...)
 	}
-	if len(update) != 0 {
-		t.Log(len(update))
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Empty(t, update)
 
 	// Ensure the plan allocated on the new node
 	var planned []*structs.Allocation
 	for _, allocList := range plan.NodeAllocation {
 		planned = append(planned, allocList...)
 	}
-	if len(planned) != 1 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, planned, 1)
 
 	// Ensure it allocated on the right node
 	if _, ok := plan.NodeAllocation[node.ID]; !ok {
@@ -534,12 +504,7 @@ func TestSystemSched_JobModify(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	var nodes []*structs.Node
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		nodes = append(nodes, node)
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	nodes := createNodes(t, h, 10)
 
 	// Generate a fake job with allocations
 	job := mock.SystemJob()
@@ -590,14 +555,10 @@ func TestSystemSched_JobModify(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan evicted all allocs
@@ -605,18 +566,14 @@ func TestSystemSched_JobModify(t *testing.T) {
 	for _, updateList := range plan.NodeUpdate {
 		update = append(update, updateList...)
 	}
-	if len(update) != len(allocs) {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Equal(t, len(allocs), len(update))
 
 	// Ensure the plan allocated
 	var planned []*structs.Allocation
 	for _, allocList := range plan.NodeAllocation {
 		planned = append(planned, allocList...)
 	}
-	if len(planned) != 10 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, planned, 10)
 
 	// Lookup the allocations by JobID
 	ws := memdb.NewWatchSet()
@@ -625,9 +582,7 @@ func TestSystemSched_JobModify(t *testing.T) {
 
 	// Ensure all allocations placed
 	out, _ = structs.FilterTerminalAllocs(out)
-	if len(out) != 10 {
-		t.Fatalf("bad: %#v", out)
-	}
+	require.Len(t, out, 10)
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -636,12 +591,7 @@ func TestSystemSched_JobModify_Rolling(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	var nodes []*structs.Node
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		nodes = append(nodes, node)
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	nodes := createNodes(t, h, 10)
 
 	// Generate a fake job with allocations
 	job := mock.SystemJob()
@@ -739,12 +689,7 @@ func TestSystemSched_JobModify_InPlace(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	var nodes []*structs.Node
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		nodes = append(nodes, node)
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	nodes := createNodes(t, h, 10)
 
 	// Generate a fake job with allocations
 	job := mock.SystemJob()
@@ -766,7 +711,7 @@ func TestSystemSched_JobModify_InPlace(t *testing.T) {
 	job2.ID = job.ID
 	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job2))
 
-	// Create a mock evaluation to deal with drain
+	// Create a mock evaluation to deal with update
 	eval := &structs.Evaluation{
 		Namespace:   structs.DefaultNamespace,
 		ID:          uuid.Generate(),
@@ -779,14 +724,10 @@ func TestSystemSched_JobModify_InPlace(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan did not evict any allocs
@@ -794,22 +735,17 @@ func TestSystemSched_JobModify_InPlace(t *testing.T) {
 	for _, updateList := range plan.NodeUpdate {
 		update = append(update, updateList...)
 	}
-	if len(update) != 0 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Empty(t, update)
 
 	// Ensure the plan updated the existing allocs
 	var planned []*structs.Allocation
 	for _, allocList := range plan.NodeAllocation {
 		planned = append(planned, allocList...)
 	}
-	if len(planned) != 10 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, planned, 10)
+
 	for _, p := range planned {
-		if p.Job != job2 {
-			t.Fatalf("should update job")
-		}
+		require.Equal(t, job2, p.Job, "should update job")
 	}
 
 	// Lookup the allocations by JobID
@@ -818,18 +754,14 @@ func TestSystemSched_JobModify_InPlace(t *testing.T) {
 	require.NoError(t, err)
 
 	// Ensure all allocations placed
-	if len(out) != 10 {
-		t.Fatalf("bad: %#v", out)
-	}
+	require.Len(t, out, 10)
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 
 	// Verify the network did not change
 	rp := structs.Port{Label: "admin", Value: 5000}
 	for _, alloc := range out {
 		for _, resources := range alloc.TaskResources {
-			if resources.Networks[0].ReservedPorts[0] != rp {
-				t.Fatalf("bad: %#v", alloc)
-			}
+			require.Equal(t, rp, resources.Networks[0].ReservedPorts[0])
 		}
 	}
 }
@@ -838,12 +770,7 @@ func TestSystemSched_JobDeregister_Purged(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	var nodes []*structs.Node
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		nodes = append(nodes, node)
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	nodes := createNodes(t, h, 10)
 
 	// Generate a fake job with allocations
 	job := mock.SystemJob()
@@ -875,21 +802,15 @@ func TestSystemSched_JobDeregister_Purged(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan evicted the job from all nodes.
 	for _, node := range nodes {
-		if len(plan.NodeUpdate[node.ID]) != 1 {
-			t.Fatalf("bad: %#v", plan)
-		}
+		require.Len(t, plan.NodeUpdate[node.ID], 1)
 	}
 
 	// Lookup the allocations by JobID
@@ -899,9 +820,7 @@ func TestSystemSched_JobDeregister_Purged(t *testing.T) {
 
 	// Ensure no remaining allocations
 	out, _ = structs.FilterTerminalAllocs(out)
-	if len(out) != 0 {
-		t.Fatalf("bad: %#v", out)
-	}
+	require.Empty(t, out)
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -910,12 +829,7 @@ func TestSystemSched_JobDeregister_Stopped(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	var nodes []*structs.Node
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		nodes = append(nodes, node)
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	nodes := createNodes(t, h, 10)
 
 	// Generate a fake job with allocations
 	job := mock.SystemJob()
@@ -949,21 +863,15 @@ func TestSystemSched_JobDeregister_Stopped(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan evicted the job from all nodes.
 	for _, node := range nodes {
-		if len(plan.NodeUpdate[node.ID]) != 1 {
-			t.Fatalf("bad: %#v", plan)
-		}
+		require.Len(t, plan.NodeUpdate[node.ID], 1)
 	}
 
 	// Lookup the allocations by JobID
@@ -973,9 +881,7 @@ func TestSystemSched_JobDeregister_Stopped(t *testing.T) {
 
 	// Ensure no remaining allocations
 	out, _ = structs.FilterTerminalAllocs(out)
-	if len(out) != 0 {
-		t.Fatalf("bad: %#v", out)
-	}
+	require.Empty(t, out)
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -1014,35 +920,27 @@ func TestSystemSched_NodeDown(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan evicted all allocs
-	if len(plan.NodeUpdate[node.ID]) != 1 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, plan.NodeUpdate[node.ID], 1)
 
 	// Ensure the plan updated the allocation.
-	var planned []*structs.Allocation
+	planned := make([]*structs.Allocation, 0)
 	for _, allocList := range plan.NodeUpdate {
 		planned = append(planned, allocList...)
 	}
-	if len(planned) != 1 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, planned, 1)
 
 	// Ensure the allocations is stopped
-	if p := planned[0]; p.DesiredStatus != structs.AllocDesiredStatusStop &&
-		p.ClientStatus != structs.AllocClientStatusLost {
-		t.Fatalf("bad: %#v", planned[0])
-	}
+	p := planned[0]
+	require.Equal(t, structs.AllocDesiredStatusStop, p.DesiredStatus)
+	// removed badly designed assertion on client_status = lost
+	// the actual client_status is pending
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -1080,32 +978,23 @@ func TestSystemSched_NodeDrain_Down(t *testing.T) {
 	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
 
 	// Process the evaluation
-	err := h.Process(NewServiceScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	err := h.Process(NewSystemScheduler, eval) // todo: yikes
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan evicted non terminal allocs
-	if len(plan.NodeUpdate[node.ID]) != 1 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, plan.NodeUpdate[node.ID], 1)
 
 	// Ensure that the allocation is marked as lost
-	var lostAllocs []string
+	var lost []string
 	for _, alloc := range plan.NodeUpdate[node.ID] {
-		lostAllocs = append(lostAllocs, alloc.ID)
+		lost = append(lost, alloc.ID)
 	}
-	expected := []string{alloc.ID}
+	require.Equal(t, []string{alloc.ID}, lost)
 
-	if !reflect.DeepEqual(lostAllocs, expected) {
-		t.Fatalf("expected: %v, actual: %v", expected, lostAllocs)
-	}
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
 
@@ -1143,35 +1032,24 @@ func TestSystemSched_NodeDrain(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan evicted all allocs
-	if len(plan.NodeUpdate[node.ID]) != 1 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, plan.NodeUpdate[node.ID], 1)
 
 	// Ensure the plan updated the allocation.
-	var planned []*structs.Allocation
+	planned := make([]*structs.Allocation, 0)
 	for _, allocList := range plan.NodeUpdate {
 		planned = append(planned, allocList...)
 	}
-	if len(planned) != 1 {
-		t.Log(len(planned))
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, planned, 1)
 
 	// Ensure the allocations is stopped
-	if planned[0].DesiredStatus != structs.AllocDesiredStatusStop {
-		t.Fatalf("bad: %#v", planned[0])
-	}
+	require.Equal(t, structs.AllocDesiredStatusStop, planned[0].DesiredStatus)
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -1194,7 +1072,7 @@ func TestSystemSched_NodeUpdate(t *testing.T) {
 	alloc.Name = "my-job.web[0]"
 	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc}))
 
-	// Create a mock evaluation to deal
+	// Create a mock evaluation to deal with the node update
 	eval := &structs.Evaluation{
 		Namespace:   structs.DefaultNamespace,
 		ID:          uuid.Generate(),
@@ -1208,14 +1086,12 @@ func TestSystemSched_NodeUpdate(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure that queued allocations is zero
-	if val, ok := h.Evals[0].QueuedAllocations["web"]; !ok || val != 0 {
-		t.Fatalf("bad queued allocations: %#v", h.Evals[0].QueuedAllocations)
-	}
+	val, ok := h.Evals[0].QueuedAllocations["web"]
+	require.True(t, ok)
+	require.Zero(t, val)
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -1225,16 +1101,13 @@ func TestSystemSched_RetryLimit(t *testing.T) {
 	h.Planner = &RejectPlan{h}
 
 	// Create some nodes
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	_ = createNodes(t, h, 10)
 
 	// Create a job
 	job := mock.SystemJob()
 	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
 
-	// Create a mock evaluation to deregister the job
+	// Create a mock evaluation to register the job
 	eval := &structs.Evaluation{
 		Namespace:   structs.DefaultNamespace,
 		ID:          uuid.Generate(),
@@ -1247,14 +1120,10 @@ func TestSystemSched_RetryLimit(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure multiple plans
-	if len(h.Plans) == 0 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.NotEmpty(t, h.Plans)
 
 	// Lookup the allocations by JobID
 	ws := memdb.NewWatchSet()
@@ -1262,9 +1131,7 @@ func TestSystemSched_RetryLimit(t *testing.T) {
 	require.NoError(t, err)
 
 	// Ensure no allocations placed
-	if len(out) != 0 {
-		t.Fatalf("bad: %#v", out)
-	}
+	require.Empty(t, out)
 
 	// Should hit the retry limit
 	h.AssertEvalStatus(t, structs.EvalStatusFailed)
@@ -1272,7 +1139,7 @@ func TestSystemSched_RetryLimit(t *testing.T) {
 
 // This test ensures that the scheduler doesn't increment the queued allocation
 // count for a task group when allocations can't be created on currently
-// available nodes because of constrain mismatches.
+// available nodes because of constraint mismatches.
 func TestSystemSched_Queued_With_Constraints(t *testing.T) {
 	h := NewHarness(t)
 
@@ -1285,7 +1152,7 @@ func TestSystemSched_Queued_With_Constraints(t *testing.T) {
 	job := mock.SystemJob()
 	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
 
-	// Create a mock evaluation to deal
+	// Create a mock evaluation to deal with the node update
 	eval := &structs.Evaluation{
 		Namespace:   structs.DefaultNamespace,
 		ID:          uuid.Generate(),
@@ -1299,20 +1166,17 @@ func TestSystemSched_Queued_With_Constraints(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure that queued allocations is zero
-	if val, ok := h.Evals[0].QueuedAllocations["web"]; !ok || val != 0 {
-		t.Fatalf("bad queued allocations: %#v", h.Evals[0].QueuedAllocations)
-	}
-
+	val, ok := h.Evals[0].QueuedAllocations["web"]
+	require.True(t, ok)
+	require.Zero(t, val)
 }
 
 // This test ensures that the scheduler correctly ignores ineligible
 // nodes when scheduling due to a new node being added. The job has two
-// task groups contrained to a particular node class. The desired behavior
+// task groups constrained to a particular node class. The desired behavior
 // should be that the TaskGroup constrained to the newly added node class is
 // added and that the TaskGroup constrained to the ineligible node is ignored.
 func TestSystemSched_JobConstraint_AddNode(t *testing.T) {
@@ -1322,13 +1186,13 @@ func TestSystemSched_JobConstraint_AddNode(t *testing.T) {
 	var node *structs.Node
 	node = mock.Node()
 	node.NodeClass = "Class-A"
-	node.ComputeClass()
+	require.NoError(t, node.ComputeClass())
 	require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
 
 	var nodeB *structs.Node
 	nodeB = mock.Node()
 	nodeB.NodeClass = "Class-B"
-	nodeB.ComputeClass()
+	require.NoError(t, nodeB.ComputeClass())
 	require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), nodeB))
 
 	// Make a job with two task groups, each constraint to a node class
@@ -1365,7 +1229,6 @@ func TestSystemSched_JobConstraint_AddNode(t *testing.T) {
 		JobID:       job.ID,
 		Status:      structs.EvalStatusPending,
 	}
-
 	require.Nil(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
 
 	require.Nil(t, h.Process(NewSystemScheduler, eval))
@@ -1414,7 +1277,7 @@ func TestSystemSched_JobConstraint_AddNode(t *testing.T) {
 	// Add a new node Class-B
 	var nodeBTwo *structs.Node
 	nodeBTwo = mock.Node()
-	nodeBTwo.ComputeClass()
+	require.NoError(t, nodeBTwo.ComputeClass())
 	nodeBTwo.NodeClass = "Class-B"
 	require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), nodeBTwo))
 
@@ -1467,7 +1330,7 @@ func TestSystemSched_ExistingAllocNoNodes(t *testing.T) {
 	var node *structs.Node
 	// Create a node
 	node = mock.Node()
-	node.ComputeClass()
+	require.NoError(t, node.ComputeClass())
 	require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
 
 	// Make a job
@@ -1498,6 +1361,7 @@ func TestSystemSched_ExistingAllocNoNodes(t *testing.T) {
 
 	// Mark the node as ineligible
 	node.SchedulingEligibility = structs.NodeSchedulingIneligible
+
 	// Evaluate the job
 	eval2 := &structs.Evaluation{
 		Namespace:   structs.DefaultNamespace,
@@ -1549,7 +1413,7 @@ func TestSystemSched_ConstraintErrors(t *testing.T) {
 	for _, tag := range []string{"aaaaaa", "foo", "foo", "foo"} {
 		node = mock.Node()
 		node.Meta["tag"] = tag
-		node.ComputeClass()
+		require.NoError(t, node.ComputeClass())
 		require.Nil(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
 	}
 
@@ -1614,10 +1478,7 @@ func TestSystemSched_ChainedAlloc(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create some nodes
-	for i := 0; i < 10; i++ {
-		node := mock.Node()
-		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
-	}
+	_ = createNodes(t, h, 10)
 
 	// Create a job
 	job := mock.SystemJob()
@@ -1633,10 +1494,10 @@ func TestSystemSched_ChainedAlloc(t *testing.T) {
 		Status:      structs.EvalStatusPending,
 	}
 	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
+
 	// Process the evaluation
-	if err := h.Process(NewSystemScheduler, eval); err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	err := h.Process(NewSystemScheduler, eval)
+	require.NoError(t, err)
 
 	var allocIDs []string
 	for _, allocList := range h.Plans[0].NodeAllocation {
@@ -1675,6 +1536,7 @@ func TestSystemSched_ChainedAlloc(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
+	require.Len(t, h.Plans, 1)
 	plan := h1.Plans[0]
 
 	// Collect all the chained allocation ids and the new allocations which
@@ -1694,14 +1556,10 @@ func TestSystemSched_ChainedAlloc(t *testing.T) {
 
 	// Ensure that the new allocations has their corresponding original
 	// allocation ids
-	if !reflect.DeepEqual(prevAllocs, allocIDs) {
-		t.Fatalf("expected: %v, actual: %v", len(allocIDs), len(prevAllocs))
-	}
+	require.Equal(t, allocIDs, prevAllocs)
 
 	// Ensuring two new allocations don't have any chained allocations
-	if len(newAllocs) != 2 {
-		t.Fatalf("expected: %v, actual: %v", 2, len(newAllocs))
-	}
+	require.Len(t, newAllocs, 2)
 }
 
 func TestSystemSched_PlanWithDrainedNode(t *testing.T) {
@@ -1711,12 +1569,12 @@ func TestSystemSched_PlanWithDrainedNode(t *testing.T) {
 	node := mock.Node()
 	node.NodeClass = "green"
 	node.Drain = true
-	node.ComputeClass()
+	require.NoError(t, node.ComputeClass())
 	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
 
 	node2 := mock.Node()
 	node2.NodeClass = "blue"
-	node2.ComputeClass()
+	require.NoError(t, node2.ComputeClass())
 	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
 
 	// Create a Job with two task groups, each constrained on node class
@@ -1766,31 +1624,21 @@ func TestSystemSched_PlanWithDrainedNode(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 	plan := h.Plans[0]
 
 	// Ensure the plan evicted the alloc on the failed node
 	planned := plan.NodeUpdate[node.ID]
-	if len(planned) != 1 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Len(t, plan.NodeUpdate[node.ID], 1)
 
 	// Ensure the plan didn't place
-	if len(plan.NodeAllocation) != 0 {
-		t.Fatalf("bad: %#v", plan)
-	}
+	require.Empty(t, plan.NodeAllocation)
 
 	// Ensure the allocations is stopped
-	if planned[0].DesiredStatus != structs.AllocDesiredStatusStop {
-		t.Fatalf("bad: %#v", planned[0])
-	}
+	require.Equal(t, structs.AllocDesiredStatusStop, planned[0].DesiredStatus)
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -1801,12 +1649,12 @@ func TestSystemSched_QueuedAllocsMultTG(t *testing.T) {
 	// Register two nodes with two different classes
 	node := mock.Node()
 	node.NodeClass = "green"
-	node.ComputeClass()
+	require.NoError(t, node.ComputeClass())
 	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
 
 	node2 := mock.Node()
 	node2.NodeClass = "blue"
-	node2.ComputeClass()
+	require.NoError(t, node2.ComputeClass())
 	require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node2))
 
 	// Create a Job with two task groups, each constrained on node class
@@ -1839,19 +1687,14 @@ func TestSystemSched_QueuedAllocsMultTG(t *testing.T) {
 
 	// Process the evaluation
 	err := h.Process(NewSystemScheduler, eval)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
+	require.NoError(t, err)
 
 	// Ensure a single plan
-	if len(h.Plans) != 1 {
-		t.Fatalf("bad: %#v", h.Plans)
-	}
+	require.Len(t, h.Plans, 1)
 
 	qa := h.Evals[0].QueuedAllocations
-	if qa["web"] != 0 || qa["web2"] != 0 {
-		t.Fatalf("bad queued allocations %#v", qa)
-	}
+	require.Zero(t, qa["pinger"])
+	require.Zero(t, qa["pinger2"])
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
@@ -1860,63 +1703,50 @@ func TestSystemSched_Preemption(t *testing.T) {
 	h := NewHarness(t)
 
 	// Create nodes
-	var nodes []*structs.Node
+	nodes := make([]*structs.Node, 0)
 	for i := 0; i < 2; i++ {
 		node := mock.Node()
-		// TODO(preetha): remove in 0.11
+		// TODO: remove in 0.11
 		node.Resources = &structs.Resources{
 			CPU:      3072,
 			MemoryMB: 5034,
 			DiskMB:   20 * 1024,
-			Networks: []*structs.NetworkResource{
-				{
-					Device: "eth0",
-					CIDR:   "192.168.0.100/32",
-					MBits:  1000,
-				},
-			},
+			Networks: []*structs.NetworkResource{{
+				Device: "eth0",
+				CIDR:   "192.168.0.100/32",
+				MBits:  1000,
+			}},
 		}
 		node.NodeResources = &structs.NodeResources{
-			Cpu: structs.NodeCpuResources{
-				CpuShares: 3072,
-			},
-			Memory: structs.NodeMemoryResources{
-				MemoryMB: 5034,
-			},
-			Disk: structs.NodeDiskResources{
-				DiskMB: 20 * 1024,
-			},
-			Networks: []*structs.NetworkResource{
-				{
-					Device: "eth0",
-					CIDR:   "192.168.0.100/32",
-					MBits:  1000,
-				},
-			},
-			NodeNetworks: []*structs.NodeNetworkResource{
-				{
-					Mode:   "host",
-					Device: "eth0",
-					Addresses: []structs.NodeNetworkAddress{
-						{
-							Family:  structs.NodeNetworkAF_IPv4,
-							Alias:   "default",
-							Address: "192.168.0.100",
-						},
-					},
-				},
-			},
+			Cpu:    structs.NodeCpuResources{CpuShares: 3072},
+			Memory: structs.NodeMemoryResources{MemoryMB: 5034},
+			Disk:   structs.NodeDiskResources{DiskMB: 20 * 1024},
+			Networks: []*structs.NetworkResource{{
+				Device: "eth0",
+				CIDR:   "192.168.0.100/32",
+				MBits:  1000,
+			}},
+			NodeNetworks: []*structs.NodeNetworkResource{{
+				Mode:   "host",
+				Device: "eth0",
+				Addresses: []structs.NodeNetworkAddress{{
+					Family:  structs.NodeNetworkAF_IPv4,
+					Alias:   "default",
+					Address: "192.168.0.100",
+				}},
+			}},
 		}
 		require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))
 		nodes = append(nodes, node)
 	}
 
 	// Enable Preemption
-	h.State.SchedulerSetConfig(h.NextIndex(), &structs.SchedulerConfiguration{
+	err := h.State.SchedulerSetConfig(h.NextIndex(), &structs.SchedulerConfiguration{
 		PreemptionConfig: structs.PreemptionConfig{
 			SystemSchedulerEnabled: true,
 		},
 	})
+	require.NoError(t, err)
 
 	// Create some low priority batch jobs and allocations for them
 	// One job uses a reserved port
@@ -1926,17 +1756,13 @@ func TestSystemSched_Preemption(t *testing.T) {
 	job1.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
 		CPU:      512,
 		MemoryMB: 1024,
-		Networks: []*structs.NetworkResource{
-			{
-				MBits: 200,
-				ReservedPorts: []structs.Port{
-					{
-						Label: "web",
-						Value: 80,
-					},
-				},
-			},
-		},
+		Networks: []*structs.NetworkResource{{
+			MBits: 200,
+			ReservedPorts: []structs.Port{{
+				Label: "web",
+				Value: 80,
+			}},
+		}},
 	}
 
 	alloc1 := mock.Alloc()
@@ -1948,27 +1774,18 @@ func TestSystemSched_Preemption(t *testing.T) {
 	alloc1.AllocatedResources = &structs.AllocatedResources{
 		Tasks: map[string]*structs.AllocatedTaskResources{
 			"web": {
-				Cpu: structs.AllocatedCpuResources{
-					CpuShares: 512,
-				},
-				Memory: structs.AllocatedMemoryResources{
-					MemoryMB: 1024,
-				},
-				Networks: []*structs.NetworkResource{
-					{
-						Device:        "eth0",
-						IP:            "192.168.0.100",
-						ReservedPorts: []structs.Port{{Label: "web", Value: 80}},
-						MBits:         200,
-					},
-				},
+				Cpu:    structs.AllocatedCpuResources{CpuShares: 512},
+				Memory: structs.AllocatedMemoryResources{MemoryMB: 1024},
+				Networks: []*structs.NetworkResource{{
+					Device:        "eth0",
+					IP:            "192.168.0.100",
+					ReservedPorts: []structs.Port{{Label: "web", Value: 80}},
+					MBits:         200,
+				}},
 			},
 		},
-		Shared: structs.AllocatedSharedResources{
-			DiskMB: 5 * 1024,
-		},
+		Shared: structs.AllocatedSharedResources{DiskMB: 5 * 1024},
 	}
-
 	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job1))
 
 	job2 := mock.BatchJob()
@@ -1977,11 +1794,7 @@ func TestSystemSched_Preemption(t *testing.T) {
 	job2.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
 		CPU:      512,
 		MemoryMB: 1024,
-		Networks: []*structs.NetworkResource{
-			{
-				MBits: 200,
-			},
-		},
+		Networks: []*structs.NetworkResource{{MBits: 200}},
 	}
 
 	alloc2 := mock.Alloc()
@@ -1993,24 +1806,16 @@ func TestSystemSched_Preemption(t *testing.T) {
 	alloc2.AllocatedResources = &structs.AllocatedResources{
 		Tasks: map[string]*structs.AllocatedTaskResources{
 			"web": {
-				Cpu: structs.AllocatedCpuResources{
-					CpuShares: 512,
-				},
-				Memory: structs.AllocatedMemoryResources{
-					MemoryMB: 1024,
-				},
-				Networks: []*structs.NetworkResource{
-					{
-						Device: "eth0",
-						IP:     "192.168.0.100",
-						MBits:  200,
-					},
-				},
+				Cpu:    structs.AllocatedCpuResources{CpuShares: 512},
+				Memory: structs.AllocatedMemoryResources{MemoryMB: 1024},
+				Networks: []*structs.NetworkResource{{
+					Device: "eth0",
+					IP:     "192.168.0.100",
+					MBits:  200,
+				}},
 			},
 		},
-		Shared: structs.AllocatedSharedResources{
-			DiskMB: 5 * 1024,
-		},
+		Shared: structs.AllocatedSharedResources{DiskMB: 5 * 1024},
 	}
 	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job2))
 
@@ -2020,12 +1825,10 @@ func TestSystemSched_Preemption(t *testing.T) {
 	job3.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
 		CPU:      1024,
 		MemoryMB: 2048,
-		Networks: []*structs.NetworkResource{
-			{
-				Device: "eth0",
-				MBits:  400,
-			},
-		},
+		Networks: []*structs.NetworkResource{{
+			Device: "eth0",
+			MBits:  400,
+		}},
 	}
 
 	alloc3 := mock.Alloc()
@@ -2037,25 +1840,17 @@ func TestSystemSched_Preemption(t *testing.T) {
 	alloc3.AllocatedResources = &structs.AllocatedResources{
 		Tasks: map[string]*structs.AllocatedTaskResources{
 			"web": {
-				Cpu: structs.AllocatedCpuResources{
-					CpuShares: 1024,
-				},
-				Memory: structs.AllocatedMemoryResources{
-					MemoryMB: 25,
-				},
-				Networks: []*structs.NetworkResource{
-					{
-						Device:        "eth0",
-						IP:            "192.168.0.100",
-						ReservedPorts: []structs.Port{{Label: "web", Value: 80}},
-						MBits:         400,
-					},
-				},
+				Cpu:    structs.AllocatedCpuResources{CpuShares: 1024},
+				Memory: structs.AllocatedMemoryResources{MemoryMB: 25},
+				Networks: []*structs.NetworkResource{{
+					Device:        "eth0",
+					IP:            "192.168.0.100",
+					ReservedPorts: []structs.Port{{Label: "web", Value: 80}},
+					MBits:         400,
+				}},
 			},
 		},
-		Shared: structs.AllocatedSharedResources{
-			DiskMB: 5 * 1024,
-		},
+		Shared: structs.AllocatedSharedResources{DiskMB: 5 * 1024},
 	}
 	require.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Allocation{alloc1, alloc2, alloc3}))
 
@@ -2068,11 +1863,7 @@ func TestSystemSched_Preemption(t *testing.T) {
 	job4.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
 		CPU:      1024,
 		MemoryMB: 2048,
-		Networks: []*structs.NetworkResource{
-			{
-				MBits: 100,
-			},
-		},
+		Networks: []*structs.NetworkResource{{MBits: 100}},
 	}
 
 	alloc4 := mock.Alloc()
@@ -2112,12 +1903,10 @@ func TestSystemSched_Preemption(t *testing.T) {
 	job.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
 		CPU:      1948,
 		MemoryMB: 256,
-		Networks: []*structs.NetworkResource{
-			{
-				MBits:        800,
-				DynamicPorts: []structs.Port{{Label: "http"}},
-			},
-		},
+		Networks: []*structs.NetworkResource{{
+			MBits:        800,
+			DynamicPorts: []structs.Port{{Label: "http"}},
+		}},
 	}
 	require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))
 
@@ -2133,21 +1922,20 @@ func TestSystemSched_Preemption(t *testing.T) {
 	require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval}))
 
 	// Process the evaluation
-	err := h.Process(NewSystemScheduler, eval)
-	require := require.New(t)
-	require.Nil(err)
+	err = h.Process(NewSystemScheduler, eval)
+	require.Nil(t, err)
 
 	// Ensure a single plan
-	require.Equal(1, len(h.Plans))
+	require.Equal(t, 1, len(h.Plans))
 	plan := h.Plans[0]
 
-	// Ensure the plan doesn't have annotations.
-	require.Nil(plan.Annotations)
+	// Ensure the plan doesn't have annotations
+	require.Nil(t, plan.Annotations)
 
 	// Ensure the plan allocated on both nodes
 	var planned []*structs.Allocation
 	preemptingAllocId := ""
-	require.Equal(2, len(plan.NodeAllocation))
+	require.Equal(t, 2, len(plan.NodeAllocation))
 
 	// The alloc that got placed on node 1 is the preemptor
 	for _, allocList := range plan.NodeAllocation {
@@ -2162,37 +1950,49 @@ func TestSystemSched_Preemption(t *testing.T) {
 	// Lookup the allocations by JobID
 	ws := memdb.NewWatchSet()
 	out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
-	require.NoError(err)
+	require.NoError(t, err)
 
 	// Ensure all allocations placed
-	require.Equal(2, len(out))
+	require.Equal(t, 2, len(out))
 
 	// Verify that one node has preempted allocs
-	require.NotNil(plan.NodePreemptions[nodes[0].ID])
+	require.NotNil(t, plan.NodePreemptions[nodes[0].ID])
 	preemptedAllocs := plan.NodePreemptions[nodes[0].ID]
 
 	// Verify that three jobs have preempted allocs
-	require.Equal(3, len(preemptedAllocs))
+	require.Equal(t, 3, len(preemptedAllocs))
 
 	expectedPreemptedJobIDs := []string{job1.ID, job2.ID, job3.ID}
 
 	// We expect job1, job2 and job3 to have preempted allocations
 	// job4 should not have any allocs preempted
 	for _, alloc := range preemptedAllocs {
-		require.Contains(expectedPreemptedJobIDs, alloc.JobID)
+		require.Contains(t, expectedPreemptedJobIDs, alloc.JobID)
 	}
 	// Look up the preempted allocs by job ID
 	ws = memdb.NewWatchSet()
 
 	for _, jobId := range expectedPreemptedJobIDs {
 		out, err = h.State.AllocsByJob(ws, structs.DefaultNamespace, jobId, false)
-		require.NoError(err)
+		require.NoError(t, err)
 		for _, alloc := range out {
-			require.Equal(structs.AllocDesiredStatusEvict, alloc.DesiredStatus)
-			require.Equal(fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocId), alloc.DesiredDescription)
+			require.Equal(t, structs.AllocDesiredStatusEvict, alloc.DesiredStatus)
+			require.Equal(t, fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocId), alloc.DesiredDescription)
 		}
 	}
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
 
+func TestSystemSched_canHandle(t *testing.T) {
+	s := SystemScheduler{sysbatch: false}
+	t.Run("system register", func(t *testing.T) {
+		require.True(t, s.canHandle(structs.EvalTriggerJobRegister))
+	})
+	t.Run("system scheduled", func(t *testing.T) {
+		require.False(t, s.canHandle(structs.EvalTriggerScheduled))
+	})
+	t.Run("system periodic", func(t *testing.T) {
+		require.False(t, s.canHandle(structs.EvalTriggerPeriodicJob))
+	})
 }
diff --git a/scheduler/util.go b/scheduler/util.go
index 7261f67deb8f..75f291e6410a 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -60,21 +60,19 @@ func (d *diffResult) Append(other *diffResult) {
 // need to be migrated (node is draining), the allocs that need to be evicted
 // (no longer required), those that should be ignored and those that are lost
 // that need to be replaced (running on a lost node).
-//
-// job is the job whose allocs is going to be diff-ed.
-// taintedNodes is an index of the nodes which are either down or in drain mode
-// by name.
-// required is a set of allocations that must exist.
-// allocs is a list of non terminal allocations.
-// terminalAllocs is an index of the latest terminal allocations by name.
-func diffSystemAllocsForNode(job *structs.Job, nodeID string,
-	eligibleNodes, taintedNodes map[string]*structs.Node,
-	required map[string]*structs.TaskGroup, allocs []*structs.Allocation,
-	terminalAllocs map[string]*structs.Allocation) *diffResult {
-	result := &diffResult{}
+func diffSystemAllocsForNode(
+	job *structs.Job, // job whose allocs are going to be diff-ed
+	nodeID string,
+	eligibleNodes map[string]*structs.Node,
+	taintedNodes map[string]*structs.Node, // nodes which are down or in drain (by node name)
+	required map[string]*structs.TaskGroup, // set of allocations that must exist
+	allocs []*structs.Allocation, // non-terminal allocations that exist
+	terminal structs.TerminalByNodeByName, // latest terminal allocations (by node, name)
+) *diffResult {
+	result := new(diffResult)
 
 	// Scan the existing updates
-	existing := make(map[string]struct{})
+	existing := make(map[string]struct{}) // set of alloc names
 	for _, exist := range allocs {
 		// Index the existing node
 		name := exist.Name
@@ -102,6 +100,17 @@ func diffSystemAllocsForNode(job *structs.Job, nodeID string,
 			})
 			continue
 		}
+
+		// If we are a sysbatch job and terminal, ignore (or stop?) the alloc
+		if job.Type == structs.JobTypeSysBatch && exist.TerminalStatus() {
+			result.ignore = append(result.ignore, allocTuple{
+				Name:      name,
+				TaskGroup: tg,
+				Alloc:     exist,
+			})
+			continue
+		}
+
 		// If we are on a tainted node, we must migrate if we are a service or
 		// if the batch allocation did not finish
 		if node, ok := taintedNodes[exist.NodeID]; ok {
@@ -154,14 +163,38 @@ func diffSystemAllocsForNode(job *structs.Job, nodeID string,
 
 	// Scan the required groups
 	for name, tg := range required {
+
 		// Check for an existing allocation
-		_, ok := existing[name]
+		if _, ok := existing[name]; !ok {
+
+			// Check for a terminal sysbatch allocation, which should be not placed
+			// again unless the job has been updated.
+			if job.Type == structs.JobTypeSysBatch {
+				if alloc, termExists := terminal.Get(nodeID, name); termExists {
+					// the alloc is terminal, but now the job has been updated
+					if job.JobModifyIndex != alloc.Job.JobModifyIndex {
+						result.update = append(result.update, allocTuple{
+							Name:      name,
+							TaskGroup: tg,
+							Alloc:     alloc,
+						})
+					} else {
+						// alloc is terminal and job unchanged, leave it alone
+						result.ignore = append(result.ignore, allocTuple{
+							Name:      name,
+							TaskGroup: tg,
+							Alloc:     alloc,
+						})
+					}
+					continue
+				}
+			}
+
+			// Require a placement if no existing allocation. If there
+			// is an existing allocation, we would have checked for a potential
+			// update or ignore above. Ignore placements for tainted or
+			// ineligible nodes
 
-		// Require a placement if no existing allocation. If there
-		// is an existing allocation, we would have checked for a potential
-		// update or ignore above. Ignore placements for tainted or
-		// ineligible nodes
-		if !ok {
 			// Tainted and ineligible nodes for a non existing alloc
 			// should be filtered out and not count towards ignore or place
 			if _, tainted := taintedNodes[nodeID]; tainted {
@@ -171,10 +204,11 @@ func diffSystemAllocsForNode(job *structs.Job, nodeID string,
 				continue
 			}
 
+			termOnNode, _ := terminal.Get(nodeID, name)
 			allocTuple := allocTuple{
 				Name:      name,
 				TaskGroup: tg,
-				Alloc:     terminalAllocs[name],
+				Alloc:     termOnNode,
 			}
 
 			// If the new allocation isn't annotated with a previous allocation
@@ -183,6 +217,7 @@ func diffSystemAllocsForNode(job *structs.Job, nodeID string,
 			if allocTuple.Alloc == nil || allocTuple.Alloc.NodeID != nodeID {
 				allocTuple.Alloc = &structs.Allocation{NodeID: nodeID}
 			}
+
 			result.place = append(result.place, allocTuple)
 		}
 	}
@@ -191,15 +226,13 @@ func diffSystemAllocsForNode(job *structs.Job, nodeID string,
 
 // diffSystemAllocs is like diffSystemAllocsForNode however, the allocations in the
 // diffResult contain the specific nodeID they should be allocated on.
-//
-// job is the job whose allocs is going to be diff-ed.
-// nodes is a list of nodes in ready state.
-// taintedNodes is an index of the nodes which are either down or in drain mode
-// by name.
-// allocs is a list of non terminal allocations.
-// terminalAllocs is an index of the latest terminal allocations by name.
-func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[string]*structs.Node,
-	allocs []*structs.Allocation, terminalAllocs map[string]*structs.Allocation) *diffResult {
+func diffSystemAllocs(
+	job *structs.Job, // jobs whose allocations are going to be diff-ed
+	nodes []*structs.Node, // list of nodes in the ready state
+	taintedNodes map[string]*structs.Node, // nodes which are down or drain mode (by name)
+	allocs []*structs.Allocation, // non-terminal allocations
+	terminal structs.TerminalByNodeByName, // latest terminal allocations (by name)
+) *diffResult {
 
 	// Build a mapping of nodes to all their allocs.
 	nodeAllocs := make(map[string][]*structs.Allocation, len(allocs))
@@ -219,9 +252,9 @@ func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[
 	// Create the required task groups.
 	required := materializeTaskGroups(job)
 
-	result := &diffResult{}
+	result := new(diffResult)
 	for nodeID, allocs := range nodeAllocs {
-		diff := diffSystemAllocsForNode(job, nodeID, eligibleNodes, taintedNodes, required, allocs, terminalAllocs)
+		diff := diffSystemAllocsForNode(job, nodeID, eligibleNodes, taintedNodes, required, allocs, terminal)
 		result.Append(diff)
 	}
 
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index 5c783d7e59db..865b343fdbd2 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -27,6 +27,76 @@ func TestMaterializeTaskGroups(t *testing.T) {
 	}
 }
 
+func newNode(name string) *structs.Node {
+	n := mock.Node()
+	n.Name = name
+	return n
+}
+
+func TestDiffSystemAllocsForNode_Sysbatch_terminal(t *testing.T) {
+	// For a sysbatch job, the scheduler should not re-place an allocation
+	// that has become terminal, unless the job has been updated.
+
+	job := mock.SystemBatchJob()
+	required := materializeTaskGroups(job)
+
+	eligible := map[string]*structs.Node{
+		"node1": newNode("node1"),
+	}
+
+	var live []*structs.Allocation // empty
+
+	tainted := map[string]*structs.Node(nil)
+
+	t.Run("current job", func(t *testing.T) {
+		terminal := structs.TerminalByNodeByName{
+			"node1": map[string]*structs.Allocation{
+				"my-sysbatch.pinger[0]": &structs.Allocation{
+					ID:           uuid.Generate(),
+					NodeID:       "node1",
+					Name:         "my-sysbatch.pinger[0]",
+					Job:          job,
+					ClientStatus: structs.AllocClientStatusComplete,
+				},
+			},
+		}
+
+		diff := diffSystemAllocsForNode(job, "node1", eligible, tainted, required, live, terminal)
+		require.Empty(t, diff.place)
+		require.Empty(t, diff.update)
+		require.Empty(t, diff.stop)
+		require.Empty(t, diff.migrate)
+		require.Empty(t, diff.lost)
+		require.True(t, len(diff.ignore) == 1 && diff.ignore[0].Alloc == terminal["node1"]["my-sysbatch.pinger[0]"])
+	})
+
+	t.Run("outdated job", func(t *testing.T) {
+		previousJob := job.Copy()
+		previousJob.JobModifyIndex -= 1
+		terminal := structs.TerminalByNodeByName{
+			"node1": map[string]*structs.Allocation{
+				"my-sysbatch.pinger[0]": &structs.Allocation{
+					ID:     uuid.Generate(),
+					NodeID: "node1",
+					Name:   "my-sysbatch.pinger[0]",
+					Job:    previousJob,
+				},
+			},
+		}
+
+		expAlloc := terminal["node1"]["my-sysbatch.pinger[0]"]
+		expAlloc.NodeID = "node1"
+
+		diff := diffSystemAllocsForNode(job, "node1", eligible, tainted, required, live, terminal)
+		require.Empty(t, diff.place)
+		require.Equal(t, 1, len(diff.update))
+		require.Empty(t, diff.stop)
+		require.Empty(t, diff.migrate)
+		require.Empty(t, diff.lost)
+		require.Empty(t, diff.ignore)
+	})
+}
+
 func TestDiffSystemAllocsForNode(t *testing.T) {
 	job := mock.Job()
 	required := materializeTaskGroups(job)
@@ -99,28 +169,30 @@ func TestDiffSystemAllocsForNode(t *testing.T) {
 	}
 
 	// Have three terminal allocs
-	terminalAllocs := map[string]*structs.Allocation{
-		"my-job.web[4]": {
-			ID:     uuid.Generate(),
-			NodeID: "zip",
-			Name:   "my-job.web[4]",
-			Job:    job,
-		},
-		"my-job.web[5]": {
-			ID:     uuid.Generate(),
-			NodeID: "zip",
-			Name:   "my-job.web[5]",
-			Job:    job,
-		},
-		"my-job.web[6]": {
-			ID:     uuid.Generate(),
-			NodeID: "zip",
-			Name:   "my-job.web[6]",
-			Job:    job,
+	terminal := structs.TerminalByNodeByName{
+		"zip": map[string]*structs.Allocation{
+			"my-job.web[4]": {
+				ID:     uuid.Generate(),
+				NodeID: "zip",
+				Name:   "my-job.web[4]",
+				Job:    job,
+			},
+			"my-job.web[5]": {
+				ID:     uuid.Generate(),
+				NodeID: "zip",
+				Name:   "my-job.web[5]",
+				Job:    job,
+			},
+			"my-job.web[6]": {
+				ID:     uuid.Generate(),
+				NodeID: "zip",
+				Name:   "my-job.web[6]",
+				Job:    job,
+			},
 		},
 	}
 
-	diff := diffSystemAllocsForNode(job, "zip", eligible, tainted, required, allocs, terminalAllocs)
+	diff := diffSystemAllocsForNode(job, "zip", eligible, tainted, required, allocs, terminal)
 	place := diff.place
 	update := diff.update
 	migrate := diff.migrate
@@ -147,12 +219,14 @@ func TestDiffSystemAllocsForNode(t *testing.T) {
 	require.Equal(t, 6, len(place))
 
 	// Ensure that the allocations which are replacements of terminal allocs are
-	// annotated
-	for name, alloc := range terminalAllocs {
-		for _, allocTuple := range diff.place {
-			if name == allocTuple.Name {
-				require.True(t, reflect.DeepEqual(alloc, allocTuple.Alloc),
-					"expected: %#v, actual: %#v", alloc, allocTuple.Alloc)
+	// annotated.
+	for _, m := range terminal {
+		for _, alloc := range m {
+			for _, tuple := range diff.place {
+				if alloc.Name == tuple.Name {
+					require.True(t, reflect.DeepEqual(alloc, tuple.Alloc),
+						"expected: %#v, actual: %#v", alloc, tuple.Alloc)
+				}
 			}
 		}
 	}
@@ -199,9 +273,9 @@ func TestDiffSystemAllocsForNode_ExistingAllocIneligibleNode(t *testing.T) {
 	}
 
 	// No terminal allocs
-	terminalAllocs := map[string]*structs.Allocation{}
+	terminal := make(structs.TerminalByNodeByName)
 
-	diff := diffSystemAllocsForNode(job, eligibleNode.ID, eligible, tainted, required, allocs, terminalAllocs)
+	diff := diffSystemAllocsForNode(job, eligibleNode.ID, eligible, tainted, required, allocs, terminal)
 	place := diff.place
 	update := diff.update
 	migrate := diff.migrate
@@ -276,17 +350,19 @@ func TestDiffSystemAllocs(t *testing.T) {
 		},
 	}
 
-	// Have three terminal allocs
-	terminalAllocs := map[string]*structs.Allocation{
-		"my-job.web[0]": {
-			ID:     uuid.Generate(),
-			NodeID: "pipe",
-			Name:   "my-job.web[0]",
-			Job:    job,
+	// Have three (?) terminal allocs
+	terminal := structs.TerminalByNodeByName{
+		"pipe": map[string]*structs.Allocation{
+			"my-job.web[0]": {
+				ID:     uuid.Generate(),
+				NodeID: "pipe",
+				Name:   "my-job.web[0]",
+				Job:    job,
+			},
 		},
 	}
 
-	diff := diffSystemAllocs(job, nodes, tainted, allocs, terminalAllocs)
+	diff := diffSystemAllocs(job, nodes, tainted, allocs, terminal)
 	place := diff.place
 	update := diff.update
 	migrate := diff.migrate
@@ -313,12 +389,14 @@ func TestDiffSystemAllocs(t *testing.T) {
 	require.Equal(t, 2, len(place))
 
 	// Ensure that the allocations which are replacements of terminal allocs are
-	// annotated
-	for _, alloc := range terminalAllocs {
-		for _, allocTuple := range diff.place {
-			if alloc.NodeID == allocTuple.Alloc.NodeID {
-				require.True(t, reflect.DeepEqual(alloc, allocTuple.Alloc),
-					"expected: %#v, actual: %#v", alloc, allocTuple.Alloc)
+	// annotated.
+	for _, m := range terminal {
+		for _, alloc := range m {
+			for _, tuple := range diff.place {
+				if alloc.NodeID == tuple.Alloc.NodeID {
+					require.True(t, reflect.DeepEqual(alloc, tuple.Alloc),
+						"expected: %#v, actual: %#v", alloc, tuple.Alloc)
+				}
 			}
 		}
 	}
diff --git a/vendor/github.com/hashicorp/nomad/api/operator.go b/vendor/github.com/hashicorp/nomad/api/operator.go
index d5bc5d061d56..de57bffef4b2 100644
--- a/vendor/github.com/hashicorp/nomad/api/operator.go
+++ b/vendor/github.com/hashicorp/nomad/api/operator.go
@@ -159,9 +159,10 @@ const (
 
 // PreemptionConfig specifies whether preemption is enabled based on scheduler type
 type PreemptionConfig struct {
-	SystemSchedulerEnabled  bool
-	BatchSchedulerEnabled   bool
-	ServiceSchedulerEnabled bool
+	SystemSchedulerEnabled   bool
+	SysBatchSchedulerEnabled bool
+	BatchSchedulerEnabled    bool
+	ServiceSchedulerEnabled  bool
 }
 
 // SchedulerGetConfiguration is used to query the current Scheduler configuration.
diff --git a/website/pages/docs/configuration/server.mdx b/website/pages/docs/configuration/server.mdx
index 619911b7351f..9bec8ddfbeee 100644
--- a/website/pages/docs/configuration/server.mdx
+++ b/website/pages/docs/configuration/server.mdx
@@ -291,9 +291,10 @@ server {
     scheduler_algorithm = "spread"
 
     preemption_config {
-      batch_scheduler_enabled   = true
-      system_scheduler_enabled  = true
-      service_scheduler_enabled = true
+      batch_scheduler_enabled    = true
+      system_scheduler_enabled   = true
+      service_scheduler_enabled  = true
+      sysbatch_scheduler_enabled = true
     }
   }
 }
diff --git a/website/pages/docs/job-specification/job.mdx b/website/pages/docs/job-specification/job.mdx
index c12b83320dbc..b73a6f2f8dfe 100644
--- a/website/pages/docs/job-specification/job.mdx
+++ b/website/pages/docs/job-specification/job.mdx
@@ -114,7 +114,7 @@ job "docs" {
   node if any of its allocation statuses become "failed".
 
 - `type` `(string: "service")` - Specifies the [Nomad scheduler][scheduler] to
-  use. Nomad provides the `service`, `system` and `batch` schedulers.
+  use. Nomad provides the `service`, `system`, `batch`, and `sysbatch` schedulers.
 
 - `update` <code>([Update][update]: nil)</code> - Specifies the task's update
   strategy. When omitted, rolling updates are disabled.
diff --git a/website/pages/docs/job-specification/reschedule.mdx b/website/pages/docs/job-specification/reschedule.mdx
index 9234ca725eb3..96d340f473ea 100644
--- a/website/pages/docs/job-specification/reschedule.mdx
+++ b/website/pages/docs/job-specification/reschedule.mdx
@@ -47,8 +47,8 @@ job "docs" {
 }
 ```
 
-~> The reschedule stanza does not apply to `system` jobs because they run on
-every node.
+~> The reschedule stanza does not apply to `system` or `sysbatch` jobs because
+they run on every node.
 
 ## `reschedule` Parameters
 
diff --git a/website/pages/docs/job-specification/restart.mdx b/website/pages/docs/job-specification/restart.mdx
index 6e9e771db7e6..84b53ce9fa66 100644
--- a/website/pages/docs/job-specification/restart.mdx
+++ b/website/pages/docs/job-specification/restart.mdx
@@ -14,7 +14,7 @@ description: The "restart" stanza configures a group's behavior on task failure.
   ]}
 />
 
-The `restart` stanza configures a tasks's behavior on task failure. Restarts
+The `restart` stanza configures a task's behavior on task failure. Restarts
 happen on the client that is running the task.
 
 ```hcl
@@ -36,9 +36,9 @@ For example, assuming that the task group restart policy is:
 
 ```hcl
 restart {
-  interval = "30m"
   attempts = 2
   delay    = "15s"
+  interval = "30m"
   mode     = "fail"
 }
 ```
@@ -55,9 +55,9 @@ then the effective restart policy for the task will be:
 
 ```hcl
 restart {
-  interval = "30m"
   attempts = 5
   delay    = "15s"
+  interval = "30m"
   mode     = "fail"
 }
 ```
@@ -87,7 +87,7 @@ restart {
 The values for many of the `restart` parameters vary by job type. Here are the
 defaults by job type:
 
-- The default batch restart policy is:
+- The default restart policy for `batch` jobs is:
 
   ```hcl
   restart {
@@ -98,13 +98,13 @@ defaults by job type:
   }
   ```
 
-- The default service and system job restart policy is:
+- The default restart policy for `service`, `system`, and `sysbatch` jobs is:
 
   ```hcl
   restart {
-    interval = "30m"
     attempts = 2
     delay    = "15s"
+    interval = "30m"
     mode     = "fail"
   }
   ```
diff --git a/website/pages/docs/schedulers.mdx b/website/pages/docs/schedulers.mdx
index 304f6d60c241..120530e2f198 100644
--- a/website/pages/docs/schedulers.mdx
+++ b/website/pages/docs/schedulers.mdx
@@ -7,9 +7,9 @@ description: Learn about Nomad's various schedulers.
 
 # Schedulers
 
-Nomad has three scheduler types that can be used when creating your job:
-`service`, `batch` and `system`. Here we will describe the differences between
-each of these schedulers.
+Nomad has four scheduler types that can be used when creating your job:
+`service`, `batch`, `system` and `sysbatch`. Here we will describe the differences
+between each of these schedulers.
 
 ## Service
 
@@ -61,8 +61,30 @@ Systems jobs are intended to run until explicitly stopped either by an operator
 or [preemption]. If a system task exits it is considered a failure and handled
 according to the job's [restart] stanza; system jobs do not have rescheduling.
 
+## System Batch
+
+The `sysbatch` scheduler is used to register jobs that should be run to completion
+on all clients that meet the job's constraints. The `sysbatch` scheduler will
+schedule jobs similarly to the `system` scheduler, but like a `batch` job once a
+task exists successfully it is not restarted on that client.
+
+This scheduler type is useful for issuing "one off" commands to be run on every
+node in the cluster. Sysbatch jobs can also be created as [periodic] and [parameterized]
+jobs. Since these tasks are managed by Nomad, they can take advantage of job
+updating, service discovery, monitoring, and more.
+
+The `sysbatch` scheduler will preempt lower priority tasks running on a node if there
+is not enough capacity to place the job. See preemption details on how tasks that
+get preempted are chosen.
+
+Sysbatch jobs are intended to run until successful completion, explicitly stopped
+by an operator, or evicted through [preemption]. Sysbatch tasks that exit with an
+error are handled according to the job's [restart] stanza.
+
 [borg]: https://research.google.com/pubs/pub43438.html
-[sparrow]: https://cs.stanford.edu/~matei/papers/2013/sosp_sparrow.pdf
+[parameterized]: /docs/job-specification/parameterized
+[periodic]: /docs/job-specification/periodic
 [preemption]: /docs/internals/scheduling/preemption
 [restart]: /docs/job-specification/restart
 [reschedule]: /docs/job-specification/reschedule
+[sparrow]: https://cs.stanford.edu/~matei/papers/2013/sosp_sparrow.pdf