From cf6e988a74350f8dd8e60f0e7335824b018a0510 Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Fri, 2 Dec 2022 15:22:45 -0500 Subject: [PATCH 1/2] scheduler: allow using device ID as attribute Devices are fingerprinted as groups of similar devices. This prevented specifying specific device by their ID in constraint and affinity rules. This commit introduces the `${device.ids}` attribute that returns a comma separated list of IDs that are part of the device group. Users can then use the set operators to write rules. --- scheduler/feasible.go | 7 +++++ scheduler/feasible_test.go | 28 +++++++++++++++++++ .../content/docs/job-specification/device.mdx | 26 +++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/scheduler/feasible.go b/scheduler/feasible.go index ade5ce3c856c..eb8a1045d590 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -1373,6 +1373,13 @@ func resolveDeviceTarget(target string, d *structs.NodeDeviceResource) (*psstruc // Handle the interpolations switch { + case "${device.ids}" == target: + ids := make([]string, len(d.Instances)) + for i, device := range d.Instances { + ids[i] = device.ID + } + return psstructs.NewStringAttribute(strings.Join(ids, ",")), true + case "${device.model}" == target: return psstructs.NewStringAttribute(d.Name), true diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index d93ab37a5b06..30fc0df6d3ac 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -2682,6 +2682,11 @@ func TestDeviceChecker(t *testing.T) { LTarget: "${device.attr.cores_clock}", RTarget: "800MHz", }, + { + Operand: "set_contains", + LTarget: "${device.ids}", + RTarget: nvidia.Instances[0].ID, + }, }, }, }, @@ -2715,6 +2720,11 @@ func TestDeviceChecker(t *testing.T) { LTarget: "${device.attr.cores_clock}", RTarget: "800MHz", }, + { + Operand: "set_contains", + LTarget: "${device.ids}", + RTarget: fmt.Sprintf("%s,%s", nvidia.Instances[1].ID, nvidia.Instances[0].ID), + }, }, }, }, @@ -2818,6 +2828,24 @@ func TestDeviceChecker(t *testing.T) { }, }, }, + { + Name: "does not meet ID constraint", + Result: false, + NodeDevices: []*structs.NodeDeviceResource{nvidia}, + RequestedDevices: []*structs.RequestedDevice{ + { + Name: "nvidia/gpu", + Count: 1, + Constraints: []*structs.Constraint{ + { + Operand: "set_contains", + LTarget: "${device.ids}", + RTarget: "not_valid", + }, + }, + }, + }, + }, } for _, c := range cases { diff --git a/website/content/docs/job-specification/device.mdx b/website/content/docs/job-specification/device.mdx index 6a975a390d61..3c97cae109b7 100644 --- a/website/content/docs/job-specification/device.mdx +++ b/website/content/docs/job-specification/device.mdx @@ -103,6 +103,15 @@ follows: + + + {'${device.ids}'} + + Comma separated list of device IDs in the group + + 9afa5da1-8f39-25a2-48dc-ba31fd7c0023,c248b547-fed7-4d67-ade5-73a27d280ac4 + + {'${device.type}'} @@ -298,6 +307,23 @@ device "nvidia/gpu" { } ``` +### Affinity Towards Specific GPU Devices + +This example uses affinity to indicate scheduling preference towards specific +GPU devices, using their UUID as selection criteria. Since devices are +fingerprinted as a group, you may specify multiple IDs as a comma separated +list. + +```hcl +device "nvidia/gpu" { + affinity { + attribute = "${device.ids}" + operator = "set_contains" + value = "9afa5da1-8f39-25a2-48dc-ba31fd7c0023,c248b547-fed7-4d67-ade5-73a27d280ac4" + } +} +``` + [affinity]: /docs/job-specification/affinity 'Nomad affinity Job Specification' [constraint]: /docs/job-specification/constraint 'Nomad constraint Job Specification' [devices]: /docs/devices 'Nomad Device Plugins' From 7b5634246f2cbb206f5f6c6d26da6dd904f51fd9 Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Fri, 2 Dec 2022 15:28:48 -0500 Subject: [PATCH 2/2] changelog: add entry for #15455 --- .changelog/15455.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/15455.txt diff --git a/.changelog/15455.txt b/.changelog/15455.txt new file mode 100644 index 000000000000..1267340f692b --- /dev/null +++ b/.changelog/15455.txt @@ -0,0 +1,3 @@ +```release-note:improvement +scheduler: allow using device IDs in `affinity` and `constraint` +```