Skip to content

Commit

Permalink
scheduler: allow using device ID as attribute (#15455)
Browse files Browse the repository at this point in the history
Devices are fingerprinted as groups of similar devices. This prevented
specifying specific device by their ID in constraint and affinity rules.

This commit introduces the `${device.ids}` attribute that returns a
comma separated list of IDs that are part of the device group. Users can
then use the set operators to write rules.
  • Loading branch information
lgfa29 authored and philrenaud committed Jan 23, 2023
1 parent 78ee2a3 commit 2e6979d
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .changelog/15455.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
scheduler: allow using device IDs in `affinity` and `constraint`
```
7 changes: 7 additions & 0 deletions scheduler/feasible.go
Original file line number Diff line number Diff line change
Expand Up @@ -1373,6 +1373,13 @@ func resolveDeviceTarget(target string, d *structs.NodeDeviceResource) (*psstruc

// Handle the interpolations
switch {
case "${device.ids}" == target:
ids := make([]string, len(d.Instances))
for i, device := range d.Instances {
ids[i] = device.ID
}
return psstructs.NewStringAttribute(strings.Join(ids, ",")), true

case "${device.model}" == target:
return psstructs.NewStringAttribute(d.Name), true

Expand Down
28 changes: 28 additions & 0 deletions scheduler/feasible_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2682,6 +2682,11 @@ func TestDeviceChecker(t *testing.T) {
LTarget: "${device.attr.cores_clock}",
RTarget: "800MHz",
},
{
Operand: "set_contains",
LTarget: "${device.ids}",
RTarget: nvidia.Instances[0].ID,
},
},
},
},
Expand Down Expand Up @@ -2715,6 +2720,11 @@ func TestDeviceChecker(t *testing.T) {
LTarget: "${device.attr.cores_clock}",
RTarget: "800MHz",
},
{
Operand: "set_contains",
LTarget: "${device.ids}",
RTarget: fmt.Sprintf("%s,%s", nvidia.Instances[1].ID, nvidia.Instances[0].ID),
},
},
},
},
Expand Down Expand Up @@ -2818,6 +2828,24 @@ func TestDeviceChecker(t *testing.T) {
},
},
},
{
Name: "does not meet ID constraint",
Result: false,
NodeDevices: []*structs.NodeDeviceResource{nvidia},
RequestedDevices: []*structs.RequestedDevice{
{
Name: "nvidia/gpu",
Count: 1,
Constraints: []*structs.Constraint{
{
Operand: "set_contains",
LTarget: "${device.ids}",
RTarget: "not_valid",
},
},
},
},
},
}

for _, c := range cases {
Expand Down
26 changes: 26 additions & 0 deletions website/content/docs/job-specification/device.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,15 @@ follows:
</tr>
</thead>
<tbody>
<tr>
<td>
<code>{'${device.ids}'}</code>
</td>
<td>Comma separated list of device IDs in the group</td>
<td>
<code>9afa5da1-8f39-25a2-48dc-ba31fd7c0023,c248b547-fed7-4d67-ade5-73a27d280ac4</code>
</td>
</tr>
<tr>
<td>
<code>{'${device.type}'}</code>
Expand Down Expand Up @@ -298,6 +307,23 @@ device "nvidia/gpu" {
}
```

### Affinity Towards Specific GPU Devices

This example uses affinity to indicate scheduling preference towards specific
GPU devices, using their UUID as selection criteria. Since devices are
fingerprinted as a group, you may specify multiple IDs as a comma separated
list.

```hcl
device "nvidia/gpu" {
affinity {
attribute = "${device.ids}"
operator = "set_contains"
value = "9afa5da1-8f39-25a2-48dc-ba31fd7c0023,c248b547-fed7-4d67-ade5-73a27d280ac4"
}
}
```

[affinity]: /docs/job-specification/affinity 'Nomad affinity Job Specification'
[constraint]: /docs/job-specification/constraint 'Nomad constraint Job Specification'
[devices]: /docs/devices 'Nomad Device Plugins'

0 comments on commit 2e6979d

Please sign in to comment.