Skip to content

Commit

Permalink
Propagate NVIDIA driver capabilities to nvproxy and seccomp filters.
Browse files Browse the repository at this point in the history
Updates #10856

PiperOrigin-RevId: 698180763
  • Loading branch information
EtiennePerot authored and gvisor-bot committed Nov 26, 2024
1 parent 19f5519 commit 738351b
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 31 deletions.
7 changes: 5 additions & 2 deletions pkg/sentry/devices/nvproxy/nvproxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
)

// Register registers all devices implemented by this package in vfsObj.
func Register(vfsObj *vfs.VirtualFilesystem, versionStr string, uvmDevMajor uint32) error {
func Register(vfsObj *vfs.VirtualFilesystem, versionStr string, driverCaps nvconf.DriverCaps, uvmDevMajor uint32) error {
// The kernel driver's interface is unstable, so only allow versions of the
// driver that are known to be supported.
log.Infof("NVIDIA driver version: %s", versionStr)
Expand All @@ -42,10 +42,13 @@ func Register(vfsObj *vfs.VirtualFilesystem, versionStr string, uvmDevMajor uint
if !ok {
return fmt.Errorf("unsupported Nvidia driver version: %s", versionStr)
}
if driverCaps == 0 {
log.Warningf("nvproxy: NVIDIA driver capability set is empty; all GPU operations will fail")
}
nvp := &nvproxy{
abi: abiCons.cons(),
version: version,
capsEnabled: nvconf.SupportedDriverCaps, // TODO(gvisor.dev/issues/10856): Let the user specify this.
capsEnabled: driverCaps,
frontendFDs: make(map[*frontendFD]struct{}),
clients: make(map[nvgpu.Handle]*rootClient),
objsFreeSet: make(map[*object]struct{}),
Expand Down
12 changes: 10 additions & 2 deletions runsc/boot/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -845,8 +845,16 @@ func (l *Loader) installSeccompFilters() error {
var nvproxyCaps nvconf.DriverCaps
nvproxyEnabled := specutils.NVProxyEnabled(l.root.spec, l.root.conf)
if nvproxyEnabled {
// TODO(gvisor.dev/issues/10856): Plumb capabilities here.
nvproxyCaps = nvconf.DefaultDriverCaps
var err error
// We use the set of allowed capabilities here, not the subset of them
// that the root container requests. This is because we need to support
// subsequent containers being able to execute with a wider set than the
// set that the root container requests. Seccomp filters are only
// applied once at sandbox startup, so they need to be as wide as the
// set of capabilities that may ever be requested.
if nvproxyCaps, err = specutils.NVProxyDriverCapsAllowed(l.root.conf); err != nil {
return fmt.Errorf("NVIDIA capabilities: %w", err)
}
}
opts := filter.Options{
Platform: l.k.Platform.SeccompInfo(),
Expand Down
6 changes: 5 additions & 1 deletion runsc/boot/vfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1395,11 +1395,15 @@ func nvproxyRegisterDevices(info *containerInfo, vfsObj *vfs.VirtualFilesystem)
if !specutils.NVProxyEnabled(info.spec, info.conf) {
return nil
}
driverCaps, err := specutils.NVProxyDriverCapsAllowed(info.conf)
if err != nil {
return fmt.Errorf("NVIDIA driver capabilities: %w", err)
}
uvmDevMajor, err := vfsObj.GetDynamicCharDevMajor()
if err != nil {
return fmt.Errorf("reserving device major number for nvidia-uvm: %w", err)
}
if err := nvproxy.Register(vfsObj, info.nvidiaDriverVersion, uvmDevMajor); err != nil {
if err := nvproxy.Register(vfsObj, info.nvidiaDriverVersion, driverCaps, uvmDevMajor); err != nil {
return fmt.Errorf("registering nvproxy driver: %w", err)
}
info.nvidiaUVMDevMajor = uvmDevMajor
Expand Down
2 changes: 1 addition & 1 deletion runsc/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -2029,7 +2029,7 @@ func nvproxySetupAfterGoferUserns(spec *specs.Spec, conf *config.Config, goferCm
fmt.Sprintf("--pid=%d", goferCmd.Process.Pid),
fmt.Sprintf("--device=%s", devices),
}
// Pass driver capabilities specified via NVIDIA_DRIVER_CAPABILITIES as flags. See
// Pass driver capabilities allowed by configuration as flags. See
// nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/main.go:doPrestart().
driverCaps, err := specutils.NVProxyDriverCapsFromEnv(spec, conf)
if err != nil {
Expand Down
28 changes: 20 additions & 8 deletions runsc/specutils/nvidia.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,32 @@ func ParseNvidiaVisibleDevices(spec *specs.Spec) (string, error) {
return nvd, nil
}

// NVProxyDriverCapsFromEnv returns the driver capabilities requested by the
// application via the NVIDIA_DRIVER_CAPABILITIES env var. See
// nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/container_config.go:getDriverCapabilities().
func NVProxyDriverCapsFromEnv(spec *specs.Spec, conf *config.Config) (nvconf.DriverCaps, error) {
// NVProxyDriverCapsAllowed returns the driver capabilities allowed by the
// configuration, irrespective of what a container requests.
// This should be used to determine the bounding set of driver capabilities
// that a container can request.
func NVProxyDriverCapsAllowed(conf *config.Config) (nvconf.DriverCaps, error) {
// Construct the set of allowed driver capabilities.
// allowedDriverCaps is already a subset of nvconf.SupportedDriverCaps
// as this was checked by `config.Config.validate`.
allowedDriverCaps, hasAll, err := nvconf.DriverCapsFromString(conf.NVProxyAllowedDriverCapabilities)
if err != nil {
return 0, fmt.Errorf("invalid set of allowed NVIDIA driver capabilities %q: %w", conf.NVProxyAllowedDriverCapabilities, err)
}
// Resolve "all" to nvconf.SupportedDriverCaps.
// Resolve "all" to `nvconf.SupportedDriverCaps`.
// allowedDriverCaps is already a subset of `nvconf.SupportedDriverCaps`
// as this was checked by `config.Config.validate`.
if hasAll {
allowedDriverCaps |= nvconf.SupportedDriverCaps
return nvconf.SupportedDriverCaps, nil
}
return allowedDriverCaps, nil
}

// NVProxyDriverCapsFromEnv returns the driver capabilities requested by the
// application via the NVIDIA_DRIVER_CAPABILITIES env var. See
// nvidia-container-toolkit/cmd/nvidia-container-runtime-hook/container_config.go:getDriverCapabilities().
func NVProxyDriverCapsFromEnv(spec *specs.Spec, conf *config.Config) (nvconf.DriverCaps, error) {
allowedDriverCaps, err := NVProxyDriverCapsAllowed(conf)
if err != nil {
return 0, err
}

// Extract the set of driver capabilities requested by the application.
Expand Down
53 changes: 36 additions & 17 deletions runsc/specutils/specutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,80 +391,99 @@ func TestNvidiaDriverCapabilities(t *testing.T) {
for _, tc := range []struct {
name string
allowedCaps string
noEnv bool // If true, no env variable is set.
wantAllowed nvconf.DriverCaps // Capabilities allowed at the sandbox configuration level.
noEnv bool // If true, no env variable is set.
envCaps string
legacy bool
want nvconf.DriverCaps
wantActual nvconf.DriverCaps // Capabilities allowed for the container.
}{
{
name: "unspecified",
allowedCaps: testAllowedCapsFlag,
wantAllowed: testAllowedCaps,
noEnv: true,
want: nvconf.DefaultDriverCaps,
wantActual: nvconf.DefaultDriverCaps,
},
{
name: "unspecified-legacy",
allowedCaps: testAllowedCapsFlag,
wantAllowed: testAllowedCaps,
noEnv: true,
legacy: true,
want: testAllowedCaps,
wantActual: testAllowedCaps,
},
{
name: "empty",
allowedCaps: testAllowedCapsFlag,
wantAllowed: testAllowedCaps,
envCaps: "",
want: nvconf.DefaultDriverCaps,
wantActual: nvconf.DefaultDriverCaps,
},
{
name: "empty-legacy",
allowedCaps: testAllowedCapsFlag,
wantAllowed: testAllowedCaps,
envCaps: "",
legacy: true,
want: nvconf.DefaultDriverCaps,
wantActual: nvconf.DefaultDriverCaps,
},
{
name: "compute",
allowedCaps: testAllowedCapsFlag,
wantAllowed: testAllowedCaps,
envCaps: nvconf.CapCompute.String(),
want: nvconf.CapCompute,
wantActual: nvconf.CapCompute,
},
{
name: "utility,graphics-legacy",
allowedCaps: testAllowedCapsFlag,
wantAllowed: testAllowedCaps,
envCaps: (nvconf.CapUtility | nvconf.CapGraphics).String(),
legacy: true,
want: nvconf.CapUtility | nvconf.CapGraphics,
wantActual: nvconf.CapUtility | nvconf.CapGraphics,
},
{
name: "all",
allowedCaps: testAllowedCapsFlag,
wantAllowed: testAllowedCaps,
envCaps: nvconf.AllCapabilitiesName,
want: testAllowedCaps,
wantActual: testAllowedCaps,
},
{
name: "all-all",
allowedCaps: "all",
allowedCaps: nvconf.AllCapabilitiesName,
wantAllowed: nvconf.SupportedDriverCaps,
envCaps: nvconf.AllCapabilitiesName,
want: nvconf.SupportedDriverCaps,
wantActual: nvconf.SupportedDriverCaps,
},
} {
t.Run(tc.name, func(t *testing.T) {
conf := config.Config{NVProxyAllowedDriverCapabilities: tc.allowedCaps}
allowed, err := NVProxyDriverCapsAllowed(&conf)
if err != nil {
t.Errorf("NVProxyDriverCapsAllowed() failed, err: %v", err)
}
if allowed != tc.wantAllowed {
t.Fatalf("NVProxyDriverCapsAllowed() got: %v, want: %v", allowed, tc.wantAllowed)
}
var env []string
if tc.legacy {
env = append(env, fmt.Sprintf("%s=%s", cudaVersionEnv, "10.2.89"))
}
if tc.envCaps != "" || !tc.noEnv {
env = append(env, fmt.Sprintf("%s=%s", nvidiaDriverCapsEnv, tc.envCaps))
}
got, err := NVProxyDriverCapsFromEnv(
&specs.Spec{Process: &specs.Process{Env: env}},
&config.Config{NVProxyAllowedDriverCapabilities: tc.allowedCaps},
)
spec := specs.Spec{Process: &specs.Process{Env: env}}
got, err := NVProxyDriverCapsFromEnv(&spec, &conf)
if err != nil {
t.Errorf("NVProxyDriverCapsFromEnv() failed, err: %v", err)
}
if got != tc.want {
t.Errorf("NVProxyDriverCapsFromEnv() got: %v, want: %v", got, tc.want)
if got != tc.wantActual {
t.Errorf("NVProxyDriverCapsFromEnv() got: %v, want: %v", got, tc.wantActual)
}
// Check invariant: `got` must always be a subset of `allowed`.
if gotButNotAllowed := got & ^allowed; gotButNotAllowed != 0 {
t.Errorf("caps from env (%v) is not a subset of allowed caps (%v); diff: %v", got, allowed, gotButNotAllowed)
}
})
}
Expand Down

0 comments on commit 738351b

Please sign in to comment.