diff --git a/src/VirtualClient/VirtualClient.Dependencies.UnitTests/CUDAAndNvidiaGPUDriverInstallationTests.cs b/src/VirtualClient/VirtualClient.Dependencies.UnitTests/CUDAAndNvidiaGPUDriverInstallationTests.cs index d5c79dc4f..818ba80a6 100644 --- a/src/VirtualClient/VirtualClient.Dependencies.UnitTests/CUDAAndNvidiaGPUDriverInstallationTests.cs +++ b/src/VirtualClient/VirtualClient.Dependencies.UnitTests/CUDAAndNvidiaGPUDriverInstallationTests.cs @@ -6,6 +6,7 @@ namespace VirtualClient.Dependencies using System; using System.Collections.Generic; using System.Diagnostics; + using System.IO; using System.Net.Http; using System.Threading; using System.Threading.Tasks; @@ -43,6 +44,7 @@ public class CUDAAndNvidiaGPUDriverInstallationTests private TestComponent component; private Mock mockProcessManager; private State mockState; + private DependencyPath mockPackage; [SetUp] public void SetupTests() @@ -56,15 +58,6 @@ public void TearDown() this.component.Dispose(); } - [Test] - public void CUDAAndNvidiaGPUDriverInstallationDependencyThrowsForPlatformsOtherThanUnix() - { - this.SetupDefaultMockBehavior(PlatformID.Win32NT, "11.6"); - - WorkloadException exc = Assert.ThrowsAsync(() => this.component.ExecuteAsync(CancellationToken.None)); - Assert.AreEqual(ErrorReason.PlatformNotSupported, exc.Reason); - } - [Test] public void CUDAAndNvidiaGPUDriverInstallationDependencyThrowsForUnsupportedDistros() { @@ -82,10 +75,9 @@ public void CUDAAndNvidiaGPUDriverInstallationDependencyThrowsForUnsupportedDist } [Test] - [TestCase("11.6")] - public async Task CUDAAndNvidiaGPUDriverInstallationDependencyStartsCorrectProcessesOnExecute(string version) + public async Task CUDAAndNvidiaGPUDriverInstallationDependencyStartsCorrectProcessesOnExecute() { - this.SetupDefaultMockBehavior(PlatformID.Unix, version); + this.SetupDefaultMockBehavior(PlatformID.Unix); this.SetupProcessManager("sudo", UpdateCommand, Environment.CurrentDirectory); this.SetupProcessManager("sudo", BuildEssentialInstallationCommand, Environment.CurrentDirectory); @@ -103,10 +95,9 @@ public async Task CUDAAndNvidiaGPUDriverInstallationDependencyStartsCorrectProce } [Test] - [TestCase("11.6")] - public async Task CUDAAndNvidiaGPUDriverInstallationDependencyDoesNotInstallCUDAAndNvidiaGPUDriverIfAlreadyInstalled(string version) + public async Task CUDAAndNvidiaGPUDriverInstallationDependencyDoesNotInstallCUDAAndNvidiaGPUDriverIfAlreadyInstalled() { - this.SetupDefaultMockBehavior(PlatformID.Unix, version); + this.SetupDefaultMockBehavior(PlatformID.Unix); this.fixture.StateManager.OnGetState(nameof(CudaAndNvidiaGPUDriverInstallation)).ReturnsAsync(JObject.FromObject(this.mockState)); @@ -117,10 +108,9 @@ public async Task CUDAAndNvidiaGPUDriverInstallationDependencyDoesNotInstallCUDA } [Test] - [TestCase("11.6")] - public void CUDAAndNvidiaGPUDriverInstallationDependencySurfacesExceptionWhenProcessDoesNotExitSuccessfullyOnExecute(string version) + public void CUDAAndNvidiaGPUDriverInstallationDependencySurfacesExceptionWhenProcessDoesNotExitSuccessfullyOnExecute() { - this.SetupDefaultMockBehavior(PlatformID.Unix, version); + this.SetupDefaultMockBehavior(PlatformID.Unix); this.SetupProcessManager("sudo", UpdateCommand, Environment.CurrentDirectory); this.SetupProcessManager("sudo", BuildEssentialInstallationCommand, Environment.CurrentDirectory); @@ -138,17 +128,45 @@ public void CUDAAndNvidiaGPUDriverInstallationDependencySurfacesExceptionWhenPro Assert.AreEqual(ErrorReason.DependencyInstallationFailed, exc.Reason); } - private void SetupDefaultMockBehavior(PlatformID platformID, string version = "") + [Test] + public async Task CUDAAndNvidiaGPUDriverInstallationDependencyExecutesCorrectInsatllerCommandOnWindows() { - this.fixture.Setup(platformID); + this.SetupDefaultMockBehavior(PlatformID.Win32NT); + this.fixture.Parameters["packageName"] = "NvidiaDrivers"; + this.fixture.Directory.Setup(di => di.Exists(It.IsAny())) + .Returns(true); + + this.fixture.FileSystem.Setup(fe => fe.FileStream.Create(It.IsAny(), FileMode.Create, FileAccess.Write, FileShare.None)) + .Returns(Stream.Null); + this.fixture.FileSystem.Setup(fe => fe.Directory.GetFiles(It.IsAny(), It.IsAny(), SearchOption.AllDirectories)) + .Returns(new string[] { this.fixture.Combine(this.mockPackage.Path, "nvidiaDriversInstaller.exe") }); + + this.fixture.FileSystem.Setup(fe => fe.Directory.GetCurrentDirectory()) + .Returns(this.mockPackage.Path); + + this.SetupProcessManager(this.fixture.Combine(this.mockPackage.Path, "nvidiaDriversInstaller.exe"), "-y -s", Environment.CurrentDirectory); + + this.component = new TestComponent(this.fixture.Dependencies, this.fixture.Parameters); + + await this.component.ExecuteAsync(CancellationToken.None); + this.mockProcessManager.Verify(); + } + + private void SetupDefaultMockBehavior(PlatformID platformID) + { + this.fixture.Setup(platformID); + this.mockPackage = new DependencyPath("NvidiaDrivers", this.fixture.GetPackagePath("NvidiaDrivers")); + this.fixture.PackageManager.OnGetPackage("NvidiaDrivers").ReturnsAsync(this.mockPackage); this.mockProcessManager = new Mock(); + this.fixture.Parameters = new Dictionary() { - { "CudaVersion", "11.6" }, - { "DriverVersion", "510" }, + { "LinuxCudaVersion", "11.6" }, + { "LinuxDriverVersion", "510" }, { "Username", "anyuser" }, - { "LocalRunFile", "https://developer.download.nvidia.com/compute/cuda/11.6.0/local_installers/cuda_11.6.0_510.39.01_linux.run" } + { "LinuxLocalRunFile", "https://developer.download.nvidia.com/compute/cuda/11.6.0/local_installers/cuda_11.6.0_510.39.01_linux.run" }, + { "RebootRequired", false } }; this.component = new TestComponent(this.fixture.Dependencies, this.fixture.Parameters); diff --git a/src/VirtualClient/VirtualClient.Dependencies.UnitTests/Packaging/WgetPackageInstallationTests.cs b/src/VirtualClient/VirtualClient.Dependencies.UnitTests/Packaging/WgetPackageInstallationTests.cs index 3f64c752c..9cbf79bbc 100644 --- a/src/VirtualClient/VirtualClient.Dependencies.UnitTests/Packaging/WgetPackageInstallationTests.cs +++ b/src/VirtualClient/VirtualClient.Dependencies.UnitTests/Packaging/WgetPackageInstallationTests.cs @@ -5,6 +5,7 @@ namespace VirtualClient.Dependencies.Packaging { using System; using System.Collections.Generic; + using System.IO; using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; @@ -158,6 +159,59 @@ public async Task WgetPackageInstallationExecutesTheExpectedOperationsOnWindowsS } } + [Test] + [TestCase(PlatformID.Unix, Architecture.X64, "wget")] + [TestCase(PlatformID.Unix, Architecture.Arm64, "wget")] + [TestCase(PlatformID.Win32NT, Architecture.X64, "wget.exe")] + [TestCase(PlatformID.Win32NT, Architecture.Arm64, "wget.exe")] + public async Task WgetPackageInstallationExecutesTheExpectedOperationsForNonArchiveFileType(PlatformID platform, Architecture architecture, string wgetBinary) + { + this.SetupDefaults(platform, architecture); + + this.mockFixture.Parameters = new Dictionary + { + { nameof(WgetPackageInstallation.PackageName), "any-package" }, + { nameof(WgetPackageInstallation.PackageUri), "https://any.company.com/files/any-file.1.0.0.exe" } + }; + + using (WgetPackageInstallation installation = new WgetPackageInstallation(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + installation.RetryPolicy = Policy.NoOpAsync(); + + string expectedDownloadPath = this.mockFixture.GetPackagePath("any-file.1.0.0.exe"); + string expectedPackageRegistrationPath = this.mockFixture.GetPackagePath("any-package"); + string expectedDownloadedFileCopyPath = this.mockFixture.GetPackagePath("any-package", "any-file.1.0.0.exe"); + + bool downloadPathConfirmed = false; + bool installationPathConfirmed = false; + bool packageRegistrationConfirmed = false; + + // The package will be copied to expectedDownloadedFileCopyPath on the system once it is downloaded. + // e.g. + // /packages/any-file.1.0.0.exe -> /packages/any-package/any-file.1.0.0.exe + this.mockFixture.FileSystem.Setup(fe => fe.File.Copy(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((downloadFilePath, destinationPath, overWrite) => + { + downloadPathConfirmed = downloadFilePath.Equals(expectedDownloadPath); + installationPathConfirmed = destinationPath.Equals(expectedDownloadedFileCopyPath); + }); + + // Once the package is downloaded and copied, it is registered on the system with Virtual Client. + this.mockFixture.PackageManager.Setup(mgr => mgr.RegisterPackageAsync(It.IsAny(), It.IsAny())) + .Callback((package, token) => + { + packageRegistrationConfirmed = package.Name == installation.PackageName && package.Path == expectedPackageRegistrationPath; + }); + + await installation.ExecuteAsync(CancellationToken.None); + + Assert.IsTrue(this.mockFixture.ProcessManager.CommandsExecuted($"{wgetBinary} {installation.PackageUri}"), "Wget download command incorrect."); + Assert.IsTrue(downloadPathConfirmed, "Archive file path incorrect."); + Assert.IsTrue(installationPathConfirmed, "Package installation path incorrect."); + Assert.IsTrue(packageRegistrationConfirmed, "Package registration incorrect."); + } + } + [Test] [TestCase(PlatformID.Unix, Architecture.X64)] [TestCase(PlatformID.Unix, Architecture.Arm64)] diff --git a/src/VirtualClient/VirtualClient.Dependencies/CUDAAndNvidiaGPUDriverInstallation.cs b/src/VirtualClient/VirtualClient.Dependencies/CUDAAndNvidiaGPUDriverInstallation.cs index 9715e1a76..439c59e26 100644 --- a/src/VirtualClient/VirtualClient.Dependencies/CUDAAndNvidiaGPUDriverInstallation.cs +++ b/src/VirtualClient/VirtualClient.Dependencies/CUDAAndNvidiaGPUDriverInstallation.cs @@ -6,22 +6,27 @@ namespace VirtualClient.Dependencies using System; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; + using System.IO; using System.IO.Abstractions; using System.Linq; + using System.Net.Http; using System.Threading; using System.Threading.Tasks; + using Microsoft.CodeAnalysis; using Microsoft.Extensions.DependencyInjection; using Polly; using VirtualClient.Common; using VirtualClient.Common.Extensions; + using VirtualClient.Common.Rest; using VirtualClient.Common.Telemetry; using VirtualClient.Contracts; /// - /// Provides functionality for installing specific version of CUDA and supported Nvidia GPU driver on linux. + /// Provides functionality for installing specific version of CUDA and supported Nvidia GPU driver on linux and Windows. /// public class CudaAndNvidiaGPUDriverInstallation : VirtualClientComponent { + private IPackageManager packageManager; private IFileSystem fileSystem; private ISystemManagement systemManager; private IStateManager stateManager; @@ -38,53 +43,72 @@ public CudaAndNvidiaGPUDriverInstallation(IServiceCollection dependencies, IDict this.systemManager = dependencies.GetService(); this.stateManager = this.systemManager.StateManager; this.fileSystem = this.systemManager.FileSystem; - } + this.packageManager = this.systemManager.PackageManager; + } /// - /// The version of CUDA to be installed. + /// The version of CUDA to be installed in Linux Systems /// - public string CudaVersion + public string LinuxCudaVersion { get { - return this.Parameters.GetValue(nameof(CudaAndNvidiaGPUDriverInstallation.CudaVersion), string.Empty); + return this.Parameters.GetValue(nameof(CudaAndNvidiaGPUDriverInstallation.LinuxCudaVersion), string.Empty); } set { - this.Parameters[nameof(CudaAndNvidiaGPUDriverInstallation.CudaVersion)] = value; + this.Parameters[nameof(CudaAndNvidiaGPUDriverInstallation.LinuxCudaVersion)] = value; } } /// - /// The version of Nvidia GPU driver to be installed. + /// The version of Nvidia GPU driver to be installed in Linux Systems /// - public string DriverVersion + public string LinuxDriverVersion { get { - return this.Parameters.GetValue(nameof(CudaAndNvidiaGPUDriverInstallation.DriverVersion), string.Empty); + return this.Parameters.GetValue(nameof(CudaAndNvidiaGPUDriverInstallation.LinuxDriverVersion), string.Empty); } set { - this.Parameters[nameof(CudaAndNvidiaGPUDriverInstallation.DriverVersion)] = value; + this.Parameters[nameof(CudaAndNvidiaGPUDriverInstallation.LinuxDriverVersion)] = value; } } /// - /// The local runfile to install Cuda and Nvidia GPU driver. + /// The local runfile to install Cuda and Nvidia GPU driver in Linux Systems /// - public string LocalRunFile + public string LinuxLocalRunFile { get { - return this.Parameters.GetValue(nameof(CudaAndNvidiaGPUDriverInstallation.LocalRunFile), string.Empty); + return this.Parameters.GetValue(nameof(CudaAndNvidiaGPUDriverInstallation.LinuxLocalRunFile), string.Empty); } set { - this.Parameters[nameof(CudaAndNvidiaGPUDriverInstallation.LocalRunFile)] = value; + this.Parameters[nameof(CudaAndNvidiaGPUDriverInstallation.LinuxLocalRunFile)] = value; + } + } + + /// + /// Determines whether Reboot is required or not after Driver installation + /// + public bool RebootRequired + { + get + { + switch (this.Platform) + { + case PlatformID.Win32NT: + return this.Parameters.GetValue(nameof(CudaAndNvidiaGPUDriverInstallation.RebootRequired), false); + + default: + return this.Parameters.GetValue(nameof(CudaAndNvidiaGPUDriverInstallation.RebootRequired), true); + } } } @@ -153,18 +177,17 @@ await this.InstallCudaAndDriversAsync(linuxDistributionInfo.LinuxDistribution, t await this.stateManager.SaveStateAsync(nameof(CudaAndNvidiaGPUDriverInstallation), new State(), cancellationToken) .ConfigureAwait(false); - - VirtualClientRuntime.IsRebootRequested = true; } - else + else if (this.Platform == PlatformID.Win32NT) { - // CUDA and Nvidia driver installation for other platforms to be added. - throw new WorkloadException( - $"CUDA and Nvidia GPU driver Installation is not supported by Virtual Client on the current platform '{this.Platform}'." + - $"Supported Platforms include:" + - $" Unix ", - ErrorReason.PlatformNotSupported); + await this.CudaAndNvidiaGPUDriverInstallationOnWindowsAsync(telemetryContext, cancellationToken) + .ConfigureAwait(false); + + await this.stateManager.SaveStateAsync(nameof(this.CudaAndNvidiaGPUDriverInstallationOnWindowsAsync), new State(), cancellationToken) + .ConfigureAwait(false); } + + VirtualClientRuntime.IsRebootRequested = this.RebootRequired; } this.Logger.LogTraceMessage($"{this.TypeName}.ExecutionCompleted", telemetryContext); @@ -212,11 +235,11 @@ await this.fileSystem.File.WriteAllLinesAsync( { foreach (string command in commandsList) { - await this.ExecuteCommandAsync(command, Environment.CurrentDirectory, telemetryContext, cancellationToken) + await this.ExecuteCommandAsync(command, null, Environment.CurrentDirectory, telemetryContext, cancellationToken) .ConfigureAwait(false); } } - } + } private List CleanupCommands(LinuxDistribution linuxDistribution) { @@ -296,10 +319,10 @@ private List PrerequisiteCommands(LinuxDistribution linuxDistribution) private List VersionSpecificInstallationCommands(LinuxDistribution linuxDistribution) { - string runFileName = this.LocalRunFile.Split('/').Last(); - List commands = new List + string runFileName = this.LinuxLocalRunFile.Split('/').Last(); + List commands = new List() { - $"wget {this.LocalRunFile}", + $"wget {this.LinuxLocalRunFile}", $"sh {runFileName} --silent" }; @@ -309,19 +332,19 @@ private List VersionSpecificInstallationCommands(LinuxDistribution linux case LinuxDistribution.Ubuntu: commands.Add("apt update"); commands.Add("apt upgrade -y"); - commands.Add($"apt install nvidia-driver-{this.DriverVersion} nvidia-dkms-{this.DriverVersion} -y"); - commands.Add($"apt install cuda-drivers-fabricmanager-{this.DriverVersion} -y"); + commands.Add($"apt install nvidia-driver-{this.LinuxDriverVersion} nvidia-dkms-{this.LinuxDriverVersion} -y"); + commands.Add($"apt install cuda-drivers-fabricmanager-{this.LinuxDriverVersion} -y"); break; case LinuxDistribution.CentOS7: case LinuxDistribution.CentOS8: case LinuxDistribution.RHEL7: - commands.Add($"dnf module install nvidia-driver:{this.DriverVersion}/fm"); + commands.Add($"dnf module install nvidia-driver:{this.LinuxDriverVersion}/fm"); break; case LinuxDistribution.SUSE: - commands.Add($"zypper install cuda-drivers-fabricmanager-{this.DriverVersion}"); + commands.Add($"zypper install cuda-drivers-fabricmanager-{this.LinuxDriverVersion}"); break; } @@ -332,20 +355,41 @@ private List PostInstallationCommands() { return new List { - $"bash -c \"echo 'export PATH=/usr/local/cuda-{this.CudaVersion}/bin${{PATH:+:${{PATH}}}}' | " + + $"bash -c \"echo 'export PATH=/usr/local/cuda-{this.LinuxCudaVersion}/bin${{PATH:+:${{PATH}}}}' | " + $"sudo tee -a /home/{this.Username}/.bashrc\"", - $"bash -c \"echo 'export LD_LIBRARY_PATH=/usr/local/cuda-{this.CudaVersion}/lib64${{LD_LIBRARY_PATH:+:${{LD_LIBRARY_PATH}}}}' | " + + $"bash -c \"echo 'export LD_LIBRARY_PATH=/usr/local/cuda-{this.LinuxCudaVersion}/lib64${{LD_LIBRARY_PATH:+:${{LD_LIBRARY_PATH}}}}' | " + $"sudo tee -a /home/{this.Username}/.bashrc\"" }; } - private Task ExecuteCommandAsync(string commandLine, string workingDirectory, EventContext telemetryContext, CancellationToken cancellationToken) + private async Task CudaAndNvidiaGPUDriverInstallationOnWindowsAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + string installerPath = string.Empty; + + DependencyPath nvidiaDriverInstallerPackage = await this.packageManager.GetPackageAsync( + this.PackageName, cancellationToken) + .ConfigureAwait(false); + + if (this.fileSystem.Directory.GetFiles(nvidiaDriverInstallerPackage.Path, "*.exe", SearchOption.AllDirectories).Length > 0) + { + installerPath = this.fileSystem.Directory.GetFiles(nvidiaDriverInstallerPackage.Path, "*.exe", SearchOption.AllDirectories)[0]; + } + else + { + throw new DependencyException($"The installer file was not found in the directory {nvidiaDriverInstallerPackage.Path}", ErrorReason.DependencyNotFound); + } + + await this.ExecuteCommandAsync(installerPath, "-y -s", Environment.CurrentDirectory, telemetryContext, cancellationToken) + .ConfigureAwait(false); + } + + private Task ExecuteCommandAsync(string commandLine, string commandLineArgs, string workingDirectory, EventContext telemetryContext, CancellationToken cancellationToken) { return this.RetryPolicy.ExecuteAsync(async () => { string output = string.Empty; - using (IProcessProxy process = this.systemManager.ProcessManager.CreateElevatedProcess(this.Platform, commandLine, null, workingDirectory)) + using (IProcessProxy process = this.systemManager.ProcessManager.CreateElevatedProcess(this.Platform, commandLine, commandLineArgs, workingDirectory)) { this.CleanupTasks.Add(() => process.SafeKill()); this.LogProcessTrace(process); diff --git a/src/VirtualClient/VirtualClient.Dependencies/Packaging/WgetPackageInstallation.cs b/src/VirtualClient/VirtualClient.Dependencies/Packaging/WgetPackageInstallation.cs index e7b23bb30..928170cfc 100644 --- a/src/VirtualClient/VirtualClient.Dependencies/Packaging/WgetPackageInstallation.cs +++ b/src/VirtualClient/VirtualClient.Dependencies/Packaging/WgetPackageInstallation.cs @@ -149,6 +149,16 @@ protected override async Task ExecuteAsync(EventContext telemetryContext, Cancel await this.packageManager.ExtractPackageAsync(downloadedPackagePath, installationPath, cancellationToken, archiveType); } + else + { + // This section is for the standalone files which are not zipped. The file will be copied to a location in + // directory 'installationPath' under a name as defined by 'downloadedFileCopyPath' and then + // file at original download path is deleted as in above case for zip file. + string downloadedFileCopyPath = this.GetPackagePath(this.PackageName, Path.GetFileName(this.PackageUri.ToString())); + installationPath = this.GetPackagePath(this.PackageName); + this.fileSystem.Directory.CreateDirectory(installationPath); + this.fileSystem.File.Copy(downloadedPackagePath, downloadedFileCopyPath, true); + } // Note that installation path is the final path even though we are using the packages path above // as the destination. diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-MLPERF.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-MLPERF.json index e63e3efa0..f9d2d775b 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-MLPERF.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-MLPERF.json @@ -9,9 +9,9 @@ "Parameters": { "Username": "", "DiskFilter": "SizeGreaterThan:1000gb", - "CudaVersion": "12.0", - "DriverVersion": "525", - "LocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" + "LinuxCudaVersion": "12.0", + "LinuxDriverVersion": "525", + "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" }, "Actions": [ { @@ -70,10 +70,10 @@ "Type": "NvidiaCudaInstallation", "Parameters": { "Scenario": "InstallNvidiaCuda", - "CudaVersion": "$.Parameters.CudaVersion", - "DriverVersion": "$.Parameters.DriverVersion", + "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion", + "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion", "Username": "$.Parameters.Username", - "LocalRunFile": "$.Parameters.LocalRunFile" + "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile" } }, { diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json index db605a9cb..38730a227 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json @@ -10,9 +10,9 @@ "Parameters": { "ConfigurationFile": "default.yaml", "Username": "", - "CudaVersion": "12.0", - "DriverVersion": "525", - "LocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" + "LinuxCudaVersion": "12.0", + "LinuxDriverVersion": "525", + "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" }, "Actions": [ { @@ -31,10 +31,10 @@ "Type": "NvidiaCudaInstallation", "Parameters": { "Scenario": "InstallNvidiaCuda", - "CudaVersion": "$.Parameters.CudaVersion", - "DriverVersion": "$.Parameters.DriverVersion", + "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion", + "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion", "Username": "$.Parameters.Username", - "LocalRunFile": "$.Parameters.LocalRunFile" + "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile" } }, { diff --git a/src/VirtualClient/VirtualClient.Main/profiles/QUAL-GPU-DCGMI.json b/src/VirtualClient/VirtualClient.Main/profiles/QUAL-GPU-DCGMI.json index 0757eb043..151020113 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/QUAL-GPU-DCGMI.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/QUAL-GPU-DCGMI.json @@ -68,10 +68,10 @@ "Type": "NvidiaCudaInstallation", "Parameters": { "Scenario": "InstallNvidiaCuda", - "CudaVersion": "11.6", - "DriverVersion": "510", + "LinuxCudaVersion": "11.6", + "LinuxDriverVersion": "510", "Username": "$.Parameters.Username", - "LocalRunFile": "https://developer.download.nvidia.com/compute/cuda/11.6.0/local_installers/cuda_11.6.0_510.39.01_linux.run" + "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/11.6.0/local_installers/cuda_11.6.0_510.39.01_linux.run" } }, { diff --git a/website/docs/dependencies/0031-install-cuda-and-nvidia-drivers.md b/website/docs/dependencies/0031-install-cuda-and-nvidia-drivers.md new file mode 100644 index 000000000..3ef59100c --- /dev/null +++ b/website/docs/dependencies/0031-install-cuda-and-nvidia-drivers.md @@ -0,0 +1,106 @@ +# Install CUDA and NVIDIA GPU Drivers +Virtual Client has a dependency component that can be added to a workload or monitor profile to install CUDA and NVIDIA drivers in Linux and Windows systems. The following section illustrates the +details for integrating this into the profile. + +- [NVIDIA Official Drivers Page](https://www.nvidia.com/Download/index.aspx) +- [CUDA Toolkit Downloads](https://developer.nvidia.com/cuda-downloads) + +## Supported Platform/Architectures +* linux-x64 (Ubuntu, Debian, CentOS7, RHEL7, RHEL8, SUSE) +* win-x64 + +## Profile Component Parameters for Windows +This dependency component can be used to download the drivers on Windows either from Web using Wget, or from a blob storage using the DependencyPackageInstallation. +The following section describes the parameters used by the individual component in the profile in Windows: + +| **Parameter** | **Required** | **Description** | **Default** | +|---------------|--------------|----------------------------|-------------------------------------------------| +| PackageName | Yes | The logical name of the package that will be registered with the Virtual Client runtime. This name can be used by other profile components to reference the installation parent directory location for Drivers. | | +| Scenario | No | A name/identifier for the specific component in the profile. This is used for telemetry purposes only with components in dependency sections of the profile (i.e. cannot be used with --scenarios option on the command line). | | +| RebootRequired | No | Whether or not reboot is required after installing the drivers. | false | + +## Profile Component Parameters for Linux +The following section describes the parameters used by the individual component in the profile in Windows: + +| **Parameter** | **Required** | **Description** | **Default** | +|---------------|--------------|----------------------------|-------------------------------------------------| +| PackageName | Yes | The logical name of the package that will be registered with the Virtual Client runtime. This name can be used by other profile components to reference the installation parent directory location for Drivers. | | +| Scenario | No | A name/identifier for the specific component in the profile. This is used for telemetry purposes only with components in dependency sections of the profile (i.e. cannot be used with --scenarios option on the command line). | | +| LinuxCudaVersion | Yes | The version of CUDA to be installed in Linux Systems | | +| LinuxDriverVersion | Yes | The version of Nvidia GPU driver to be installed in Linux Systems | | +| LinuxLocalRunFile | Yes | The link to local runfile to install Cuda and Nvidia GPU driver in Linux Systems | | +| Username | No | The user who has the ssh identity registered for. | | +| RebootRequired | No | Whether or not reboot is required after installing the drivers. | true | + +## Example +The following sections provides examples for how to integrate the component into a profile. + +### Windows example for downloading drivers from Web +A sample URL for NVIDIA Drivers for Windows 10/11 is mentioned in example. The exact URL for the specific OS and Driver Version can be taken from NVIDIA Drivers website, given above. +
+ + ```json + { + "Type": "WgetPackageInstallation", + "Parameters": { + "Scenario": "DownloadCudaAndNvidiaDriverUsingWget", + "PackageUri": "https://us.download.nvidia.com/tesla/528.33/528.33-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe", + "PackageName": "nvidiaDrivers", + "Extract": true + } + }, + { + "Type": "CudaAndNvidiaGPUDriverInstallation", + "Parameters": { + "Scenario": "InstallCudaAndNvidiaGPUDriverForWindows", + "RebootRequired": false, + "PackageName": "nvidiaDrivers" + } + } + ``` +
+ +### Windows example for downloading drivers from Web + +
+ + ```json +{ + "Type": "DependencyPackageInstallation", + "Parameters": { + "Scenario": "DownloadCudaAndNvidiaDriverFromBlob", + "BlobContainer": "packages", + "BlobName": "", + "PackageName": "nvidiaDrivers", + "Extract": true + } + }, + { + "Type": "CudaAndNvidiaGPUDriverInstallation", + "Parameters": { + "Scenario": "InstallCudaAndNvidiaGPUDriverForWindows", + "RebootRequired": false, + "PackageName": "nvidiaDrivers" + } + } + ``` +
+ + +### Linux example for downloading drivers +A sample URL for NVIDIA Drivers RunFile for Linux Ubuntu is mentioned in example. The exact URL for the specific OS and Driver Version can be taken from CUDA Toolkit website, given above. +
+ + ```json + { + "Type": "NvidiaCudaInstallation", + "Parameters": { + "Scenario": "InstallNvidiaCuda", + "LinuxCudaVersion": "12.0", + "LinuxDriverVersion": "525", + "Username": "", + "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" + } + }, + ``` +
\ No newline at end of file diff --git a/website/docs/workloads/mlperf/mlperf-profiles.md b/website/docs/workloads/mlperf/mlperf-profiles.md index 6594a6a77..e065b7e63 100644 --- a/website/docs/workloads/mlperf/mlperf-profiles.md +++ b/website/docs/workloads/mlperf/mlperf-profiles.md @@ -33,9 +33,9 @@ Runs the MLPerf benchmark workload to test GPU performance. |-----------------------|---------|---------------| | Username | Optional. User which needs to be created in container to run MLPerf benchmarks. | testuser | | DiskFilter | Optional. Filter to decide the disk that will be used to download benchmarks. Since benchmarks data is around 800gb, default disk filter is greater than 1000gb. | SizeGreaterThan:1000gb | - | CudaVersion | Optional. Version of CUDA that needs to be installed. | 11.6 | - | DriverVersion | Optional. Version of GPU driver that needs to be installed. | 510 | - | LocalRunFile | Optional. Link to download specified CUDA and GPU driver versions. | https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run | + | LinuxCudaVersion | Optional. Version of CUDA that needs to be installed. | 11.6 | + | LinuxDriverVersion | Optional. Version of GPU driver that needs to be installed. | 510 | + | LinuxLocalRunFile | Optional. Link to download specified CUDA and GPU driver versions. | https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run | * **Profile Runtimes** See the 'Metadata' section of the profile for estimated runtimes. These timings represent the length of time required to run a single round of profile