update docs

Signed-off-by: limengxuan <limengxuan@4paradigm.com>
Project-HAMi · Feb 1, 2024 · 772d153 · 772d153
1 parent 2ad91f0
commit 772d153
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 21 deletions.
diff --git a/charts/vgpu/values.yaml b/charts/vgpu/values.yaml
@@ -27,7 +27,7 @@ podSecurityPolicy:
   enabled: false
 
 global:
-  gpuHookPath: /usr/local
+  gpuHookPath: /usr/local/vgpu
   labels: {}
   annotations: {}
 

diff --git a/cmd/vGPUmonitor/metrics.go b/cmd/vGPUmonitor/metrics.go
@@ -165,17 +165,11 @@ func (cc ClusterManagerCollector) Collect(ch chan<- prometheus.Metric) {
 					klog.Error(nvml.ErrorString(nvret))
 				}
 				memoryUsed := 0
-				memory, ret := hdev.GetMemoryInfo_v2()
+				memory, ret := hdev.GetMemoryInfo()
 				if ret == nvml.SUCCESS {
 					memoryUsed = int(memory.Used)
 				} else {
-					klog.Error("nvml get memory_v2 error ret=", ret)
-					memory_v1, ret := hdev.GetMemoryInfo()
-					if ret != nvml.SUCCESS {
-						klog.Error("nvml get memory error ret=", ret)
-					} else {
-						memoryUsed = int(memory_v1.Used)
-					}
+					klog.Error("nvml get memory error ret=", ret)
 				}
 
 				uuid, nvret := hdev.GetUUID()

diff --git a/hack/build.sh b/hack/build.sh
@@ -23,7 +23,7 @@ export COMMIT_CODE
 export VERSION="${SHORT_VERSION}-${COMMIT_CODE}"
 export LATEST_VERSION="latest"
 export GOLANG_IMAGE="golang:1.21-bullseye"
-export NVIDIA_IMAGE="nvidia/cuda:11.2.2-base-ubuntu20.04"
+export NVIDIA_IMAGE="nvidia/cuda:12.2.0-devel-ubuntu20.04"
 export DEST_DIR=""
 
 IMAGE=${IMAGE-"4pdosc/k8s-vdevice"}

diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go
@@ -107,17 +107,12 @@ func (r *NvidiaDevicePlugin) getApiDevices() *[]*api.DeviceInfo {
 			panic(0)
 		}
 		memoryTotal := 0
-		memory, ret := ndev.GetMemoryInfo_v2()
+		memory, ret := ndev.GetMemoryInfo()
 		if ret == nvml.SUCCESS {
 			memoryTotal = int(memory.Total)
 		} else {
-			klog.Error("nvml get memory_v2 error ret=", ret)
-			memory_v1, ret := ndev.GetMemoryInfo()
-			if ret != nvml.SUCCESS {
-				klog.Error("nvml get memory_v2 error ret=", ret)
-				panic(0)
-			}
-			memoryTotal = int(memory_v1.Total)
+			klog.Error("nvml get memory error ret=", ret)
+			panic(0)
 		}
 		UUID, ret := ndev.GetUUID()
 		if ret != nvml.SUCCESS {

diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go
@@ -367,7 +367,7 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 			os.Chmod("/tmp/vgpulock", 0777)
 			response.Mounts = append(response.Mounts,
 				&pluginapi.Mount{ContainerPath: fmt.Sprintf("%s/vgpu/libvgpu.so", hostHookPath),
-					HostPath: hostHookPath + "/libvgpu.so",
+					HostPath: hostHookPath + "/vgpu/libvgpu.so",
 					ReadOnly: true},
 				&pluginapi.Mount{ContainerPath: fmt.Sprintf("%s/vgpu", hostHookPath),
 					HostPath: cacheFileHostDirectory,
@@ -385,7 +385,7 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 			}
 			if !found {
 				response.Mounts = append(response.Mounts, &pluginapi.Mount{ContainerPath: "/etc/ld.so.preload",
-					HostPath: hostHookPath + "/ld.so.preload",
+					HostPath: hostHookPath + "/vgpu/ld.so.preload",
 					ReadOnly: true},
 				)
 			}
@@ -398,7 +398,7 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.
 				})
 				response.Mounts = append(response.Mounts, &pluginapi.Mount{
 					ContainerPath: "/usr/bin/vgpuvalidator",
-					HostPath:      fmt.Sprintf("%s/vgpuvalidator", hostHookPath),
+					HostPath:      fmt.Sprintf("%s/vgpu/vgpuvalidator", hostHookPath),
 					ReadOnly:      true,
 				})
 			}