Skip to content

Commit

Permalink
Make versioninfo() resilient against NVML EPERM. (#1771)
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt authored Feb 13, 2023
1 parent 3b20f54 commit 868d33e
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions src/utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,21 +84,41 @@ function versioninfo(io::IO=stdout)
println(io, length(devs), " devices:")
end
for (i, dev) in enumerate(devs)
if has_nvml()
function query_nvml()
mig = uuid(dev) != parent_uuid(dev)
nvml_gpu = NVML.Device(parent_uuid(dev))
nvml_dev = NVML.Device(uuid(dev); mig)

str = NVML.name(nvml_dev)
cap = NVML.compute_capability(nvml_gpu)
mem = NVML.memory_info(nvml_dev)
else

(; str, cap, mem)
end

function query_cuda()
str = name(dev)
cap = capability(dev)
mem = device!(dev) do
# this requires a device context, so we prefer NVML
(free=available_memory(), total=total_memory())
end
(; str, cap, mem)
end

str, cap, mem = if has_nvml()
try
query_nvml()
catch err
@show err
if !isa(err, NVML.NVMLError) ||
!in(err.code, [NVML.ERROR_NOT_SUPPORTED, NVML.ERROR_NO_PERMISSION])
rethrow()
end
query_cuda()
end
else
query_cuda()
end
println(io, " $(i-1): $str (sm_$(cap.major)$(cap.minor), $(Base.format_bytes(mem.free)) / $(Base.format_bytes(mem.total)) available)")
end
Expand Down

0 comments on commit 868d33e

Please sign in to comment.