diff --git a/docs/src/lib/driver.md b/docs/src/lib/driver.md index b4247c1cd1..af97a0f0ba 100644 --- a/docs/src/lib/driver.md +++ b/docs/src/lib/driver.md @@ -143,7 +143,7 @@ CUDA.total_memory ```@docs CuStream -CUDA.query(::CuStream) +CUDA.isdone(::CuStream) priority_range priority synchronize(::CuStream) @@ -153,9 +153,9 @@ CUDA.@sync For specific use cases, special streams are available: ```@docs -CuDefaultStream -CuStreamLegacy -CuStreamPerThread +default_stream +legacy_stream +per_thread_stream ``` ## Event Management @@ -164,7 +164,7 @@ CuStreamPerThread CuEvent record synchronize(::CuEvent) -CUDA.query(::CuEvent) +CUDA.isdone(::CuEvent) CUDA.wait(::CuEvent) elapsed CUDA.@elapsed diff --git a/lib/cudadrv/devices.jl b/lib/cudadrv/devices.jl index bc813a7806..350a7e578c 100644 --- a/lib/cudadrv/devices.jl +++ b/lib/cudadrv/devices.jl @@ -63,9 +63,6 @@ const DEVICE_INVALID = _CuDevice(CUdevice(-2)) Base.convert(::Type{CUdevice}, dev::CuDevice) = dev.handle -Base.:(==)(a::CuDevice, b::CuDevice) = a.handle == b.handle -Base.hash(dev::CuDevice, h::UInt) = hash(dev.handle, h) - function Base.show(io::IO, ::MIME"text/plain", dev::CuDevice) print(io, "CuDevice($(dev.handle)): ") if dev == DEVICE_CPU diff --git a/lib/cudadrv/events.jl b/lib/cudadrv/events.jl index d2d1377328..4e896a1161 100644 --- a/lib/cudadrv/events.jl +++ b/lib/cudadrv/events.jl @@ -50,12 +50,12 @@ Waits for an event to complete. synchronize(e::CuEvent) = cuEventSynchronize(e) """ - query(e::CuEvent) + isdone(e::CuEvent) Return `false` if there is outstanding work preceding the most recent call to `record(e)` and `true` if all captured work has been completed. """ -function query(e::CuEvent) +function isdone(e::CuEvent) res = unsafe_cuEventQuery(e) if res == ERROR_NOT_READY return false diff --git a/lib/cudadrv/stream.jl b/lib/cudadrv/stream.jl index dab625d0d5..bd83748a84 100644 --- a/lib/cudadrv/stream.jl +++ b/lib/cudadrv/stream.jl @@ -1,7 +1,7 @@ # Stream management export - CuStream, CuDefaultStream, CuStreamLegacy, CuStreamPerThread, + CuStream, default_stream, legacy_stream, per_thread_stream, priority, priority_range, synchronize, device_synchronize """ @@ -29,15 +29,15 @@ mutable struct CuStream return obj end - global CuDefaultStream() = new(convert(CUstream, C_NULL), nothing) + global default_stream() = new(convert(CUstream, C_NULL), nothing) - global CuStreamLegacy() = new(convert(CUstream, 1), nothing) + global legacy_stream() = new(convert(CUstream, 1), nothing) - global CuStreamPerThread() = new(convert(CUstream, 2), nothing) + global per_thread_stream() = new(convert(CUstream, 2), nothing) end """ - CuDefaultStream() + default_stream() Return the default stream. @@ -46,10 +46,10 @@ Return the default stream. It is generally better to use `stream()` to get a stream object that's local to the current task. That way, operations scheduled in other tasks can overlap. """ -CuDefaultStream() +default_stream() """ - CuStreamLegacy() + legacy_stream() Return a special object to use use an implicit stream with legacy synchronization behavior. @@ -57,10 +57,10 @@ You can use this stream to perform operations that should block on all streams ( exception of streams created with `STREAM_NON_BLOCKING`). This matches the old pre-CUDA 7 global stream behavior. """ -CuStreamLegacy() +legacy_stream() """ - CuStreamPerThread() + per_thread_stream() Return a special object to use an implicit stream with per-thread synchronization behavior. This stream object is normally meant to be used with APIs that do not have per-thread @@ -72,7 +72,7 @@ versions of their APIs (i.e. without a `ptsz` or `ptds` suffix). gets its own non-blocking stream, and multithreading in Julia is typically accomplished using tasks. """ -CuStreamPerThread() +per_thread_stream() Base.unsafe_convert(::Type{CUstream}, s::CuStream) = s.handle @@ -92,12 +92,12 @@ function Base.show(io::IO, stream::CuStream) end """ - query(s::CuStream) + isdone(s::CuStream) Return `false` if a stream is busy (has task running or queued) and `true` if that stream is free. """ -function query(s::CuStream) +function isdone(s::CuStream) res = unsafe_cuStreamQuery(s) if res == ERROR_NOT_READY return false @@ -119,7 +119,7 @@ See also: [`device_synchronize`](@ref) """ function synchronize(stream::CuStream=stream(); blocking::Bool=true) # fast path - query(stream) && @goto(exit) + isdone(stream) && @goto(exit) # minimize latency of short operations by busy-waiting, # initially without even yielding to other tasks @@ -132,7 +132,7 @@ function synchronize(stream::CuStream=stream(); blocking::Bool=true) else yield() end - query(stream) && @goto(exit) + isdone(stream) && @goto(exit) spins += 1 end @@ -155,7 +155,7 @@ Block for the current device's tasks to complete. This is a heavyweight operatio you only need to call [`synchronize`](@ref) which only synchronizes the stream associated with the current task. """ -device_synchronize() = synchronize(CuStreamLegacy()) +device_synchronize() = synchronize(legacy_stream()) """ priority_range() diff --git a/lib/cufft/fft.jl b/lib/cufft/fft.jl index 9b87fd5750..c355548e1a 100644 --- a/lib/cufft/fft.jl +++ b/lib/cufft/fft.jl @@ -27,7 +27,7 @@ function CUDA.unsafe_free!(plan::CuFFTPlan, stream::CuStream=stream()) unsafe_free!(plan.workarea, stream) end -unsafe_finalize!(plan::CuFFTPlan) = unsafe_free!(plan, CuDefaultStream()) +unsafe_finalize!(plan::CuFFTPlan) = unsafe_free!(plan, default_stream()) mutable struct cCuFFTPlan{T<:cufftNumber,K,inplace,N} <: CuFFTPlan{T,K,inplace} handle::cufftHandle diff --git a/lib/nvml/device.jl b/lib/nvml/device.jl index 05d655fc9c..88a673dddd 100644 --- a/lib/nvml/device.jl +++ b/lib/nvml/device.jl @@ -23,6 +23,11 @@ end Base.unsafe_convert(::Type{nvmlDevice_t}, dev::Device) = dev.handle +function Base.show(io::IO, ::MIME"text/plain", dev::Device) + print(io, "NVML.Device($(index(dev))): ") + print(io, "$(name(dev))") +end + # iteration @@ -75,6 +80,12 @@ function serial(dev::Device) return unsafe_string(pointer(buf)) end +function index(dev::Device) + index = Ref{Cuint}() + nvmlDeviceGetIndex(dev, index) + return Int(index[]) +end + # watt function power_usage(dev::Device) ref = Ref{Cuint}() diff --git a/src/array.jl b/src/array.jl index 95b2c1d708..c8ed4a3db5 100644 --- a/src/array.jl +++ b/src/array.jl @@ -63,7 +63,7 @@ earlier to reduce pressure on the memory allocator. By default, the operation is performed on the task-local stream. During task or process finalization however, that stream may be destroyed already, so be sure to specify a safe -stream (i.e. `CuDefaultStream()`, which will ensure the operation will block on other +stream (i.e. `default_stream()`, which will ensure the operation will block on other streams) when calling this function from a finalizer. For simplicity, the `unsafe_finalize!` function does exactly that. """ @@ -98,7 +98,7 @@ function unsafe_finalize!(xs::CuArray) # stream, it synchronizes "too much". we could do better, e.g., by keeping track of all # streams involved, or by refcounting uses and decrementing that refcount after the # operation using `cuLaunchHostFunc`. See CUDA.jl#778 and CUDA.jl#780 for details. - unsafe_free!(xs, CuDefaultStream()) + unsafe_free!(xs, default_stream()) end diff --git a/src/deprecated.jl b/src/deprecated.jl index 26ccb0c2fc..f59993eae4 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -5,3 +5,9 @@ @deprecate CuCurrentContext() current_context() @deprecate CuContext(ptr::Union{Ptr,CuPtr}) context(ptr) @deprecate CuDevice(ptr::Union{Ptr,CuPtr}) device(ptr) + +@deprecate CuDefaultStream() default_stream() +@deprecate CuStreamLegacy() legacy_stream() +@deprecate CuStreamPerThread() per_thread_stream() +@deprecate query(s::CuStream) isdone(s) +@deprecate query(e::CuEvent) isdone(e) diff --git a/test/cudadrv.jl b/test/cudadrv.jl index 64a2225e1b..d3da991124 100644 --- a/test/cudadrv.jl +++ b/test/cudadrv.jl @@ -213,8 +213,8 @@ CuEvent(CUDA.EVENT_BLOCKING_SYNC | CUDA.EVENT_DISABLE_TIMING) end @testset "event query" begin - event = CuEvent() - @test CUDA.query(event) == true + event = CuEvent() + @test CUDA.isdone(event) end end @@ -835,7 +835,7 @@ end s = CuStream() synchronize(s) -@test CUDA.query(s) == true +@test CUDA.isdone(s) let s2 = CuStream() @test s != s2 diff --git a/test/nvml.jl b/test/nvml.jl index 1a4329add2..bf342a8696 100644 --- a/test/nvml.jl +++ b/test/nvml.jl @@ -19,6 +19,10 @@ end @testset "devices" begin let dev = NVML.Device(0) @test dev == first(NVML.devices()) + @test NVML.index(dev) == 0 + + str = sprint(io->show(io, "text/plain", dev)) + @test occursin("NVML.Device(0)", str) end cuda_dev = CuDevice(0)