-
Notifications
You must be signed in to change notification settings - Fork 29
/
pointer.jl
97 lines (76 loc) · 3.14 KB
/
pointer.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# pointer.jl
#
# Julia type and memory management for CUDA pointers. Combined
# from CUDA.jl, CUDArt.jl, and Nick Henderson's fork of CUDArt.jl.
# A raw CUDA pointer
type CudaPtr{T}
ptr::Ptr{T}
end
# Type alias for previous name
typealias CudaDevicePtr CudaPtr
#############################
# Low-level memory handling #
#############################
CudaPtr() = CudaPtr(C_NULL)
CudaPtr(T::Type) = CudaPtr(convert(Ptr{T},C_NULL))
convert{T}(::Type{Ptr{T}}, p::CudaPtr{T}) = p.ptr
convert{T}(::Type{Ptr{Void}}, p::CudaPtr{T}) = convert(Ptr{Void}, p.ptr)
copy(p::CudaPtr) = CudaPtr(p.ptr)
rawpointer(p::CudaPtr) = p
# Enable both manual and garbage-collected memory management.
# If you need to free resources, you can call free manually.
# cuda_ptrs keeps track of all memory that needs to be freed,
# and prevents double-free (which otherwise causes serious problems).
# key = ptr, val = device id
const cuda_ptrs = Dict{Any,Int}()
function malloc(T::Type, n::Integer)
p = Ptr{Void}[C_NULL]
nbytes = sizeof(T)*n
rt.cudaMalloc(p, nbytes)
cptr = CudaPtr(convert(Ptr{T},p[1]))
finalizer(cptr, free)
cuda_ptrs[WeakRef(cptr)] = device()
cptr
end
malloc(nbytes::Integer) = malloc(Uint8, nbytes)
function free{T}(p::CudaPtr{T})
cnull = convert(Ptr{T}, C_NULL)
if p.ptr != cnull && haskey(cuda_ptrs, p)
delete!(cuda_ptrs, p)
rt.cudaFree(p)
p.ptr = cnull
end
end
typealias Ptrs Union(Ptr, CudaPtr, rt.cudaPitchedPtr)
typealias CudaPtrs Union(CudaPtr, rt.cudaPitchedPtr)
cudamemcpykind(dstp::Ptr, srcp::Ptr) = rt.cudaMemcpyHostToHost
cudamemcpykind(dstp::CudaPtrs, srcp::Ptr) = rt.cudaMemcpyHostToDevice
cudamemcpykind(dstp::Ptr, srcp::CudaPtrs) = rt.cudaMemcpyDeviceToHost
cudamemcpykind(dstp::CudaPtrs, srcp::CudaPtrs) = rt.cudaMemcpyDeviceToDevice
cudamemcpykind(dst::Ptrs, src::Ptrs) = error("This should never happen") # prevent a useless ambiguity warning
cudamemcpykind(dst, src::Ptrs) = cudamemcpykind(pointer(dst), src)
cudamemcpykind(dst::Ptrs, src) = cudamemcpykind(dst, pointer(src))
cudamemcpykind(dst, src) = cudamemcpykind(pointer(dst), pointer(src))
## converting pointers to an appropriate unsigned ##
const CUDA_NULL = CudaPtr()
# pointer to integer
convert(::Type{Uint}, x::CudaPtr) = convert(Uint,x.ptr)
convert{T<:Integer}(::Type{T}, x::CudaPtr) = convert(T,unsigned(x))
# integer to pointer
convert{T}(::Type{CudaPtr{T}}, x::Integer) = CudaPtr(convert(Ptr{T},x))
# pointer to pointer
convert{T}(::Type{CudaPtr{T}}, p::CudaPtr{T}) = p
convert{T}(::Type{CudaPtr{T}}, p::CudaPtr) = CudaPtr(convert(Ptr{T},p.ptr))
integer(x::CudaPtr) = convert(Uint, x.ptr)
unsigned(x::CudaPtr) = convert(Uint, x.ptr)
eltype{T}(::CudaPtr{T}) = T
## limited pointer arithmetic & comparison ##
==(x::CudaPtr, y::CudaPtr) = uint(x) == uint(y)
-(x::CudaPtr, y::CudaPtr) = uint(x) - uint(y)
+(x::CudaPtr, y::Integer) = oftype(x, uint(uint(x) + y))
-(x::CudaPtr, y::Integer) = oftype(x, uint(uint(x) - y))
+(x::Integer, y::CudaPtr) = y + x
zero{T}(::Type{CudaPtr{T}}) = convert(CudaPtr{T}, 0)
zero{T}(x::CudaPtr{T}) = convert(CudaPtr{T}, 0)
one{T}(::Type{CudaPtr{T}}) = convert(CudaPtr{T}, 1)
one{T}(x::CudaPtr{T}) = convert(CudaPtr{T}, 1)