-
-
Notifications
You must be signed in to change notification settings - Fork 95
/
Copy pathdata_structure.nim
197 lines (169 loc) · 7.44 KB
/
data_structure.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# Copyright 2017 the Arraymancer contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
../laser/dynamic_stack_arrays,
../laser/tensor/datatypes,
../private/sequninit,
nimblas,
# Standard library
std/[complex, typetraits]
export nimblas.OrderType, complex
export datatypes, dynamic_stack_arrays
type
CudaStorage*[T: SomeFloat] = object
## Opaque seq-like structure for storage on the Cuda backend.
##
## Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
##
# TODO: Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
Flen*: int
Fdata*: ptr UncheckedArray[T]
Fref_tracking*: ref[ptr UncheckedArray[T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
CudaTensor*[T: SomeFloat] = object
## Tensor data structure stored on Nvidia GPU (Cuda)
## - ``shape``: Dimensions of the CudaTensor
## - ``strides``: Numbers of items to skip to get the next item along a dimension.
## - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
## - ``storage``: An opaque data storage for the CudaTensor
##
## Warning ⚠:
## Assignment ```var a = b``` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
## However modification on metadata (shape, strides or offset) will not affect the other tensor.
## Explicit copies can be made with ``clone``: ```var a = b.clone```
shape*: Metadata
strides*: Metadata
offset*: int
storage*: CudaStorage[T]
ClStorage*[T: SomeFloat] = object
## Opaque seq-like structure for storage on the OpenCL backend.
Flen*: int
Fdata*: ptr UncheckedArray[T]
Fref_tracking*: ref[ptr UncheckedArray[T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
ClTensor*[T: SomeFloat] = object
## Tensor data structure stored on OpenCL (CPU, GPU, FPGAs or other accelerators)
## - ``shape``: Dimensions of the CudaTensor
## - ``strides``: Numbers of items to skip to get the next item along a dimension.
## - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
## - ``storage``: An opaque data storage for the CudaTensor
##
## Warning ⚠:
## Assignment ```var a = b``` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
## However modification on metadata (shape, strides or offset) will not affect the other tensor.
## Explicit copies can be made with ``clone``: ```var a = b.clone```
shape*: Metadata
strides*: Metadata
offset*: int
storage*: ClStorage[T]
AnyTensor*[T] = Tensor[T] or CudaTensor[T] or ClTensor[T]
# ###############
# Field accessors
# ###############
proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.deprecated: "Use copyFromRaw instead".} =
# Set tensor raw data
# This is intended for library writer
assert s.len > 0
when T is KnownSupportsCopyMem:
t.copyFromRaw(s[0].unsafeaddr, s.len)
else:
t.storage.raw_buffer = s
# ################
# Tensor Metadata
# ################
func rank*[T](t: CudaTensor[T] or ClTensor[T]): range[0 .. LASER_MAXRANK] {.inline.} =
t.shape.len
func size*[T](t: CudaTensor[T] or ClTensor[T]): Natural {.inline.} =
t.shape.product
proc shape_to_strides*(shape: Metadata, layout: OrderType = rowMajor, result: var Metadata) {.noSideEffect.} =
## Input:
## - A shape (Metadata), for example [3,5] for a 3x5 matrix
## - Optionally rowMajor (C layout - default) or colMajor (Fortran)
## Returns:
## - The strides in C or Fortran order corresponding to this shape and layout
##
## Arraymancer defaults to rowMajor. Temporarily, CudaTensors are colMajor by default.
# See Design document for further considerations.
var accum = 1
result.len = shape.len
if layout == rowMajor:
for i in countdown(shape.len-1,0):
result[i] = accum
accum *= shape[i]
return
for i in 0 ..< shape.len:
result[i] = accum
accum *= shape[i]
return
func is_C_contiguous*(t: CudaTensor or ClTensor): bool =
## Check if the tensor follows C convention / is row major
var cur_size = 1
for i in countdown(t.rank - 1,0):
# 1. We should ignore strides on dimensions of size 1
# 2. Strides always must have the size equal to the product of the next dimensions
if t.shape[i] != 1 and t.strides[i] != cur_size:
return false
cur_size *= t.shape[i]
return true
proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
## Check if the tensor follows Fortran convention / is column major
var z = 1
for i in 0..<t.shape.len:
# 1. We should ignore strides on dimensions of size 1
# 2. Strides always must have the size equal to the product of the next dimensons
if t.shape[i] != 1 and t.strides[i] != z:
return false
z *= t.shape[i]
return true
proc isContiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
## Check if the tensor is contiguous
return t.is_C_contiguous or t.is_F_contiguous
# ##################
# Raw pointer access
# ##################
proc get_data_ptr*[T: KnownSupportsCopyMem](t: Tensor[T]): ptr T {.noSideEffect, inline.}=
## Input:
## - A tensor
## Returns:
## - A pointer to the real start of its data (no offset)
cast[ptr T](t.storage.raw_buffer)
proc get_data_ptr*[T: not KnownSupportsCopyMem](t: AnyTensor[T]): ptr T {.error: "`get_data_ptr`" &
" cannot be safely used for GC'ed types!".}
proc get_offset_ptr*[T: KnownSupportsCopyMem](t: Tensor[T]): ptr T {.noSideEffect, inline.}=
## Input:
## - A tensor
## Returns:
## - A pointer to the offset start of its data
t.storage.raw_buffer[t.offset].unsafeAddr
proc get_offset_ptr*[T: not KnownSupportsCopyMem](t: AnyTensor[T]): ptr T {.error: "`get_offset_ptr`" &
" cannot be safely used for GC'ed types!".}
proc get_data_ptr*[T](t: CudaTensor[T] or ClTensor[T]): ptr T {.noSideEffect, inline.}=
## Input:
## - A tensor
## Returns:
## - A pointer to the real start of its data (no offset)
cast[ptr T](t.storage.Fdata)
proc get_offset_ptr*[T](t: CudaTensor[T] or ClTensor[T]): ptr T {.noSideEffect, inline.}=
## Input:
## - A tensor
## Returns:
## - A pointer to the offset start of its data
t.storage.Fdata[t.offset].unsafeAddr
proc dataArray*[T: KnownSupportsCopyMem](t: Tensor[T]): ptr UncheckedArray[T] {.noSideEffect, inline, deprecated: "Use toUnsafeView instead".}=
## Input:
## - A tensor
## Returns:
## - A pointer to the offset start of the data.
## Return value supports array indexing.
cast[ptr UncheckedArray[T]](t.unsafe_raw_offset[t.offset].unsafeAddr)
proc dataArray*[T: not KnownSupportsCopyMem](t: Tensor[T]): ptr UncheckedArray[T] {.error: "`dataArray` " &
" is deprecated for mem copyable types and not supported for GC'ed types!".}