-
Notifications
You must be signed in to change notification settings - Fork 5
/
alloc_gpu.impala
54 lines (46 loc) · 1.6 KB
/
alloc_gpu.impala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
fn @iteration(body: fn(i32, i32) -> ()) = @|acc: Accelerator, width: i32, height: i32| {
let grid = (width, height, 1);
let block = (128, 1, 1);
for work_item in acc.exec(grid, block) {
@body(work_item.gidx(), work_item.gidy());
}
acc.sync();
};
fn @read(buf: Buffer, i: i32) = bitcast[&addrspace(1)[f32]](buf.data)(i);
fn @write(buf: Buffer, i: i32, v: f32) = bitcast[&mut addrspace(1)[f32]](buf.data)(i) = v;
#[export]
fn main() -> i32 {
let width = 2048;
let height = 2048;
let acc = accelerator(device_id);
let arr = alloc_cpu((width * height) as i64 * sizeof[f32]());
let out = alloc_cpu((width * height) as i64 * sizeof[f32]());
let arr_gpu = acc.alloc((width * height) as i64 * sizeof[f32]());
let out_gpu = acc.alloc((width * height) as i64 * sizeof[f32]());
for i in range(0, width*height) {
let arr_ptr = bitcast[&mut[f32]](arr.data);
let out_ptr = bitcast[&mut[f32]](out.data);
arr_ptr(i) = i as f32;
out_ptr(i) = 0:f32;
}
copy(arr, arr_gpu);
for x, y in iteration(acc, width, height) {
let idx = y*width + x;
write(out_gpu, idx, read(arr_gpu, idx));
}
copy(out_gpu, out);
let mut passed = 0;
for i in range(0, width*height) {
if bitcast[&[f32]](out.data)(i) != bitcast[&[f32]](arr.data)(i) { passed++; }
}
if passed == 0 {
print_string("Test PASSED!\n");
} else {
print_string("Test FAILED!\n");
}
release(out_gpu);
release(arr_gpu);
release(arr);
release(out);
if passed >= 256 { 255 } else { passed }
}