-
Notifications
You must be signed in to change notification settings - Fork 0
/
vector_add.cu
65 lines (54 loc) · 1.92 KB
/
vector_add.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include <iostream>
using namespace std;
__global__ void vectorAdd(int *a, int *b, int *c, int vector_size) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < vector_size)
c[i] = a[i] + b[i];
}
int main(void) {
cout << "Defining variables & allocating memory in HOST..." << endl;
int vector_size = 1000000;
size_t n_bytes = sizeof(int) * vector_size;
int *a = (int *) malloc(n_bytes);
int *b = (int *) malloc(n_bytes);
int *c = (int *) malloc(n_bytes);
cout << "Defining variables & allocating memory in DEVICE..." << endl;
int *device_a, *device_b, *device_c;
cudaMalloc((void **) &device_a, n_bytes);
cudaMalloc((void **) &device_b, n_bytes);
cudaMalloc((void **) &device_c, n_bytes);
cout << "Initializing variables in HOST..." << endl;
for (int i = 0; i < vector_size; i++) {
a[i] = i;
b[i] = vector_size - i;
}
cout << "Copying HOST variables to DEVICE variables..." << endl;
cudaMemcpy(device_a, a, n_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(device_b, b, n_bytes, cudaMemcpyHostToDevice);
cout << "Defining & calling kernel..." << endl;
int block_size = 1024;
int blocks_count = (vector_size / block_size) + (vector_size % block_size != 0);
vectorAdd<<<blocks_count, block_size>>>(device_a, device_b, device_c, vector_size);
cout << "Copying DEVICE variables to HOST variables..." << endl;
cudaMemcpy(c, device_c, n_bytes, cudaMemcpyDeviceToHost);
cout << "Checking result..." << endl;
bool pass = true;
for (int i = 0; i < vector_size; i++) {
if (c[i] != a[i] + b[i]) {
pass = false;
break;
}
}
if (pass)
cout << "Passed!" << endl;
else
cout << "Failed!" << endl;
cout << "Freeing memory..." << endl;
free(a);
free(b);
free(c);
cudaFree(device_a);
cudaFree(device_b);
cudaFree(device_c);
return 0;
}