-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathpython_kernel.py
executable file
·53 lines (41 loc) · 1.74 KB
/
python_kernel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python
"""This is the minimal example from the README"""
import json
import numpy as np
from kernel_tuner.kernelbuilder import PythonKernel
def kernelbuilder_example():
#To make this example self-contained we include the kernel as a string
#here, but you can also just point to a file with the kernel code
kernel_string = """
__global__ void vector_add(float *c, float *a, float *b, int n) {
int i = blockIdx.x * block_size_x + threadIdx.x;
if (i<n) {
c[i] = a[i] + b[i];
}
}
"""
#Setup the arguments for our vector add kernel
size = 10000000
a = np.random.randn(size).astype(np.float32)
b = np.random.randn(size).astype(np.float32)
c = np.zeros_like(b)
n = np.int32(size)
args = [c, a, b, n] #note that the type and order should match our GPU code
#The inputs and outputs arrays are used to specify whether
#the arguments in the argument list are inputs or outputs to the kernel
inputs = [False, True, True, True]
outputs=[not i for i in inputs]
#We can instantiate a specific kernel configurations
params = {"block_size_x": 128}
#Here we construct a Python object that represents the kernel
#we can use it to conveniently use the GPU kernel in Python
#applications that want to frequently call the GPU kernel
vector_add = PythonKernel("vector_add", kernel_string, size, args, params,
inputs=inputs, outputs=outputs)
#We can use the PythonKernel instance as a regular Python function
result = vector_add(c, a, b, n)
#Here we compare the result with a+b computed in Python
assert np.allclose(result, a+b)
return result
if __name__ == "__main__":
kernelbuilder_example()