Skip to content

Commit

Permalink
Added get_profiling_info method to the Event class
Browse files Browse the repository at this point in the history
  • Loading branch information
ajkxyz committed Mar 18, 2014
1 parent cdf3ce0 commit b9a1b6a
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 3 deletions.
36 changes: 35 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,39 @@ Python cffi OpenCL bindings and helper classes.

Tested with Python 2.7, Python 3.3 and PyPy on Linux and Windows.

Covered functions:
```
clBuildProgram
clCreateBuffer
clCreateCommandQueue
clCreateContext
clCreateKernel
clCreateKernel
clCreateProgramWithSource
clEnqueueMapBuffer
clEnqueueNDRangeKernel
clEnqueueReadBuffer
clEnqueueUnmapMemObject
clEnqueueWriteBuffer
clFinish
clFlush
clGetDeviceIDs
clGetDeviceInfo
clGetEventProfilingInfo
clGetPlatformIDs
clGetPlatformInfo
clGetProgramBuildInfo
clReleaseCommandQueue
clReleaseContext
clReleaseEvent
clReleaseKernel
clReleaseKernel
clReleaseMemObject
clReleaseProgram
clSetKernelArg
clWaitForEvents
```

To install the module run:
```bash
python setup.py install
Expand All @@ -29,7 +62,8 @@ for PyPy:
PYTHONPATH=src pypy tests/test_api.py
```

Currently, PyPy numpy support may be incomplete, so tests which use numpy arrays may fail.
Currently, PyPy numpy support may be incomplete,
so tests which use numpy arrays may fail.

Example usage:

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
setup(
name="opencl4py",
description="OpenCL cffi bindings and helper classes",
version="1.0.1",
version="1.0.2",
license="Simplified BSD",
author="Samsung Electronics Co.,Ltd.",
author_email="a.kazantsev@samsung.com",
Expand Down
7 changes: 6 additions & 1 deletion src/opencl4py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
CL_DEVICE_TYPE_ACCELERATOR,
CL_DEVICE_TYPE_CUSTOM,
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
CL_QUEUE_PROFILING_ENABLE,
CL_MAP_READ,
CL_MAP_WRITE,
CL_MAP_WRITE_INVALIDATE_REGION,
Expand All @@ -57,7 +58,11 @@
CL_MEM_READ_ONLY,
CL_MEM_USE_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_COPY_HOST_PTR)
CL_MEM_COPY_HOST_PTR,
CL_PROFILING_COMMAND_QUEUED,
CL_PROFILING_COMMAND_SUBMIT,
CL_PROFILING_COMMAND_START,
CL_PROFILING_COMMAND_END)


def realign_array(a, align, np):
Expand Down
12 changes: 12 additions & 0 deletions src/opencl4py/_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
CL_DEVICE_GLOBAL_MEM_SIZE = 0x101F
CL_DEVICE_MEM_BASE_ADDR_ALIGN = 0x1019
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = 1
CL_QUEUE_PROFILING_ENABLE = 2
CL_PROGRAM_BUILD_LOG = 0x1183
CL_MAP_READ = 1
CL_MAP_WRITE = 2
Expand All @@ -65,6 +66,10 @@
CL_MEM_USE_HOST_PTR = 8
CL_MEM_ALLOC_HOST_PTR = 16
CL_MEM_COPY_HOST_PTR = 32
CL_PROFILING_COMMAND_QUEUED = 0x1280
CL_PROFILING_COMMAND_SUBMIT = 0x1281
CL_PROFILING_COMMAND_START = 0x1282
CL_PROFILING_COMMAND_END = 0x1283


# Create parser
Expand Down Expand Up @@ -97,6 +102,7 @@ def initialize():
typedef uint64_t cl_mem_flags;
typedef uint32_t cl_bool;
typedef uint64_t cl_map_flags;
typedef uint32_t cl_profiling_info;
typedef void* cl_platform_id;
typedef void* cl_device_id;
Expand Down Expand Up @@ -235,6 +241,12 @@ def initialize():
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event);
cl_int clGetEventProfilingInfo(cl_event event,
cl_profiling_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
"""

# Parse
Expand Down
49 changes: 49 additions & 0 deletions src/opencl4py/_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ class Event(CL):
Attributes:
event_: cffi OpenCL event handle.
profiling_values:
dictionary of profiling values
if get_profiling_info was ever called;
keys: CL_PROFILING_COMMAND_QUEUED,
CL_PROFILING_COMMAND_SUBMIT,
CL_PROFILING_COMMAND_START,
CL_PROFILING_COMMAND_END;
values: the current device time counter in seconds (float),
or 0 if there was an error, in such case, corresponding
profile_errors will be set with the error code.
profiling_errors: dictionary of profiling errors
if get_profiling_info was ever called.
"""
def __init__(self, event_):
super(Event, self).__init__()
Expand All @@ -108,6 +120,43 @@ def wait(self):
"""
Event.wait_multi((self,), self.lib_)

def get_profiling_info(self, raise_exception=True):
"""Get profiling info of the event.
Queue should be created with CL_QUEUE_PROFILING_ENABLE flag,
and event should be in complete state (wait completed).
Parameters:
raise_exception: raise exception on error or not,
self.profiling_values, self.profiling_errors
will be available anyway.
Returns:
tuple of (profiling_values, profiling_errors).
"""
vle = cl.ffi.new("cl_ulong[]", 1)
sz = cl.ffi.sizeof(vle)
vles = {}
errs = {}
for name in (cl.CL_PROFILING_COMMAND_QUEUED,
cl.CL_PROFILING_COMMAND_SUBMIT,
cl.CL_PROFILING_COMMAND_START,
cl.CL_PROFILING_COMMAND_END):
vle[0] = 0
n = self.lib_.clGetEventProfilingInfo(
self.event_, name, sz, vle, cl.NULL)
vles[name] = 1.0e-9 * vle[0] if not n else 0.0
errs[name] = n
self.profiling_values = vles
self.profiling_errors = errs
if raise_exception:
for err in errs.values():
if not err:
continue
raise CLRuntimeError("clGetEventProfilingInfo() failed with "
"error %d" % (err), err)
return (vles, errs)

def release(self):
if self.event_ is not None:
self.lib_.clReleaseEvent(self.event_)
Expand Down
42 changes: 42 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,48 @@ def test_api_nonumpy(self):
del _b
del _a

def test_event_profiling(self):
import numpy
# Create platform, context, program, kernel and queue
platforms = cl.Platforms()
ctx = platforms.create_some_context()
prg = ctx.create_program(self.src_test)
krn = prg.get_kernel("test")
queue = ctx.create_queue(ctx.devices[0], cl.CL_QUEUE_PROFILING_ENABLE)

# Create arrays with some values for testing
a = numpy.arange(100000, dtype=numpy.float32)
b = numpy.cos(a)
a = numpy.sin(a)
c = numpy.array([1.2345], dtype=numpy.float32)

# Create buffers
a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_COPY_HOST_PTR,
a)
b_ = ctx.create_buffer(cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR,
b)

# Set kernel arguments
krn.set_arg(0, a_)
krn.set_arg(1, b_)
krn.set_arg(2, c[0:1])

# Execute kernel
ev = queue.execute_kernel(krn, [a.size], None)
ev.wait()

try:
vles, errs = ev.get_profiling_info()
self.assertEqual(vles, ev.profiling_values)
self.assertEqual(errs, ev.profiling_errors)
except cl.CLRuntimeError:
pass
for name, vle in ev.profiling_values.items():
err = ev.profiling_errors[name]
self.assertTrue((vle and not err) or (not vle and err))
self.assertEqual(type(vle), float)
self.assertEqual(type(err), int)


if __name__ == "__main__":
unittest.main()

0 comments on commit b9a1b6a

Please sign in to comment.