Skip to content

Commit

Permalink
Merge pull request #1032 from IntelPython/feature/support_sycl_queue_…
Browse files Browse the repository at this point in the history
…in_array_ctors

Feature/support sycl queue in array constructor functions
  • Loading branch information
Diptorup Deb authored May 31, 2023
2 parents c359fa4 + 50dcaec commit f54cdc1
Show file tree
Hide file tree
Showing 20 changed files with 1,637 additions and 529 deletions.
67 changes: 38 additions & 29 deletions numba_dpex/core/runtime/_dpexrt_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,15 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
bool dest_is_float,
bool value_is_float,
int64_t value,
const char *device);
const DPCTLSyclQueueRef qref);
static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj,
void *data,
npy_intp nitems,
npy_intp itemsize,
DPCTLSyclQueueRef qref);
static NRT_MemInfo *DPEXRT_MemInfo_alloc(npy_intp size,
size_t usm_type,
const DPCTLSyclQueueRef qref);
static void usmndarray_meminfo_dtor(void *ptr, size_t size, void *info);
static PyObject *box_from_arystruct_parent(arystruct_t *arystruct,
int ndim,
Expand Down Expand Up @@ -477,17 +480,23 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj,
* @param size The size of memory (data) owned by the NRT_MemInfo
* object.
* @param usm_type The usm type of the memory.
* @param device The device on which the memory was allocated.
* @param qref The sycl queue on which the memory was allocated. Note
* that the ownership of the qref object is passed to
* the NRT_MemInfo. As such, it is the caller's
* responsibility to ensure the qref is nt owned by any
* other object and is not deallocated. For such cases,
* the caller should copy the DpctlSyclQueueRef and
* pass a copy of the original qref.
* @return {return} A new NRT_MemInfo object, NULL if no NRT_MemInfo
* object could be created.
*/
static NRT_MemInfo *
DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
static NRT_MemInfo *DPEXRT_MemInfo_alloc(npy_intp size,
size_t usm_type,
const DPCTLSyclQueueRef qref)
{
NRT_MemInfo *mi = NULL;
NRT_ExternalAllocator *ext_alloca = NULL;
MemInfoDtorInfo *midtor_info = NULL;
DPCTLSyclQueueRef qref = NULL;

DPEXRT_DEBUG(drt_debug_print(
"DPEXRT-DEBUG: Inside DPEXRT_MemInfo_alloc %s, line %d\n", __FILE__,
Expand All @@ -499,15 +508,6 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
goto error;
}

if (!(qref = (DPCTLSyclQueueRef)DPEXRTQueue_CreateFromFilterString(device)))
{
DPEXRT_DEBUG(
drt_debug_print("DPEXRT-ERROR: Could not create a sycl::queue from "
"filter string: %s at %s %d.\n",
device, __FILE__, __LINE__));
goto error;
}

// Allocate a new NRT_ExternalAllocator
if (!(ext_alloca = NRT_ExternalAllocator_new_for_usm(qref, usm_type)))
goto error;
Expand All @@ -520,15 +520,22 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
mi->dtor_info = midtor_info;
mi->data = ext_alloca->malloc(size, qref);

DPEXRT_DEBUG(
DPCTLSyclDeviceRef device_ref; device_ref = DPCTLQueue_GetDevice(qref);
drt_debug_print(
"DPEXRT-DEBUG: DPEXRT_MemInfo_alloc, device info in %s at %d:\n%s",
__FILE__, __LINE__, DPCTLDeviceMgr_GetDeviceInfoStr(device_ref));
DPCTLDevice_Delete(device_ref););

if (mi->data == NULL)
goto error;

mi->size = size;
mi->external_allocator = ext_alloca;
DPEXRT_DEBUG(drt_debug_print(
"DPEXRT-DEBUG: DPEXRT_MemInfo_alloc mi=%p "
"external_allocator=%p for usm_type %zu on device %s, %s at %d\n",
mi, ext_alloca, usm_type, device, __FILE__, __LINE__));
"external_allocator=%p for usm_type=%zu on queue=%p, %s at %d\n",
mi, ext_alloca, usm_type, DPCTLQueue_Hash(qref), __FILE__, __LINE__));

return mi;

Expand All @@ -551,7 +558,7 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
* @param dest_is_float True if the destination array's dtype is float.
* @param value_is_float True if the value to be filled is float.
* @param value The value to be used to fill an array.
* @param device The device on which the memory was allocated.
* @param qref The queue on which the memory was allocated.
* @return NRT_MemInfo* A new NRT_MemInfo object, NULL if no NRT_MemInfo
* object could be created.
*/
Expand All @@ -560,9 +567,8 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
bool dest_is_float,
bool value_is_float,
int64_t value,
const char *device)
const DPCTLSyclQueueRef qref)
{
DPCTLSyclQueueRef qref = NULL;
DPCTLSyclEventRef eref = NULL;
size_t count = 0, size = 0, exp = 0;

Expand Down Expand Up @@ -603,9 +609,6 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
goto error;
}

if (!(qref = (DPCTLSyclQueueRef)DPEXRTQueue_CreateFromFilterString(device)))
goto error;

switch (exp) {
case 3:
{
Expand All @@ -621,7 +624,7 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
}
else if (!dest_is_float && value_is_float) {
double *p = (double *)&value;
bc.i64_ = *p;
bc.i64_ = (int64_t)*p;
}
else {
bc.i64_ = value;
Expand All @@ -635,7 +638,7 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
{
if (dest_is_float && value_is_float) {
double *p = (double *)(&value);
bc.f_ = *p;
bc.f_ = (float)*p;
}
else if (dest_is_float && !value_is_float) {
// To stop warning: dereferencing type-punned pointer
Expand All @@ -645,7 +648,7 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
}
else if (!dest_is_float && value_is_float) {
double *p = (double *)&value;
bc.i32_ = *p;
bc.i32_ = (int32_t)*p;
}
else {
bc.i32_ = (int32_t)value;
Expand All @@ -662,7 +665,7 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,

if (value_is_float) {
double *p = (double *)&value;
bc.i16_ = *p;
bc.i16_ = (int16_t)*p;
}
else {
bc.i16_ = (int16_t)value;
Expand All @@ -679,7 +682,7 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,

if (value_is_float) {
double *p = (double *)&value;
bc.i8_ = *p;
bc.i8_ = (int8_t)*p;
}
else {
bc.i8_ = (int8_t)value;
Expand All @@ -694,8 +697,6 @@ static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
}

DPCTLEvent_Wait(eref);

DPCTLQueue_Delete(qref);
DPCTLEvent_Delete(eref);

return mi;
Expand Down Expand Up @@ -1198,6 +1199,14 @@ static int DPEXRT_sycl_queue_from_python(PyObject *obj,
goto error;
}

DPEXRT_DEBUG(DPCTLSyclDeviceRef device_ref;
device_ref = DPCTLQueue_GetDevice(queue_ref);
drt_debug_print("DPEXRT-DEBUG: DPEXRT_sycl_queue_from_python, "
"device info in %s at %d:\n%s",
__FILE__, __LINE__,
DPCTLDeviceMgr_GetDeviceInfoStr(device_ref));
DPCTLDevice_Delete(device_ref););

queue_struct->parent = obj;
queue_struct->queue_ref = queue_ref;

Expand Down
105 changes: 76 additions & 29 deletions numba_dpex/core/runtime/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,20 @@ def _check_null_result(func):
@functools.wraps(func)
def wrap(self, builder, *args, **kwargs):
memptr = func(self, builder, *args, **kwargs)
msg = "USM allocation failed. Check the usm_type and filter "
"string values."
msg = "USM allocation failed. Check the usm_type and queue."
cgutils.guard_memory_error(self._context, builder, memptr, msg=msg)
return memptr

return wrap

@_check_null_result
def meminfo_alloc(self, builder, size, usm_type, device):
def meminfo_alloc(self, builder, size, usm_type, queue_ref):
"""
Wrapper to call :func:`~context.DpexRTContext.meminfo_alloc_unchecked`
with null checking of the returned value.
"""
return self.meminfo_alloc_unchecked(builder, size, usm_type, device)

return self.meminfo_alloc_unchecked(builder, size, usm_type, queue_ref)

@_check_null_result
def meminfo_fill(
Expand All @@ -44,7 +44,7 @@ def meminfo_fill(
dest_is_float,
value_is_float,
value,
device,
queue_ref,
):
"""
Wrapper to call :func:`~context.DpexRTContext.meminfo_fill_unchecked`
Expand All @@ -57,28 +57,34 @@ def meminfo_fill(
dest_is_float,
value_is_float,
value,
device,
queue_ref,
)

def meminfo_alloc_unchecked(self, builder, size, usm_type, device):
def meminfo_alloc_unchecked(self, builder, size, usm_type, queue_ref):
"""Allocate a new MemInfo with a data payload of `size` bytes.
The result of the call is checked and if it is NULL, i.e. allocation
failed, then a MemoryError is raised. If the allocation succeeded then
a pointer to the MemInfo is returned.
Args:
builder (_type_): LLVM IR builder
size (_type_): LLVM uint64 Value specifying the size in bytes for
the data payload.
usm_type (_type_): An LLVM Constant Value specifying the type of the
usm allocator. The constant value should match the values in
``dpctl's`` ``libsyclinterface::DPCTLSyclUSMType`` enum.
device (_type_): An LLVM ArrayType storing a const string for a
DPC++ filter selector string.
Returns: A pointer to the MemInfo is returned.
builder (`llvmlite.ir.builder.IRBuilder`): LLVM IR builder.
size (`llvmlite.ir.values.Argument`): LLVM uint64 value specifying
the size in bytes for the data payload, i.e. i64 %"arg.allocsize"
usm_type (`llvmlite.ir.values.Argument`): An LLVM Argument object
specifying the type of the usm allocator. The constant value
should match the values in
``dpctl's`` ``libsyclinterface::DPCTLSyclUSMType`` enum,
i.e. i64 %"arg.usm_type".
queue_ref (`llvmlite.ir.values.Argument`): An LLVM argument value storing
the pointer to the address of the queue object, the object can be
`dpctl.SyclQueue()`, i.e. i8* %"arg.queue".
Returns:
ret (`llvmlite.ir.instructions.CallInstr`): A pointer to the `MemInfo`
is returned from the `DPEXRT_MemInfo_alloc` C function call.
"""

mod = builder.module
u64 = llvmir.IntType(64)
fnty = llvmir.FunctionType(
Expand All @@ -87,7 +93,7 @@ def meminfo_alloc_unchecked(self, builder, size, usm_type, device):
fn = cgutils.get_or_insert_function(mod, fnty, "DPEXRT_MemInfo_alloc")
fn.return_value.add_attribute("noalias")

ret = builder.call(fn, [size, usm_type, device])
ret = builder.call(fn, [size, usm_type, queue_ref])

return ret

Expand All @@ -99,7 +105,7 @@ def meminfo_fill_unchecked(
dest_is_float,
value_is_float,
value,
device,
queue_ref,
):
"""Fills an allocated `MemInfo` with the value specified.
Expand All @@ -108,17 +114,29 @@ def meminfo_fill_unchecked(
is succeeded then a pointer to the `MemInfo` is returned.
Args:
builder (llvmlite.ir.builder.IRBuilder): LLVM IR builder
meminfo (llvmlite.ir.instructions.LoadInstr): LLVM uint64 value
builder (`llvmlite.ir.builder.IRBuilder`): LLVM IR builder.
meminfo (`llvmlite.ir.instructions.LoadInstr`): LLVM uint64 value
specifying the size in bytes for the data payload.
itemsize (llvmlite.ir.values.Constant): An LLVM Constant value
itemsize (`llvmlite.ir.values.Constant`): An LLVM Constant value
specifying the size of the each data item allocated by the
usm allocator.
device (llvmlite.ir.values.FormattedConstant): An LLVM ArrayType
storing a const string for a DPC++ filter selector string.
dest_is_float (`llvmlite.ir.values.Constant`): An LLVM Constant
value specifying if the destination array type is floating
point.
value_is_float (`llvmlite.ir.values.Constant`): An LLVM Constant
value specifying if the input value is a floating point.
value (`llvmlite.ir.values.Constant`): An LLVM Constant value
specifying if the input value that will be used to fill
the array.
queue_ref (`llvmlite.ir.instructions.ExtractValue`): An LLVM ExtractValue
instruction object to extract the pointer to the queue from the
DpctlSyclQueue type, i.e. %".74" = extractvalue {i8*, i8*} %".73", 1.
Returns: A pointer to the `MemInfo` is returned.
Returns:
ret (`llvmlite.ir.instructions.CallInstr`): A pointer to the `MemInfo`
is returned from the `DPEXRT_MemInfo_fill` C function call.
"""

mod = builder.module
u64 = llvmir.IntType(64)
b = llvmir.IntType(1)
Expand All @@ -131,7 +149,14 @@ def meminfo_fill_unchecked(

ret = builder.call(
fn,
[meminfo, itemsize, dest_is_float, value_is_float, value, device],
[
meminfo,
itemsize,
dest_is_float,
value_is_float,
value,
queue_ref,
],
)

return ret
Expand All @@ -154,7 +179,6 @@ def arraystruct_from_python(self, pyapi, obj, ptr):

def queuestruct_from_python(self, pyapi, obj, ptr):
"""Calls the c function DPEXRT_sycl_queue_from_python"""

fnty = llvmir.FunctionType(
llvmir.IntType(32), [pyapi.pyobj, pyapi.voidptr]
)
Expand All @@ -164,7 +188,6 @@ def queuestruct_from_python(self, pyapi, obj, ptr):
fn.args[1].add_attribute("nocapture")

self.error = pyapi.builder.call(fn, (obj, ptr))

return self.error

def queuestruct_to_python(self, pyapi, val):
Expand Down Expand Up @@ -258,7 +281,7 @@ def submit_range(
"""Calls DPEXRTQueue_CreateFromFilterString to create a new sycl::queue
from a given filter string.
Returns: A LLVM IR call inst.
Returns: A DPCTLSyclQueueRef pointer.
"""
mod = builder.module
fnty = llvmir.FunctionType(
Expand Down Expand Up @@ -353,3 +376,27 @@ def submit_ndrange(
)

return ret

def copy_queue(self, builder, queue_ref):
"""Calls DPCTLQueue_Copy to create a copy of the DpctlSyclQueueRef
pointer passed in to the function.
Args:
builder: The llvmlite.IRBuilder used to generate the LLVM IR for the
call.
queue_ref: An LLVM value for a DpctlSyclQueueRef pointer that will
be passed to the DPCTLQueue_Copy function.
Returns: A DPCTLSyclQueueRef pointer.
"""
mod = builder.module
fnty = llvmir.FunctionType(
cgutils.voidptr_t,
[cgutils.voidptr_t],
)
fn = cgutils.get_or_insert_function(mod, fnty, "DPCTLQueue_Copy")
fn.return_value.add_attribute("noalias")

ret = builder.call(fn, [queue_ref])

return ret
Loading

0 comments on commit f54cdc1

Please sign in to comment.