Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/migrate parfor #1424

Merged
merged 3 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions numba_dpex/core/parfors/kernel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,17 @@ def __init__(
kernel_args,
kernel_arg_types,
queue: dpctl.SyclQueue,
local_accessors=None,
work_group_size=None,
):
self.name = name
self.kernel = kernel
self.signature = signature
self.kernel_args = kernel_args
self.kernel_arg_types = kernel_arg_types
self.queue = queue
self.local_accessors = local_accessors
self.work_group_size = work_group_size


def _print_block(block):
Expand Down
48 changes: 16 additions & 32 deletions numba_dpex/core/parfors/kernel_templates/reduction_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def __init__(
parfor_args,
parfor_reddict,
redvars_dict,
local_accessors_dict,
typemap,
work_group_size,
) -> None:
self._kernel_name = kernel_name
self._kernel_params = kernel_params
Expand All @@ -44,8 +44,8 @@ def __init__(
self._parfor_args = parfor_args
self._parfor_reddict = parfor_reddict
self._redvars_dict = redvars_dict
self._local_accessors_dict = local_accessors_dict
self._typemap = typemap
self._work_group_size = work_group_size

self._kernel_txt = self._generate_kernel_stub_as_string()
self._kernel_ir = self._generate_kernel_ir()
Expand All @@ -55,7 +55,7 @@ def _generate_kernel_stub_as_string(self):

gufunc_txt = ""
gufunc_txt += "def " + self._kernel_name
gufunc_txt += "(" + (", ".join(self._kernel_params)) + "):\n"
gufunc_txt += "(nd_item, " + (", ".join(self._kernel_params)) + "):\n"
global_id_dim = 0
for_loop_dim = self._parfor_dim

Expand All @@ -64,21 +64,17 @@ def _generate_kernel_stub_as_string(self):
else:
global_id_dim = self._parfor_dim

gufunc_txt += " group = nd_item.get_group()\n"
for dim in range(global_id_dim):
dstr = str(dim)
gufunc_txt += (
f" {self._ivar_names[dim]} = dpex.get_global_id({dstr})\n"
f" {self._ivar_names[dim]} = nd_item.get_global_id({dstr})\n"
)
gufunc_txt += f" local_id{dim} = dpex.get_local_id({dstr})\n"
gufunc_txt += f" local_size{dim} = dpex.get_local_size({dstr})\n"
gufunc_txt += f" group_id{dim} = dpex.get_group_id({dstr})\n"

# Allocate local_sums arrays for each reduction variable.
for redvar in self._redvars:
rtyp = str(self._typemap[redvar])
redvar = self._redvars_dict[redvar]
gufunc_txt += f" local_sums_{redvar} = \
dpex.local.array({self._work_group_size}, dpnp.{rtyp})\n"
gufunc_txt += f" local_id{dim} = nd_item.get_local_id({dstr})\n"
gufunc_txt += (
f" local_size{dim} = group.get_local_range({dstr})\n"
)
gufunc_txt += f" group_id{dim} = group.get_group_id({dstr})\n"

for dim in range(global_id_dim, for_loop_dim):
for indent in range(1 + (dim - global_id_dim)):
Expand Down Expand Up @@ -282,10 +278,13 @@ def _generate_kernel_stub_as_string(self):
)

for redvar in self._redvars:
rtyp = str(self._typemap[redvar])
legal_redvar = self._redvars_dict[redvar]
gufunc_txt += " "
gufunc_txt += legal_redvar + " = "
gufunc_txt += f"{self._parfor_reddict[redvar].init_val}\n"
gufunc_txt += (
f"dpnp.{rtyp}({self._parfor_reddict[redvar].init_val})\n"
)

gufunc_txt += (
" "
Expand All @@ -294,32 +293,17 @@ def _generate_kernel_stub_as_string(self):
+ f"{self._global_size_var_name[0]} + j\n"
)

for redvar in self._redvars:
rtyp = str(self._typemap[redvar])
redvar = self._redvars_dict[redvar]
gufunc_txt += (
" "
+ f"local_sums_{redvar} = "
+ f"dpex.local.array(1, dpnp.{rtyp})\n"
)

gufunc_txt += " " + self._sentinel_name + " = 0\n"

for i, redvar in enumerate(self._redvars):
legal_redvar = self._redvars_dict[redvar]
gufunc_txt += (
" " + f"local_sums_{legal_redvar}[0] = {legal_redvar}\n"
)

for i, redvar in enumerate(self._redvars):
legal_redvar = self._redvars_dict[redvar]
redop = self._parfor_reddict[redvar].redop
if redop == operator.iadd:
gufunc_txt += f" {self._final_sum_var_name[i]}[0] += \
local_sums_{legal_redvar}[0]\n"
{legal_redvar}\n"
elif redop == operator.imul:
gufunc_txt += f" {self._final_sum_var_name[i]}[0] *= \
local_sums_{legal_redvar}[0]\n"
{legal_redvar}\n"
else:
raise NotImplementedError

Expand Down
20 changes: 18 additions & 2 deletions numba_dpex/core/parfors/parfor_lowerer.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,24 @@ def _submit_parfor_kernel(
queue_ref = kl_builder.get_queue(exec_queue=kernel_fn.queue)

kernel_args = []
for arg in kernel_fn.kernel_args:
kernel_args.append(_getvar(lowerer, arg))
for i, arg in enumerate(kernel_fn.kernel_args):
if (
kernel_fn.local_accessors is not None
and arg in kernel_fn.local_accessors
):
wg_size = lowerer.context.get_constant(
types.intp, kernel_fn.work_group_size
)
la_shape = cgutils.pack_array(lowerer.builder, [wg_size])
arg_ty = kernel_fn.kernel_arg_types[i]
la = cgutils.create_struct_proxy(arg_ty)(
lowerer.context,
lowerer.builder,
)
la.shape = la_shape
kernel_args.append(la._getvalue())
else:
kernel_args.append(_getvar(lowerer, arg))

kernel_ref_addr = kernel_fn.kernel.addressof_ref()
kernel_ref = lowerer.builder.inttoptr(
Expand Down
49 changes: 41 additions & 8 deletions numba_dpex/core/parfors/reduction_kernel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
)
from numba.core.typing import signature

from numba_dpex.core.parfors.reduction_helper import ReductionKernelVariables
from numba_dpex.core.types import DpctlSyclQueue
from numba_dpex.core.types.kernel_api.index_space_ids import NdItemType
from numba_dpex.core.types.kernel_api.local_accessor import LocalAccessorType

from .kernel_builder import _print_body # saved for debug
from .kernel_builder import (
Expand All @@ -40,14 +43,15 @@ def create_reduction_main_kernel_for_parfor(
typemap,
flags,
has_aliases,
reductionKernelVar,
reductionKernelVar: ReductionKernelVariables,
parfor_reddict=None,
):
"""
Creates a numba_dpex.kernel function for reduction main kernel.
"""

loc = parfor_node.init_block.loc
parfor_dim = len(parfor_node.loop_nests)

for race in parfor_node.races:
msg = (
Expand Down Expand Up @@ -77,20 +81,35 @@ def create_reduction_main_kernel_for_parfor(
except KeyError:
pass

parfor_params = reductionKernelVar.parfor_params.copy()
parfor_legalized_params = reductionKernelVar.parfor_legalized_params.copy()
parfor_param_types = reductionKernelVar.param_types.copy()
local_accessors_dict = {}
for k, v in reductionKernelVar.redvars_legal_dict.items():
la_var = "local_sums_" + v
local_accessors_dict[k] = la_var
idx = reductionKernelVar.parfor_params.index(k)
arr_ty = reductionKernelVar.param_types[idx]
la_ty = LocalAccessorType(parfor_dim, arr_ty.dtype)

parfor_params.append(la_var)
parfor_legalized_params.append(la_var)
parfor_param_types.append(la_ty)

kernel_template = TreeReduceIntermediateKernelTemplate(
kernel_name=kernel_name,
kernel_params=reductionKernelVar.parfor_legalized_params,
kernel_params=parfor_legalized_params,
ivar_names=reductionKernelVar.legal_loop_indices,
sentinel_name=sentinel_name,
loop_ranges=loop_ranges,
param_dict=reductionKernelVar.param_dict,
parfor_dim=len(parfor_node.loop_nests),
parfor_dim=parfor_dim,
redvars=reductionKernelVar.parfor_redvars,
parfor_args=reductionKernelVar.parfor_params,
parfor_args=parfor_params,
parfor_reddict=parfor_reddict,
redvars_dict=reductionKernelVar.redvars_legal_dict,
local_accessors_dict=local_accessors_dict,
typemap=typemap,
work_group_size=reductionKernelVar.work_group_size,
)
kernel_ir = kernel_template.kernel_ir

Expand All @@ -114,7 +133,7 @@ def create_reduction_main_kernel_for_parfor(
new_var_dict[name] = mk_unique_var(name)

replace_var_names(kernel_ir.blocks, new_var_dict)
kernel_param_types = reductionKernelVar.param_types
kernel_param_types = parfor_param_types
kernel_stub_last_label = max(kernel_ir.blocks.keys()) + 1
# Add kernel stub last label to each parfor.loop_body label to prevent
# label conflicts.
Expand All @@ -136,6 +155,13 @@ def create_reduction_main_kernel_for_parfor(
if not has_aliases:
flags.noalias = True

# The first argument to a range kernel is a kernel_api.NdItem object. The
# ``NdItem`` object is used by the kernel_api.spirv backend to generate the
# correct SPIR-V indexing instructions. Since, the argument is not something
# available originally in the kernel_param_types, we add it at this point to
# make sure the kernel signature matches the actual generated code.
ty_item = NdItemType(parfor_dim)
kernel_param_types = (ty_item, *kernel_param_types)
kernel_sig = signature(types.none, *kernel_param_types)

# FIXME: A better design is required so that we do not have to create a
Expand All @@ -155,13 +181,20 @@ def create_reduction_main_kernel_for_parfor(

flags.noalias = old_alias

parfor_params = (
reductionKernelVar.parfor_params.copy()
+ parfor_params[len(reductionKernelVar.parfor_params) :] # noqa: $203
)

return ParforKernel(
name=kernel_name,
kernel=sycl_kernel,
signature=kernel_sig,
kernel_args=reductionKernelVar.parfor_params,
kernel_arg_types=reductionKernelVar.func_arg_types,
kernel_args=parfor_params,
kernel_arg_types=parfor_param_types,
queue=exec_queue,
local_accessors=set(local_accessors_dict.values()),
work_group_size=reductionKernelVar.work_group_size,
)


Expand Down
Loading