Skip to content

Commit

Permalink
Merge pull request #1424 from IntelPython/feature/migrate_parfor
Browse files Browse the repository at this point in the history
Feature/migrate parfor
  • Loading branch information
ZzEeKkAa authored Apr 3, 2024
2 parents 5fb6093 + 40c465b commit e52b423
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 42 deletions.
4 changes: 4 additions & 0 deletions numba_dpex/core/parfors/kernel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,17 @@ def __init__(
kernel_args,
kernel_arg_types,
queue: dpctl.SyclQueue,
local_accessors=None,
work_group_size=None,
):
self.name = name
self.kernel = kernel
self.signature = signature
self.kernel_args = kernel_args
self.kernel_arg_types = kernel_arg_types
self.queue = queue
self.local_accessors = local_accessors
self.work_group_size = work_group_size


def _print_block(block):
Expand Down
48 changes: 16 additions & 32 deletions numba_dpex/core/parfors/kernel_templates/reduction_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def __init__(
parfor_args,
parfor_reddict,
redvars_dict,
local_accessors_dict,
typemap,
work_group_size,
) -> None:
self._kernel_name = kernel_name
self._kernel_params = kernel_params
Expand All @@ -44,8 +44,8 @@ def __init__(
self._parfor_args = parfor_args
self._parfor_reddict = parfor_reddict
self._redvars_dict = redvars_dict
self._local_accessors_dict = local_accessors_dict
self._typemap = typemap
self._work_group_size = work_group_size

self._kernel_txt = self._generate_kernel_stub_as_string()
self._kernel_ir = self._generate_kernel_ir()
Expand All @@ -55,7 +55,7 @@ def _generate_kernel_stub_as_string(self):

gufunc_txt = ""
gufunc_txt += "def " + self._kernel_name
gufunc_txt += "(" + (", ".join(self._kernel_params)) + "):\n"
gufunc_txt += "(nd_item, " + (", ".join(self._kernel_params)) + "):\n"
global_id_dim = 0
for_loop_dim = self._parfor_dim

Expand All @@ -64,21 +64,17 @@ def _generate_kernel_stub_as_string(self):
else:
global_id_dim = self._parfor_dim

gufunc_txt += " group = nd_item.get_group()\n"
for dim in range(global_id_dim):
dstr = str(dim)
gufunc_txt += (
f" {self._ivar_names[dim]} = dpex.get_global_id({dstr})\n"
f" {self._ivar_names[dim]} = nd_item.get_global_id({dstr})\n"
)
gufunc_txt += f" local_id{dim} = dpex.get_local_id({dstr})\n"
gufunc_txt += f" local_size{dim} = dpex.get_local_size({dstr})\n"
gufunc_txt += f" group_id{dim} = dpex.get_group_id({dstr})\n"

# Allocate local_sums arrays for each reduction variable.
for redvar in self._redvars:
rtyp = str(self._typemap[redvar])
redvar = self._redvars_dict[redvar]
gufunc_txt += f" local_sums_{redvar} = \
dpex.local.array({self._work_group_size}, dpnp.{rtyp})\n"
gufunc_txt += f" local_id{dim} = nd_item.get_local_id({dstr})\n"
gufunc_txt += (
f" local_size{dim} = group.get_local_range({dstr})\n"
)
gufunc_txt += f" group_id{dim} = group.get_group_id({dstr})\n"

for dim in range(global_id_dim, for_loop_dim):
for indent in range(1 + (dim - global_id_dim)):
Expand Down Expand Up @@ -282,10 +278,13 @@ def _generate_kernel_stub_as_string(self):
)

for redvar in self._redvars:
rtyp = str(self._typemap[redvar])
legal_redvar = self._redvars_dict[redvar]
gufunc_txt += " "
gufunc_txt += legal_redvar + " = "
gufunc_txt += f"{self._parfor_reddict[redvar].init_val}\n"
gufunc_txt += (
f"dpnp.{rtyp}({self._parfor_reddict[redvar].init_val})\n"
)

gufunc_txt += (
" "
Expand All @@ -294,32 +293,17 @@ def _generate_kernel_stub_as_string(self):
+ f"{self._global_size_var_name[0]} + j\n"
)

for redvar in self._redvars:
rtyp = str(self._typemap[redvar])
redvar = self._redvars_dict[redvar]
gufunc_txt += (
" "
+ f"local_sums_{redvar} = "
+ f"dpex.local.array(1, dpnp.{rtyp})\n"
)

gufunc_txt += " " + self._sentinel_name + " = 0\n"

for i, redvar in enumerate(self._redvars):
legal_redvar = self._redvars_dict[redvar]
gufunc_txt += (
" " + f"local_sums_{legal_redvar}[0] = {legal_redvar}\n"
)

for i, redvar in enumerate(self._redvars):
legal_redvar = self._redvars_dict[redvar]
redop = self._parfor_reddict[redvar].redop
if redop == operator.iadd:
gufunc_txt += f" {self._final_sum_var_name[i]}[0] += \
local_sums_{legal_redvar}[0]\n"
{legal_redvar}\n"
elif redop == operator.imul:
gufunc_txt += f" {self._final_sum_var_name[i]}[0] *= \
local_sums_{legal_redvar}[0]\n"
{legal_redvar}\n"
else:
raise NotImplementedError

Expand Down
20 changes: 18 additions & 2 deletions numba_dpex/core/parfors/parfor_lowerer.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,24 @@ def _submit_parfor_kernel(
queue_ref = kl_builder.get_queue(exec_queue=kernel_fn.queue)

kernel_args = []
for arg in kernel_fn.kernel_args:
kernel_args.append(_getvar(lowerer, arg))
for i, arg in enumerate(kernel_fn.kernel_args):
if (
kernel_fn.local_accessors is not None
and arg in kernel_fn.local_accessors
):
wg_size = lowerer.context.get_constant(
types.intp, kernel_fn.work_group_size
)
la_shape = cgutils.pack_array(lowerer.builder, [wg_size])
arg_ty = kernel_fn.kernel_arg_types[i]
la = cgutils.create_struct_proxy(arg_ty)(
lowerer.context,
lowerer.builder,
)
la.shape = la_shape
kernel_args.append(la._getvalue())
else:
kernel_args.append(_getvar(lowerer, arg))

kernel_ref_addr = kernel_fn.kernel.addressof_ref()
kernel_ref = lowerer.builder.inttoptr(
Expand Down
49 changes: 41 additions & 8 deletions numba_dpex/core/parfors/reduction_kernel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
)
from numba.core.typing import signature

from numba_dpex.core.parfors.reduction_helper import ReductionKernelVariables
from numba_dpex.core.types import DpctlSyclQueue
from numba_dpex.core.types.kernel_api.index_space_ids import NdItemType
from numba_dpex.core.types.kernel_api.local_accessor import LocalAccessorType

from .kernel_builder import _print_body # saved for debug
from .kernel_builder import (
Expand All @@ -40,14 +43,15 @@ def create_reduction_main_kernel_for_parfor(
typemap,
flags,
has_aliases,
reductionKernelVar,
reductionKernelVar: ReductionKernelVariables,
parfor_reddict=None,
):
"""
Creates a numba_dpex.kernel function for reduction main kernel.
"""

loc = parfor_node.init_block.loc
parfor_dim = len(parfor_node.loop_nests)

for race in parfor_node.races:
msg = (
Expand Down Expand Up @@ -77,20 +81,35 @@ def create_reduction_main_kernel_for_parfor(
except KeyError:
pass

parfor_params = reductionKernelVar.parfor_params.copy()
parfor_legalized_params = reductionKernelVar.parfor_legalized_params.copy()
parfor_param_types = reductionKernelVar.param_types.copy()
local_accessors_dict = {}
for k, v in reductionKernelVar.redvars_legal_dict.items():
la_var = "local_sums_" + v
local_accessors_dict[k] = la_var
idx = reductionKernelVar.parfor_params.index(k)
arr_ty = reductionKernelVar.param_types[idx]
la_ty = LocalAccessorType(parfor_dim, arr_ty.dtype)

parfor_params.append(la_var)
parfor_legalized_params.append(la_var)
parfor_param_types.append(la_ty)

kernel_template = TreeReduceIntermediateKernelTemplate(
kernel_name=kernel_name,
kernel_params=reductionKernelVar.parfor_legalized_params,
kernel_params=parfor_legalized_params,
ivar_names=reductionKernelVar.legal_loop_indices,
sentinel_name=sentinel_name,
loop_ranges=loop_ranges,
param_dict=reductionKernelVar.param_dict,
parfor_dim=len(parfor_node.loop_nests),
parfor_dim=parfor_dim,
redvars=reductionKernelVar.parfor_redvars,
parfor_args=reductionKernelVar.parfor_params,
parfor_args=parfor_params,
parfor_reddict=parfor_reddict,
redvars_dict=reductionKernelVar.redvars_legal_dict,
local_accessors_dict=local_accessors_dict,
typemap=typemap,
work_group_size=reductionKernelVar.work_group_size,
)
kernel_ir = kernel_template.kernel_ir

Expand All @@ -114,7 +133,7 @@ def create_reduction_main_kernel_for_parfor(
new_var_dict[name] = mk_unique_var(name)

replace_var_names(kernel_ir.blocks, new_var_dict)
kernel_param_types = reductionKernelVar.param_types
kernel_param_types = parfor_param_types
kernel_stub_last_label = max(kernel_ir.blocks.keys()) + 1
# Add kernel stub last label to each parfor.loop_body label to prevent
# label conflicts.
Expand All @@ -136,6 +155,13 @@ def create_reduction_main_kernel_for_parfor(
if not has_aliases:
flags.noalias = True

# The first argument to a range kernel is a kernel_api.NdItem object. The
# ``NdItem`` object is used by the kernel_api.spirv backend to generate the
# correct SPIR-V indexing instructions. Since, the argument is not something
# available originally in the kernel_param_types, we add it at this point to
# make sure the kernel signature matches the actual generated code.
ty_item = NdItemType(parfor_dim)
kernel_param_types = (ty_item, *kernel_param_types)
kernel_sig = signature(types.none, *kernel_param_types)

# FIXME: A better design is required so that we do not have to create a
Expand All @@ -155,13 +181,20 @@ def create_reduction_main_kernel_for_parfor(

flags.noalias = old_alias

parfor_params = (
reductionKernelVar.parfor_params.copy()
+ parfor_params[len(reductionKernelVar.parfor_params) :] # noqa: $203
)

return ParforKernel(
name=kernel_name,
kernel=sycl_kernel,
signature=kernel_sig,
kernel_args=reductionKernelVar.parfor_params,
kernel_arg_types=reductionKernelVar.func_arg_types,
kernel_args=parfor_params,
kernel_arg_types=parfor_param_types,
queue=exec_queue,
local_accessors=set(local_accessors_dict.values()),
work_group_size=reductionKernelVar.work_group_size,
)


Expand Down

0 comments on commit e52b423

Please sign in to comment.