Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MLIR][OpenMP] Make omp.taskloop into a loop wrapper #87253

Merged
merged 11 commits into from
Apr 16, 2024
Merged
9 changes: 4 additions & 5 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,10 @@ using TaskgroupClauseOps =
detail::Clauses<AllocateClauseOps, TaskReductionClauseOps>;

using TaskloopClauseOps =
detail::Clauses<AllocateClauseOps, CollapseClauseOps, FinalClauseOps,
GrainsizeClauseOps, IfClauseOps, InReductionClauseOps,
LoopRelatedOps, MergeableClauseOps, NogroupClauseOps,
NumTasksClauseOps, PriorityClauseOps, PrivateClauseOps,
ReductionClauseOps, UntiedClauseOps>;
detail::Clauses<AllocateClauseOps, FinalClauseOps, GrainsizeClauseOps,
IfClauseOps, InReductionClauseOps, MergeableClauseOps,
NogroupClauseOps, NumTasksClauseOps, PriorityClauseOps,
PrivateClauseOps, ReductionClauseOps, UntiedClauseOps>;

using TaskwaitClauseOps = detail::Clauses<DependClauseOps, NowaitClauseOps>;

Expand Down
39 changes: 16 additions & 23 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1030,32 +1030,30 @@ def TaskOp : OpenMP_Op<"task", [AttrSizedOperandSegments,
}

def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
AutomaticAllocationScope, RecursiveMemoryEffects,
AllTypesMatch<["lowerBound", "upperBound", "step"]>,
AutomaticAllocationScope,
DeclareOpInterfaceMethods<LoopWrapperInterface>,
ReductionClauseInterface]> {
RecursiveMemoryEffects, ReductionClauseInterface,
SingleBlockImplicitTerminator<"TerminatorOp">]> {
let summary = "taskloop construct";
let description = [{
The taskloop construct specifies that the iterations of one or more
associated loops will be executed in parallel using explicit tasks. The
iterations are distributed across tasks generated by the construct and
scheduled to be executed.

The `lowerBound` and `upperBound` specify a half-open range: the range
includes the lower bound but does not include the upper bound. If the
`inclusive` attribute is specified then the upper bound is also included.
The `step` specifies the loop step.

The body region can contain any number of blocks.
The body region can contain a single block which must contain a single
operation and a terminator. The operation must be another compatible loop
wrapper or an `omp.loop_nest`.

```
omp.taskloop <clauses>
for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
%sum = arith.addf %a, %b : f32
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.terminator
omp.taskloop <clauses> {
omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
%sum = arith.addf %a, %b : f32
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
}
}
```

Expand Down Expand Up @@ -1132,11 +1130,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
created.
}];

let arguments = (ins Variadic<IntLikeType>:$lowerBound,
Variadic<IntLikeType>:$upperBound,
Variadic<IntLikeType>:$step,
UnitAttr:$inclusive,
Optional<I1>:$if_expr,
let arguments = (ins Optional<I1>:$if_expr,
Optional<I1>:$final_expr,
UnitAttr:$untied,
UnitAttr:$mergeable,
Expand Down Expand Up @@ -1179,8 +1173,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
|`grain_size` `(` $grain_size `:` type($grain_size) `)`
|`num_tasks` `(` $num_tasks `:` type($num_tasks) `)`
|`nogroup` $nogroup
) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
type($step), $inclusive) attr-dict
) $region attr-dict
}];

let extraClassDeclaration = [{
Expand Down
15 changes: 12 additions & 3 deletions mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1829,9 +1829,8 @@ void TaskloopOp::build(OpBuilder &builder, OperationState &state,
MLIRContext *ctx = builder.getContext();
// TODO Store clauses in op: reductionByRefAttr, privateVars, privatizers.
TaskloopOp::build(
builder, state, clauses.loopLBVar, clauses.loopUBVar, clauses.loopStepVar,
clauses.loopInclusiveAttr, clauses.ifVar, clauses.finalVar,
clauses.untiedAttr, clauses.mergeableAttr, clauses.inReductionVars,
builder, state, clauses.ifVar, clauses.finalVar, clauses.untiedAttr,
clauses.mergeableAttr, clauses.inReductionVars,
makeArrayAttr(ctx, clauses.inReductionDeclSymbols), clauses.reductionVars,
makeArrayAttr(ctx, clauses.reductionDeclSymbols), clauses.priorityVar,
clauses.allocateVars, clauses.allocatorVars, clauses.grainsizeVar,
Expand Down Expand Up @@ -1870,6 +1869,16 @@ LogicalResult TaskloopOp::verify() {
"the grainsize clause and num_tasks clause are mutually exclusive and "
"may not appear on the same taskloop directive");
}

if (!isWrapper())
return emitOpError() << "must be a loop wrapper";

if (LoopWrapperInterface nested = getNestedWrapper()) {
// Check for the allowed leaf constructs that may appear in a composite
// construct directly after TASKLOOP.
if (!isa<SimdLoopOp>(nested))
return emitError() << "only supported nested wrapper is 'omp.simdloop'";
}
return success();
}

Expand Down
92 changes: 62 additions & 30 deletions mlir/test/Dialect/OpenMP/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -1580,10 +1580,11 @@ func.func @omp_cancellationpoint2() {
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testmemref = "test.memref"() : () -> (memref<i32>)
// expected-error @below {{expected equal sizes for allocate and allocator variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testmemref) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0>} : (i32, i32, i32, i32, i32, i32, memref<i32>) -> ()
"omp.taskloop"(%testmemref) ({
omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.yield
}
}) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 1, 0, 0, 0>} : (memref<i32>) -> ()
return
}

Expand All @@ -1593,23 +1594,24 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr)
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0>, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> ()
"omp.taskloop"(%testf32, %testf32_2) ({
omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.yield
}
}) {operandSegmentSizes = array<i32: 0, 0, 0, 2, 0, 0, 0, 0, 0>, reductions = [@add_f32]} : (!llvm.ptr, !llvm.ptr) -> ()
return
}

// -----

func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr)
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0>, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> ()
"omp.taskloop"(%testf32) ({
omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.yield
}
}) {operandSegmentSizes = array<i32: 0, 0, 0, 1, 0, 0, 0, 0, 0>, reductions = [@add_f32, @add_f32]} : (!llvm.ptr) -> ()
return
}

Expand All @@ -1619,23 +1621,24 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr)
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {in_reductions = [@add_f32], operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> ()
"omp.taskloop"(%testf32, %testf32_2) ({
omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.yield
}
}) {in_reductions = [@add_f32], operandSegmentSizes = array<i32: 0, 0, 2, 0, 0, 0, 0, 0, 0>} : (!llvm.ptr, !llvm.ptr) -> ()
return
}

// -----

func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr)
// expected-error @below {{expected as many reduction symbol references as reduction variables}}
"omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32_2) ({
^bb0(%arg3: i32, %arg4: i32):
"omp.terminator"() : () -> ()
}) {in_reductions = [@add_f32, @add_f32], operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> ()
"omp.taskloop"(%testf32) ({
omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.yield
}
}) {in_reductions = [@add_f32, @add_f32], operandSegmentSizes = array<i32: 0, 0, 1, 0, 0, 0, 0, 0, 0>} : (!llvm.ptr) -> ()
return
}

Expand All @@ -1657,9 +1660,10 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr)
%testf32_2 = "test.f32"() : () -> (!llvm.ptr)
// expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) nogroup
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.terminator
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) nogroup {
omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.yield
}
}
return
}
Expand All @@ -1681,9 +1685,10 @@ combiner {
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testf32 = "test.f32"() : () -> (!llvm.ptr)
// expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr) in_reduction(@add_f32 -> %testf32 : !llvm.ptr)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.terminator
omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr) in_reduction(@add_f32 -> %testf32 : !llvm.ptr) {
omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.yield
}
}
return
}
Expand All @@ -1693,15 +1698,42 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
%testi64 = "test.i64"() : () -> (i64)
// expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64)
for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64) {
omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
omp.yield
}
}
return
}

// -----

func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
// expected-error @below {{op must be a loop wrapper}}
omp.taskloop {
%0 = arith.constant 0 : i32
omp.terminator
}
return
}

// -----

func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
// expected-error @below {{only supported nested wrapper is 'omp.simdloop'}}
omp.taskloop {
omp.distribute {
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
omp.yield
}
omp.terminator
}
}
return
}

// -----

func.func @omp_threadprivate() {
%1 = llvm.mlir.addressof @_QFsubEx : !llvm.ptr
// expected-error @below {{op failed to verify that all of {sym_addr, tls_addr} have same type}}
Expand Down
Loading
Loading