Skip to content

Commit

Permalink
Fix remaining issues after merge.
Browse files Browse the repository at this point in the history
- Cherry-pick some unit test fixes from PR #44.
- Cherry-pick reduction processing fix from PR llvm#85807.
- Update argument lists of modified functions.
- Rewrite `ReductionProcessor::addReductionSym` based on new clause structures.
- Remove leftover references to `RIManager`.
- Op renames to match new approach.

Co-authored-by: Jan Leyonberg <jan_sjodin@yahoo.com>
  • Loading branch information
skatrak and jsjodin committed Mar 21, 2024
1 parent 21a0a58 commit 25d3695
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 61 deletions.
26 changes: 18 additions & 8 deletions flang/lib/Lower/OpenMP/ClauseProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -907,27 +907,37 @@ bool ClauseProcessor::processMap(
bool ClauseProcessor::processTargetReduction(
llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionSymbols)
const {
return findRepeatableClause<ClauseTy::Reduction>(
[&](const ClauseTy::Reduction *reductionClause,
return findRepeatableClause<omp::clause::Reduction>(
[&](const omp::clause::Reduction &clause,
const Fortran::parser::CharBlock &) {
ReductionProcessor rp;
rp.addReductionSym(reductionClause->v, reductionSymbols);
rp.addReductionSym(clause, reductionSymbols);
});
}

bool ClauseProcessor::processReduction(
mlir::Location currentLocation,
llvm::SmallVectorImpl<mlir::Value> &reductionVars,
llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *reductionSymbols)
const {
llvm::SmallVectorImpl<mlir::Value> &outReductionVars,
llvm::SmallVectorImpl<mlir::Attribute> &outReductionDeclSymbols,
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
*outReductionSymbols) const {
return findRepeatableClause<omp::clause::Reduction>(
[&](const omp::clause::Reduction &clause,
const Fortran::parser::CharBlock &) {
llvm::SmallVector<mlir::Value> reductionVars;
llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
ReductionProcessor rp;
rp.addDeclareReduction(currentLocation, converter, clause,
reductionVars, reductionDeclSymbols,
reductionSymbols);
outReductionSymbols ? &reductionSymbols
: nullptr);
llvm::copy(reductionVars, std::back_inserter(outReductionVars));
llvm::copy(reductionDeclSymbols,
std::back_inserter(outReductionDeclSymbols));
if (outReductionSymbols)
llvm::copy(reductionSymbols,
std::back_inserter(*outReductionSymbols));
});
}

Expand Down
7 changes: 3 additions & 4 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1339,7 +1339,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
Fortran::parser::OmpClause::Defaultmap>(
currentLocation, llvm::omp::Directive::OMPD_target);

DataSharingProcessor localDSP(converter, clauseList, eval);
DataSharingProcessor localDSP(converter, semaCtx, clauseList, eval);
DataSharingProcessor &actualDSP = dsp ? *dsp : localDSP;
actualDSP.processStep1();

Expand Down Expand Up @@ -1458,8 +1458,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
/*teams_thread_limit=*/nullptr, /*num_threads=*/nullptr);

genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes,
mapSymLocs, mapSymbols, currentLocation, clauseList,
actualDSP);
mapSymLocs, mapSymbols, currentLocation, actualDSP);

return targetOp;
}
Expand Down Expand Up @@ -2074,7 +2073,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
}();

bool validDirective = false;
DataSharingProcessor dsp(converter, loopOpClauseList, eval);
DataSharingProcessor dsp(converter, semaCtx, loopOpClauseList, eval);

if (llvm::omp::topTaskloopSet.test(ompDirective)) {
validDirective = true;
Expand Down
14 changes: 4 additions & 10 deletions flang/lib/Lower/OpenMP/ReductionProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,11 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
}

void ReductionProcessor::addReductionSym(
const Fortran::parser::OmpReductionClause &reduction,
const omp::clause::Reduction &reduction,
llvm::SmallVector<const Fortran::semantics::Symbol *> &symbols) {
const auto &objectList{std::get<Fortran::parser::OmpObjectList>(reduction.t)};

for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
if (const auto *name{
Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
if (const Fortran::semantics::Symbol * symbol{name->symbol})
symbols.push_back(symbol);
}
}
const auto &objectList{std::get<omp::ObjectList>(reduction.t)};
llvm::transform(objectList, std::back_inserter(symbols),
[](const Object &object) { return object.id(); });
}

bool ReductionProcessor::supportedIntrinsicProcReduction(
Expand Down
3 changes: 1 addition & 2 deletions flang/lib/Lower/OpenMP/ReductionProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "Clauses.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/symbol.h"
#include "flang/Semantics/type.h"
#include "mlir/IR/Location.h"
Expand Down Expand Up @@ -106,7 +105,7 @@ class ReductionProcessor {
mlir::Value op2);

static void addReductionSym(
const Fortran::parser::OmpReductionClause &reduction,
const omp::clause::Reduction &reduction,
llvm::SmallVector<const Fortran::semantics::Symbol *> &symbols);

/// Creates an OpenMP reduction declaration and inserts it into the provided
Expand Down
54 changes: 22 additions & 32 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1125,22 +1125,19 @@ convertOmpWsloop(
tempTerminator->eraseFromParent();
builder.restoreIP(nextInsertionPoint);

if (!ompBuilder->Config.isGPU())
ompBuilder->RIManager.clear();

return success();
}

static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP =
findAllocaInsertPoint(builder, moduleTranslation);
SmallVector<OwningReductionGen> owningReductionGens;
SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;

return convertOmpWsLoop(opInst, builder, moduleTranslation, redAllocaIP,
return convertOmpWsloop(opInst, builder, moduleTranslation, redAllocaIP,
owningReductionGens, owningAtomicReductionGens,
reductionInfos);
}
Expand Down Expand Up @@ -1414,9 +1411,6 @@ convertOmpParallel(Operation &opInst1, llvm::IRBuilderBase &builder,
ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
ifCond, numThreads, pbKind, isCancellable));

if (!ompBuilder->Config.isGPU())
ompBuilder->RIManager.clear();

return bodyGenStatus;
}

Expand Down Expand Up @@ -3330,10 +3324,9 @@ class ConversionDispatchList {
}
};

static LogicalResult convertOmpDistributeParallelWsLoop(
Operation *op,
omp::DistributeOp distribute, omp::ParallelOp parallel,
omp::WsLoopOp wsloop, llvm::IRBuilderBase &builder,
static LogicalResult convertOmpDistributeParallelWsloop(
Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel,
omp::WsloopOp wsloop, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
ConversionDispatchList &dispatchList);

Expand Down Expand Up @@ -3534,10 +3527,10 @@ convertInternalTargetOp(Operation *op, llvm::IRBuilderBase &builder,

omp::DistributeOp distribute;
omp::ParallelOp parallel;
omp::WsLoopOp wsloop;
omp::WsloopOp wsloop;
// Match composite constructs
if (matchOpNest(op, distribute, parallel, wsloop)) {
return convertOmpDistributeParallelWsLoop(op, distribute, parallel, wsloop,
return convertOmpDistributeParallelWsloop(op, distribute, parallel, wsloop,
builder, moduleTranslation,
dispatchList);
}
Expand Down Expand Up @@ -3586,9 +3579,9 @@ class OpenMPDialectLLVMIRTranslationInterface
// Implementation converting a nest of operations in a single function. This
// just overrides the parallel and wsloop dispatches but does the normal
// lowering for now.
static LogicalResult convertOmpDistributeParallelWsLoop(
static LogicalResult convertOmpDistributeParallelWsloop(
Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel,
omp::WsLoopOp wsloop, llvm::IRBuilderBase &builder,
omp::WsloopOp wsloop, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
ConversionDispatchList &dispatchList) {

Expand All @@ -3599,25 +3592,22 @@ static LogicalResult convertOmpDistributeParallelWsLoop(
llvm::OpenMPIRBuilder::InsertPointTy redAllocaIP;

// Convert wsloop alternative implementation
ConvertFunctionTy convertWsLoop = [&redAllocaIP, &owningReductionGens,
&owningAtomicReductionGens,
&reductionInfos](
Operation *op,
llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation
&moduleTranslation) {
if (!isa<omp::WsLoopOp>(op)) {
return std::make_pair(false, failure());
}
ConvertFunctionTy convertWsloop =
[&redAllocaIP, &owningReductionGens, &owningAtomicReductionGens,
&reductionInfos](Operation *op, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
if (!isa<omp::WsloopOp>(op)) {
return std::make_pair(false, failure());
}

LogicalResult result = convertOmpWsLoop(
*op, builder, moduleTranslation, redAllocaIP, owningReductionGens,
owningAtomicReductionGens, reductionInfos);
return std::make_pair(true, result);
};
LogicalResult result = convertOmpWsloop(
*op, builder, moduleTranslation, redAllocaIP, owningReductionGens,
owningAtomicReductionGens, reductionInfos);
return std::make_pair(true, result);
};

// Push the new alternative functions
dispatchList.pushConversionFunction(convertWsLoop);
dispatchList.pushConversionFunction(convertWsloop);

// Lower the current distribute operation
LogicalResult result = convertOmpDistribute(*op, builder, moduleTranslation,
Expand Down
3 changes: 2 additions & 1 deletion mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes {
target_cpu = "gfx90a",
target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>
target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>,
omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
} {
omp.parallel {
%loop_ub = llvm.mlir.constant(9 : i32) : i32
Expand Down
4 changes: 3 additions & 1 deletion mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

module attributes {omp.is_target_device = true} {
llvm.func @foo(i32)
llvm.func @omp_target_teams_shared_simple(%arg0 : i32) {
llvm.func @omp_target_teams_shared_simple(%arg0 : i32) attributes {
omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
} {
omp.teams {
llvm.call @foo(%arg0) : (i32) -> ()
omp.terminator
Expand Down
4 changes: 3 additions & 1 deletion mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
// for nested omp do loop with collapse clause inside omp target region

module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) {
llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) attributes {
omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
} {
%loop_ub = llvm.mlir.constant(99 : i32) : i32
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : index) : i32
Expand Down
8 changes: 6 additions & 2 deletions mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
// for nested omp do loop inside omp target region

module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
llvm.func @target_wsloop(%arg0: !llvm.ptr ){
llvm.func @target_wsloop(%arg0: !llvm.ptr) attributes {
omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
} {
%loop_ub = llvm.mlir.constant(9 : i32) : i32
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : i32) : i32
Expand All @@ -16,7 +18,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
llvm.return
}

llvm.func @target_empty_wsloop(){
llvm.func @target_empty_wsloop() attributes {
omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>
} {
%loop_ub = llvm.mlir.constant(9 : i32) : i32
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : i32) : i32
Expand Down

0 comments on commit 25d3695

Please sign in to comment.