Skip to content

Commit

Permalink
[flang][OpenMP][DoConcurrent] Support fir.shape_shift values
Browse files Browse the repository at this point in the history
  • Loading branch information
ergawy committed Sep 2, 2024
1 parent 3e674f5 commit e509514
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 94 deletions.
238 changes: 152 additions & 86 deletions flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,36 @@ mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc,
return tripCount;
}

mlir::Value mapTemporaryValue(fir::FirOpBuilder &builder,
mlir::omp::TargetOp targetOp, mlir::Value val,
std::string name = "") {
mlir::OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointAfterValue(val);
auto copyVal = builder.createTemporary(val.getLoc(), val.getType());
builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);

llvm::SmallVector<mlir::Value> bounds;
builder.setInsertionPoint(targetOp);
mlir::Value mapOp = createMapInfoOp(
builder, copyVal.getLoc(), copyVal,
/*varPtrPtr=*/mlir::Value{}, name, bounds,
/*members=*/llvm::SmallVector<mlir::Value>{},
/*membersIndex=*/mlir::DenseIntElementsAttr{},
static_cast<std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
targetOp.getMapVarsMutable().append(mapOp);

mlir::Region &targetRegion = targetOp.getRegion();
mlir::Block *targetEntryBlock = &targetRegion.getBlocks().front();
mlir::Value clonedValArg =
targetRegion.addArgument(copyVal.getType(), copyVal.getLoc());
builder.setInsertionPointToStart(targetEntryBlock);
auto loadOp =
builder.create<fir::LoadOp>(clonedValArg.getLoc(), clonedValArg);
return loadOp.getResult();
}

/// Check if cloning the bounds introduced any dependency on the outer region.
/// If so, then either clone them as well if they are MemoryEffectFree, or else
/// copy them to a new temporary and add them to the map and block_argument
Expand Down Expand Up @@ -179,31 +209,9 @@ void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder,
return use.getOwner()->getBlock() == targetEntryBlock;
});
} else {
mlir::OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointAfter(valOp);
auto copyVal = builder.createTemporary(val.getLoc(), val.getType());
builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);

llvm::SmallVector<mlir::Value> bounds;
std::stringstream name;
builder.setInsertionPoint(targetOp);
mlir::Value mapOp = createMapInfoOp(
builder, copyVal.getLoc(), copyVal,
/*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
/*members=*/llvm::SmallVector<mlir::Value>{},
/*membersIndex=*/mlir::DenseIntElementsAttr{},
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
targetOp.getMapVarsMutable().append(mapOp);
mlir::Value clonedValArg =
targetRegion.addArgument(copyVal.getType(), copyVal.getLoc());
builder.setInsertionPointToStart(targetEntryBlock);
auto loadOp =
builder.create<fir::LoadOp>(clonedValArg.getLoc(), clonedValArg);
mlir::Value mappedTemp = mapTemporaryValue(builder, targetOp, val);
val.replaceUsesWithIf(
loadOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) {
mappedTemp, [targetEntryBlock](mlir::OpOperand &use) {
return use.getOwner()->getBlock() == targetEntryBlock;
});
}
Expand Down Expand Up @@ -747,17 +755,18 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {

if (mapToDevice) {
mlir::omp::TargetOperands targetClauseOps;
LiveInShapeInfoMap liveInShapeInfoMap;

// The outermost loop will contain all the live-in values in all nested
// loops since live-in values are collected recursively for all nested
// ops.
for (mlir::Value liveIn : loopNestLiveIns) {
targetClauseOps.mapVars.push_back(
genMapInfoOpForLiveIn(rewriter, liveIn, liveInToName));
targetClauseOps.mapVars.push_back(genMapInfoOpForLiveIn(
rewriter, liveIn, liveInToName, liveInShapeInfoMap[liveIn]));
}

targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns,
targetClauseOps);
targetClauseOps, liveInShapeInfoMap);
genTeamsOp(doLoop.getLoc(), rewriter);
}

Expand Down Expand Up @@ -809,42 +818,76 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
}

private:
void genBoundsOps(mlir::ConversionPatternRewriter &rewriter,
mlir::Location loc, mlir::Value shape,
llvm::SmallVectorImpl<mlir::Value> &boundsOps) const {
struct TargetDeclareShapeCreationInfo {
std::vector<mlir::Value> startIndices{};
std::vector<mlir::Value> extents{};

bool isShapedValue() const { return !extents.empty(); }
bool isShapeShiftedValue() const { return !startIndices.empty(); }
};

using LiveInShapeInfoMap =
llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>;

void
genBoundsOps(mlir::ConversionPatternRewriter &rewriter, mlir::Location loc,
mlir::Value shape, llvm::SmallVectorImpl<mlir::Value> &boundsOps,
TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const {
if (shape == nullptr) {
return;
}

auto shapeOp =
mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp());
auto shapeShiftOp =
mlir::dyn_cast_if_present<fir::ShapeShiftOp>(shape.getDefiningOp());

if (shapeOp == nullptr)
TODO(loc, "Shapes not defined by shape op's are not supported yet.");
if (shapeOp == nullptr && shapeShiftOp == nullptr)
TODO(loc,
"Shapes not defined by `fir.shape` or `fir.shape_shift` op's are "
"not supported yet.");

auto extents = shapeOp.getExtents();
auto extents = shapeOp != nullptr
? std::vector<mlir::Value>(shapeOp.getExtents().begin(),
shapeOp.getExtents().end())
: shapeShiftOp.getExtents();

auto genBoundsOp = [&](mlir::Value extent) {
mlir::Type extentType = extent.getType();
auto lb = rewriter.create<mlir::arith::ConstantOp>(
loc, extentType, rewriter.getIntegerAttr(extentType, 0));
// TODO I think this caluclation might not be correct. But this is how
// it is done in PFT->OpenMP lowering. So keeping it like this until we
// double check.
mlir::Value ub = rewriter.create<mlir::arith::SubIOp>(loc, extent, lb);
mlir::Type idxType = extents.front().getType();

auto one = rewriter.create<mlir::arith::ConstantOp>(
loc, idxType, rewriter.getIntegerAttr(idxType, 1));
// For non-shifted values, that starting index is the default Fortran
// value: 1.
std::vector<mlir::Value> startIndices =
shapeOp != nullptr ? std::vector<mlir::Value>(extents.size(), one)
: shapeShiftOp.getOrigins();

auto genBoundsOp = [&](mlir::Value startIndex, mlir::Value extent) {
// We map the entire range of data by default, therefore, we always map
// from the start.
auto normalizedLB = rewriter.create<mlir::arith::ConstantOp>(
loc, idxType, rewriter.getIntegerAttr(idxType, 0));

mlir::Value ub = rewriter.create<mlir::arith::SubIOp>(loc, extent, one);

return rewriter.create<mlir::omp::MapBoundsOp>(
loc, rewriter.getType<mlir::omp::MapBoundsType>(), lb, ub, extent,
mlir::Value{}, false, mlir::Value{});
loc, rewriter.getType<mlir::omp::MapBoundsType>(), normalizedLB, ub,
extent,
/*stride=*/mlir::Value{}, /*stride_in_bytes=*/false, startIndex);
};

for (auto extent : extents)
boundsOps.push_back(genBoundsOp(extent));
for (auto [startIndex, extent] : llvm::zip_equal(startIndices, extents))
boundsOps.push_back(genBoundsOp(startIndex, extent));

if (shapeShiftOp != nullptr)
targetShapeCreationInfo.startIndices = std::move(startIndices);
targetShapeCreationInfo.extents = std::move(extents);
}

mlir::omp::MapInfoOp genMapInfoOpForLiveIn(
mlir::ConversionPatternRewriter &rewriter, mlir::Value liveIn,
const llvm::DenseMap<mlir::Value, std::string> &liveInToName) const {
const llvm::DenseMap<mlir::Value, std::string> &liveInToName,
TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const {
mlir::Value rawAddr = liveIn;
mlir::Value shape = nullptr;
std::string name = "";
Expand Down Expand Up @@ -891,7 +934,8 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
}

llvm::SmallVector<mlir::Value> boundsOps;
genBoundsOps(rewriter, liveIn.getLoc(), shape, boundsOps);
genBoundsOps(rewriter, liveIn.getLoc(), shape, boundsOps,
targetShapeCreationInfo);

return Fortran::lower::omp ::internal::createMapInfoOp(
rewriter, liveIn.getLoc(), rawAddr,
Expand All @@ -904,11 +948,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
captureKind, rawAddr.getType());
}

mlir::omp::TargetOp genTargetOp(mlir::Location loc,
mlir::ConversionPatternRewriter &rewriter,
mlir::IRMapping &mapper,
llvm::ArrayRef<mlir::Value> liveIns,
mlir::omp::TargetOperands &clauseOps) const {
mlir::omp::TargetOp
genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
mlir::IRMapping &mapper,
const llvm::ArrayRef<mlir::Value> liveIns,
const mlir::omp::TargetOperands &clauseOps,
const LiveInShapeInfoMap &liveInShapeInfoMap) const {
auto targetOp = rewriter.create<mlir::omp::TargetOp>(loc, clauseOps);

mlir::Region &region = targetOp.getRegion();
Expand All @@ -923,81 +968,102 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
}

rewriter.createBlock(&region, {}, liveInTypes, liveInLocs);
fir::FirOpBuilder firBuilder(
fir::FirOpBuilder builder(
rewriter,
fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>()));

for (auto [liveIn, arg, mapInfoOp] :
llvm::zip_equal(liveIns, region.getArguments(), clauseOps.mapVars)) {
size_t argIdx = 0;
for (auto [liveIn, mapInfoOp] :
llvm::zip_equal(liveIns, clauseOps.mapVars)) {
auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp());
hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp);
hlfir::DeclareOp liveInDeclare =
genLiveInDeclare(builder, targetOp, region.getArgument(argIdx), miOp,
liveInShapeInfoMap.at(liveIn));

// TODO If `liveIn.getDefiningOp()` is a `fir::BoxAddrOp`, we probably
// need to "unpack" the box by getting the defining op of it's value.
// However, we did not hit this case in reality yet so leaving it as a
// todo for now.

if (!llvm::isa<mlir::omp::PointerLikeType>(liveIn.getType()))
mapper.map(liveIn,
firBuilder.loadIfRef(liveIn.getLoc(),
liveInDeclare.getOriginalBase()));
mapper.map(liveIn, builder.loadIfRef(liveIn.getLoc(),
liveInDeclare.getOriginalBase()));
else
mapper.map(liveIn, liveInDeclare.getOriginalBase());

if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
liveIn.getDefiningOp())) {
mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase());
}
++argIdx;
}

Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder,
targetOp);
Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(builder, targetOp);
rewriter.setInsertionPoint(
rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc()));

return targetOp;
}

hlfir::DeclareOp
genLiveInDeclare(mlir::ConversionPatternRewriter &rewriter,
mlir::Value liveInArg,
mlir::omp::MapInfoOp liveInMapInfoOp) const {
hlfir::DeclareOp genLiveInDeclare(
fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp,
mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp,
const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const {
mlir::Type liveInType = liveInArg.getType();
std::string liveInName = liveInMapInfoOp.getName().has_value()
? liveInMapInfoOp.getName().value().str()
: std::string("");

if (fir::isa_ref_type(liveInType))
liveInType = fir::unwrapRefType(liveInType);

mlir::Value shape = [&]() -> mlir::Value {
if (hlfir::isFortranScalarNumericalType(liveInType))
if (!targetShapeCreationInfo.isShapedValue())
return {};

if (hlfir::isFortranArrayObject(liveInType)) {
llvm::SmallVector<mlir::Value> shapeOpOperands;
llvm::SmallVector<mlir::Value> extentOperands;
llvm::SmallVector<mlir::Value> startIndexOperands;

if (targetShapeCreationInfo.isShapeShiftedValue()) {
llvm::SmallVector<mlir::Value> shapeShiftOperands;

size_t shapeIdx = 0;
for (auto [startIndex, extent] :
llvm::zip_equal(targetShapeCreationInfo.startIndices,
targetShapeCreationInfo.extents)) {
shapeShiftOperands.push_back(
Fortran::lower::omp::internal::mapTemporaryValue(
builder, targetOp, startIndex,
liveInName + ".start_idx.dim" + std::to_string(shapeIdx)));
shapeShiftOperands.push_back(
Fortran::lower::omp::internal::mapTemporaryValue(
builder, targetOp, extent,
liveInName + ".extent.dim" + std::to_string(shapeIdx)));
++shapeIdx;
}

for (auto boundsOperand : liveInMapInfoOp.getBounds()) {
auto boundsOp =
mlir::cast<mlir::omp::MapBoundsOp>(boundsOperand.getDefiningOp());
mlir::Operation *localExtentDef =
boundsOp.getExtent().getDefiningOp()->clone();
rewriter.getInsertionBlock()->push_back(localExtentDef);
assert(localExtentDef->getNumResults() == 1);
auto shapeShiftType = fir::ShapeShiftType::get(
builder.getContext(), shapeShiftOperands.size() / 2);
return builder.create<fir::ShapeShiftOp>(
liveInArg.getLoc(), shapeShiftType, shapeShiftOperands);
}

shapeOpOperands.push_back(localExtentDef->getResult(0));
}
llvm::SmallVector<mlir::Value> shapeOperands;

return rewriter.create<fir::ShapeOp>(liveInArg.getLoc(),
shapeOpOperands);
size_t shapeIdx = 0;
for (auto extent : targetShapeCreationInfo.extents) {
shapeOperands.push_back(
Fortran::lower::omp::internal::mapTemporaryValue(
builder, targetOp, extent,
liveInName + ".extent.dim" + std::to_string(shapeIdx)));
++shapeIdx;
}

std::string opStr;
llvm::raw_string_ostream opOs(opStr);
opOs << "Unsupported type: " << liveInType;
llvm_unreachable(opOs.str().c_str());
return builder.create<fir::ShapeOp>(liveInArg.getLoc(), shapeOperands);
}();

return rewriter.create<hlfir::DeclareOp>(liveInArg.getLoc(), liveInArg,
liveInMapInfoOp.getName().value(),
shape);
return builder.create<hlfir::DeclareOp>(liveInArg.getLoc(), liveInArg,
liveInName, shape);
}

mlir::omp::TeamsOp
Expand Down
10 changes: 8 additions & 2 deletions flang/test/Transforms/DoConcurrent/basic_device.f90
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@ program do_concurrent_basic
! CHECK-DAG: %[[UB_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref<index> {name = "loop.0.ub"}
! CHECK-DAG: %[[STEP_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref<index> {name = "loop.0.step"}

! CHECK: %[[C1:.*]] = arith.constant 1 : index
! CHECK: %[[C0:.*]] = arith.constant 0 : index
! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index
! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C1]] : index

! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index)
! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index)
! CHECK-SAME: extent(%[[A_EXTENT]] : index)
! CHECK-SAME: start_idx(%[[C1]] : index)

! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}})
! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]])
Expand All @@ -44,13 +46,16 @@ program do_concurrent_basic
! CHECK-SAME: %[[STEP_MAP_INFO]] -> %[[STEP_ARG:.[[:alnum:]]+]],
! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]],
! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]]
! CHECK-SAME: %[[A_EXT:.*]] -> %[[A_EXT_ARG:.[[:alnum:]]+]]
! CHECK-SAME: trip_count(%[[TRIP_COUNT]] : i64)

! CHECK-NEXT: ^{{.*}}(%[[LB_ARG]]: !fir.ref<index>,
! CHECK-SAME: %[[UB_ARG]]: !fir.ref<index>, %[[STEP_ARG]]: !fir.ref<index>,
! CHECK-SAME: %[[I_ARG]]: !fir.ref<i32>,
! CHECK-SAME: %[[A_ARG]]: !fir.ref<!fir.array<10xi32>>, %[[A_EXT_ARG]]: !fir.ref<index>):

! CHECK: %[[A_EXT:.*]] = fir.load %[[A_EXT_ARG]] : !fir.ref<index>

! CHECK: %[[LB_DEV_DECL:.*]]:2 = hlfir.declare %[[LB_ARG]]
! CHECK: %[[LB_DEV_VAL:.*]] = fir.load %[[LB_DEV_DECL]]#1

Expand All @@ -60,7 +65,8 @@ program do_concurrent_basic
! CHECK: %[[STEP_DEV_DECL:.*]]:2 = hlfir.declare %[[STEP_ARG]]
! CHECK: %[[STEP_DEV_VAL:.*]] = fir.load %[[STEP_DEV_DECL]]#1

! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]]
! CHECK: %[[A_SHAPE:.*]] = fir.shape %[[A_EXT]] : (index) -> !fir.shape<1>
! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]](%[[A_SHAPE]])

! CHECK: omp.teams {
! CHECK-NEXT: omp.parallel {
Expand Down
Loading

0 comments on commit e509514

Please sign in to comment.