diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index a929b0bdd16943..75392c601789da 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -24,11 +24,13 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/LoopInvariantCodeMotionUtils.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include #include +#include #include namespace flangomp { @@ -36,7 +38,8 @@ namespace flangomp { #include "flang/Optimizer/OpenMP/Passes.h.inc" } // namespace flangomp -#define DEBUG_TYPE "fopenmp-do-concurrent-conversion" +#define DEBUG_TYPE "do-concurrent-conversion" +#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ") namespace Fortran { namespace lower { @@ -148,6 +151,36 @@ mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc, return tripCount; } +mlir::Value mapTemporaryValue(fir::FirOpBuilder &builder, + mlir::omp::TargetOp targetOp, mlir::Value val, + std::string name = "") { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointAfterValue(val); + auto copyVal = builder.createTemporary(val.getLoc(), val.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); + + llvm::SmallVector bounds; + builder.setInsertionPoint(targetOp); + mlir::Value mapOp = createMapInfoOp( + builder, copyVal.getLoc(), copyVal, + /*varPtrPtr=*/mlir::Value{}, name, bounds, + /*members=*/llvm::SmallVector{}, + /*membersIndex=*/mlir::DenseIntElementsAttr{}, + static_cast>( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), + mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); + targetOp.getMapVarsMutable().append(mapOp); + + mlir::Region &targetRegion = targetOp.getRegion(); + mlir::Block *targetEntryBlock = &targetRegion.getBlocks().front(); + mlir::Value clonedValArg = + targetRegion.addArgument(copyVal.getType(), copyVal.getLoc()); + builder.setInsertionPointToStart(targetEntryBlock); + auto loadOp = + builder.create(clonedValArg.getLoc(), clonedValArg); + return loadOp.getResult(); +} + /// Check if cloning the bounds introduced any dependency on the outer region. /// If so, then either clone them as well if they are MemoryEffectFree, or else /// copy them to a new temporary and add them to the map and block_argument @@ -176,31 +209,9 @@ void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder, return use.getOwner()->getBlock() == targetEntryBlock; }); } else { - mlir::OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointAfter(valOp); - auto copyVal = builder.createTemporary(val.getLoc(), val.getType()); - builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); - - llvm::SmallVector bounds; - std::stringstream name; - builder.setInsertionPoint(targetOp); - mlir::Value mapOp = createMapInfoOp( - builder, copyVal.getLoc(), copyVal, - /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, - /*members=*/llvm::SmallVector{}, - /*membersIndex=*/mlir::DenseIntElementsAttr{}, - static_cast< - std::underlying_type_t>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), - mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); - targetOp.getMapVarsMutable().append(mapOp); - mlir::Value clonedValArg = - targetRegion.addArgument(copyVal.getType(), copyVal.getLoc()); - builder.setInsertionPointToStart(targetEntryBlock); - auto loadOp = - builder.create(clonedValArg.getLoc(), clonedValArg); + mlir::Value mappedTemp = mapTemporaryValue(builder, targetOp, val); val.replaceUsesWithIf( - loadOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) { + mappedTemp, [targetEntryBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == targetEntryBlock; }); } @@ -248,29 +259,59 @@ bool isIndVarUltimateOperand(mlir::Operation *op, fir::DoLoopOp doLoop) { return false; }; +mlir::Value findLoopIndVar(fir::DoLoopOp doLoop) { + mlir::Value result = nullptr; + mlir::visitUsedValuesDefinedAbove( + doLoop.getRegion(), [&](mlir::OpOperand *operand) { + if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) + result = operand->get(); + }); + + assert(result != nullptr); + return result; +} + /// Collect the list of values used inside the loop but defined outside of it. /// The first item in the returned list is always the loop's induction /// variable. -void collectLoopLiveIns(fir::DoLoopOp doLoop, - llvm::SmallVectorImpl &liveIns) { +void collectLoopNestLiveIns( + LoopNestToIndVarMap &loopNest, llvm::SmallVectorImpl &liveIns, + llvm::DenseMap *liveInToName = nullptr) { llvm::SmallDenseSet seenValues; llvm::SmallDenseSet seenOps; - mlir::visitUsedValuesDefinedAbove( - doLoop.getRegion(), [&](mlir::OpOperand *operand) { - if (!seenValues.insert(operand->get()).second) - return; + auto addValueToLiveIns = [&](mlir::Value liveIn) { + if (!seenValues.insert(liveIn).second) + return false; - mlir::Operation *definingOp = operand->get().getDefiningOp(); - // We want to collect ops corresponding to live-ins only once. - if (definingOp && !seenOps.insert(definingOp).second) - return; + mlir::Operation *definingOp = liveIn.getDefiningOp(); + // We want to collect ops corresponding to live-ins only once. + if (definingOp && !seenOps.insert(definingOp).second) + return false; - liveIns.push_back(operand->get()); + liveIns.push_back(liveIn); + return true; + }; - if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) - std::swap(*liveIns.begin(), *liveIns.rbegin()); - }); + size_t nestLevel = 0; + for (auto [loop, _] : loopNest) { + auto addBoundOrStepToLiveIns = [&](mlir::Value operand, std::string name) { + (*liveInToName)[operand] = name; + addValueToLiveIns(operand); + }; + + addBoundOrStepToLiveIns(loop.getLowerBound(), + "loop." + std::to_string(nestLevel) + ".lb"); + addBoundOrStepToLiveIns(loop.getUpperBound(), + "loop." + std::to_string(nestLevel) + ".ub"); + addBoundOrStepToLiveIns(loop.getStep(), + "loop." + std::to_string(nestLevel) + ".step"); + ++nestLevel; + } + + mlir::visitUsedValuesDefinedAbove( + loopNest.front().first.getRegion(), + [&](mlir::OpOperand *operand) { addValueToLiveIns(operand->get()); }); } /// Collects the op(s) responsible for updating a loop's iteration variable with @@ -359,24 +400,96 @@ void collectIndirectConstOpChain(mlir::Operation *link, opChain.insert(link); } +/// Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff +/// there are no operations in \p outerloop's other than: +/// +/// 1. those operations needed to setup \p innerLoop's LB, UB, and step values, +/// 2. the operations needed to assing/update \p outerLoop's induction variable. +/// 3. \p innerLoop itself. +/// +/// \p return true if \p innerLoop is perfectly nested inside \p outerLoop +/// according to the above definition. +bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) { + mlir::BackwardSliceOptions backwardSliceOptions; + backwardSliceOptions.inclusive = true; + // We will collect the backward slices for innerLoop's LB, UB, and step. + // However, we want to limit the scope of these slices to the scope of + // outerLoop's region. + backwardSliceOptions.filter = [&](mlir::Operation *op) { + return !mlir::areValuesDefinedAbove(op->getResults(), + outerLoop.getRegion()); + }; + + llvm::SetVector lbSlice; + mlir::getBackwardSlice(innerLoop.getLowerBound(), &lbSlice, + backwardSliceOptions); + + llvm::SetVector ubSlice; + mlir::getBackwardSlice(innerLoop.getUpperBound(), &ubSlice, + backwardSliceOptions); + + llvm::SetVector stepSlice; + mlir::getBackwardSlice(innerLoop.getStep(), &stepSlice, backwardSliceOptions); + + mlir::ForwardSliceOptions forwardSliceOptions; + forwardSliceOptions.inclusive = true; + // We don't care of the outer loop's induction variable's uses within the + // inner loop, so we filter out these uses. + forwardSliceOptions.filter = [&](mlir::Operation *op) { + return mlir::areValuesDefinedAbove(op->getResults(), innerLoop.getRegion()); + }; + + llvm::SetVector indVarSlice; + mlir::getForwardSlice(outerLoop.getInductionVar(), &indVarSlice, + forwardSliceOptions); + + llvm::SetVector innerLoopSetupOpsVec; + innerLoopSetupOpsVec.set_union(indVarSlice); + innerLoopSetupOpsVec.set_union(lbSlice); + innerLoopSetupOpsVec.set_union(ubSlice); + innerLoopSetupOpsVec.set_union(stepSlice); + llvm::DenseSet innerLoopSetupOpsSet; + + for (mlir::Operation *op : innerLoopSetupOpsVec) + innerLoopSetupOpsSet.insert(op); + + llvm::DenseSet loopBodySet; + outerLoop.walk([&](mlir::Operation *op) { + if (op == outerLoop) + return mlir::WalkResult::advance(); + + if (op == innerLoop) + return mlir::WalkResult::skip(); + + if (op->hasTrait()) + return mlir::WalkResult::advance(); + + loopBodySet.insert(op); + return mlir::WalkResult::advance(); + }); + + bool result = (loopBodySet == innerLoopSetupOpsSet); + LLVM_DEBUG(DBGS() << "Loop pair starting at location " << outerLoop.getLoc() + << " is" << (result ? "" : " not") + << " perfectly nested\n"); + return result; +} + /// Starting with `outerLoop` collect a perfectly nested loop nest, if any. This /// function collects as much as possible loops in the nest; it case it fails to /// recognize a certain nested loop as part of the nest it just returns the /// parent loops it discovered before. -mlir::LogicalResult collectLoopNest(fir::DoLoopOp outerLoop, +mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop, LoopNestToIndVarMap &loopNest) { - assert(outerLoop.getUnordered()); - llvm::SmallVector outerLoopLiveIns; - collectLoopLiveIns(outerLoop, outerLoopLiveIns); - + assert(currentLoop.getUnordered()); while (true) { loopNest.try_emplace( - outerLoop, + currentLoop, InductionVariableInfo{ - outerLoopLiveIns.front().getDefiningOp(), - std::move(looputils::extractIndVarUpdateOps(outerLoop))}); + findLoopIndVar(currentLoop).getDefiningOp(), + std::move(looputils::extractIndVarUpdateOps(currentLoop))}); - auto directlyNestedLoops = outerLoop.getRegion().getOps(); + auto directlyNestedLoops = currentLoop.getRegion().getOps(); llvm::SmallVector unorderedLoops; for (auto nestedLoop : directlyNestedLoops) @@ -396,64 +509,10 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp outerLoop, (nestedUnorderedLoop.getStep().getDefiningOp() == nullptr)) return mlir::failure(); - llvm::SmallVector nestedLiveIns; - collectLoopLiveIns(nestedUnorderedLoop, nestedLiveIns); - - llvm::DenseSet outerLiveInsSet; - llvm::DenseSet nestedLiveInsSet; - - // Returns a "unified" view of an mlir::Value. This utility checks if the - // value is defined by an op, and if so, return the first value defined by - // that op (if there are many), otherwise just returns the value. - // - // This serves the purpose that if, for example, `%op_res#0` is used in the - // outer loop and `%op_res#1` is used in the nested loop (or vice versa), - // that we detect both as the same value. If we did not do so, we might - // falesely detect that the 2 loops are not perfectly nested since they use - // "different" sets of values. - auto getUnifiedLiveInView = [](mlir::Value liveIn) { - return liveIn.getDefiningOp() != nullptr - ? liveIn.getDefiningOp()->getResult(0) - : liveIn; - }; - - // Re-package both lists of live-ins into sets so that we can use set - // equality to compare the values used in the outerloop vs. the nestd one. - - for (auto liveIn : nestedLiveIns) - nestedLiveInsSet.insert(getUnifiedLiveInView(liveIn)); - - mlir::Value outerLoopIV; - for (auto liveIn : outerLoopLiveIns) { - outerLiveInsSet.insert(getUnifiedLiveInView(liveIn)); - - // Keep track of the IV of the outerloop. See `isPerfectlyNested` for more - // info on the reason. - if (outerLoopIV == nullptr) - outerLoopIV = getUnifiedLiveInView(liveIn); - } - - // For the 2 loops to be perfectly nested, either: - // * both would have exactly the same set of live-in values or, - // * the outer loop would have exactly 1 extra live-in value: the outer - // loop's induction variable; this happens when the outer loop's IV is - // *not* referenced in the nested loop. - bool isPerfectlyNested = [&]() { - if (outerLiveInsSet == nestedLiveInsSet) - return true; - - if ((outerLiveInsSet.size() == nestedLiveIns.size() + 1) && - !nestedLiveInsSet.contains(outerLoopIV)) - return true; - - return false; - }(); - - if (!isPerfectlyNested) + if (!isPerfectlyNested(currentLoop, nestedUnorderedLoop)) return mlir::failure(); - outerLoop = nestedUnorderedLoop; - outerLoopLiveIns = std::move(nestedLiveIns); + currentLoop = nestedUnorderedLoop; } return mlir::success(); @@ -627,10 +686,6 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { "defining operation."); } - llvm::SmallVector outermostLoopLiveIns; - looputils::collectLoopLiveIns(doLoop, outermostLoopLiveIns); - assert(!outermostLoopLiveIns.empty()); - looputils::LoopNestToIndVarMap loopNest; bool hasRemainingNestedLoops = failed(looputils::collectLoopNest(doLoop, loopNest)); @@ -639,15 +694,57 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { "Some `do concurent` loops are not perfectly-nested. " "These will be serialzied."); + llvm::DenseMap liveInToName; + llvm::SmallVector loopNestLiveIns; + + // TODO outline into a separete function. This hoists the ops to compute + // bounds of all loops in the entire loop nest outside the outermost loop. + // Without this hoisting, values/variables that are required to compute + // these bounds will be stuck inside the original `fir.do_loop` ops and + // therefore their SSA values won't be visible from within the `target` + // region. + { + fir::DoLoopOp outermostLoop = loopNest.front().first; + + mlir::BackwardSliceOptions backwardSliceOptions; + backwardSliceOptions.inclusive = true; + // We will collect the backward slices for innerLoop's LB, UB, and step. + // However, we want to limit the scope of these slices to the scope of + // outerLoop's region. + backwardSliceOptions.filter = [&](mlir::Operation *op) { + return !mlir::areValuesDefinedAbove(op->getResults(), + outermostLoop.getRegion()); + }; + + for (auto [loop, _] : loopNest) { + auto moveBoundOrStepOutOfLoopNest = [&](mlir::Value operand) { + llvm::SetVector loopOperandSlice; + mlir::getBackwardSlice(operand, &loopOperandSlice, + backwardSliceOptions); + + for (mlir::Operation *sliceOp : loopOperandSlice) { + outermostLoop.moveOutOfLoop(sliceOp); + } + }; + + moveBoundOrStepOutOfLoopNest(loop.getLowerBound()); + moveBoundOrStepOutOfLoopNest(loop.getUpperBound()); + moveBoundOrStepOutOfLoopNest(loop.getStep()); + } + } + + looputils::collectLoopNestLiveIns(loopNest, loopNestLiveIns, &liveInToName); + assert(!loopNestLiveIns.empty()); + llvm::SetVector locals; looputils::collectLoopLocalValues(loopNest.back().first, locals); // We do not want to map "loop-local" values to the device through // `omp.map.info` ops. Therefore, we remove them from the list of live-ins. - outermostLoopLiveIns.erase(llvm::remove_if(outermostLoopLiveIns, - [&](mlir::Value liveIn) { - return locals.contains(liveIn); - }), - outermostLoopLiveIns.end()); + loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns, + [&](mlir::Value liveIn) { + return locals.contains(liveIn); + }), + loopNestLiveIns.end()); looputils::sinkLoopIVArgs(rewriter, loopNest); @@ -658,16 +755,18 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { if (mapToDevice) { mlir::omp::TargetOperands targetClauseOps; + LiveInShapeInfoMap liveInShapeInfoMap; // The outermost loop will contain all the live-in values in all nested // loops since live-in values are collected recursively for all nested // ops. - for (mlir::Value liveIn : outermostLoopLiveIns) - targetClauseOps.mapVars.push_back( - genMapInfoOpForLiveIn(rewriter, liveIn)); + for (mlir::Value liveIn : loopNestLiveIns) { + targetClauseOps.mapVars.push_back(genMapInfoOpForLiveIn( + rewriter, liveIn, liveInToName, liveInShapeInfoMap[liveIn])); + } - targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, - outermostLoopLiveIns, targetClauseOps); + targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, + targetClauseOps, liveInShapeInfoMap); genTeamsOp(doLoop.getLoc(), rewriter); } @@ -719,48 +818,103 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { } private: - void genBoundsOps(mlir::ConversionPatternRewriter &rewriter, - mlir::Location loc, hlfir::DeclareOp declareOp, - llvm::SmallVectorImpl &boundsOps) const { - if (declareOp.getShape() == nullptr) { + struct TargetDeclareShapeCreationInfo { + std::vector startIndices{}; + std::vector extents{}; + + bool isShapedValue() const { return !extents.empty(); } + bool isShapeShiftedValue() const { return !startIndices.empty(); } + }; + + using LiveInShapeInfoMap = + llvm::DenseMap; + + void + genBoundsOps(mlir::ConversionPatternRewriter &rewriter, mlir::Location loc, + mlir::Value shape, llvm::SmallVectorImpl &boundsOps, + TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { + if (shape == nullptr) { return; } - auto shapeOp = mlir::dyn_cast_if_present( - declareOp.getShape().getDefiningOp()); + auto shapeOp = + mlir::dyn_cast_if_present(shape.getDefiningOp()); + auto shapeShiftOp = + mlir::dyn_cast_if_present(shape.getDefiningOp()); + + if (shapeOp == nullptr && shapeShiftOp == nullptr) + TODO(loc, + "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are " + "not supported yet."); + + auto extents = shapeOp != nullptr + ? std::vector(shapeOp.getExtents().begin(), + shapeOp.getExtents().end()) + : shapeShiftOp.getExtents(); - if (shapeOp == nullptr) - TODO(loc, "Shapes not defined by shape op's are not supported yet."); + mlir::Type idxType = extents.front().getType(); - auto extents = shapeOp.getExtents(); + auto one = rewriter.create( + loc, idxType, rewriter.getIntegerAttr(idxType, 1)); + // For non-shifted values, that starting index is the default Fortran + // value: 1. + std::vector startIndices = + shapeOp != nullptr ? std::vector(extents.size(), one) + : shapeShiftOp.getOrigins(); - auto genBoundsOp = [&](mlir::Value extent) { - mlir::Type extentType = extent.getType(); - auto lb = rewriter.create( - loc, extentType, rewriter.getIntegerAttr(extentType, 0)); - // TODO I think this caluclation might not be correct. But this is how - // it is done in PFT->OpenMP lowering. So keeping it like this until we - // double check. - mlir::Value ub = rewriter.create(loc, extent, lb); + auto genBoundsOp = [&](mlir::Value startIndex, mlir::Value extent) { + // We map the entire range of data by default, therefore, we always map + // from the start. + auto normalizedLB = rewriter.create( + loc, idxType, rewriter.getIntegerAttr(idxType, 0)); + + mlir::Value ub = rewriter.create(loc, extent, one); return rewriter.create( - loc, rewriter.getType(), lb, ub, extent, - mlir::Value{}, false, mlir::Value{}); + loc, rewriter.getType(), normalizedLB, ub, + extent, + /*stride=*/mlir::Value{}, /*stride_in_bytes=*/false, startIndex); }; - for (auto extent : extents) - boundsOps.push_back(genBoundsOp(extent)); + for (auto [startIndex, extent] : llvm::zip_equal(startIndices, extents)) + boundsOps.push_back(genBoundsOp(startIndex, extent)); + + if (shapeShiftOp != nullptr) + targetShapeCreationInfo.startIndices = std::move(startIndices); + targetShapeCreationInfo.extents = std::move(extents); } - mlir::omp::MapInfoOp - genMapInfoOpForLiveIn(mlir::ConversionPatternRewriter &rewriter, - mlir::Value liveIn) const { - auto declareOp = - mlir::dyn_cast_if_present(liveIn.getDefiningOp()); + mlir::omp::MapInfoOp genMapInfoOpForLiveIn( + mlir::ConversionPatternRewriter &rewriter, mlir::Value liveIn, + const llvm::DenseMap &liveInToName, + TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { + mlir::Value rawAddr = liveIn; + mlir::Value shape = nullptr; + std::string name = ""; - if (declareOp == nullptr) - TODO(liveIn.getLoc(), - "Values not defined by declare op's are not supported yet."); + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present(liveInDefiningOp); + + if (declareOp != nullptr) { + // Use the raw address to avoid unboxing `fir.box` values whenever + // possible. Put differently, if we have access to the direct value memory + // reference/address, we use it. + rawAddr = declareOp.getOriginalBase(); + shape = declareOp.getShape(); + name = declareOp.getUniqName().str(); + } else if (liveInToName.contains(liveIn)) + name = liveInToName.at(liveIn); + + if (!llvm::isa(rawAddr.getType())) { + fir::FirOpBuilder builder( + rewriter, fir::getKindMapping( + liveInDefiningOp->getParentOfType())); + builder.setInsertionPointAfter(liveInDefiningOp); + auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal); + rawAddr = copyVal; + } mlir::Type liveInType = liveIn.getType(); mlir::Type eleType = liveInType; @@ -780,15 +934,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { } llvm::SmallVector boundsOps; - genBoundsOps(rewriter, liveIn.getLoc(), declareOp, boundsOps); + genBoundsOps(rewriter, liveIn.getLoc(), shape, boundsOps, + targetShapeCreationInfo); - // Use the raw address to avoid unboxing `fir.box` values whenever possible. - // Put differently, if we have access to the direct value memory - // reference/address, we use it. - mlir::Value rawAddr = declareOp.getOriginalBase(); return Fortran::lower::omp ::internal::createMapInfoOp( rewriter, liveIn.getLoc(), rawAddr, - /*varPtrPtr=*/{}, declareOp.getUniqName().str(), boundsOps, + /*varPtrPtr=*/{}, name, boundsOps, /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{}, static_cast< @@ -797,11 +948,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { captureKind, rawAddr.getType()); } - mlir::omp::TargetOp genTargetOp(mlir::Location loc, - mlir::ConversionPatternRewriter &rewriter, - mlir::IRMapping &mapper, - llvm::ArrayRef liveIns, - mlir::omp::TargetOperands &clauseOps) const { + mlir::omp::TargetOp + genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + mlir::IRMapping &mapper, + const llvm::ArrayRef liveIns, + const mlir::omp::TargetOperands &clauseOps, + const LiveInShapeInfoMap &liveInShapeInfoMap) const { auto targetOp = rewriter.create(loc, clauseOps); mlir::Region ®ion = targetOp.getRegion(); @@ -809,82 +961,109 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { llvm::SmallVector liveInTypes; llvm::SmallVector liveInLocs; - for (mlir::Value liveIn : liveIns) { - liveInTypes.push_back(liveIn.getType()); - liveInLocs.push_back(liveIn.getLoc()); + for (mlir::Value mapInfoOp : clauseOps.mapVars) { + auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); + liveInTypes.push_back(miOp.getVarPtr().getType()); + liveInLocs.push_back(miOp.getVarPtr().getLoc()); } rewriter.createBlock(®ion, {}, liveInTypes, liveInLocs); + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(targetOp->getParentOfType())); - for (auto [arg, mapInfoOp] : - llvm::zip_equal(region.getArguments(), clauseOps.mapVars)) { + size_t argIdx = 0; + for (auto [liveIn, mapInfoOp] : + llvm::zip_equal(liveIns, clauseOps.mapVars)) { auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); - hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp); - mlir::Value miOperand = miOp.getVariableOperand(0); + hlfir::DeclareOp liveInDeclare = + genLiveInDeclare(builder, targetOp, region.getArgument(argIdx), miOp, + liveInShapeInfoMap.at(liveIn)); - // TODO If `miOperand.getDefiningOp()` is a `fir::BoxAddrOp`, we probably + // TODO If `liveIn.getDefiningOp()` is a `fir::BoxAddrOp`, we probably // need to "unpack" the box by getting the defining op of it's value. // However, we did not hit this case in reality yet so leaving it as a // todo for now. - mapper.map(miOperand, liveInDeclare.getOriginalBase()); + if (!llvm::isa(liveIn.getType())) + mapper.map(liveIn, builder.loadIfRef(liveIn.getLoc(), + liveInDeclare.getOriginalBase())); + else + mapper.map(liveIn, liveInDeclare.getOriginalBase()); if (auto origDeclareOp = mlir::dyn_cast_if_present( - miOperand.getDefiningOp())) + liveIn.getDefiningOp())) { mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase()); + } + ++argIdx; } - fir::FirOpBuilder firBuilder( - rewriter, - fir::getKindMapping(targetOp->getParentOfType())); - Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder, - targetOp); + Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(builder, targetOp); rewriter.setInsertionPoint( rewriter.create(targetOp.getLoc())); return targetOp; } - hlfir::DeclareOp - genLiveInDeclare(mlir::ConversionPatternRewriter &rewriter, - mlir::Value liveInArg, - mlir::omp::MapInfoOp liveInMapInfoOp) const { + hlfir::DeclareOp genLiveInDeclare( + fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp, + mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp, + const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { mlir::Type liveInType = liveInArg.getType(); + std::string liveInName = liveInMapInfoOp.getName().has_value() + ? liveInMapInfoOp.getName().value().str() + : std::string(""); if (fir::isa_ref_type(liveInType)) liveInType = fir::unwrapRefType(liveInType); mlir::Value shape = [&]() -> mlir::Value { - if (hlfir::isFortranScalarNumericalType(liveInType)) + if (!targetShapeCreationInfo.isShapedValue()) return {}; - if (hlfir::isFortranArrayObject(liveInType)) { - llvm::SmallVector shapeOpOperands; + llvm::SmallVector extentOperands; + llvm::SmallVector startIndexOperands; + + if (targetShapeCreationInfo.isShapeShiftedValue()) { + llvm::SmallVector shapeShiftOperands; + + size_t shapeIdx = 0; + for (auto [startIndex, extent] : + llvm::zip_equal(targetShapeCreationInfo.startIndices, + targetShapeCreationInfo.extents)) { + shapeShiftOperands.push_back( + Fortran::lower::omp::internal::mapTemporaryValue( + builder, targetOp, startIndex, + liveInName + ".start_idx.dim" + std::to_string(shapeIdx))); + shapeShiftOperands.push_back( + Fortran::lower::omp::internal::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } - for (auto boundsOperand : liveInMapInfoOp.getBounds()) { - auto boundsOp = - mlir::cast(boundsOperand.getDefiningOp()); - mlir::Operation *localExtentDef = - boundsOp.getExtent().getDefiningOp()->clone(); - rewriter.getInsertionBlock()->push_back(localExtentDef); - assert(localExtentDef->getNumResults() == 1); + auto shapeShiftType = fir::ShapeShiftType::get( + builder.getContext(), shapeShiftOperands.size() / 2); + return builder.create( + liveInArg.getLoc(), shapeShiftType, shapeShiftOperands); + } - shapeOpOperands.push_back(localExtentDef->getResult(0)); - } + llvm::SmallVector shapeOperands; - return rewriter.create(liveInArg.getLoc(), - shapeOpOperands); + size_t shapeIdx = 0; + for (auto extent : targetShapeCreationInfo.extents) { + shapeOperands.push_back( + Fortran::lower::omp::internal::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; } - std::string opStr; - llvm::raw_string_ostream opOs(opStr); - opOs << "Unsupported type: " << liveInType; - llvm_unreachable(opOs.str().c_str()); + return builder.create(liveInArg.getLoc(), shapeOperands); }(); - return rewriter.create(liveInArg.getLoc(), liveInArg, - liveInMapInfoOp.getName().value(), - shape); + return builder.create(liveInArg.getLoc(), liveInArg, + liveInName, shape); } mlir::omp::TeamsOp @@ -917,24 +1096,31 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { looputils::collectIndirectConstOpChain(operation, opChain); mlir::Operation *result; - for (mlir::Operation *link : opChain) + for (mlir::Operation *link : opChain) { result = rewriter.clone(*link, mapper); + } return result; }; for (auto &[doLoop, _] : loopNest) { - mlir::Operation *lbOp = doLoop.getLowerBound().getDefiningOp(); - loopNestClauseOps.loopLowerBounds.push_back( - cloneBoundOrStepOpChain(lbOp)->getResult(0)); - - mlir::Operation *ubOp = doLoop.getUpperBound().getDefiningOp(); - loopNestClauseOps.loopUpperBounds.push_back( - cloneBoundOrStepOpChain(ubOp)->getResult(0)); - - mlir::Operation *stepOp = doLoop.getStep().getDefiningOp(); - loopNestClauseOps.loopSteps.push_back( - cloneBoundOrStepOpChain(stepOp)->getResult(0)); + auto addBoundsOrStep = + [&](mlir::Value value, + llvm::SmallVectorImpl &boundsOrStepVec) { + if (mapper.contains(value)) + boundsOrStepVec.push_back(mapper.lookup(value)); + else { + mlir::Operation *definingOp = value.getDefiningOp(); + boundsOrStepVec.push_back( + cloneBoundOrStepOpChain(definingOp)->getResult(0)); + } + }; + + addBoundsOrStep(doLoop.getLowerBound(), + loopNestClauseOps.loopLowerBounds); + addBoundsOrStep(doLoop.getUpperBound(), + loopNestClauseOps.loopUpperBounds); + addBoundsOrStep(doLoop.getStep(), loopNestClauseOps.loopSteps); } loopNestClauseOps.loopInclusive = rewriter.getUnitAttr(); diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 index 2f762c003ddf16..e5de687cad758a 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -22,42 +22,62 @@ program do_concurrent_basic ! CHECK-NOT: fir.do_loop ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1 + + ! CHECK-DAG: %[[LB_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.lb"} + ! CHECK-DAG: %[[UB_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.ub"} + ! CHECK-DAG: %[[STEP_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.step"} + + ! CHECK: %[[C1:.*]] = arith.constant 1 : index ! CHECK: %[[C0:.*]] = arith.constant 0 : index - ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index + ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C1]] : index ! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index) ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index) ! CHECK-SAME: extent(%[[A_EXTENT]] : index) + ! CHECK-SAME: start_idx(%[[C1]] : index) ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}}) ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) ! CHECK: %[[TRIP_COUNT:.*]] = arith.muli %{{.*}}, %{{.*}} : i64 - ! CHECK: omp.target - ! CHECK-SAME: map_entries(%[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], + ! CHECK-SAME: map_entries(%[[LB_MAP_INFO]] -> %[[LB_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[UB_MAP_INFO]] -> %[[UB_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[STEP_MAP_INFO]] -> %[[STEP_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]] + ! CHECK-SAME: %[[A_EXT:.*]] -> %[[A_EXT_ARG:.[[:alnum:]]+]] ! CHECK-SAME: trip_count(%[[TRIP_COUNT]] : i64) - ! CHECK-NEXT: ^{{.*}}(%[[I_ARG]]: !fir.ref, %[[A_ARG]]: !fir.ref>): + ! CHECK-NEXT: ^{{.*}}(%[[LB_ARG]]: !fir.ref, + ! CHECK-SAME: %[[UB_ARG]]: !fir.ref, %[[STEP_ARG]]: !fir.ref, + ! CHECK-SAME: %[[I_ARG]]: !fir.ref, + ! CHECK-SAME: %[[A_ARG]]: !fir.ref>, %[[A_EXT_ARG]]: !fir.ref): + + ! CHECK: %[[A_EXT:.*]] = fir.load %[[A_EXT_ARG]] : !fir.ref + + ! CHECK: %[[LB_DEV_DECL:.*]]:2 = hlfir.declare %[[LB_ARG]] + ! CHECK: %[[LB_DEV_VAL:.*]] = fir.load %[[LB_DEV_DECL]]#1 + + ! CHECK: %[[UB_DEV_DECL:.*]]:2 = hlfir.declare %[[UB_ARG]] + ! CHECK: %[[UB_DEV_VAL:.*]] = fir.load %[[UB_DEV_DECL]]#1 + + ! CHECK: %[[STEP_DEV_DECL:.*]]:2 = hlfir.declare %[[STEP_ARG]] + ! CHECK: %[[STEP_DEV_VAL:.*]] = fir.load %[[STEP_DEV_DECL]]#1 + + ! CHECK: %[[A_SHAPE:.*]] = fir.shape %[[A_EXT]] : (index) -> !fir.shape<1> + ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]](%[[A_SHAPE]]) - ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] ! CHECK: omp.teams { ! CHECK-NEXT: omp.parallel { ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"} ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) - ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 - ! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index - ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 - ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index - ! CHECK: %[[STEP:.*]] = arith.constant 1 : index - ! CHECK-NEXT: omp.distribute { ! CHECK-NEXT: omp.wsloop { - ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB_DEV_VAL]]) to (%[[UB_DEV_VAL]]) inclusive step (%[[STEP_DEV_VAL]]) { ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref @@ -78,6 +98,7 @@ program do_concurrent_basic ! CHECK-NEXT: } ! CHECK-NEXT: omp.terminator ! CHECK-NEXT: } + do concurrent (i=1:10) a(i) = i end do diff --git a/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 b/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 new file mode 100644 index 00000000000000..a0463ea6fdb601 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 @@ -0,0 +1,89 @@ +! Tests loop-nest detection algorithm for do-concurrent mapping. + +! REQUIRES: asserts + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host \ +! RUN: -mmlir -debug %s -o - 2> %t.log || true + +! RUN: FileCheck %s < %t.log + +program main + implicit none + +contains + +subroutine foo(n) + implicit none + integer :: n, m + integer :: i, j, k + integer :: x + integer, dimension(n) :: a + integer, dimension(n, n, n) :: b + + ! NOTE This for sure is a perfect loop nest. However, the way `do-concurrent` + ! loops are now emitted by flang is probably not correct. This is being looked + ! into at the moment and once we have flang emitting proper loop headers, we + ! will revisit this. + ! + ! CHECK: Loop pair starting at location loc("{{.*}}":[[# @LINE + 2]]:{{.*}}) + ! CHECK-SAME: is not perfectly nested + do concurrent(i=1:n, j=1:bar(n*m, n/m)) + a(i) = n + end do + + ! NOTE same as above. + ! + ! CHECK: Loop pair starting at location loc("{{.*}}":[[# @LINE + 2]]:{{.*}}) + ! CHECK-SAME: is not perfectly nested + do concurrent(i=bar(n, x):n, j=1:bar(n*m, n/m)) + a(i) = n + end do + + ! NOTE This is **not** a perfect nest since the inner call to `bar` will allocate + ! memory for the temp results of `n*m` and `n/m` **inside** the outer loop. + ! + ! CHECK: Loop pair starting at location loc("{{.*}}":[[# @LINE + 2]]:{{.*}}) + ! CHECK-SAME: is not perfectly nested + do concurrent(i=bar(n, x):n) + do concurrent(j=1:bar(n*m, n/m)) + a(i) = n + end do + end do + + ! CHECK: Loop pair starting at location loc("{{.*}}":[[# @LINE + 2]]:{{.*}}) + ! CHECK-SAME: is not perfectly nested + do concurrent(i=1:n) + x = 10 + do concurrent(j=1:m) + b(i,j,k) = i * j + k + end do + end do + + ! CHECK: Loop pair starting at location loc("{{.*}}":[[# @LINE + 2]]:{{.*}}) + ! CHECK-SAME: is not perfectly nested + do concurrent(i=1:n) + do concurrent(j=1:m) + b(i,j,k) = i * j + k + end do + x = 10 + end do + + ! CHECK: Loop pair starting at location loc("{{.*}}":[[# @LINE + 2]]:{{.*}}) + ! CHECK-SAME: is perfectly nested + do concurrent(i=1:n) + do concurrent(j=1:m) + b(i,j,k) = i * j + k + x = 10 + end do + end do +end subroutine + +pure function bar(n, m) + implicit none + integer, intent(in) :: n, m + integer :: bar + + bar = n + m +end function + +end program main diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 index 17cf27a9b70b27..18758cfc5efcdd 100644 --- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 @@ -20,6 +20,9 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/partially_nested.f90 -o - \ ! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/dummy_arg_loop_bounds.f90 -o - \ +! RUN: | FileCheck %s --check-prefixes=DUMMY_UBS + !--- multi_range.f90 program main integer, parameter :: n = 10 @@ -76,32 +79,14 @@ program main ! COMMON-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"} ! COMMON-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"} -! COMMON: %[[C1_1:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_I:.*]] = fir.convert %[[C1_1]] : (i32) -> index -! COMMON: %[[C10:.*]] = arith.constant 10 : i32 -! COMMON: %[[UB_I:.*]] = fir.convert %[[C10]] : (i32) -> index -! COMMON: %[[STEP_I:.*]] = arith.constant 1 : index - -! COMMON: %[[C1_2:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_J:.*]] = fir.convert %[[C1_2]] : (i32) -> index -! COMMON: %[[C20:.*]] = arith.constant 20 : i32 -! COMMON: %[[UB_J:.*]] = fir.convert %[[C20]] : (i32) -> index -! COMMON: %[[STEP_J:.*]] = arith.constant 1 : index - -! COMMON: %[[C1_3:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_K:.*]] = fir.convert %[[C1_3]] : (i32) -> index -! COMMON: %[[C30:.*]] = arith.constant 30 : i32 -! COMMON: %[[UB_K:.*]] = fir.convert %[[C30]] : (i32) -> index -! COMMON: %[[STEP_K:.*]] = arith.constant 1 : index - ! DEVICE: omp.distribute ! COMMON: omp.wsloop { ! COMMON-NEXT: omp.loop_nest ! COMMON-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]]) -! COMMON-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]]) -! COMMON-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive -! COMMON-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) { +! COMMON-SAME: : index = (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) +! COMMON-SAME: to (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) inclusive +! COMMON-SAME: step (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) { ! COMMON-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]] ! COMMON-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#1 @@ -119,3 +104,52 @@ program main ! HOST-NEXT: omp.terminator ! HOST-NEXT: } + +!--- dummy_arg_loop_bounds.f90 + +subroutine foo(n, m) + implicit none + integer :: n, m + integer :: i, j + integer :: a(n, m) + + do concurrent(i=1:n, j=1:m) + a(i,j) = i * j + end do +end subroutine + +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.lb"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.ub"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.step"} + +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.lb"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.ub"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.step"} + + +! DUMMY_UBS: omp.target {{.*}} { + +! DUMMY_UBS-DAG: %[[LOOP0_LB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.lb"} +! DUMMY_UBS-DAG: %[[LOOP0_UB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.ub"} +! DUMMY_UBS-DAG: %[[LOOP0_STEP_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.step"} + +! DUMMY_UBS-DAG: %[[LOOP1_LB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.lb"} +! DUMMY_UBS-DAG: %[[LOOP1_UB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.ub"} +! DUMMY_UBS-DAG: %[[LOOP1_STEP_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.step"} + +! DUMMY_UBS-DAG: %[[LOOP0_LB:.*]] = fir.load %[[LOOP0_LB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP0_UB:.*]] = fir.load %[[LOOP0_UB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP0_STEP:.*]] = fir.load %[[LOOP0_STEP_DECL]]#1 + +! DUMMY_UBS-DAG: %[[LOOP1_LB:.*]] = fir.load %[[LOOP1_LB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP1_UB:.*]] = fir.load %[[LOOP1_UB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP1_STEP:.*]] = fir.load %[[LOOP1_STEP_DECL]]#1 + +! DUMMY_UBS: omp.loop_nest (%{{.*}}, %{{.*}}) : index +! DUMMY_UBS-SAME: = (%[[LOOP0_LB]], %[[LOOP1_LB]]) +! DUMMY_UBS-SAME: to (%[[LOOP0_UB]], %[[LOOP1_UB]]) +! DUMMY_UBS-SAME: inclusive step (%[[LOOP0_STEP]], %[[LOOP1_STEP]]) + +! DUMMY_UBS: omp.terminator +! DUMMY_UBS: } + diff --git a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 index f3f2e78f5b3183..9b0c9fa5b25179 100644 --- a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 +++ b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 @@ -31,9 +31,14 @@ program main ! DEVICE: omp.target -! DEVICE: ^bb0(%[[I_ARG:[^[:space:]]+]]: !fir.ref, %[[X_ARG:[^[:space:]]+]]: !fir.ref, +! DEVICE: ^bb0( +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %[[I_ARG:[^[:space:]]+]]: !fir.ref, +! DEVICE-SAME: %[[X_ARG:[^[:space:]]+]]: !fir.ref, ! DEVICE-SAME: %[[J_ARG:[^[:space:]]+]]: !fir.ref, %[[K_ARG:[^[:space:]]+]]: !fir.ref, -! DEVICE-SAME: %[[A_ARG:[^[:space:]]+]]: !fir.ref>): +! DEVICE-SAME: %[[A_ARG:[^[:space:]]+]]: !fir.ref> ! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} ! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"} diff --git a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 index 5420ff4586be60..b7a3bf8e5b6b87 100644 --- a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 +++ b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 @@ -23,20 +23,24 @@ subroutine foo(n) ! CHECK-DAG: %[[I_MAP:.*]] = omp.map.info var_ptr(%[[I_DECL]]#1 : {{.*}}) ! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}}) -! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%[[N_ALLOC]] : {{.*}}) +! CHECK-DAG: %[[LOOP_LB_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "loop.0.lb"} +! CHECK-DAG: %[[LOOP_UB_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "loop.0.ub"} +! CHECK-DAG: %[[LOOP_STEP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "loop.0.step"} +! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) ! CHECK: omp.target -! CHECK-SAME: map_entries(%[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], +! CHECK-SAME: map_entries(%[[LOOP_LB_MAP]] -> %[[LB_ARG:arg[0-9]*]], +! CHECK-SAME: %[[LOOP_UB_MAP]] -> %[[UB_ARG:arg[0-9]*]], +! CHECK-SAME: %[[LOOP_STEP_MAP]] -> %[[STEP_ARG:arg[0-9]*]], +! CHECK-SAME: %[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], ! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]], ! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}}) ! CHECK-SAME: {{.*}} { +! CHECK-DAG: %[[N_VAL:.*]] = fir.load %[[N_ARG]] +! CHECK-DAG: %[[A_SHAPE:.*]] = fir.shape %[[N_VAL]] : (index) -> !fir.shape<1> ! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]] -! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]] -! CHECK-DAG: %{{.*}} = fir.load %[[N_ARG]] +! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]](%[[A_SHAPE]]) ! CHECK: omp.terminator ! CHECK: } - - - diff --git a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 index 429500cead1073..49174fd6d40610 100644 --- a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 +++ b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 @@ -31,9 +31,13 @@ program main ! DEVICE: omp.target -! DEVICE: ^bb0(%[[I_ARG:[^[:space:]]+]]: !fir.ref, +! DEVICE: ^bb0( +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %[[I_ARG:[^[:space:]]+]]: !fir.ref, ! DEVICE-SAME: %[[J_ARG:[^[:space:]]+]]: !fir.ref, %[[K_ARG:[^[:space:]]+]]: !fir.ref, -! DEVICE-SAME: %[[A_ARG:[^[:space:]]+]]: !fir.ref>): +! DEVICE-SAME: %[[A_ARG:[^[:space:]]+]]: !fir.ref> ! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} ! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"}