From b36a0e74861c3c3e790d94228c04001cabf1ddb6 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 14 Nov 2023 15:03:39 +0000 Subject: [PATCH 1/2] [mlir] clean up alias analysis * Interpret `readnone` as "not reading or writing" rather than just "not reading", in line with LLVM. * Simplify code in external callee attribute-based transfer function. * Treat all stores as "may" because to meet the expectations of the activity post-analysis that only looks at function exit points and therefore needs conservative results. --- enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp | 266 ++++++++---------- enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.h | 4 + .../MLIR/AliasAnalysis/func_attributes.mlir | 17 ++ 3 files changed, 145 insertions(+), 142 deletions(-) diff --git a/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp b/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp index c2154eb59408..c61422174eb1 100644 --- a/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp +++ b/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp @@ -286,6 +286,23 @@ ChangeResult enzyme::PointsToSets::markAllPointToUnknown() { return ChangeResult::Change; } +ChangeResult enzyme::PointsToSets::markAllExceptPointToUnknown( + const AliasClassSet &destClasses) { + bool wasOtherPointingToUnknown = otherPointToUnknown; + otherPointToUnknown = true; + + llvm::SmallDenseSet keysToDelete; + for (DistinctAttr key : llvm::make_first_range(pointsTo)) { + if (!destClasses.getAliasClasses().contains(key)) + keysToDelete.insert(key); + } + for (DistinctAttr key : keysToDelete) + pointsTo.erase(key); + return (wasOtherPointingToUnknown && keysToDelete.empty()) + ? ChangeResult::NoChange + : ChangeResult::Change; +} + // TODO: Reduce code duplication with activity analysis std::optional getStored(Operation *op); @@ -325,14 +342,17 @@ void enzyme::PointsToPointerAnalysis::processCapturingStore( // TODO: this should become an interface or be integrated into side effects so // it doesn't depend on the dialect. +// TODO: currently, we should treat all stores is "may store" because of how the +// analysis is used: the points-to of the function exit point is considered as +// the overall state of the function, which may be incorrect for any operation +// post-dominated by a must-store. static bool isMustStore(Operation *op, Value pointer) { - return isa(op); + return false; // isa(op); } void enzyme::PointsToPointerAnalysis::visitOperation(Operation *op, const PointsToSets &before, PointsToSets *after) { - using llvm::errs; join(after, before); // If we know nothing about memory effects, record reaching the pessimistic @@ -385,22 +405,6 @@ void enzyme::PointsToPointerAnalysis::visitOperation(Operation *op, constexpr static llvm::StringLiteral kLLVMMemoryAttrName = "memory"; -static std::pair -isReadWriteOnly(FunctionOpInterface callee, unsigned argNo, - std::optional argMemMRI) { - unsigned numArguments = callee.getNumArguments(); - bool isReadOnly = - (argMemMRI && *argMemMRI == LLVM::ModRefInfo::Ref) || - (argNo < numArguments && - !!callee.getArgAttr(argNo, LLVM::LLVMDialect::getReadonlyAttrName())); - bool isWriteOnly = - (argMemMRI && *argMemMRI == LLVM::ModRefInfo::Mod) || - (argNo < numArguments && - !!callee.getArgAttr(argNo, LLVM::LLVMDialect::getWriteOnlyAttrName())); - assert(!(isReadOnly && isWriteOnly)); - return std::make_pair(isReadOnly, isWriteOnly); -} - static bool modRefMayMod(std::optional modRef) { return modRef ? (*modRef == LLVM::ModRefInfo::Mod || *modRef == LLVM::ModRefInfo::ModRef) @@ -413,6 +417,42 @@ static bool modRefMayRef(std::optional modRef) { : true; } +static bool mayReadArg(FunctionOpInterface callee, unsigned argNo, + std::optional argMemMRI) { + // Function-wide annotation. + bool funcMayRead = modRefMayRef(argMemMRI); + + // Vararg behavior can only be specified by the function. + unsigned numArguments = callee.getNumArguments(); + if (argNo >= numArguments) + return funcMayRead; + + bool hasWriteOnlyAttr = + !!callee.getArgAttr(argNo, LLVM::LLVMDialect::getWriteOnlyAttrName()); + bool hasReadNoneAttr = + !!callee.getArgAttr(argNo, LLVM::LLVMDialect::getReadnoneAttrName()); + return !hasWriteOnlyAttr && !hasReadNoneAttr && funcMayRead; +} + +static bool mayWriteArg(FunctionOpInterface callee, unsigned argNo, + std::optional argMemMRI) { + // Function-wide annotation. + bool funcMayWrite = modRefMayMod(argMemMRI); + + // Vararg behavior can only be specified by the function. + unsigned numArguments = callee.getNumArguments(); + if (argNo >= numArguments) + return funcMayWrite; + + // Individual attributes can further restrict argument writability. Note that + // `readnone` means "no read or write" for LLVM. + bool hasReadOnlyAttr = + !!callee.getArgAttr(argNo, LLVM::LLVMDialect::getReadonlyAttrName()); + bool hasReadNoneAttr = + !!callee.getArgAttr(argNo, LLVM::LLVMDialect::getReadnoneAttrName()); + return !hasReadOnlyAttr && !hasReadNoneAttr && funcMayWrite; +} + void enzyme::PointsToPointerAnalysis::visitCallControlFlowTransfer( CallOpInterface call, CallControlFlowAction action, const PointsToSets &before, PointsToSets *after) { @@ -467,91 +507,75 @@ void enzyme::PointsToPointerAnalysis::visitCallControlFlowTransfer( pointerLikeOperands.push_back(i); } - // If the function may write to "other", that is any potential other - // pointer, record that. - // However, if some argument pointers cannot be written into due to other - // attributes, keep the classes for them from the "before" lattice. - bool funcMayWriteToOther = modRefMayMod(otherModRef); - bool funcMayWriteToArgs = modRefMayMod(argModRef); - auto mayArgBeStoredInto = [&](int arg) { - auto [isReadOnly, isWriteOnly] = - isReadWriteOnly(callee, arg, argModRef); - return (!isReadOnly || isWriteOnly) && funcMayWriteToArgs; - }; - if (funcMayWriteToOther) { - propagateIfChanged(after, after->markAllPointToUnknown()); - - for (int pointerOperand : pointerLikeOperands) { - // TODO(zinenko): FIXME, even if the arg may be stored into, it - // doesn't mean we should pessimize it. Instead, it should be possible - // to join the before state for it with the alias classes the function - // may be storing into it. - // TODO(zinenko): consider monotonicity carefully. - if (mayArgBeStoredInto(pointerOperand)) - continue; - - auto *destClasses = getOrCreateFor( - call, call.getArgOperands()[pointerOperand]); - - ChangeResult result = - destClasses->getAliasClassesObject().foreachClass( - [&](DistinctAttr dest) { - return after->setPointingToClasses( - dest, before.getPointsTo(dest)); - }); - propagateIfChanged(after, result); - } - return; - } - - // If the function may read from "other", it may be storing any pointer - // into the arguments. At this point, we know it shouldn't be also writing - // to "other". + // Precompute the set of alias classes the function may capture. + // TODO: consider a more advanced lattice that can encode "may point to + // any class _except_ the given classes"; this is mathematically possible + // but needs careful programmatic encoding. + AliasClassSet functionMayCapture; bool funcMayReadOther = modRefMayRef(otherModRef); unsigned numArguments = callee.getNumArguments(); if (funcMayReadOther) { - for (int pointerAsAddress : pointerLikeOperands) { - if (!mayArgBeStoredInto(pointerAsAddress)) - continue; - - auto *destClasses = getOrCreateFor( - call, call.getArgOperands()[pointerAsAddress]); - propagateIfChanged(after, after->markPointToUnknown( - destClasses->getAliasClassesObject())); - } + // If a function may read from other, it may be storing pointers from + // unknown alias sets into any writable pointer. + functionMayCapture.markUnknown(); } else { for (int pointerAsData : pointerLikeOperands) { - // If not captured, it cannot be stored in anything. However, another - // pointer that may be stored in this pointer can be stored in another - // writable pointer. - bool isNoCapture = - (pointerAsData < numArguments && + // If not captured, it cannot be stored in anything. + if ((pointerAsData < numArguments && !!callee.getArgAttr(pointerAsData, - LLVM::LLVMDialect::getNoCaptureAttrName())); + LLVM::LLVMDialect::getNoCaptureAttrName()))) + continue; - for (int pointerAsAddress : pointerLikeOperands) { - if (!mayArgBeStoredInto(pointerAsAddress)) - continue; - - // In all cases, we want to record that "destination" may be - // pointing to same classes as "source". - const auto *srcClasses = getOrCreateFor( - call, call.getArgOperands()[pointerAsData]); - const auto *destClasses = getOrCreateFor( - call, call.getArgOperands()[pointerAsAddress]); - propagateIfChanged( - after, after->addSetsFrom(destClasses->getAliasClassesObject(), - srcClasses->getAliasClassesObject())); - - // If "source" is also captured, then "destination" may additionally - // be pointing to the classes of "source" itself. - if (isNoCapture) - continue; - processCapturingStore(call, after, - call.getArgOperands()[pointerAsData], - call.getArgOperands()[pointerAsAddress]); - } + const auto *srcClasses = getOrCreateFor( + call, call.getArgOperands()[pointerAsData]); + functionMayCapture.join(srcClasses->getAliasClassesObject()); + } + } + + AliasClassSet pointerOperandClasses; + ChangeResult changed = ChangeResult::NoChange; + for (int pointerOperand : pointerLikeOperands) { + auto *destClasses = getOrCreateFor( + call, call.getArgOperands()[pointerOperand]); + pointerOperandClasses.join(destClasses->getAliasClassesObject()); + + // If the argument cannot be stored into, just preserve it as is. + if (!mayWriteArg(callee, pointerOperand, argModRef)) + continue; + + // If the destination class is unknown, we reached the pessimistic + // fixpoint. + if (destClasses->isUnknown()) { + pointerOperandClasses.reset(); + changed |= after->markAllPointToUnknown(); + break; } + + // Otherwise, indicate that a pointer that belongs to any of the + // classes captured by this function may be stored into the + // destination class. + changed |= destClasses->getAliasClassesObject().foreachClass( + [&](DistinctAttr dest) { + return after->insert(dest, functionMayCapture); + }); + } + + // If the function may write to "other", that is any potential other + // pointer, record that. + if (modRefMayMod(otherModRef)) { + // All other alias classes that are not present as arguments should + // point to unknown. + // Since: + // - `after` was joined with `before` at the beginning; and + // - pre-existing keys in `after` (and in `before` since no new keys + // were added) have their values: preserved, joined with another + // alias set (->insert is a join), or removed here with default value + // being set to "any" (lattice top); + // this transfer function is monotonic with respect to its input, i.e, + // the `before` lattice. + // TODO(zinenko): consider monotonicity more carefully wrt to + // `destClasses` change. + changed |= after->markAllExceptPointToUnknown(pointerOperandClasses); } // Pointer-typed results may be pointing to any other pointer. The @@ -560,15 +584,14 @@ void enzyme::PointsToPointerAnalysis::visitCallControlFlowTransfer( // "other" memory, the return values may be pointing only to // same alias classes as arguments + arguments themselves + a new // alias class for a potential allocation. - // - Additionally, if any of the arguments are annotated as writeonly, - // the results should not point to alias classes those arguments are - // pointing to. + // - Additionally, if any of the arguments are annotated as writeonly or + // readnone, the results should not point to alias classes those + // arguments are pointing to. // - Additionally, if any of the arguments are annotated as nocapture, // the results should not point to those arguments themselves. // - If the function is marked as not reading from arguments, the // results should not point to any alias classes pointed to by the // arguments. - bool funcMayReadArgs = modRefMayRef(argModRef); for (OpResult result : call->getResults()) { if (!isPointerLike(result.getType())) continue; @@ -583,19 +606,13 @@ void enzyme::PointsToPointerAnalysis::visitCallControlFlowTransfer( continue; } - ChangeResult changed = ChangeResult::NoChange; AliasClassSet commonReturnScope; (void)commonReturnScope.markFresh( StringAttr::get(call->getContext(), "function-return-common")); for (int operandNo : pointerLikeOperands) { const auto *srcClasses = getOrCreateFor( call, call.getArgOperands()[operandNo]); - bool maybeRead = - funcMayReadArgs && - (operandNo >= numArguments || - !callee.getArgAttr(operandNo, - LLVM::LLVMDialect::getWriteOnlyAttrName())); - if (maybeRead) { + if (mayReadArg(callee, operandNo, argModRef)) { changed |= after->addSetsFrom(destClasses->getAliasClassesObject(), srcClasses->getAliasClassesObject()); } @@ -611,10 +628,8 @@ void enzyme::PointsToPointerAnalysis::visitCallControlFlowTransfer( after->insert(destClasses->getAliasClassesObject(), commonReturnScope); } - propagateIfChanged(after, changed); } - - return; + return propagateIfChanged(after, changed); } // Don't know how to handle, record pessimistic fixpoint. @@ -875,36 +890,3 @@ void enzyme::AliasAnalysis::visitExternalCall( } } } - -// // Generic reasoning based on attributes. -// auto callee = SymbolTable::lookupNearestSymbolFrom( -// call, symbol.getLeafReference()); -// if (!callee) -// return populateConservativeCallEffects(call, effects); - -// // A function by default has all possible effects on all pointer-like -// // arguments. Presence of specific attributes removes those effects. -// auto memoryAttr = -// callee->getAttrOfType(kLLVMMemoryAttrName); -// std::optional argMemMRI = -// memoryAttr ? std::make_optional(memoryAttr.getArgMem()) : std::nullopt; - -// unsigned numArguments = callee.getNumArguments(); -// for (auto &&[i, argument] : llvm::enumerate(call.getArgOperands())) { -// if (!isPointerLike(argument.getType())) -// continue; - -// bool isReadNone = -// !modRefMayRef(argMemMRI) || -// (i < numArguments && -// !!callee.getArgAttr(i, LLVM::LLVMDialect::getReadnoneAttrName())); -// auto [isReadOnly, isWriteOnly] = isReadWriteOnly(callee, i, argMemMRI); - -// if (!isReadNone && !isWriteOnly) -// effects.emplace_back(MemoryEffects::Read::get(), argument); - -// if (!isReadOnly) -// effects.emplace_back(MemoryEffects::Write::get(), argument); - -// // TODO: consider having a may-free effect. -// } diff --git a/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.h b/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.h index f4eeef9f7aec..be91b1f90705 100644 --- a/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.h +++ b/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.h @@ -134,6 +134,10 @@ class PointsToSets : public dataflow::AbstractDenseLattice { /// containing any other pointer. This is the full pessimistic fixpoint. ChangeResult markAllPointToUnknown(); + /// Mark all alias classes except the given ones to point to the "unknown" + /// alias set. + ChangeResult markAllExceptPointToUnknown(const AliasClassSet &destClasses); + const AliasClassSet &getPointsTo(DistinctAttr id) const { auto it = pointsTo.find(id); if (it == pointsTo.end()) diff --git a/enzyme/test/MLIR/AliasAnalysis/func_attributes.mlir b/enzyme/test/MLIR/AliasAnalysis/func_attributes.mlir index 5c962dd6675d..d77a8c0988e0 100644 --- a/enzyme/test/MLIR/AliasAnalysis/func_attributes.mlir +++ b/enzyme/test/MLIR/AliasAnalysis/func_attributes.mlir @@ -271,3 +271,20 @@ func.func @caller() -> !llvm.ptr { %1 = call @callee() {tag = "func-return-2"} : () -> !llvm.ptr return %0 : !llvm.ptr } + + +// ----- + +// CHECK: points-to-pointer sets +// CHECK-NOT: points to {} +// CHECK: other points to unknown: 0 +func.func private @callee(!llvm.ptr {llvm.readnone}) attributes { + memory = #llvm.memory_effects +} + +func.func @caller(%arg0: !llvm.ptr {enzyme.tag = "argument"}) { + call @callee(%arg0) : (!llvm.ptr) -> () + return +} From 8e037efd1825d4d846c908c18612e5c85cca6196 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 14 Nov 2023 15:29:09 +0000 Subject: [PATCH 2/2] [mlir] don't incorrectly bail from AliasAnalysis::transfer * Alias transfer behavior isn't always completely described by memory effects, be conservative about that. * "Global" effects with no target value should not be ignored. --- enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp | 52 ++++++++++++++----- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp b/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp index c61422174eb1..ac302c6a9ab5 100644 --- a/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp +++ b/enzyme/Enzyme/MLIR/Analysis/AliasAnalysis.cpp @@ -38,6 +38,9 @@ // The LLVM dialect is only used for attribute names. #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +// TODO: remove this once aliasing interface is factored out. +#include "mlir/Dialect/MemRef/IR/MemRef.h" + using namespace mlir; using namespace mlir::dataflow; @@ -725,14 +728,28 @@ void enzyme::AliasAnalysis::setToEntryState(AliasClassLattice *lattice) { } } +/// Returns `true` if the alias transfer function of the operation is fully +/// described by its memory effects. +// +// We could have an operation that has side effects and loads a pointer from +// another pointer, but also has another result that aliases the operand, which +// would need additional processing. +// +// TODO: turn this into an interface. +static bool isAliasTransferFullyDescribedByMemoryEffects(Operation *op) { + return isa(op); +} + void enzyme::AliasAnalysis::transfer( Operation *op, ArrayRef effects, ArrayRef operands, ArrayRef results) { + bool globalRead = false; for (const auto &effect : effects) { + // If the effect is global read, record that. Value value = effect.getValue(); if (!value) { - // TODO: we can't assume anything about entry states + globalRead |= isa(effect.getEffect()); continue; } @@ -771,23 +788,32 @@ void enzyme::AliasAnalysis::transfer( } } - // TODO(zinenko): this is sketchy. We could have an operation that has side - // effects and loads a pointer from another pointer, but also has another - // result that aliases the operand. So returning here is premature. - if (!effects.empty()) + // If there was a global read effect, the operation may be reading from any + // pointer so we cannot say what the results are pointing to. Can safely exit + // here because all results are now in the fixpoint state. + if (globalRead) { + for (auto *resultLattice : results) + propagateIfChanged(resultLattice, resultLattice->markUnknown()); + return; + } + + // If it was enough to reason about effects, exit here. + if (!effects.empty() && isAliasTransferFullyDescribedByMemoryEffects(op)) return; - // For operations that don't touch memory, conservatively assume all results - // alias all operands. - for (auto *resultLattice : results) { + // Conservatively assume all results alias all operands. + for (AliasClassLattice *resultLattice : results) { // TODO: Setting this flag to true will assume non-pointers don't alias, // which is not true in general but can result in improved analysis speed. // We need a type analysis for full correctness. - const bool prune_non_pointers = false; - if (!prune_non_pointers || - isPointerLike(resultLattice->getPoint().getType())) - for (const auto *operandLattice : operands) - join(resultLattice, *operandLattice); + constexpr bool pruneNonPointers = false; + if (pruneNonPointers && !isPointerLike(resultLattice->getPoint().getType())) + continue; + + ChangeResult r = ChangeResult::NoChange; + for (const AliasClassLattice *operandLattice : operands) + r |= resultLattice->join(*operandLattice); + propagateIfChanged(resultLattice, r); } }