From ed2d7ead81b224875abeee3a635b753c122be4b0 Mon Sep 17 00:00:00 2001 From: Nimish Mishra Date: Thu, 3 Oct 2024 10:40:05 +0530 Subject: [PATCH] [flang][mlir][llvm][OpenMP] Emit __atomic_load and __atomic_compare_exchange libcalls for complex types in atomic update --- flang/lib/Lower/DirectivesCommon.h | 3 +- .../OpenMP/atomic-capture-complex.f90 | 47 ++++ .../OpenMP/atomic-update-complex.f90 | 42 ++++ .../test/Lower/OpenMP/Todo/atomic-complex.f90 | 8 - llvm/include/llvm/Frontend/Atomic/Atomic.h | 232 ++++++++++++++++++ .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 31 +++ llvm/lib/Frontend/Atomic/Atomic.cpp | 19 ++ llvm/lib/Frontend/Atomic/CMakeLists.txt | 15 ++ llvm/lib/Frontend/CMakeLists.txt | 1 + llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 48 ++++ .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 31 +-- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 114 +++++++++ 12 files changed, 563 insertions(+), 28 deletions(-) create mode 100644 flang/test/Integration/OpenMP/atomic-capture-complex.f90 create mode 100644 flang/test/Integration/OpenMP/atomic-update-complex.f90 delete mode 100644 flang/test/Lower/OpenMP/Todo/atomic-complex.f90 create mode 100644 llvm/include/llvm/Frontend/Atomic/Atomic.h create mode 100644 llvm/lib/Frontend/Atomic/Atomic.cpp create mode 100644 llvm/lib/Frontend/Atomic/CMakeLists.txt diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index a32f0b287e049a..da192ded4aa971 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -129,8 +129,7 @@ static void processOmpAtomicTODO(mlir::Type elementType, // Based on assertion for supported element types in OMPIRBuilder.cpp // createAtomicRead mlir::Type unwrappedEleTy = fir::unwrapRefType(elementType); - bool supportedAtomicType = - (!fir::isa_complex(unwrappedEleTy) && fir::isa_trivial(unwrappedEleTy)); + bool supportedAtomicType = fir::isa_trivial(unwrappedEleTy); if (!supportedAtomicType) TODO(loc, "Unsupported atomic type"); } diff --git a/flang/test/Integration/OpenMP/atomic-capture-complex.f90 b/flang/test/Integration/OpenMP/atomic-capture-complex.f90 new file mode 100644 index 00000000000000..72329f0b2eb10d --- /dev/null +++ b/flang/test/Integration/OpenMP/atomic-capture-complex.f90 @@ -0,0 +1,47 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s + +!CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8 +!CHECK: %[[VAL_1:.*]] = alloca { float, float }, i64 1, align 8 +!CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 +!CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4 +!CHECK: br label %entry + +!CHECK: entry: +!CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 +!CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) +!CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 +!CHECK: br label %.atomic.cont + +!CHECK: .atomic.cont +!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] +!CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 +!CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 +!CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 +!CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00 +!CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0 +!CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1 +!CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4 +!CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]], +!i32 2, i32 2) +!CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4 +!CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont + +!CHECK: .atomic.exit +!CHECK: store { float, float } %[[VAL_10]], ptr %[[VAL_1]], align 4 + +program main + complex*8 ia, ib + ia = (2, 2) + !$omp atomic capture + ia = ia + (1, 1) + ib = ia + !$omp end atomic +end program diff --git a/flang/test/Integration/OpenMP/atomic-update-complex.f90 b/flang/test/Integration/OpenMP/atomic-update-complex.f90 new file mode 100644 index 00000000000000..827e84a011f53b --- /dev/null +++ b/flang/test/Integration/OpenMP/atomic-update-complex.f90 @@ -0,0 +1,42 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s + +!CHECK: define void @_QQmain() { +!CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8 +!CHECK: {{.*}} = alloca { float, float }, i64 1, align 8 +!CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 +!CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4 +!CHECK: br label %entry + +!CHECK: entry: +!CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 +!CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) +!CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 +!CHECK: br label %.atomic.cont + +!CHECK: .atomic.cont +!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] +!CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 +!CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 +!CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 +!CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00 +!CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0 +!CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1 +!CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4 +!CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]], i32 2, i32 2) +!CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4 +!CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont +program main + complex*8 ia, ib + ia = (2, 2) + !$omp atomic update + ia = ia + (1, 1) + !$omp end atomic +end program diff --git a/flang/test/Lower/OpenMP/Todo/atomic-complex.f90 b/flang/test/Lower/OpenMP/Todo/atomic-complex.f90 deleted file mode 100644 index 6d6e4399ee192e..00000000000000 --- a/flang/test/Lower/OpenMP/Todo/atomic-complex.f90 +++ /dev/null @@ -1,8 +0,0 @@ -! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Unsupported atomic type -subroutine complex_atomic - complex :: l, r - !$omp atomic read - l = r -end subroutine diff --git a/llvm/include/llvm/Frontend/Atomic/Atomic.h b/llvm/include/llvm/Frontend/Atomic/Atomic.h new file mode 100644 index 00000000000000..3942d06144ce17 --- /dev/null +++ b/llvm/include/llvm/Frontend/Atomic/Atomic.h @@ -0,0 +1,232 @@ +//===--- Atomic.h - Codegen of atomic operations +//---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FRONTEND_ATOMIC_ATOMIC_H +#define LLVM_FRONTEND_ATOMIC_ATOMIC_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/RuntimeLibcalls.h" + +namespace llvm { + +template struct AtomicInfo { + + IRBuilderTy *Builder; + Type *Ty; + uint64_t AtomicSizeInBits; + uint64_t ValueSizeInBits; + llvm::Align AtomicAlign; + llvm::Align ValueAlign; + bool UseLibcall; + +public: + AtomicInfo(IRBuilderTy *Builder, Type *Ty, uint64_t AtomicSizeInBits, + uint64_t ValueSizeInBits, llvm::Align AtomicAlign, + llvm::Align ValueAlign, bool UseLibcall) + : Builder(Builder), Ty(Ty), AtomicSizeInBits(AtomicSizeInBits), + ValueSizeInBits(ValueSizeInBits), AtomicAlign(AtomicAlign), + ValueAlign(ValueAlign), UseLibcall(UseLibcall) {} + + virtual ~AtomicInfo() = default; + + llvm::Align getAtomicAlignment() const { return AtomicAlign; } + uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } + uint64_t getValueSizeInBits() const { return ValueSizeInBits; } + bool shouldUseLibcall() const { return UseLibcall; } + llvm::Type *getAtomicTy() const { return Ty; } + + virtual llvm::Value *getAtomicPointer() const = 0; + virtual void decorateWithTBAA(Instruction *I) = 0; + virtual llvm::AllocaInst *CreateAlloca(llvm::Type *Ty, + const llvm::Twine &Name) const = 0; + + /* + * Is the atomic size larger than the underlying value type? + * Note that the absence of padding does not mean that atomic + * objects are completely interchangeable with non-atomic + * objects: we might have promoted the alignment of a type + * without making it bigger. + */ + bool hasPadding() const { return (ValueSizeInBits != AtomicSizeInBits); } + + LLVMContext &getLLVMContext() const { return Builder->getContext(); } + + static bool shouldCastToInt(llvm::Type *ValTy, bool CmpXchg) { + if (ValTy->isFloatingPointTy()) + return ValTy->isX86_FP80Ty() || CmpXchg; + return !ValTy->isIntegerTy() && !ValTy->isPointerTy(); + } + + llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile, + bool CmpXchg = false) { + Value *Ptr = getAtomicPointer(); + Type *AtomicTy = Ty; + if (shouldCastToInt(Ty, CmpXchg)) + AtomicTy = llvm::IntegerType::get(getLLVMContext(), AtomicSizeInBits); + LoadInst *Load = + Builder->CreateAlignedLoad(AtomicTy, Ptr, AtomicAlign, "atomic-load"); + Load->setAtomic(AO); + if (IsVolatile) + Load->setVolatile(true); + decorateWithTBAA(Load); + return Load; + } + + static CallInst *EmitAtomicLibcall(IRBuilderTy *Builder, StringRef fnName, + Type *ResultType, ArrayRef Args) { + LLVMContext &ctx = Builder->getContext(); + SmallVector ArgTys; + for (Value *Arg : Args) + ArgTys.push_back(Arg->getType()); + FunctionType *FnType = FunctionType::get(ResultType, ArgTys, false); + Module *M = Builder->GetInsertBlock()->getModule(); + + // TODO: Use llvm::TargetLowering for Libcall ABI + llvm::AttrBuilder fnAttrBuilder(ctx); + fnAttrBuilder.addAttribute(llvm::Attribute::NoUnwind); + fnAttrBuilder.addAttribute(llvm::Attribute::WillReturn); + llvm::AttributeList fnAttrs = llvm::AttributeList::get( + ctx, llvm::AttributeList::FunctionIndex, fnAttrBuilder); + FunctionCallee LibcallFn = M->getOrInsertFunction(fnName, FnType, fnAttrs); + CallInst *Call = Builder->CreateCall(LibcallFn, Args); + return Call; + } + + llvm::Value *getAtomicSizeValue() const { + LLVMContext &ctx = getLLVMContext(); + + // TODO: Get from llvm::TargetMachine / clang::TargetInfo + // if clang shares this codegen in future + constexpr uint16_t SizeTBits = 64; + constexpr uint16_t BitsPerByte = 8; + return llvm::ConstantInt::get(llvm::IntegerType::get(ctx, SizeTBits), + AtomicSizeInBits / BitsPerByte); + } + + std::pair EmitAtomicCompareExchangeLibcall( + llvm::Value *ExpectedVal, llvm::Value *DesiredVal, + llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure) { + LLVMContext &ctx = getLLVMContext(); + + // __atomic_compare_exchange's expected and desired are passed by pointers + // FIXME: types + + // TODO: Get from llvm::TargetMachine / clang::TargetInfo + // if clang shares this codegen in future + constexpr uint64_t IntBits = 32; + + // bool __atomic_compare_exchange(size_t size, void *obj, void *expected, + // void *desired, int success, int failure); + llvm::Value *Args[6] = { + getAtomicSizeValue(), + getAtomicPointer(), + ExpectedVal, + DesiredVal, + llvm::Constant::getIntegerValue( + llvm::IntegerType::get(ctx, IntBits), + llvm::APInt(IntBits, static_cast(Success), + /*signed=*/true)), + llvm::Constant::getIntegerValue( + llvm::IntegerType::get(ctx, IntBits), + llvm::APInt(IntBits, static_cast(Failure), + /*signed=*/true)), + }; + auto Result = EmitAtomicLibcall(Builder, "__atomic_compare_exchange", + llvm::IntegerType::getInt1Ty(ctx), Args); + return std::make_pair(ExpectedVal, Result); + } + + Value *castToAtomicIntPointer(Value *addr) const { + return addr; // opaque pointer + } + + Value *getAtomicAddressAsAtomicIntPointer() const { + return castToAtomicIntPointer(getAtomicPointer()); + } + + std::pair + EmitAtomicCompareExchangeOp(llvm::Value *ExpectedVal, llvm::Value *DesiredVal, + llvm::AtomicOrdering Success, + llvm::AtomicOrdering Failure, + bool IsVolatile = false, bool IsWeak = false) { + // Do the atomic store. + Value *Addr = getAtomicAddressAsAtomicIntPointer(); + auto *Inst = Builder->CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal, + getAtomicAlignment(), Success, + Failure, llvm::SyncScope::System); + // Other decoration. + Inst->setVolatile(IsVolatile); + Inst->setWeak(IsWeak); + + auto *PreviousVal = Builder->CreateExtractValue(Inst, /*Idxs=*/0); + auto *SuccessFailureVal = Builder->CreateExtractValue(Inst, /*Idxs=*/1); + return std::make_pair(PreviousVal, SuccessFailureVal); + } + + std::pair + EmitAtomicCompareExchange(llvm::Value *ExpectedVal, llvm::Value *DesiredVal, + llvm::AtomicOrdering Success, + llvm::AtomicOrdering Failure, bool IsVolatile, + bool IsWeak) { + if (shouldUseLibcall()) + return EmitAtomicCompareExchangeLibcall(ExpectedVal, DesiredVal, Success, + Failure); + + auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success, + Failure, IsVolatile, IsWeak); + return Res; + } + + // void __atomic_load(size_t size, void *mem, void *return, int order); + std::pair + EmitAtomicLoadLibcall(llvm::AtomicOrdering AO) { + LLVMContext &Ctx = getLLVMContext(); + Type *SizedIntTy = Type::getIntNTy(Ctx, getAtomicSizeInBits()); + Type *ResultTy; + SmallVector Args; + AttributeList Attr; + Module *M = Builder->GetInsertBlock()->getModule(); + const DataLayout &DL = M->getDataLayout(); + Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), + this->getAtomicSizeInBits() / 8)); + + Value *PtrVal = getAtomicPointer(); + PtrVal = Builder->CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx)); + Args.push_back(PtrVal); + AllocaInst *AllocaResult = + CreateAlloca(Ty, getAtomicPointer()->getName() + "atomic.temp.load"); + const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); + AllocaResult->setAlignment(AllocaAlignment); + Args.push_back(AllocaResult); + Constant *OrderingVal = + ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(AO)); + Args.push_back(OrderingVal); + + ResultTy = Type::getVoidTy(Ctx); + SmallVector ArgTys; + for (Value *Arg : Args) + ArgTys.push_back(Arg->getType()); + FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); + FunctionCallee LibcallFn = + M->getOrInsertFunction("__atomic_load", FnType, Attr); + CallInst *Call = Builder->CreateCall(LibcallFn, Args); + Call->setAttributes(Attr); + return std::make_pair( + Builder->CreateAlignedLoad(Ty, AllocaResult, AllocaAlignment), + AllocaResult); + } +}; +} // end namespace llvm + +#endif /* LLVM_FRONTEND_ATOMIC_ATOMIC_H */ diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 4be0159fb1dd9f..1b8a6e47b3baf8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -15,6 +15,7 @@ #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Frontend/Atomic/Atomic.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" #include "llvm/IR/DebugLoc.h" @@ -479,6 +480,27 @@ class OpenMPIRBuilder { T(Triple(M.getTargetTriple())) {} ~OpenMPIRBuilder(); + class AtomicInfo : public llvm::AtomicInfo> { + llvm::Value *AtomicVar; + + public: + AtomicInfo(IRBuilder<> *Builder, llvm::Type *Ty, uint64_t AtomicSizeInBits, + uint64_t ValueSizeInBits, llvm::Align AtomicAlign, + llvm::Align ValueAlign, bool UseLibcall, llvm::Value *AtomicVar) + : llvm::AtomicInfo>(Builder, Ty, AtomicSizeInBits, + ValueSizeInBits, AtomicAlign, + ValueAlign, UseLibcall), + AtomicVar(AtomicVar) {} + + llvm::Value *getAtomicPointer() const override { return AtomicVar; } + void decorateWithTBAA(llvm::Instruction *I) override {} + llvm::AllocaInst *CreateAlloca(llvm::Type *Ty, + const llvm::Twine &Name) const override { + llvm::AllocaInst *allocaInst = Builder->CreateAlloca(Ty); + allocaInst->setName(Name); + return allocaInst; + } + }; /// Initialize the internal state, this will put structures types and /// potentially other helpers into the underlying module. Must be called /// before any other method and only once! This internal state includes types @@ -3039,6 +3061,15 @@ class OpenMPIRBuilder { AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr); + std::pair + EmitAtomicLoadLibcall(Value *X, Type *XElemTy, llvm::AtomicOrdering AO, + uint64_t AtomicSizeInBits); + + std::pair EmitAtomicCompareExchangeLibcall( + Value *X, Type *XElemTy, uint64_t AtomicSizeInBits, + llvm::Value *ExpectedVal, llvm::Value *DesiredVal, + llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure); + /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . /// /// \Return The instruction diff --git a/llvm/lib/Frontend/Atomic/Atomic.cpp b/llvm/lib/Frontend/Atomic/Atomic.cpp new file mode 100644 index 00000000000000..03b476d113bd48 --- /dev/null +++ b/llvm/lib/Frontend/Atomic/Atomic.cpp @@ -0,0 +1,19 @@ +//===--- Atomic.h - Codegen of atomic operations +//---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Frontend/Atomic/Atomic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Frontend/Atomic/Atomic.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" + +namespace {} // namespace + +namespace llvm {} // end namespace llvm diff --git a/llvm/lib/Frontend/Atomic/CMakeLists.txt b/llvm/lib/Frontend/Atomic/CMakeLists.txt new file mode 100644 index 00000000000000..0d0d3d445b726d --- /dev/null +++ b/llvm/lib/Frontend/Atomic/CMakeLists.txt @@ -0,0 +1,15 @@ +add_llvm_component_library(LLVMFrontendAtomic + Atomic.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/Atomic + + DEPENDS + LLVMAnalysis + LLVMTargetParser + + LINK_COMPONENTS + Core + Support + Analysis +) diff --git a/llvm/lib/Frontend/CMakeLists.txt b/llvm/lib/Frontend/CMakeLists.txt index 62dd0da1e6c2de..b305ce7d771ce7 100644 --- a/llvm/lib/Frontend/CMakeLists.txt +++ b/llvm/lib/Frontend/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(Atomic) add_subdirectory(Driver) add_subdirectory(HLSL) add_subdirectory(OpenACC) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 922c65d7fc3f5c..47cc6ff7655caf 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -7977,6 +7977,54 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( Res.second = Res.first; else Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); + } else if (RMWOp == llvm::AtomicRMWInst::BinOp::BAD_BINOP && + XElemTy->isStructTy()) { + LoadInst *OldVal = + Builder.CreateLoad(XElemTy, X, X->getName() + ".atomic.load"); + OldVal->setAtomic(AO); + const DataLayout &LoadDL = OldVal->getModule()->getDataLayout(); + unsigned LoadSize = + LoadDL.getTypeStoreSize(OldVal->getPointerOperand()->getType()); + + OpenMPIRBuilder::AtomicInfo atomicInfo( + &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->getAlign(), + OldVal->getAlign(), true /* UseLibcall */, X); + auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO); + BasicBlock *CurBB = Builder.GetInsertBlock(); + Instruction *CurBBTI = CurBB->getTerminator(); + CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable(); + BasicBlock *ExitBB = + CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit"); + BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), + X->getName() + ".atomic.cont"); + ContBB->getTerminator()->eraseFromParent(); + Builder.restoreIP(AllocaIP); + AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy); + NewAtomicAddr->setName(X->getName() + "x.new.val"); + Builder.SetInsertPoint(ContBB); + llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2); + PHI->addIncoming(AtomicLoadRes.first, CurBB); + Value *OldExprVal = PHI; + Value *Upd = UpdateOp(OldExprVal, Builder); + Builder.CreateStore(Upd, NewAtomicAddr); + AtomicOrdering Failure = + llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); + auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall( + AtomicLoadRes.second, NewAtomicAddr, AO, Failure); + LoadInst *PHILoad = Builder.CreateLoad(XElemTy, Result.first); + PHI->addIncoming(PHILoad, Builder.GetInsertBlock()); + Builder.CreateCondBr(Result.second, ExitBB, ContBB); + OldVal->eraseFromParent(); + Res.first = OldExprVal; + Res.second = Upd; + + if (UnreachableInst *ExitTI = + dyn_cast(ExitBB->getTerminator())) { + CurBBTI->eraseFromParent(); + Builder.SetInsertPoint(ExitBB); + } else { + Builder.SetInsertPoint(ExitTI); + } } else { IntegerType *IntCastTy = IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9e5f800dca60bd..19d80fbbd699b0 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2055,28 +2055,23 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, isPostfixUpdate = atomicCaptureOp.getSecondOp() == atomicCaptureOp.getAtomicUpdateOp().getOperation(); auto &innerOpList = atomicUpdateOp.getRegion().front().getOperations(); - bool isRegionArgUsed{false}; // Find the binary update operation that uses the region argument // and get the expression to update - for (Operation &innerOp : innerOpList) { - if (innerOp.getNumOperands() == 2) { - binop = convertBinOpToAtomic(innerOp); - if (!llvm::is_contained(innerOp.getOperands(), - atomicUpdateOp.getRegion().getArgument(0))) - continue; - isRegionArgUsed = true; - isXBinopExpr = - innerOp.getNumOperands() > 0 && - innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0); - mlirExpr = - (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0)); - break; + if (innerOpList.size() == 2) { + mlir::Operation &innerOp = *atomicUpdateOp.getRegion().front().begin(); + if (!llvm::is_contained(innerOp.getOperands(), + atomicUpdateOp.getRegion().getArgument(0))) { + return atomicUpdateOp.emitError( + "no atomic update operation with region argument" + " as operand found inside atomic.update region"); } + binop = convertBinOpToAtomic(innerOp); + isXBinopExpr = + innerOp.getOperand(0) == atomicUpdateOp.getRegion().getArgument(0); + mlirExpr = (isXBinopExpr ? innerOp.getOperand(1) : innerOp.getOperand(0)); + } else { + binop = llvm::AtomicRMWInst::BinOp::BAD_BINOP; } - if (!isRegionArgUsed) - return atomicUpdateOp.emitError( - "no atomic update operation with region argument" - " as operand found inside atomic.update region"); } llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 95e12e5bc4e742..5d76e87472dfe4 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -1450,6 +1450,120 @@ llvm.func @omp_atomic_update(%x:!llvm.ptr, %expr: i32, %xbool: !llvm.ptr, %exprb // ----- +//CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8 +//CHECK: {{.*}} = alloca { float, float }, i64 1, align 8 +//CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 + +//CHECK: br label %entry + +//CHECK: entry: +//CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 +//CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) +//CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 +//CHECK: br label %.atomic.cont + +//CHECK: .atomic.cont +//CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] +//CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 +//CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 +//CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 +//CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00 +//CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0 +//CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1 +//CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4 +//CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]], i32 2, i32 2) +//CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4 +//CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont + +llvm.func @_QPomp_atomic_update_complex() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x !llvm.struct<(f32, f32)> {bindc_name = "ib"} : (i64) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x !llvm.struct<(f32, f32)> {bindc_name = "ia"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1.000000e+00 : f32) : f32 + %5 = llvm.mlir.undef : !llvm.struct<(f32, f32)> + %6 = llvm.insertvalue %4, %5[0] : !llvm.struct<(f32, f32)> + %7 = llvm.insertvalue %4, %6[1] : !llvm.struct<(f32, f32)> + omp.atomic.update %3 : !llvm.ptr { + ^bb0(%arg0: !llvm.struct<(f32, f32)>): + %8 = llvm.extractvalue %arg0[0] : !llvm.struct<(f32, f32)> + %9 = llvm.extractvalue %arg0[1] : !llvm.struct<(f32, f32)> + %10 = llvm.extractvalue %7[0] : !llvm.struct<(f32, f32)> + %11 = llvm.extractvalue %7[1] : !llvm.struct<(f32, f32)> + %12 = llvm.fadd %8, %10 {fastmathFlags = #llvm.fastmath} : f32 + %13 = llvm.fadd %9, %11 {fastmathFlags = #llvm.fastmath} : f32 + %14 = llvm.mlir.undef : !llvm.struct<(f32, f32)> + %15 = llvm.insertvalue %12, %14[0] : !llvm.struct<(f32, f32)> + %16 = llvm.insertvalue %13, %15[1] : !llvm.struct<(f32, f32)> + omp.yield(%16 : !llvm.struct<(f32, f32)>) + } + llvm.return +} + +// ----- + +//CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8 +//CHECK: %[[VAL_1:.*]] = alloca { float, float }, i64 1, align 8 +//CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 +//CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4 +//CHECK: br label %entry + +//CHECK: entry: ; preds = %0 +//CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 +//CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) +//CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 +//CHECK: br label %.atomic.cont + +//CHECK: .atomic.cont +//CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] +//CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 +//CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 +//CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 +//CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00 +//CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0 +//CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1 +//CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4 +//CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]], i32 2, i32 2) +//CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4 +//CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont +//CHECK: .atomic.exit +//CHECK: store { float, float } %[[VAL_10]], ptr %[[VAL_1]], align 4 + +llvm.func @_QPomp_atomic_capture_complex() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x !llvm.struct<(f32, f32)> {bindc_name = "ib"} : (i64) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x !llvm.struct<(f32, f32)> {bindc_name = "ia"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1.000000e+00 : f32) : f32 + %5 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + %6 = llvm.mlir.undef : !llvm.struct<(f32, f32)> + %7 = llvm.insertvalue %5, %6[0] : !llvm.struct<(f32, f32)> + %8 = llvm.insertvalue %5, %7[1] : !llvm.struct<(f32, f32)> + llvm.store %8, %3 : !llvm.struct<(f32, f32)>, !llvm.ptr + %9 = llvm.mlir.undef : !llvm.struct<(f32, f32)> + %10 = llvm.insertvalue %4, %9[0] : !llvm.struct<(f32, f32)> + %11 = llvm.insertvalue %4, %10[1] : !llvm.struct<(f32, f32)> + omp.atomic.capture { + omp.atomic.update %3 : !llvm.ptr { + ^bb0(%arg0: !llvm.struct<(f32, f32)>): + %12 = llvm.extractvalue %arg0[0] : !llvm.struct<(f32, f32)> + %13 = llvm.extractvalue %arg0[1] : !llvm.struct<(f32, f32)> + %14 = llvm.extractvalue %11[0] : !llvm.struct<(f32, f32)> + %15 = llvm.extractvalue %11[1] : !llvm.struct<(f32, f32)> + %16 = llvm.fadd %12, %14 {fastmathFlags = #llvm.fastmath} : f32 + %17 = llvm.fadd %13, %15 {fastmathFlags = #llvm.fastmath} : f32 + %18 = llvm.mlir.undef : !llvm.struct<(f32, f32)> + %19 = llvm.insertvalue %16, %18[0] : !llvm.struct<(f32, f32)> + %20 = llvm.insertvalue %17, %19[1] : !llvm.struct<(f32, f32)> + omp.yield(%20 : !llvm.struct<(f32, f32)>) + } + omp.atomic.read %1 = %3 : !llvm.ptr, !llvm.struct<(f32, f32)> + } + llvm.return +} + +// ----- + // Checking an order-dependent operation when the order is `expr binop x` // CHECK-LABEL: @omp_atomic_update_ordering // CHECK-SAME: (ptr %[[x:.*]], i32 %[[expr:.*]])