diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 458d05d5059db..1f3fb95c339c7 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -690,14 +690,14 @@ inlineOmpRegionCleanup(llvm::SmallVectorImpl &cleanupRegions, : &builder.GetInsertBlock()->back(); if (potentialTerminator && potentialTerminator->isTerminator()) builder.SetInsertPoint(potentialTerminator); - llvm::Value *prviateVarValue = + llvm::Value *privateVarValue = shouldLoadCleanupRegionArg ? builder.CreateLoad( moduleTranslation.convertType(entry.getArgument(0).getType()), privateVariables[i]) : privateVariables[i]; - moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue); + moduleTranslation.mapValue(entry.getArgument(0), privateVarValue); if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder, moduleTranslation))) @@ -1424,35 +1424,106 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, } }; - SmallVector privatizerClones; - SmallVector privateVariables; + SmallVector mlirPrivatizerClones; + SmallVector llvmPrivateVars; // TODO: Perform appropriate actions according to the data-sharing // attribute (shared, private, firstprivate, ...) of variables. // Currently shared and private are supported. auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::Value &, llvm::Value &vPtr, - llvm::Value *&replacementValue) -> InsertPointTy { - replacementValue = &vPtr; + llvm::Value &, llvm::Value &llvmOmpRegionInput, + llvm::Value *&llvmReplacementValue) -> InsertPointTy { + llvmReplacementValue = &llvmOmpRegionInput; // If this is a private value, this lambda will return the corresponding // mlir value and its `PrivateClauseOp`. Otherwise, empty values are // returned. - auto [privVar, privatizerClone] = + auto [mlirPrivVar, mlirPrivatizerClone] = [&]() -> std::pair { if (!opInst.getPrivateVars().empty()) { - auto privateVars = opInst.getPrivateVars(); - auto privateSyms = opInst.getPrivateSyms(); + auto mlirPrivVars = opInst.getPrivateVars(); + auto mlirPrivSyms = opInst.getPrivateSyms(); - for (auto [privVar, privatizerAttr] : - llvm::zip_equal(privateVars, *privateSyms)) { + // Try to find a privatizer that corresponds to the LLVM value being + // privatized. + for (auto [mlirPrivVar, mlirPrivatizerAttr] : + llvm::zip_equal(mlirPrivVars, *mlirPrivSyms)) { // Find the MLIR private variable corresponding to the LLVM value // being privatized. - llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar); - if (llvmPrivVar != &vPtr) + llvm::Value *mlirToLLVMPrivVar = + moduleTranslation.lookupValue(mlirPrivVar); + + // Check if the LLVM value being privatized matches the LLVM value + // mapped to privVar. In some cases, this is not trivial ... + auto isMatch = [&]() { + if (mlirToLLVMPrivVar == nullptr) + return false; + + // If both values are trivially equal, we found a match. + if (mlirToLLVMPrivVar == &llvmOmpRegionInput) + return true; + + // Otherwise, we check if both llvmOmpRegionInputPtr and + // mlirToLLVMPrivVar refer to the same memory (through a load/store + // pair). This happens if a struct (i.e. multi-field value) is being + // privatized. + // + // For example, if the privatized value is defined by: + // ``` + // %priv_val = alloca { ptr, i64 }, align 8 + // ``` + // + // The initialization of this value (outside the omp region) will be + // something like this: + // + // clang-format off + // ``` + // %partially_init_priv_val = insertvalue { ptr, i64 } undef, + // ptr %some_ptr, 0 + // %fully_init_priv_val = insertvalue { ptr, i64 } %partially_init_priv_val, + // i64 %some_i64, 1 + // ... + // store { ptr, i64 } %fully_init_priv_val, ptr %priv_val, align 8 + // ``` + // clang-format on + // + // Now, we enter the OMP region, in order to access this privatized + // value, we need to load from the allocated memory: + // ``` + // omp.par.entry: + // %priv_val_load = load { ptr, i64 }, ptr %priv_val, align 8 + // ``` + // + // The 2 LLVM values tracked here map as follows: + // - `mlirToLLVMPrivVar` -> `%fully_init_priv_val` + // - `llvmOmpRegionInputPtr` -> `%priv_val_load` + // + // Even though they eventually refer to the same memory reference + // (the memory being privatized), they are 2 distinct LLVM values. + // Therefore, we need to discover their correspondence by finding + // out if they store into and load from the same mem ref. + auto *llvmOmpRegionInputPtrLoad = + llvm::dyn_cast_if_present(&llvmOmpRegionInput); + + if (llvmOmpRegionInputPtrLoad == nullptr) + return false; + + for (auto &use : mlirToLLVMPrivVar->uses()) { + auto *mlirToLLVMPrivVarStore = + llvm::dyn_cast_if_present(use.getUser()); + if (mlirToLLVMPrivVarStore && + (llvmOmpRegionInputPtrLoad->getPointerOperand() == + mlirToLLVMPrivVarStore->getPointerOperand())) + return true; + } + + return false; + }; + + if (!isMatch()) continue; - SymbolRefAttr privSym = llvm::cast(privatizerAttr); + SymbolRefAttr privSym = llvm::cast(mlirPrivatizerAttr); omp::PrivateClauseOp privatizer = SymbolTable::lookupNearestSymbolFrom( opInst, privSym); @@ -1480,24 +1551,24 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, counter); clone.setSymName(cloneName); - return {privVar, clone}; + return {mlirPrivVar, clone}; } } return {mlir::Value(), omp::PrivateClauseOp()}; }(); - if (privVar) { - Region &allocRegion = privatizerClone.getAllocRegion(); + if (mlirPrivVar) { + Region &allocRegion = mlirPrivatizerClone.getAllocRegion(); // If this is a `firstprivate` clause, prepare the `omp.private` op by: - if (privatizerClone.getDataSharingType() == + if (mlirPrivatizerClone.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate) { auto oldAllocBackBlock = std::prev(allocRegion.end()); omp::YieldOp oldAllocYieldOp = llvm::cast(oldAllocBackBlock->getTerminator()); - Region ©Region = privatizerClone.getCopyRegion(); + Region ©Region = mlirPrivatizerClone.getCopyRegion(); mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext()); // 1. Cloning the `copy` region to the end of the `alloc` region. @@ -1524,7 +1595,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // This way, the body of the privatizer will be changed from using the // region/block argument to the value being privatized. auto allocRegionArg = allocRegion.getArgument(0); - replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion); + replaceAllUsesInRegionWith(allocRegionArg, mlirPrivVar, allocRegion); auto oldIP = builder.saveIP(); builder.restoreIP(allocaIP); @@ -1535,15 +1606,15 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, opInst.emitError("failed to inline `alloc` region of an `omp.private` " "op in the parallel region"); bodyGenStatus = failure(); - privatizerClone.erase(); + mlirPrivatizerClone.erase(); } else { assert(yieldedValues.size() == 1); - replacementValue = yieldedValues.front(); + llvmReplacementValue = yieldedValues.front(); // Keep the LLVM replacement value and the op clone in case we need to // emit cleanup (i.e. deallocation) logic. - privateVariables.push_back(replacementValue); - privatizerClones.push_back(privatizerClone); + llvmPrivateVars.push_back(llvmReplacementValue); + mlirPrivatizerClones.push_back(mlirPrivatizerClone); } builder.restoreIP(oldIP); @@ -1571,13 +1642,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, bodyGenStatus = failure(); SmallVector privateCleanupRegions; - llvm::transform(privatizerClones, std::back_inserter(privateCleanupRegions), + llvm::transform(mlirPrivatizerClones, + std::back_inserter(privateCleanupRegions), [](omp::PrivateClauseOp privatizer) { return &privatizer.getDeallocRegion(); }); if (failed(inlineOmpRegionCleanup( - privateCleanupRegions, privateVariables, moduleTranslation, builder, + privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder, "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false))) bodyGenStatus = failure(); @@ -1604,7 +1676,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, isCancellable)); - for (mlir::omp::PrivateClauseOp privatizerClone : privatizerClones) + for (mlir::omp::PrivateClauseOp privatizerClone : mlirPrivatizerClones) privatizerClone.erase(); return bodyGenStatus; diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir index 65ae98b2a74c6..b06ad96f4592c 100644 --- a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir +++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir @@ -114,3 +114,45 @@ omp.private {type = firstprivate} @multi_block.privatizer : !llvm.ptr alloc { llvm.store %arg2, %arg3 : f32, !llvm.ptr omp.yield(%arg3 : !llvm.ptr) } + +// ----- + +// Verifies fix for https://github.com/llvm/llvm-project/issues/102935. +// +// The issue happens since we previously failed to match MLIR values to their +// corresponding LLVM values in some cases (e.g. char strings with non-const +// length). +llvm.func @non_const_len_char_test(%n: !llvm.ptr {fir.bindc_name = "n"}) { + %n_val = llvm.load %n : !llvm.ptr -> i64 + %orig_alloc = llvm.alloca %n_val x i8 {bindc_name = "str"} : (i64) -> !llvm.ptr + %orig_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)> + %orig_val_init = llvm.insertvalue %orig_alloc, %orig_val[0] : !llvm.struct<(ptr, i64)> + omp.parallel private(@non_const_len_char %orig_val_init -> %priv_arg : !llvm.struct<(ptr, i64)>) { + %dummy = llvm.extractvalue %priv_arg[0] : !llvm.struct<(ptr, i64)> + omp.terminator + } + llvm.return +} + +omp.private {type = firstprivate} @non_const_len_char : !llvm.struct<(ptr, i64)> alloc { +^bb0(%orig_val: !llvm.struct<(ptr, i64)>): + %str_len = llvm.extractvalue %orig_val[1] : !llvm.struct<(ptr, i64)> + %priv_alloc = llvm.alloca %str_len x i8 {bindc_name = "str", pinned} : (i64) -> !llvm.ptr + %priv_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)> + %priv_val_init = llvm.insertvalue %priv_alloc, %priv_val[0] : !llvm.struct<(ptr, i64)> + omp.yield(%priv_val_init : !llvm.struct<(ptr, i64)>) +} copy { +^bb0(%orig_val: !llvm.struct<(ptr, i64)>, %priv_val: !llvm.struct<(ptr, i64)>): + llvm.call @foo() : () -> () + omp.yield(%priv_val : !llvm.struct<(ptr, i64)>) +} + +llvm.func @foo() + +// CHECK-LABEL: @non_const_len_char_test..omp_par({{.*}}) +// CHECK-NEXT: omp.par.entry: +// Verify that we found the privatizer by checking that we properly inlined the +// bodies of the alloc and copy regions. +// CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1 +// CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1 +// CHECK: call void @foo()