Skip to content

Commit

Permalink
Integrate LLVM at 9fa55ec3 (#18670)
Browse files Browse the repository at this point in the history
Cherry-picks:
1. llvm/llvm-project#110918
2. llvm/llvm-project#110904
3. llvm/llvm-project#110927

The revision disables the pack/unpack decomposition when any of inner
tiles is dynamic. Because it leads to unbounded stack allocation (which
is introduced by tensor.pad op). It's broken by the `Extend the logic to
generalise tensor.pack` commits. See
llvm/llvm-project@66f84c8
and
llvm/llvm-project@1c01bcb.

---------

Signed-off-by: Benoit Jacob <jacob.benoit.1@gmail.com>
Signed-off-by: hanhanW <hanhan0912@gmail.com>
Co-authored-by: hanhanW <hanhan0912@gmail.com>
  • Loading branch information
bjacob and hanhanW authored Oct 2, 2024
1 parent cd48b10 commit 903ab0a
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 16 deletions.
39 changes: 36 additions & 3 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
Expand Down Expand Up @@ -272,14 +273,22 @@ getVectorPreProcStrategy(linalg::LinalgOp linalgOp) {
return VectorPreProcStrategy::None;
}

DictionaryAttr getPipelineConfWithPeelingAttr(MLIRContext *context) {
static DictionaryAttr getPipelineConfWithPeelingAttr(MLIRContext *context) {
auto enableLoopPeelingAttrName = getEnableLoopPeelingAttrName(context);
auto unitAttr = UnitAttr::get(context);

return DictionaryAttr::get(
context, ArrayRef<NamedAttribute>({enableLoopPeelingAttrName, unitAttr}));
}

static DictionaryAttr
getPipelineConfWithDecompositionAttr(MLIRContext *context) {
auto attrName = getEnableDecompositionAttrName(context);
auto unitAttr = UnitAttr::get(context);
return DictionaryAttr::get(context,
ArrayRef<NamedAttribute>({attrName, unitAttr}));
}

/// Looks for the `native_vector_size` attribute in the hal.executable.target
/// looked up from this op.
static int64_t
Expand Down Expand Up @@ -1690,11 +1699,23 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
distTileSizes[pos] = std::max<int64_t>(distTileSizes[pos], 1);
}

// Dynamic inner tiles lead to unbounded stack allocation (which is introduced
// by tensor.pad op), so we do not decompose the cases. The x86 and risc-v
// backends prefer to not decompose the ops.
DictionaryAttr pipelineConfig;
auto target = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
bool hasDynamicInnerTile = llvm::any_of(
op.getMixedTiles(), [](OpFoldResult ofr) { return ofr.is<Value>(); });
if (!hasDynamicInnerTile && !isX86(target) && !isRISCV(target)) {
pipelineConfig = getPipelineConfWithDecompositionAttr(op.getContext());
}

SmallVector<int64_t> vecTileSizes = getPackVectorTileSizes(entryPointFn, op);
TileSizesListType tileSizesList = {distTileSizes, vecTileSizes};
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, op, tileSizesList,
DispatchLoweringPassPipeline::CPUDataTiling);
DispatchLoweringPassPipeline::CPUDataTiling, /*workgroupSize=*/{},
/*subgroupSize=*/{}, pipelineConfig);
}

static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
Expand All @@ -1718,10 +1739,22 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
tileSizes[pos] = ShapedType::isDynamic(size) ? 1 : size;
}

// Dynamic inner tiles lead to unbounded stack allocation (which is introduced
// by tensor.pad op), so we do not decompose the cases. The x86 and risc-v
// backends prefer to not decompose the ops.
DictionaryAttr pipelineConfig;
auto target = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
bool hasDynamicInnerTile = llvm::any_of(
op.getMixedTiles(), [](OpFoldResult ofr) { return ofr.is<Value>(); });
if (!hasDynamicInnerTile && !isX86(target) && !isRISCV(target)) {
pipelineConfig = getPipelineConfWithDecompositionAttr(op.getContext());
}

TileSizesListType tileSizesList = {distTileSizes, tileSizes};
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, op, tileSizesList,
DispatchLoweringPassPipeline::CPUDataTiling);
DispatchLoweringPassPipeline::CPUDataTiling, /*workgroupSize=*/{},
/*subgroupSize=*/{}, pipelineConfig);
}

static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() {
LLVMCPUPipelineOptions pipelineOpts;
if (isX86(target) || isRISCV(target)) {
pipelineOpts.useConfiguredVectorSizes = false;
pipelineOpts.decomposePackUnPackOps = false;
}
pipelineOpts.decomposePackUnPackOps = isDecompositionEnabled(funcOp);
pipelineOpts.lowerToAVX2 = hasAVX2Feature(target);
pipelineOpts.enableVectorMasking =
isX86(target) || isRISCV(target) ||
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_system_
return
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[1, 16], [1, 1]]>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling, {enable_decomposition}>
// CHECK: func.func @pack()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: tensor.pack
Expand Down Expand Up @@ -293,14 +293,48 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl
return
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [32, 16]]>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling, {enable_decomposition}>
// CHECK: func.func @unpack_outer_dynamic()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: tensor.unpack
// CHECK-SAME: lowering_config = #[[CONFIG]]

// -----

#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>
func.func @unpack_fully_dynamic() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
%c131072 = arith.constant 131072 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : i32
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i32
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : i32
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : i32
%6 = arith.index_castui %0 : i32 to index
%7 = arith.index_castui %1 : i32 to index
%8 = arith.index_castui %2 : i32 to index
%9 = arith.index_castui %3 : i32 to index
%10 = arith.index_castui %4 : i32 to index
%11 = arith.index_castui %5 : i32 to index
%12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%6, %7, %10, %11}
%13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%8, %9}
%14 = flow.dispatch.tensor.load %12, offsets = [0, 0, 0, 0], sizes = [%6, %7, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%6, %7, %10, %11} -> tensor<?x?x?x?xi32>
%15 = tensor.empty(%8, %9) : tensor<?x?xi32>
%unpack = tensor.unpack %14 inner_dims_pos = [0, 1] inner_tiles = [%10, %11] into %15 : tensor<?x?x?x?xi32> -> tensor<?x?xi32>
flow.dispatch.tensor.store %unpack, %13, offsets = [0, 0], sizes = [%8, %9], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%8, %9}
return
}
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [1, 1]]>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling>
// CHECK: func.func @unpack_fully_dynamic()
// CHECK-SAME: translation_info = #[[TRANSLATION]]
// CHECK: tensor.unpack
// CHECK-SAME: lowering_config = #[[CONFIG]]

// -----

#pipeline_layout = #hal.pipeline.layout<bindings = [
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>,
Expand Down
11 changes: 11 additions & 0 deletions compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
namespace mlir::iree_compiler {

static const char kLoopPeelingAttrName[] = "enable_loop_peeling";
static const char kDecompositionAttrName[] = "enable_decomposition";

FailureOr<Operation *> getRootOperation(ArrayRef<Operation *> computeOps) {
Operation *rootOperation = nullptr;
Expand Down Expand Up @@ -66,6 +67,16 @@ FailureOr<Operation *> getRootOperation(ArrayRef<Operation *> computeOps) {
return rootOperation;
}

StringAttr getEnableDecompositionAttrName(MLIRContext *ctx) {
return StringAttr::get(ctx, kDecompositionAttrName);
}

bool isDecompositionEnabled(FunctionOpInterface funcOp) {
DictionaryAttr config = getTranslationInfo(funcOp).getConfiguration();

return config && config.contains(kDecompositionAttrName);
}

StringAttr getEnableLoopPeelingAttrName(MLIRContext *ctx) {
return StringAttr::get(ctx, kLoopPeelingAttrName);
}
Expand Down
9 changes: 9 additions & 0 deletions compiler/src/iree/compiler/Codegen/Utils/CPUUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ namespace mlir::iree_compiler {
/// to the end of the function is the root op.
FailureOr<Operation *> getRootOperation(ArrayRef<Operation *> computeOps);

/// Creates a string attribute containing the name of the attribute that is
/// used to enable decomposition.
StringAttr getEnableDecompositionAttrName(MLIRContext *ctx);

/// Checks whether loop peeling has been enabled for the input function. This
/// is infered from the config dict. attribute that's part of to the
/// translation info corresponding to this funciton.
bool isDecompositionEnabled(FunctionOpInterface funcOp);

/// Creates a string attribute containing the name of the attribute that is
/// used to enable loop peeling.
StringAttr getEnableLoopPeelingAttrName(MLIRContext *ctx);
Expand Down
18 changes: 13 additions & 5 deletions compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ static void addExecutableSubstitutionPasses(OpPassManager &passManager,
}
if (!substitutions.empty()) {
SubstituteExecutablesPassOptions substituteOptions;
substituteOptions.substitutions = substitutions;
substituteOptions.substitutions.assign(substitutions.begin(),
substitutions.end());
passManager.addPass(
IREE::HAL::createSubstituteExecutablesPass(substituteOptions));
}
Expand All @@ -197,12 +198,19 @@ void buildHALDeviceAssignmentPassPipeline(
// Today we just assign devices from parameters but we should instead be
// performing analysis at the flow level and then doing magic device
// database lookups here.
passManager.addPass(IREE::HAL::createAssignLegacyTargetDevicesPass(
{&targetRegistry, assignmentOptions.legacyTargetBackends}));
AssignLegacyTargetDevicesPassOptions options;
options.targetRegistry = &targetRegistry;
options.targetBackends.assign(
assignmentOptions.legacyTargetBackends.begin(),
assignmentOptions.legacyTargetBackends.end());
passManager.addPass(
IREE::HAL::createAssignLegacyTargetDevicesPass(options));
}
if (!assignmentOptions.targetDevices.empty()) {
passManager.addPass(IREE::HAL::createAssignTargetDevicesPass(
{assignmentOptions.targetDevices}));
AssignTargetDevicesPassOptions options;
options.targetDevices.assign(assignmentOptions.targetDevices.begin(),
assignmentOptions.targetDevices.end());
passManager.addPass(IREE::HAL::createAssignTargetDevicesPass(options));
}

// Create globals for each device (if needed).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ bool emitEncodeFnDefs(const llvm::RecordKeeper &recordKeeper, raw_ostream &os) {
}

os << " if (";
auto printOneCondition = [&](Record *encodingExpr) {
auto printOneCondition = [&](const Record *encodingExpr) {
StringRef expr = encodingExpr->getValueAsString("expr");
std::vector<StringRef> params =
encodingExpr->getValueAsListOfStrings("params");
Expand Down
9 changes: 6 additions & 3 deletions compiler/src/iree/compiler/GlobalOptimization/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,12 @@ void buildGlobalOptimizationPassPipeline(
// parameters are available for folding.
if (!transformOptions.options.parameterImportPaths.empty()) {
IREE::IO::Parameters::ImportParametersPassOptions importParametersOptions;
importParametersOptions.scopePaths =
transformOptions.options.parameterImportPaths;
importParametersOptions.keys = transformOptions.options.parameterImportKeys;
importParametersOptions.scopePaths.assign(
transformOptions.options.parameterImportPaths.begin(),
transformOptions.options.parameterImportPaths.end());
importParametersOptions.keys.assign(
transformOptions.options.parameterImportKeys.begin(),
transformOptions.options.parameterImportKeys.end());
importParametersOptions.maximumSize =
transformOptions.options.parameterImportMaximumSize;
mainPassManager.addPass(IREE::IO::Parameters::createImportParametersPass(
Expand Down
2 changes: 1 addition & 1 deletion third_party/llvm-project
Submodule llvm-project updated 1112 files

0 comments on commit 903ab0a

Please sign in to comment.