Skip to content

Commit

Permalink
Migrate LLVMCPU vectorization to its own pass. (iree-org#12837)
Browse files Browse the repository at this point in the history
The below related code are deprecated because they are no longer used by
anyone.

- Deprecate SingleTilingExpert
- Deprecate StrategyVectorize pass

The VectorizationPattern and LinalgVectorizationPattern are not deleted
because they are used by LLVMGPU.
  • Loading branch information
hanhanW authored and NatashaKnk committed Jul 6, 2023
1 parent 55703f2 commit 5bd4dcf
Show file tree
Hide file tree
Showing 13 changed files with 272 additions and 442 deletions.
3 changes: 3 additions & 0 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ iree_compiler_cc_library(
"LLVMCPUTile.cpp",
"LLVMCPUTileAndFuse.cpp",
"LLVMCPUUnfuseFMAOps.cpp",
"LLVMCPUVectorization.cpp",
"Passes.cpp",
"TargetMLTransformInfo.cpp",
"Utils.cpp",
Expand Down Expand Up @@ -74,6 +75,7 @@ iree_compiler_cc_library(
"@llvm-project//llvm:TargetParser",
"@llvm-project//mlir:AffineDialect",
"@llvm-project//mlir:AffineToStandard",
"@llvm-project//mlir:AffineUtils",
"@llvm-project//mlir:Analysis",
"@llvm-project//mlir:ArithDialect",
"@llvm-project//mlir:ArithToLLVM",
Expand Down Expand Up @@ -108,6 +110,7 @@ iree_compiler_cc_library(
"@llvm-project//mlir:SCFDialect",
"@llvm-project//mlir:SCFToControlFlow",
"@llvm-project//mlir:SCFTransforms",
"@llvm-project//mlir:SCFUtils",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:TensorTransforms",
"@llvm-project//mlir:TosaDialect",
Expand Down
3 changes: 3 additions & 0 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ iree_cc_library(
"LLVMCPUTile.cpp"
"LLVMCPUTileAndFuse.cpp"
"LLVMCPUUnfuseFMAOps.cpp"
"LLVMCPUVectorization.cpp"
"Passes.cpp"
"TargetMLTransformInfo.cpp"
"Utils.cpp"
Expand All @@ -53,6 +54,7 @@ iree_cc_library(
LLVMTargetParser
MLIRAffineDialect
MLIRAffineToStandard
MLIRAffineUtils
MLIRAnalysis
MLIRArithDialect
MLIRArithToLLVM
Expand Down Expand Up @@ -86,6 +88,7 @@ iree_cc_library(
MLIRSCFDialect
MLIRSCFToControlFlow
MLIRSCFTransforms
MLIRSCFUtils
MLIRTensorDialect
MLIRTensorTransforms
MLIRTosaDialect
Expand Down
230 changes: 230 additions & 0 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "iree/compiler/Codegen/PassDetail.h"
#include "iree/compiler/Codegen/Passes.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/LoopUtils.h"
#include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h"
#include "mlir/Dialect/Vector/Transforms/Passes.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

#define DEBUG_TYPE "iree-llvmcpu-vectorization"

namespace mlir {
namespace iree_compiler {
namespace {
/// Returns the op that contains lowering config. Checks whether the provided op
/// contains the lowering config and returns it. Otherwise, tries to find the
/// lowering config across the function. If there are multiple ops with the same
/// lowering configs, returns the first one found. Returns failure if there are
/// multiple op with different lowering config.
/// TODO(hanchung): This is copied from LinalgTensorCodegenDriver.cpp. We should
/// refactor it to Utils.h.
static FailureOr<Operation *> getRootOp(Operation *op) {
// Check for self first.
if (iree_compiler::getLoweringConfig(op)) {
return op;
}

// Get the function op.
auto funcOp = dyn_cast<func::FuncOp>(op);
if (!funcOp) {
funcOp = op->getParentOfType<func::FuncOp>();
}

assert(funcOp && "Missing funcOp");

Operation *rootOp = nullptr;
mlir::iree_compiler::IREE::Codegen::LoweringConfigAttr rootLoweringConfig;
auto result = funcOp.walk([&](Operation *op) -> WalkResult {
auto loweringConfig = iree_compiler::getLoweringConfig(op);
if (!loweringConfig) {
return WalkResult::advance();
}
if (rootLoweringConfig) {
if (rootLoweringConfig != loweringConfig) {
return WalkResult::interrupt();
}
} else {
rootOp = op;
rootLoweringConfig = loweringConfig;
}
return WalkResult::advance();
});

if (!rootOp || result.wasInterrupted()) {
return failure();
}
return rootOp;
}

/// Computes the canonical shape used to vectorize this dispatch. Retrieves
/// the vectorization tile sizes (parallel and reduction levels) out of the
/// lowering config and adjusts them to the format expected by the Linalg
/// vectorizer.
static SmallVector<int64_t> getCanonicalVectorShape(func::FuncOp funcOp) {
FailureOr<Operation *> rootOp = getRootOp(funcOp);
if (failed(rootOp)) {
return {};
}

unsigned numTileLevels =
mlir::iree_compiler::getNumTileLevels(rootOp.value());
if (numTileLevels < 3) {
return {};
}

// Retrieve the tile sizes from the last two tiling levels (parallel and
// reduction) used for vectorization.
SmallVector<int64_t> canonicalVectorShape =
mlir::iree_compiler::getTileSizes(rootOp.value(), numTileLevels - 2);
SmallVector<int64_t> reductionTileSizes =
mlir::iree_compiler::getTileSizes(rootOp.value(), numTileLevels - 1);

if (!reductionTileSizes.empty()) {
assert(canonicalVectorShape.size() == reductionTileSizes.size() &&
"Unexpected tile sizes");

// Combine the reduction tile sizes with the parallel tile sizes already in
// the canonical vector shape.
for (int i = 0, end = canonicalVectorShape.size(); i < end; ++i) {
if (reductionTileSizes[i] > 0)
canonicalVectorShape[i] = reductionTileSizes[i];
}
}

// Replace zeros in canonical vector shape to turn it into a valid shape.
std::replace(canonicalVectorShape.begin(), canonicalVectorShape.end(), 0, 1);
return canonicalVectorShape;
}

// Give the canonical vector shape of a dispatch, returns the vector sizes for a
// particular linalg op within that dispatch.
static SmallVector<int64_t> getVectorSizes(
linalg::LinalgOp linalgOp, ArrayRef<int64_t> canonicalVectorShape) {
FailureOr<Operation *> rootOp = getRootOp(linalgOp);
if (failed(rootOp)) {
return {};
}

// TODO: Infer the tiles sizes for an op that is not the root op.
if (*rootOp != linalgOp.getOperation()) {
return {};
}

if (canonicalVectorShape.empty()) {
return {};
}

assert(canonicalVectorShape.size() >= linalgOp.getNumLoops() &&
"Unexpected canonical vector shape or number of loops");

// Return the valid canonical vector shape subset based on the number of loops
// of the linalg op.
SmallVector<int64_t> vecSize(
canonicalVectorShape.take_front(linalgOp.getNumLoops()));
for (auto [idx, val] : llvm::enumerate(linalgOp.getStaticLoopRanges())) {
if (ShapedType::isDynamic(val)) continue;
vecSize[idx] = std::max(vecSize[idx], val);
}

return vecSize;
}

class LLVMCPUVectorizationPass
: public LLVMCPUVectorizationBase<LLVMCPUVectorizationPass> {
public:
using LLVMCPUVectorizationBase::LLVMCPUVectorizationBase;
LLVMCPUVectorizationPass(const LLVMCPUVectorizationPassOptions &options) {
this->enableVectorMasking.setValue(options.enableVectorMasking);
this->vectorizePadding.setValue(options.vectorizePadding);
this->vectorizeGatherAccesses.setValue(options.vectorizeGatherAccesses);
}

void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<tensor::TensorDialect, linalg::LinalgDialect,
vector::VectorDialect>();
}
void runOnOperation() override;
};

void LLVMCPUVectorizationPass::runOnOperation() {
MLIRContext *context = &getContext();
auto funcOp = getOperation();
SmallVector<int64_t> canonicalVectorShape;
if (enableVectorMasking) {
canonicalVectorShape = getCanonicalVectorShape(funcOp);
}

IRRewriter rewriter(context);
SmallVector<linalg::LinalgOp> candidates;
funcOp.walk(
[&](linalg::LinalgOp linalgOp) { candidates.push_back(linalgOp); });
for (auto linalgOp : candidates) {
SmallVector<int64_t> vectorSizes;
if (enableVectorMasking) {
vectorSizes.append(getVectorSizes(linalgOp, canonicalVectorShape));
}
(void)linalg::vectorize(rewriter, linalgOp, vectorSizes,
vectorizeGatherAccesses);
};

// TODO: Move this down the pipeline once we have the ODM-based masking
// representation.
RewritePatternSet vectorizationPatterns(funcOp.getContext());
vector::populateVectorMaskLoweringPatternsForSideEffectingOps(
vectorizationPatterns);
vector::populateVectorTransferPermutationMapLoweringPatterns(
vectorizationPatterns);
vector::populateVectorReductionToContractPatterns(vectorizationPatterns);
vectorizationPatterns.add<linalg::LinalgCopyVTRForwardingPattern,
linalg::LinalgCopyVTWForwardingPattern>(
funcOp.getContext(), /*benefit=*/2);
vector::TransferReadOp::getCanonicalizationPatterns(vectorizationPatterns,
funcOp.getContext());
vector::TransferWriteOp::getCanonicalizationPatterns(vectorizationPatterns,
funcOp.getContext());
(void)applyPatternsAndFoldGreedily(funcOp, std::move(vectorizationPatterns));

// Apply the pad tensor op vectorization separately to avoid running the
// GenericPadOpVectorizationPattern too early.
// TODO: Improve once we have better infrastructure to control pattern
// application.
if (vectorizePadding) {
RewritePatternSet patterns(funcOp.getContext());
linalg::populatePadOpVectorizationPatterns(patterns);
(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
}

// Gathers all innermost loops through a post order pruned walk.
funcOp.walk([](Operation *op) {
if (auto forOp = dyn_cast<AffineForOp>(op))
(void)promoteIfSingleIteration(forOp);
else if (auto forOp = dyn_cast<scf::ForOp>(op))
(void)promoteIfSingleIteration(forOp);
});
linalg::hoistRedundantVectorTransfers(funcOp);
linalg::hoistRedundantVectorTransfersOnTensor(funcOp);
}
} // namespace

std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorizationPass() {
return std::make_unique<LLVMCPUVectorizationPass>();
}
std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorizationPass(
const LLVMCPUVectorizationPassOptions &options) {
return std::make_unique<LLVMCPUVectorizationPass>(options);
}
} // namespace iree_compiler
} // namespace mlir
36 changes: 11 additions & 25 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,12 +339,11 @@ void addCPUBufferOpsTileAndVectorizePipeline(OpPassManager &passManager,
static_cast<int64_t>(StrategyTilingLevel::ParallelTiles)));
nestedModulePM.addNestedPass<func::FuncOp>(createLLVMCPUPeelPass());
{
LinalgSingleTilingExpertPassOptions options;
options.vectorize = true;
LLVMCPUVectorizationPassOptions options;
options.enableVectorMasking = enableVectorMasking;
options.vectorizeGatherAccesses = true;
nestedModulePM.addNestedPass<func::FuncOp>(
createLinalgSingleTilingExpertPass(options));
createLLVMCPUVectorizationPass(options));
nestedModulePM.addNestedPass<func::FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCSEPass());
}
Expand Down Expand Up @@ -378,13 +377,12 @@ void addDoubleTilingPadExpertPassPipeline(OpPassManager &passManager,
createLLVMCPUTensorPadPass(LLVMCPUTensorPadOption::ReductionDims));

{
LinalgSingleTilingExpertPassOptions options;
options.vectorize = true;
LLVMCPUVectorizationPassOptions options;
options.enableVectorMasking = enableVectorMasking;
options.vectorizePadding = true;
options.vectorizeGatherAccesses = true;
nestedModulePM.addNestedPass<func::FuncOp>(
createLinalgSingleTilingExpertPass(options));
createLLVMCPUVectorizationPass(options));
nestedModulePM.addNestedPass<func::FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCSEPass());
}
Expand Down Expand Up @@ -478,17 +476,13 @@ void addMultiTilingExpertPassPipeline(OpPassManager &passManager,
createDecomposePackUnPackOpsPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCSEPass());
LinalgSingleTilingExpertPassOptions options;
options.vectorize = true;
LLVMCPUVectorizationPassOptions options;
options.enableVectorMasking = enableVectorMasking;
options.vectorizeGatherAccesses = true;
nestedModulePM.addNestedPass<func::FuncOp>(
createLinalgSingleTilingExpertPass(options));
createLLVMCPUVectorizationPass(options));
nestedModulePM.addNestedPass<func::FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCSEPass());
// TODO(hanchung): Merge two vectorization passes into a pass. All the ops
// should be vectorized altogether. Otherwise, there would be tensor.empty
// ops which becomes a stack allocation in bufferization.
}

addBufferizePasses(nestedModulePM);
Expand Down Expand Up @@ -538,18 +532,13 @@ void addConvTileAndDecomposeExpertPassPipeline(OpPassManager &passManager,
nestedModulePM.addNestedPass<func::FuncOp>(createVectorizePadPass());
}

// Add the sandbox single tiling expert to vectorize.
// We can't do the vectorization in the tiling expert above due to an issue in
// codegen strategy pipeline. Since we are moving to the transform dialect, we
// choose to have a workaround here by splitting them into two stages.
{
LinalgSingleTilingExpertPassOptions options;
options.vectorize = true;
LLVMCPUVectorizationPassOptions options;
options.enableVectorMasking = enableVectorMasking;
options.vectorizePadding = true;
options.vectorizeGatherAccesses = true;
nestedModulePM.addNestedPass<func::FuncOp>(
createLinalgSingleTilingExpertPass(options));
createLLVMCPUVectorizationPass(options));
nestedModulePM.addNestedPass<func::FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCSEPass());
}
Expand Down Expand Up @@ -582,12 +571,9 @@ void addMmt4dTilingExpertPassPipeline(OpPassManager &passManager) {
nestedModulePM.addNestedPass<func::FuncOp>(createLLVMCPUTilePass(
static_cast<int64_t>(StrategyTilingLevel::ReductionTiles)));

{
LinalgSingleTilingExpertPassOptions options;
options.vectorize = true;
nestedModulePM.addNestedPass<func::FuncOp>(
createLinalgSingleTilingExpertPass(options));
}
nestedModulePM.addNestedPass<func::FuncOp>(createLLVMCPUVectorizationPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCanonicalizerPass());
nestedModulePM.addNestedPass<func::FuncOp>(createCSEPass());

addBufferizePasses(nestedModulePM);

Expand Down
9 changes: 9 additions & 0 deletions compiler/src/iree/compiler/Codegen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,15 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUPeelPass();
std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUSplitReductionPass(
bool enableReassociateFpReductions = false);

struct LLVMCPUVectorizationPassOptions {
bool enableVectorMasking = false;
bool vectorizePadding = false;
bool vectorizeGatherAccesses = false;
};
std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorizationPass();
std::unique_ptr<OperationPass<func::FuncOp>> createLLVMCPUVectorizationPass(
const LLVMCPUVectorizationPassOptions &options);

/// Performs the final conversion to LLVM dialect.
std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
bool reassociateFpReordering = false);
Expand Down
Loading

0 comments on commit 5bd4dcf

Please sign in to comment.