From ae93524006b67741fa9116d184b05b2cc8584cc1 Mon Sep 17 00:00:00 2001 From: Ognjen Djuricic <160603639+odjuricicTT@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:53:21 +0100 Subject: [PATCH] [Optimizer] Add df and layout overrides (#1148) Also move optimizer overrides to TTNN/ --- .../ttmlir/Dialect/TT/Utils/OverrideParams.h | 196 ------------------ .../Dialect/TTNN/Analysis/LegalGridAnalysis.h | 2 +- .../Dialect/TTNN/Pipelines/TTNNPipelines.h | 12 +- .../Dialect/TTNN/Transforms/Optimizer.h | 4 +- .../ttmlir/Dialect/TTNN/Transforms/Passes.h | 2 +- .../Dialect/TTNN/Utils/OptimizerOverrides.h | 55 +++++ .../TTNN/Analysis/LegalGridAnalysis.cpp | 21 +- lib/Dialect/TTNN/Transforms/Optimizer.cpp | 46 +++- lib/Dialect/TTNN/Utils/CMakeLists.txt | 1 + lib/Dialect/TTNN/Utils/OptimizerOverrides.cpp | 192 +++++++++++++++++ .../TTNN/input_layout_loc_override.mlir | 2 +- ...le_add_with_loc_input_layout_override.mlir | 2 +- ...e_add_with_loc_output_layout_override.mlir | 6 +- .../TTNN/test_override_reshard_edges.mlir | 2 +- .../TTNN/sharded/mnist_sharding_tiled.mlir | 6 +- 15 files changed, 328 insertions(+), 221 deletions(-) delete mode 100644 include/ttmlir/Dialect/TT/Utils/OverrideParams.h create mode 100644 include/ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h create mode 100644 lib/Dialect/TTNN/Utils/OptimizerOverrides.cpp diff --git a/include/ttmlir/Dialect/TT/Utils/OverrideParams.h b/include/ttmlir/Dialect/TT/Utils/OverrideParams.h deleted file mode 100644 index 56cde1c07..000000000 --- a/include/ttmlir/Dialect/TT/Utils/OverrideParams.h +++ /dev/null @@ -1,196 +0,0 @@ -// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef TTMLIR_DIALECT_TT_UTILS_OVERRIDEPARAMS_H -#define TTMLIR_DIALECT_TT_UTILS_OVERRIDEPARAMS_H - -#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" -#include - -namespace mlir::tt { - -struct InputLayoutOverrideParams { - SmallVector operandIdxes; -}; - -struct InputLayoutOverrideParser - : public llvm::cl::parser> { -public: - InputLayoutOverrideParser(llvm::cl::Option &opt) - : llvm::cl::parser>(opt) {} - - bool parse(llvm::cl::Option &opt, StringRef argName, StringRef arg, - llvm::StringMap &value) { - SmallVector opOverrideList; - constexpr size_t kvPairSize = 2; - constexpr size_t iOpName = 0; - constexpr size_t iOperands = 1; - constexpr char opSeparator = ','; - constexpr char opNameSeparator = '='; - constexpr char opParamSeparator = ':'; - - arg.split(opOverrideList, opSeparator); - for (const StringRef override : opOverrideList) { - SmallVector opOverrideParts; - override.split(opOverrideParts, opNameSeparator); - if (opOverrideParts.size() != kvPairSize) { - opt.error("Invalid format for input layouts override: " + override); - return true; - } - - SmallVector operandIndexes; - SmallVector operandIndexParts; - - // Parse operand indexes. - opOverrideParts[iOperands].split(operandIndexParts, opParamSeparator); - for (const StringRef operandIndexPart : operandIndexParts) { - int64_t operandIndexValue; - if (operandIndexPart.getAsInteger(10 /*Radix*/, operandIndexValue)) { - opt.error("Invalid operand index: " + operandIndexPart); - return true; - } - operandIndexes.push_back(operandIndexValue); - } - - // Set parsed op overrides. - value[opOverrideParts[iOpName]] = - InputLayoutOverrideParams{std::move(operandIndexes)}; - } - return false; - } - - static void print(llvm::raw_ostream &os, - const llvm::StringMap &value) { - os << "insert-memreconfig="; - size_t count = 0; - for (const auto &entry : value) { - os << entry.getKey() << "="; - const InputLayoutOverrideParams ¶ms = entry.getValue(); - for (int64_t operandIdx : params.operandIdxes) { - os << operandIdx - << (operandIdx < static_cast(params.operandIdxes.size()) - 1 - ? ':' - : char()); - } - if (++count < value.size()) { - os << ","; - } - } - os << "\n"; - } -}; - -struct OutputLayoutOverrideParams { - SmallVector grid; - MemorySpace memorySpace; - TensorMemoryLayout memoryLayout; -}; - -struct OutputLayoutOverrideParser - : public llvm::cl::parser> { -public: - OutputLayoutOverrideParser(llvm::cl::Option &opt) - : llvm::cl::parser>(opt) {} - - bool parse(llvm::cl::Option &opt, StringRef argName, StringRef arg, - llvm::StringMap &value) { - SmallVector opOverrideList; - constexpr size_t kMaxGridSize = 2; - constexpr size_t kvPairSize = 2; - constexpr size_t kMaxLayoutOverrideParams = 3; - constexpr size_t iOpName = 0; - constexpr size_t iLayoutOverrideParams = 1; - constexpr size_t iGrid = 0; - constexpr size_t iMemorySpace = 1; - constexpr size_t iMemoryLayout = 2; - constexpr char opSeparator = ','; - constexpr char opNameSeparator = '='; - constexpr char paramSepataor = ':'; - constexpr char gridSeparator = 'x'; - - arg.split(opOverrideList, opSeparator); - for (const StringRef override : opOverrideList) { - SmallVector opOverrideParts; - override.split(opOverrideParts, opNameSeparator); - if (opOverrideParts.size() != kvPairSize) { - opt.error("Invalid format for override grid sizes: " + override); - return true; - } - - SmallVector layoutParamParts; - // Split into layout parameters. - opOverrideParts[iLayoutOverrideParams].split(layoutParamParts, - paramSepataor); - if (layoutParamParts.size() != kMaxLayoutOverrideParams) { - opt.error("Invalid number of layout parameters: " + - std::to_string(layoutParamParts.size())); - return true; - } - - // Parse grid. - SmallVector grid; - SmallVector gridParts; - layoutParamParts[iGrid].split(gridParts, gridSeparator); - for (const StringRef gridPart : gridParts) { - int64_t gridValue; - if (gridPart.getAsInteger(10 /*Radix*/, gridValue)) { - opt.error("Invalid grid size: " + gridPart); - return true; - } - grid.push_back(gridValue); - } - - // Parse memory space. - std::optional memorySpace = - mlir::tt::symbolizeMemorySpace(layoutParamParts[iMemorySpace]); - if (!memorySpace.has_value()) { - opt.error("Invalid memory space: " + layoutParamParts[iMemorySpace]); - return true; - } - - // Parse tensor memory layout. - std::optional memoryLayout = - mlir::tt::symbolizeTensorMemoryLayout( - layoutParamParts[iMemoryLayout]); - if (!memoryLayout.has_value()) { - opt.error("Invalid tensor memory layout: " + - layoutParamParts[iMemoryLayout]); - return true; - } - - // Set parsed op overrides. - value[opOverrideParts[iOpName]] = OutputLayoutOverrideParams{ - grid, memorySpace.value(), memoryLayout.value()}; - } - return false; - } - - static void print(llvm::raw_ostream &os, - const llvm::StringMap &value) { - os << "override-output-layout="; - size_t count = 0; - for (const auto &entry : value) { - os << entry.getKey() << "="; - const OutputLayoutOverrideParams ¶ms = entry.getValue(); - // Print grid values - for (size_t i = 0; i < params.grid.size(); ++i) { - os << params.grid[i]; - if (i < params.grid.size() - 1) { - os << "x"; - } - } - // Print memory space and memory layout - os << ":" << mlir::tt::stringifyMemorySpace(params.memorySpace); - os << ":" << mlir::tt::stringifyTensorMemoryLayout(params.memoryLayout); - if (++count < value.size()) { - os << ","; - } - } - os << "\n"; - } -}; - -} // namespace mlir::tt - -#endif diff --git a/include/ttmlir/Dialect/TTNN/Analysis/LegalGridAnalysis.h b/include/ttmlir/Dialect/TTNN/Analysis/LegalGridAnalysis.h index 0f7a3a39c..7d7f4a179 100644 --- a/include/ttmlir/Dialect/TTNN/Analysis/LegalGridAnalysis.h +++ b/include/ttmlir/Dialect/TTNN/Analysis/LegalGridAnalysis.h @@ -6,8 +6,8 @@ #define TTMLIR_DIALECT_TTNN_ANALYSIS_LEGALGRIDANALYSIS_H #include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" -#include "ttmlir/Dialect/TT/Utils/OverrideParams.h" #include "ttmlir/Dialect/TTNN/Analysis/TTNNAnalysis.h" +#include "ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h" #include "llvm/ADT/StringMap.h" namespace mlir::tt::ttnn { diff --git a/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h b/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h index d808613b9..7e5829873 100644 --- a/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h +++ b/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h @@ -7,7 +7,7 @@ #include "mlir/Pass/PassOptions.h" #include "ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h" -#include "ttmlir/Dialect/TT/Utils/OverrideParams.h" +#include "ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h" namespace mlir::tt::ttnn { @@ -46,17 +46,19 @@ struct TTIRToTTNNBackendPipelineOptions // The format is a comma separated list of op names equal to the output layout // params separated by ":" // - // op_name=grid_size:memory_space:tensor_memory_layout + // op_name=grid_size:memory_space:tensor_memory_layout:memory_layout:data_type // // * grid_size=2x2 // * memory_space: system, mmio, dram or l1 // * tensor_memory_layout: none, interleaved, single_bank, height_sharded, // width_sharded or block_sharded + // * memory_layout: row_major or tile + // * data_type: f32, f16, bf16, bfp_f8, bfp_bf8, bfp_f4, bfp_bf4, bfp_f2, + // bfp_bf2, u32, u16, u8 // - // Full Example: "op1=2x2:dram:interleaved,op2=4x4:l1:block_sharded" + // Full Example: + // "op1=2x2:dram:interleaved:tile:fp32,op2=4x4:l1:block_sharded:row_major:fp16" // - // This will set the output layout for op1 to grid 2x2,dram,interleaved and - // op2 4x4,l1,block_sharded. // // Note: This option is only valid if optimizerPassEnabled is true. // diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h b/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h index e7d9c459f..2f075165e 100644 --- a/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h +++ b/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h @@ -103,14 +103,14 @@ class TTNNOptimizerBase : public ::mlir::OperationPass<::mlir::ModuleOp> { protected: ::mlir::Pass::Option, - mlir::tt::InputLayoutOverrideParser> + mlir::tt::ttnn::InputLayoutOverrideParser> overrideInputLayout{ *this, "insert-memreconfig", ::llvm::cl::desc( "Manually insert memory reconfig op for specific op's operand."), ::llvm::cl::init(llvm::StringMap())}; ::mlir::Pass::Option, - mlir::tt::OutputLayoutOverrideParser> + mlir::tt::ttnn::OutputLayoutOverrideParser> overrideOutputLayout{ *this, "override-output-layout", ::llvm::cl::desc("Override output tensor layout for specific ops."), diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Passes.h b/include/ttmlir/Dialect/TTNN/Transforms/Passes.h index fa05f41de..990fc3e82 100644 --- a/include/ttmlir/Dialect/TTNN/Transforms/Passes.h +++ b/include/ttmlir/Dialect/TTNN/Transforms/Passes.h @@ -8,10 +8,10 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" -#include "ttmlir/Dialect/TT/Utils/OverrideParams.h" #include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h" #include "ttmlir/Dialect/TTNN/IR/TTNN.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" +#include "ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h" namespace mlir::tt::ttnn { diff --git a/include/ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h b/include/ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h new file mode 100644 index 000000000..6ff9d5445 --- /dev/null +++ b/include/ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef TTMLIR_DIALECT_TT_UTILS_OVERRIDEPARAMS_H +#define TTMLIR_DIALECT_TT_UTILS_OVERRIDEPARAMS_H + +#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" +#include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h" +#include +#include + +namespace mlir::tt::ttnn { + +struct OutputLayoutOverrideParams { + SmallVector grid; + tt::MemorySpace memorySpace; + tt::TensorMemoryLayout tensorMemoryLayout; // INTERLEAVED / SHARDED etc... + tt::ttnn::Layout memoryLayout; // ROW_MAJOR / TILE + tt::DataType dataType; +}; + +struct InputLayoutOverrideParams { + SmallVector operandIdxes; +}; + +struct OutputLayoutOverrideParser + : public llvm::cl::parser> { +public: + OutputLayoutOverrideParser(llvm::cl::Option &opt) + : llvm::cl::parser>(opt) {} + + bool parse(llvm::cl::Option &opt, StringRef argName, StringRef arg, + llvm::StringMap &value); + + static void print(llvm::raw_ostream &os, + const llvm::StringMap &value); +}; + +struct InputLayoutOverrideParser + : public llvm::cl::parser> { +public: + InputLayoutOverrideParser(llvm::cl::Option &opt) + : llvm::cl::parser>(opt) {} + + bool parse(llvm::cl::Option &opt, StringRef argName, StringRef arg, + llvm::StringMap &value); + + static void print(llvm::raw_ostream &os, + const llvm::StringMap &value); +}; + +} // namespace mlir::tt::ttnn + +#endif diff --git a/lib/Dialect/TTNN/Analysis/LegalGridAnalysis.cpp b/lib/Dialect/TTNN/Analysis/LegalGridAnalysis.cpp index 25997d2b8..a13bb7a19 100644 --- a/lib/Dialect/TTNN/Analysis/LegalGridAnalysis.cpp +++ b/lib/Dialect/TTNN/Analysis/LegalGridAnalysis.cpp @@ -6,6 +6,8 @@ #include "ttmlir/Dialect/TT/IR/TTOpsTypes.h" #include "ttmlir/Dialect/TTNN/IR/TTNN.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" +#include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h" +#include "ttmlir/Dialect/TTNN/Utils/Utils.h" namespace mlir::tt::ttnn { @@ -84,12 +86,21 @@ bool LegalGridAnalysis::applyOverrides() { mlir::cast(op->getResult(0).getType()); tt::LayoutAttr layout = mlir::cast(tensorType.getEncoding()); + GridAttr grid = + GridAttr::get(op->getContext(), ArrayRef(override.grid)); + + // Create element type for the new layout. + Type elementType = + utils::createRowMajorTypeFromDtype(op->getContext(), override.dataType); + if (override.memoryLayout == Layout::Tile) { + elementType = TileType::get(op->getContext(), elementType); + } + analysisResult.push_back( - layout.withMemorySpace(op->getContext(), override.memorySpace) - .withMemoryLayout(op->getContext(), override.memoryLayout) - .withGrid(op->getContext(), tensorType, - GridAttr::get(op->getContext(), - ArrayRef(override.grid)))); + layout.withGrid(op->getContext(), tensorType, grid) + .withMemorySpace(op->getContext(), override.memorySpace) + .withMemoryLayout(op->getContext(), override.tensorMemoryLayout) + .withElementType(op->getContext(), elementType)); return true; } diff --git a/lib/Dialect/TTNN/Transforms/Optimizer.cpp b/lib/Dialect/TTNN/Transforms/Optimizer.cpp index 2af08e9c9..c3d4d304d 100644 --- a/lib/Dialect/TTNN/Transforms/Optimizer.cpp +++ b/lib/Dialect/TTNN/Transforms/Optimizer.cpp @@ -25,6 +25,8 @@ class TTNNOptimizer : public impl::TTNNOptimizerBase { // Perform final configuration analysis. // Apply graph transformations based on analysis results. // + assertOverridesValid(); + ModuleOp moduleOp = getOperation(); // Get the max grid size from the system description. @@ -130,7 +132,7 @@ class TTNNOptimizer : public impl::TTNNOptimizerBase { mlir::cast(op->getResult(0).getType()); llvm::ArrayRef tensorShape = tensorType.getShape(); - // Update the output layout attribute with the new grid size. + // Update the output layout attribute with the new one. // if (opConfigAnalysis.getResult().contains(op)) { RankedTensorType newTensorType = @@ -156,7 +158,14 @@ class TTNNOptimizer : public impl::TTNNOptimizerBase { EmptyOp emptyOp = mlir::cast(op->getOperands().back().getDefiningOp()); - emptyOp.setMemoryConfigAttr(MemoryConfigAttr::get( + emptyOp.setDtype( + utils::getDataTypeFromMemRef(ttLayoutAttr.getMemref())); + if (llvm::isa(ttLayoutAttr.getElementType())) { + emptyOp.setLayout(ttnn::Layout::Tile); + } else { + emptyOp.setLayout(ttnn::Layout::RowMajor); + } + emptyOp.setMemoryConfigAttr(ttnn::MemoryConfigAttr::get( op->getContext(), TensorMemoryLayoutAttr::get(op->getContext(), tensorMemoryLayout), @@ -207,6 +216,39 @@ class TTNNOptimizer : public impl::TTNNOptimizerBase { }); } +private: + void assertOverridesValid() { + // Check if each overriden op exists in the graph. + // + llvm::StringMap overridenOpExists; + for (auto &override : overrideOutputLayout) { + overridenOpExists[override.first()] = false; + } + for (auto &override : overrideInputLayout) { + overridenOpExists[override.first()] = false; + } + + ModuleOp moduleOp = getOperation(); + moduleOp->walk([&](Operation *op) { + if (not isa(op->getLoc())) { + return; + } + + StringRef opLocName = mlir::cast(op->getLoc()).getName(); + if (overridenOpExists.contains(opLocName)) { + overridenOpExists[opLocName] = true; + } + }); + + for (auto &override : overridenOpExists) { + if (!override.second) { + llvm::errs() << "Trying to override non-existing op: " + << override.first() << "\n"; + assert(false && "Trying to override non-existing op"); + } + } + } + void extractReshardEdges(ModuleOp &moduleOp, std::unordered_set &overrideReshardEdges) { moduleOp->walk([&](Operation *op) { diff --git a/lib/Dialect/TTNN/Utils/CMakeLists.txt b/lib/Dialect/TTNN/Utils/CMakeLists.txt index 410e764c4..f49f829e6 100644 --- a/lib/Dialect/TTNN/Utils/CMakeLists.txt +++ b/lib/Dialect/TTNN/Utils/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_dialect_library(TTMLIRTTNNUtils Utils.cpp + OptimizerOverrides.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/TTNN diff --git a/lib/Dialect/TTNN/Utils/OptimizerOverrides.cpp b/lib/Dialect/TTNN/Utils/OptimizerOverrides.cpp new file mode 100644 index 000000000..8554e1328 --- /dev/null +++ b/lib/Dialect/TTNN/Utils/OptimizerOverrides.cpp @@ -0,0 +1,192 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#include "ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h" + +namespace mlir::tt::ttnn { + +bool OutputLayoutOverrideParser::parse( + llvm::cl::Option &opt, StringRef argName, StringRef arg, + llvm::StringMap &value) { + SmallVector opOverrideList; + constexpr size_t kMaxGridSize = 2; + constexpr size_t kvPairSize = 2; + constexpr size_t kMaxLayoutOverrideParams = 5; + constexpr size_t iOpName = 0; + constexpr size_t iLayoutOverrideParams = 1; + constexpr size_t iGrid = 0; + constexpr size_t iMemorySpace = 1; + constexpr size_t iTensorMemoryLayout = 2; + constexpr size_t iMemoryLayout = 3; + constexpr size_t iDataType = 4; + constexpr char opSeparator = ','; + constexpr char opNameSeparator = '='; + constexpr char paramSepataor = ':'; + constexpr char gridSeparator = 'x'; + + arg.split(opOverrideList, opSeparator); + for (const StringRef override : opOverrideList) { + SmallVector opOverrideParts; + override.split(opOverrideParts, opNameSeparator); + if (opOverrideParts.size() != kvPairSize) { + opt.error("Invalid format for override grid sizes: " + override); + return true; + } + + SmallVector layoutParamParts; + // Split into layout parameters. + opOverrideParts[iLayoutOverrideParams].split(layoutParamParts, + paramSepataor); + if (layoutParamParts.size() != kMaxLayoutOverrideParams) { + opt.error("Invalid number of layout parameters: " + + std::to_string(layoutParamParts.size())); + return true; + } + + // Parse grid. + SmallVector grid; + SmallVector gridParts; + layoutParamParts[iGrid].split(gridParts, gridSeparator); + for (const StringRef gridPart : gridParts) { + int64_t gridValue; + if (gridPart.getAsInteger(10 /*Radix*/, gridValue)) { + opt.error("Invalid grid size: " + gridPart); + return true; + } + grid.push_back(gridValue); + } + + // Parse memory space. + std::optional memorySpace = + mlir::tt::symbolizeMemorySpace(layoutParamParts[iMemorySpace]); + if (!memorySpace.has_value()) { + opt.error("Invalid memory space: " + layoutParamParts[iMemorySpace]); + return true; + } + + // Parse tensor memory layout. + std::optional tensorMemoryLayout = + mlir::tt::symbolizeTensorMemoryLayout( + layoutParamParts[iTensorMemoryLayout]); + if (!tensorMemoryLayout.has_value()) { + opt.error("Invalid tensor memory layout: " + + layoutParamParts[iTensorMemoryLayout]); + return true; + } + + // Parse memory layout. + std::optional memoryLayout = + mlir::tt::ttnn::symbolizeLayout(layoutParamParts[iMemoryLayout]); + if (!memoryLayout.has_value()) { + opt.error("Invalid memory layout: " + layoutParamParts[iMemoryLayout]); + return true; + } + + // Parse data type. + std::optional dataType = + mlir::tt::DataTypeStringToEnum(layoutParamParts[iDataType]); + if (!dataType.has_value()) { + opt.error("Invalid data type: " + layoutParamParts[iDataType]); + return true; + } + + // Set parsed op overrides. + value[opOverrideParts[iOpName]] = OutputLayoutOverrideParams{ + std::move(grid), memorySpace.value(), tensorMemoryLayout.value(), + memoryLayout.value(), dataType.value()}; + } + return false; +} + +void OutputLayoutOverrideParser::print( + llvm::raw_ostream &os, + const llvm::StringMap &value) { + os << "override-output-layout="; + size_t count = 0; + for (const auto &entry : value) { + os << entry.getKey() << "="; + const OutputLayoutOverrideParams ¶ms = entry.getValue(); + // Print grid values + for (size_t i = 0; i < params.grid.size(); ++i) { + os << params.grid[i]; + if (i < params.grid.size() - 1) { + os << "x"; + } + } + // Print memory space and memory layout + os << ":" << mlir::tt::stringifyMemorySpace(params.memorySpace); + os << ":" + << mlir::tt::stringifyTensorMemoryLayout(params.tensorMemoryLayout); + os << ":" << mlir::tt::ttnn::stringifyLayout(params.memoryLayout); + os << ":" << mlir::tt::DataTypeEnumToString(params.dataType); + if (++count < value.size()) { + os << ","; + } + } + os << "\n"; +} + +bool InputLayoutOverrideParser::parse( + llvm::cl::Option &opt, StringRef argName, StringRef arg, + llvm::StringMap &value) { + SmallVector opOverrideList; + constexpr size_t kvPairSize = 2; + constexpr size_t iOpName = 0; + constexpr size_t iOperands = 1; + constexpr char opSeparator = ','; + constexpr char opNameSeparator = '='; + constexpr char opParamSeparator = ':'; + + arg.split(opOverrideList, opSeparator); + for (const StringRef override : opOverrideList) { + SmallVector opOverrideParts; + override.split(opOverrideParts, opNameSeparator); + if (opOverrideParts.size() != kvPairSize) { + opt.error("Invalid format for input layouts override: " + override); + return true; + } + + SmallVector operandIndexes; + SmallVector operandIndexParts; + + // Parse operand indexes. + opOverrideParts[iOperands].split(operandIndexParts, opParamSeparator); + for (const StringRef operandIndexPart : operandIndexParts) { + int64_t operandIndexValue; + if (operandIndexPart.getAsInteger(10 /*Radix*/, operandIndexValue)) { + opt.error("Invalid operand index: " + operandIndexPart); + return true; + } + operandIndexes.push_back(operandIndexValue); + } + + // Set parsed op overrides. + value[opOverrideParts[iOpName]] = + InputLayoutOverrideParams{std::move(operandIndexes)}; + } + return false; +} + +void InputLayoutOverrideParser::print( + llvm::raw_ostream &os, + const llvm::StringMap &value) { + os << "insert-memreconfig="; + size_t count = 0; + for (const auto &entry : value) { + os << entry.getKey() << "="; + const InputLayoutOverrideParams ¶ms = entry.getValue(); + for (int64_t operandIdx : params.operandIdxes) { + os << operandIdx + << (operandIdx < static_cast(params.operandIdxes.size()) - 1 + ? ':' + : char()); + } + if (++count < value.size()) { + os << ","; + } + } + os << "\n"; +} + +} // namespace mlir::tt::ttnn diff --git a/test/ttmlir/Dialect/TTNN/input_layout_loc_override.mlir b/test/ttmlir/Dialect/TTNN/input_layout_loc_override.mlir index 44c3d4e4d..8ecaf354d 100644 --- a/test/ttmlir/Dialect/TTNN/input_layout_loc_override.mlir +++ b/test/ttmlir/Dialect/TTNN/input_layout_loc_override.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --mlir-print-debuginfo --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true override-output-layout=matmul_1_in_1_layout=1x1:l1:interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --mlir-print-debuginfo --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true override-output-layout=matmul_1_in_1_layout=1x1:l1:interleaved:tile:bf16" %s | FileCheck %s #any_device = #tt.operand_constraint #loc = loc("Matmul":4294967295:0) // CHECK-DAG: #[[LOC_MATMUL_IN0:.*]] = loc("matmul_1_in_0_layout"(#loc3)) diff --git a/test/ttmlir/Dialect/TTNN/multiple_add_with_loc_input_layout_override.mlir b/test/ttmlir/Dialect/TTNN/multiple_add_with_loc_input_layout_override.mlir index b1951166a..4048f15fd 100644 --- a/test/ttmlir/Dialect/TTNN/multiple_add_with_loc_input_layout_override.mlir +++ b/test/ttmlir/Dialect/TTNN/multiple_add_with_loc_input_layout_override.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memreconfig-enabled=true insert-memreconfig=add_0_1_2=0 override-output-layout=add_1_2=1x1:dram:interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memreconfig-enabled=true insert-memreconfig=add_0_1_2=0 override-output-layout=add_1_2=1x1:dram:interleaved:row_major:f32" %s | FileCheck %s #any_device = #tt.operand_constraint #loc = loc("test_ops.py:17_0_0":0:0) module attributes {} { diff --git a/test/ttmlir/Dialect/TTNN/multiple_add_with_loc_output_layout_override.mlir b/test/ttmlir/Dialect/TTNN/multiple_add_with_loc_output_layout_override.mlir index e1a34cc61..027e2ca8b 100644 --- a/test/ttmlir/Dialect/TTNN/multiple_add_with_loc_output_layout_override.mlir +++ b/test/ttmlir/Dialect/TTNN/multiple_add_with_loc_output_layout_override.mlir @@ -1,12 +1,12 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true override-output-layout=add_1_0=4x4:dram:interleaved,add_2_0=4x4:l1:interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true override-output-layout=add_1_0=4x4:dram:interleaved:row_major:bf16,add_2_0=4x4:l1:interleaved:tile:f32" %s | FileCheck %s #any_device = #tt.operand_constraint #loc = loc("test_ops.py:17_0_0":0:0) module attributes {} { func.func @main(%arg0: tensor<1x32x32xf32> loc("test_ops.py:17_0_0":0:0), %arg1: tensor<1x32x32xf32> loc("test_ops.py:17_0_0":0:0), %arg2: tensor<1x32x32xf32> loc("test_ops.py:17_0_0":0:0)) -> (tensor<1x32x32xf32>, tensor<1x32x32xf32>) { // CHECK: #[[L1_:.*]] = #tt.memory_space // CHECK: #[[LAYOUT_0:.*]] = #tt.layout<(d0, d1, d2) -> (d0 * 32 + d1, d2), undef, <1x1>, memref<32x32xf32, #system>> - // CHECK: #[[LAYOUT_1:.*]] = #tt.layout<(d0, d1, d2) -> (d0 * 32 + d1, d2), undef, <4x4>, memref<8x8xf32, #dram>, interleaved> - // CHECK: #[[LAYOUT_2:.*]] = #tt.layout<(d0, d1, d2) -> (d0 * 32 + d1, d2), undef, <4x4>, memref<8x8xf32, #l1_>, interleaved> + // CHECK: #[[LAYOUT_1:.*]] = #tt.layout<(d0, d1, d2) -> (d0 * 32 + d1, d2), undef, <4x4>, memref<8x8xbf16, #dram>, interleaved> + // CHECK: #[[LAYOUT_2:.*]] = #tt.layout<(d0, d1, d2) -> (d0 * 32 + d1, d2), undef, <4x4>, memref<1x1x!tt.tile<32x32, f32>, #l1_>, interleaved> // CHECK: #[[LAYOUT_3:.*]] = #tt.layout<(d0, d1, d2) -> (d0 * 32 + d1, d2), undef, <8x8>, memref<4x4xf32, #dram>, interleaved> %0 = tensor.empty() : tensor<1x32x32xf32> loc(#loc5) // CHECK: %[[C:.*]] = "ttnn.add"[[C:.*]] -> tensor<1x32x32xf32, #[[LAYOUT_1]]> diff --git a/test/ttmlir/Dialect/TTNN/test_override_reshard_edges.mlir b/test/ttmlir/Dialect/TTNN/test_override_reshard_edges.mlir index 07868a8c5..8fa5d2430 100644 --- a/test/ttmlir/Dialect/TTNN/test_override_reshard_edges.mlir +++ b/test/ttmlir/Dialect/TTNN/test_override_reshard_edges.mlir @@ -1,4 +1,4 @@ -// RUN: ttmlir-opt --ttnn-optimizer="memory-layout-analysis-enabled=true memreconfig-enabled=true insert-memreconfig=add_0_1_2=0 override-output-layout=add_1_2=1x1:dram:interleaved" %s | FileCheck %s +// RUN: ttmlir-opt --ttnn-optimizer="memory-layout-analysis-enabled=true memreconfig-enabled=true insert-memreconfig=add_0_1_2=0 override-output-layout=add_1_2=1x1:dram:interleaved:row_major:f32" %s | FileCheck %s #device = #tt.device (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]> #dram = #tt.memory_space #system = #tt.memory_space diff --git a/test/ttmlir/Silicon/TTNN/sharded/mnist_sharding_tiled.mlir b/test/ttmlir/Silicon/TTNN/sharded/mnist_sharding_tiled.mlir index 9fc73e066..5767ded96 100644 --- a/test/ttmlir/Silicon/TTNN/sharded/mnist_sharding_tiled.mlir +++ b/test/ttmlir/Silicon/TTNN/sharded/mnist_sharding_tiled.mlir @@ -4,7 +4,7 @@ #any_device = #tt.operand_constraint #loc = loc("MNISTLinear":4294967295:0) module @"tt-forge-graph" attributes {} { - func.func @main(%arg0: tensor<32x784xf32> loc("MNISTLinear":4294967295:0), %arg1: tensor<32x32xf32> loc("MNISTLinear":4294967295:0), %arg2: tensor<256x32xf32> loc("MNISTLinear":4294967295:0), %arg3: tensor<32x256xf32> loc("MNISTLinear":4294967295:0), %arg4: tensor<784x256xf32> loc("MNISTLinear":4294967295:0)) -> tensor<32x32xf32> { + func.func @main(%arg0: tensor<32x784xf32> loc("MNISTLinear":4294967295:0), %arg1: tensor<32xf32> loc("MNISTLinear":4294967295:0), %arg2: tensor<256x32xf32> loc("MNISTLinear":4294967295:0), %arg3: tensor<256xf32> loc("MNISTLinear":4294967295:0), %arg4: tensor<784x256xf32> loc("MNISTLinear":4294967295:0)) -> tensor<32x32xf32> { // CHECK-DAG: #[[LAYOUT_1:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x8>, memref<32x32xf32, #l1_>, width_sharded> // CHECK-DAG: #[[LAYOUT_2:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<32x32xf32, #l1_>, width_sharded> %0 = tensor.empty() : tensor<32x256xf32> loc(#loc8) @@ -12,7 +12,7 @@ module @"tt-forge-graph" attributes {} { %1 = "ttir.matmul"(%arg0, %arg4, %0) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x784xf32>, tensor<784x256xf32>, tensor<32x256xf32>) -> tensor<32x256xf32> loc(#loc8) %2 = tensor.empty() : tensor<32x256xf32> loc(#loc9) // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<32x256xf32, #[[LAYOUT_1]]> - %3 = "ttir.add"(%1, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x256xf32>, tensor<32x256xf32>, tensor<32x256xf32>) -> tensor<32x256xf32> loc(#loc9) + %3 = "ttir.add"(%1, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x256xf32>, tensor<256xf32>, tensor<32x256xf32>) -> tensor<32x256xf32> loc(#loc9) %4 = tensor.empty() : tensor<32x256xf32> loc(#loc10) // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<32x256xf32, #[[LAYOUT_1]]> %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<32x256xf32>, tensor<32x256xf32>) -> tensor<32x256xf32> loc(#loc10) @@ -21,7 +21,7 @@ module @"tt-forge-graph" attributes {} { %7 = "ttir.matmul"(%5, %arg2, %6) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x256xf32>, tensor<256x32xf32>, tensor<32x32xf32>) -> tensor<32x32xf32> loc(#loc11) %8 = tensor.empty() : tensor<32x32xf32> loc(#loc12) // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<32x32xf32, #[[LAYOUT_2]]> - %9 = "ttir.add"(%7, %arg1, %8) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xf32>, tensor<32x32xf32>, tensor<32x32xf32>) -> tensor<32x32xf32> loc(#loc12) + %9 = "ttir.add"(%7, %arg1, %8) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xf32>, tensor<32xf32>, tensor<32x32xf32>) -> tensor<32x32xf32> loc(#loc12) %10 = tensor.empty() : tensor<32x32xf32> loc(#loc13) %11 = "ttir.softmax"(%9, %10) <{dimension = 1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x32xf32>, tensor<32x32xf32>) -> tensor<32x32xf32> loc(#loc13) return %11 : tensor<32x32xf32> loc(#loc7)