diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 5eae77205..6e150815d 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -6,6 +6,9 @@ - [Building](./build.md) - [Docker Notes](./docker-notes.md) +- [Testing](./testing.md) + - [Lit testing](./lit-testing.md) + - [EmitC testing](./emitc-testing.md) - [Tools](./tools.md) - [ttmlir-opt](./ttmlir-opt.md) - [ttmlir-translate](./ttmlir-translate.md) @@ -13,6 +16,7 @@ - [tt-explorer](./tt-explorer.md) - [Usage & API](./tt-explorer-usage-api.md) - [Roadmap](./tt-explorer-roadmap.md) + - [ttnn-standalone](./ttnn-standalone.md) - [Flatbuffers](./flatbuffers.md) - [CI](./ci.md) - [Additional Reading](./additional-reading.md) diff --git a/docs/src/adding-an-op.md b/docs/src/adding-an-op.md index 2a41397d5..3ba4aaf59 100644 --- a/docs/src/adding-an-op.md +++ b/docs/src/adding-an-op.md @@ -23,7 +23,9 @@ This guide will cover the following steps: - [`runtime/lib/ttnn/operations/CMakeLists.txt`](#runtimelibttnnoperationscmakeliststxt) - [`runtime/lib/ttnn/program.cpp`](#runtimelibttnnprogramcpp) - [8. Add a silicon unit test for the Op](#8-add-a-silicon-unit-test-for-the-op) - - [`test/ttmlir/Silicon/TTNN/simple_matmul.mlir`](#testttmlirsiliconttnnsimple_matmulmlir) + - [`test/ttmlir/Silicon/TTNN/matmul/simple_matmul.mlir`](#testttmlirsiliconttnnmatmulsimple_matmulmlir) + - [9. Add an EmitC test for the Op](#9-add-an-emitc-test-for-the-op) + - [`test/ttmlir/EmitC/TTNN/matmul/matmul.mlir`](#testttmliremitcttnnmatmulmatmulmlir) ## 1. Define the Op in the TTIR frontend dialect @@ -302,11 +304,11 @@ ttrt run out.ttnn After adding runtime support, we're ready to test our Op on silicon. All silicon tests are located under `test/ttmlir/Silicon`. The process is similar to [adding a compiler unit test](#4-add-a-compiler-unit-test-for-the-op). -In our specific case, we create a unit test here: `test/ttmlir/Silicon/TTNN/simple_matmul.mlir`: +In our specific case, we create a unit test here: -#### `test/ttmlir/Silicon/TTNN/simple_matmul.mlir` +#### `test/ttmlir/Silicon/TTNN/matmul/simple_matmul.mlir` ```mlir -{{#include ../../../test/ttmlir/Silicon/TTNN/simple_matmul.mlir}} +{{#include ../../../test/ttmlir/Silicon/TTNN/matmul/simple_matmul.mlir}} ``` Couple things to point out about this process: @@ -318,3 +320,15 @@ If you want the module to run on silicon in CI, the test must be placed under `t Ensuring the system descriptor accurately reflects the target hardware is essential for running the module correctly. - `// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn`: This runs `ttmlir-translate` that serializes the output mlir module to a flatbuffer binary. We added the logic for this serialization in the [Serialize the Op in the flatbuffer format](#6-serialize-the-op-in-the-flatbuffer-format) section. + +## 9. Add an EmitC test for the Op +Op should be tested in the EmitC (C++ codegen) path as well. + +TTNN EmitC tests live in the `test/ttmlir/EmitC/TTNN` path. In our case, the test is in `test/ttmlir/EmitC/TTNN/matmul/matmul.mlir`. + +#### `test/ttmlir/EmitC/TTNN/matmul/matmul.mlir` +```cpp +{{#include ../../../test/ttmlir/EmitC/TTNN/matmul/matmul.mlir}} +``` + +The first two `RUN` lines create a flatbuffer. The third and forth convert to EmitC dialect, translate to C++, then output the result to `matmul.mlir.cpp` file. diff --git a/docs/src/build.md b/docs/src/build.md index b415ca08d..628776513 100644 --- a/docs/src/build.md +++ b/docs/src/build.md @@ -60,24 +60,6 @@ source env/activate cmake --build build -- check-ttmlir ``` -### llvm-lit - -Under the hood the check-ttmlir cmake target is running `llvm-lit`. With it you -can: - -```bash -# Query which tests are available -llvm-lit -sv ./build/test --show-tests - -# Run an individual test: -llvm-lit -sv ./build/test/ttmlir/Dialect/TTIR/test_allocate.mlir - -# Run a sub-suite: -llvm-lit -sv ./build/test/ttmlir/Dialect/TTIR -``` - -> See the full [llvm-lit documentation](https://llvm.org/docs/CommandGuide/lit.html) for more information. - ## Lint ```bash diff --git a/docs/src/emitc-testing.md b/docs/src/emitc-testing.md new file mode 100644 index 000000000..fe84d45d7 --- /dev/null +++ b/docs/src/emitc-testing.md @@ -0,0 +1,14 @@ +# EmitC testing + +To locally run EmitC tests: + +```bash +# Generate flatbuffers and .cpp files +llvm-lit -sv test/ttmlir/EmitC/TTNN + +# Compile .cpp files to shared objects +tools/ttnn-standalone/ci_compile_dylib.py + +# Run flatbuffers + shared objects and compare results +ttrt run --emitc build/test/ttmlir/EmitC/TTNN +``` diff --git a/docs/src/lit-testing.md b/docs/src/lit-testing.md new file mode 100644 index 000000000..f1204ad43 --- /dev/null +++ b/docs/src/lit-testing.md @@ -0,0 +1,16 @@ +# Lit testing + +`llvm-lit` tool is used for MLIR testing. With it you can: + +```bash +# Query which tests are available +llvm-lit -sv ./build/test --show-tests + +# Run an individual test: +llvm-lit -sv ./build/test/ttmlir/Dialect/TTIR/test_allocate.mlir + +# Run a sub-suite: +llvm-lit -sv ./build/test/ttmlir/Dialect/TTIR +``` + +> See the full [llvm-lit documentation](https://llvm.org/docs/CommandGuide/lit.html) for more information. diff --git a/docs/src/testing.md b/docs/src/testing.md new file mode 100644 index 000000000..8a38294a3 --- /dev/null +++ b/docs/src/testing.md @@ -0,0 +1,8 @@ +# Testing + +To run tests: + +```bash +source env/activate +cmake --build build -- check-ttmlir +``` diff --git a/docs/src/tools.md b/docs/src/tools.md index b7fa562ee..bb6e1fbd9 100644 --- a/docs/src/tools.md +++ b/docs/src/tools.md @@ -3,5 +3,7 @@ Currently, there are a few primary tools that are part of the `ttmlir` project: - `ttmlir-opt`: The `ttmlir` optimizer driver. This tool is used to run the `ttmlir` compiler passes on a `.mlir` source files and is central to developing and testing the compiler. +- `ttmlir-translate`: The `ttmlir` translation tool. This tool can convert from IR to external representation (and inverse). For example, IR in EmitC dialect can be converted into C++ code. - `ttrt`: This tool is intended to be a swiss army knife for working with flatbuffers generated by the compiler. Its primary role is to inspect and run flatbuffer files. - [`tt-explorer`](https://github.com/vprajapati-tt/tt-explorer): Visualizer tool for `ttmlir`-powered compiler results. Visualizes from emitted `.mlir` files to display compiled model, attributes, performance results, and provide a platform for human-driven overrides to _gameify_ model tuning. +- `ttnn-standalone`: This tool is used to run C++ TTNN code outside of the compiler environment. diff --git a/docs/src/ttnn-standalone.md b/docs/src/ttnn-standalone.md new file mode 100644 index 000000000..30842acc6 --- /dev/null +++ b/docs/src/ttnn-standalone.md @@ -0,0 +1,24 @@ +# `ttnn-standalone` + +TTNN Standalone is a post-compile tuning/debugging tool. + +Forge and third party ML models (PyTorch, Jax, ONNX, ...) can be compiled to a set of TTNN library calls in C++. This generated code can then be used outside of the compiler environment. TTNN Standalone tool offers all the scaffolding needed to run the C++ code on device (build & run scripts). + +### Usage + +```bash +# Compile a model to EmitC dialect => translate to C++ code => pipe to ttnn-standalone.cpp +./build/bin/ttmlir-opt --ttir-to-emitc-pipeline test/ttmlir/EmitC/TTNN/sanity_add.mlir \ +| ./build/bin/ttmlir-translate --mlir-to-cpp \ +> tools/ttnn-standalone/ttnn-standalone.cpp + +# Change dir to `tools/ttnn-standalone` and use the `run` script to compile and run the ttnn standalone: +cd tools/ttnn-standalone +./run +``` + +Note: if you receive this error +```bash +-bash: ./run: Permission denied +``` +running `chmod +x run` will allow the execution of the script. diff --git a/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp b/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp index efe4cc9ec..2e2d01fb4 100644 --- a/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp +++ b/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp @@ -148,6 +148,7 @@ void createTTIRToTTNNBackendPipeline( void createTTIRToEmitCPipeline(OpPassManager &pm, const TTIRToEmitCPipelineOptions &options) { createTTIRToTTNNBackendPipeline(pm, options); + pm.addPass(createTTNNCreateInputGenerators()); pm.addPass(createConvertTTNNToEmitCPass()); } diff --git a/test/ttmlir/Dialect/TTNN/pipelines/ttir_to_emitc_add.mlir b/test/ttmlir/Dialect/TTNN/pipelines/ttir_to_emitc_add.mlir index 21c665ae2..ec2664b44 100644 --- a/test/ttmlir/Dialect/TTNN/pipelines/ttir_to_emitc_add.mlir +++ b/test/ttmlir/Dialect/TTNN/pipelines/ttir_to_emitc_add.mlir @@ -1,5 +1,5 @@ // RUN: ttmlir-opt --ttir-to-emitc-pipeline="system-desc-path=%system_desc_path%" %s > %direct.mlir -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" --convert-ttnn-to-emitc %s > %indirect.mlir +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" --ttnn-create-input-gens --convert-ttnn-to-emitc %s > %indirect.mlir // RUN: diff %direct.mlir %indirect.mlir // // This test checks that the (TTIR to EmitC pipeline) is equivalent to (TTIR to TTNN pipeline + dialect conversion from TTNN to EmitC). diff --git a/test/ttmlir/EmitC/TTNN/matmul/matmul.mlir b/test/ttmlir/EmitC/TTNN/matmul/matmul.mlir new file mode 100644 index 000000000..0c39bde08 --- /dev/null +++ b/test/ttmlir/EmitC/TTNN/matmul/matmul.mlir @@ -0,0 +1,10 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %basename_t.ttnn +// RUN: ttmlir-opt --ttnn-modify-signatures-for-dylib --convert-ttnn-to-emitc %t.mlir > %t2.mlir +// RUN: ttmlir-translate --mlir-to-cpp %t2.mlir > %basename_t.cpp + +func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>) -> tensor<64x96xbf16> { + %0 = tensor.empty() : tensor<64x96xbf16> + %1 = "ttir.matmul"(%arg0, %arg1, %0) : (tensor<64x128xbf16>, tensor<128x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16> + return %1 : tensor<64x96xbf16> +} diff --git a/tools/ttnn-standalone/README.md b/tools/ttnn-standalone/README.md index 619e52d1c..ac6f6c436 100644 --- a/tools/ttnn-standalone/README.md +++ b/tools/ttnn-standalone/README.md @@ -1,36 +1,3 @@ -## Table of contents +# TTNN Standalone -- [TTNN Standalone](#ttnn-standalone) - - [Usage](#usage) -- [TTNN Dylib](#ttnn-dylib) - -## TTNN Standalone - -TTNN Standalone is a post-compile tuning tool. - -Third party ML models (PyTorch, Jax, ONNX, ...) can be compiled to a set of TTNN library calls in C++. This generated code can then be manually fine-tuned outside of the compiler environment. TTNN Standalone tool offers all the scaffolding needed to run the C++ code on device (build & run scripts). - -### Usage - -```bash -# Compile a model to C++ code -./build/bin/ttmlir-opt --ttir-to-emitc-pipeline test/ttmlir/Silicon/TTNN/emitc/simple_add.mlir | ./build/bin/ttmlir-translate --mlir-to-cpp - -# Copy paste the generated function into `ttnn-standalone.cpp`. - -# Adapt the `main()` function in `ttnn-standalone.cpp` to feed tensors needed for the model - -# Run the following script from within this folder (`tools/ttnn-standalone`) to compile and run the ttnn standalone: - -./run -``` - -Note: if you receive this error -```bash --bash: ./run: Permission denied -``` -running `chmod +x run` will allow the execution of the script. - -## TTNN Dylib - -Similarly to the Standalone, this tool offers the ability to compile third party ML models, but to dylibs. Initial intent for compiled dylibs is to be used in testing infrastructure, but sky's the limit :) +Please refer to [TTMLIR docs](https://docs.tenstorrent.com/tt-mlir/ttnn-standalone.html). diff --git a/tools/ttnn-standalone/ttnn-standalone.cpp b/tools/ttnn-standalone/ttnn-standalone.cpp index c7d90edbb..8be0a466a 100644 --- a/tools/ttnn-standalone/ttnn-standalone.cpp +++ b/tools/ttnn-standalone/ttnn-standalone.cpp @@ -1,70 +1,44 @@ -// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC // // SPDX-License-Identifier: Apache-2.0 #include "ttnn-precompiled.hpp" - -// To generate forward function, run: -// ./build/bin/ttmlir-opt --ttir-to-emitc-pipeline -// test/ttmlir/Silicon/TTNN/emitc/simple_add.mlir | ./build/bin/ttmlir-translate -// --mlir-to-cpp - -ttnn::Tensor forward(ttnn::Tensor v1, ttnn::Tensor v2) { - ttnn::IDevice *v3 = ttnn::DeviceGetter::getInstance(); - ttnn::MemoryConfig v4 = ttnn::MemoryConfig( - ttnn::TensorMemoryLayout::INTERLEAVED, ttnn::BufferType::DRAM); +ttnn::Tensor add(ttnn::Tensor v1, ttnn::Tensor v2) { + ttnn::IDevice* v3 = ttnn::DeviceGetter::getInstance(); + ttnn::MemoryConfig v4 = ttnn::MemoryConfig(ttnn::TensorMemoryLayout::INTERLEAVED, ttnn::BufferType::DRAM); ttnn::Tensor v5 = ttnn::to_device(v1, v3, v4); - ttnn::Tensor v6 = - ttnn::to_layout(v5, ttnn::Layout::TILE, std::nullopt, std::nullopt, - static_cast<::ttnn::IDevice *>(nullptr)); + ttnn::Tensor v6 = ttnn::to_layout(v5, ttnn::Layout::TILE, std::nullopt, std::nullopt, static_cast<::ttnn::IDevice *>(nullptr)); ttnn::deallocate(v5, false); - ttnn::MemoryConfig v7 = ttnn::MemoryConfig( - ttnn::TensorMemoryLayout::INTERLEAVED, ttnn::BufferType::DRAM); + ttnn::MemoryConfig v7 = ttnn::MemoryConfig(ttnn::TensorMemoryLayout::INTERLEAVED, ttnn::BufferType::DRAM); ttnn::Tensor v8 = ttnn::to_device(v2, v3, v7); - ttnn::Tensor v9 = - ttnn::to_layout(v8, ttnn::Layout::TILE, std::nullopt, std::nullopt, - static_cast<::ttnn::IDevice *>(nullptr)); + ttnn::Tensor v9 = ttnn::to_layout(v8, ttnn::Layout::TILE, std::nullopt, std::nullopt, static_cast<::ttnn::IDevice *>(nullptr)); ttnn::deallocate(v8, false); - ttnn::SimpleShape v10 = ttnn::SimpleShape({ - 32, - 32, - }); - ttnn::MemoryConfig v11 = ttnn::MemoryConfig( - ttnn::TensorMemoryLayout::INTERLEAVED, ttnn::BufferType::DRAM); - ttnn::Tensor v12 = - ttnn::empty(v10, ttnn::DataType::BFLOAT16, ttnn::Layout::TILE, v3, v11); + ttnn::SimpleShape v10 = ttnn::SimpleShape(tt::tt_metal::LegacyShape({32, 32, })); + ttnn::MemoryConfig v11 = ttnn::MemoryConfig(ttnn::TensorMemoryLayout::INTERLEAVED, ttnn::BufferType::DRAM); + ttnn::Tensor v12 = ttnn::empty(v10, ttnn::DataType::BFLOAT16, ttnn::Layout::TILE, v3, v11); ttnn::Tensor v13 = ttnn::add(v6, v9, std::nullopt, std::nullopt, v12); ttnn::deallocate(v9, false); ttnn::deallocate(v6, false); ttnn::Tensor v14 = ttnn::from_device(v13); ttnn::deallocate(v12, false); - ttnn::Tensor v15 = - ttnn::to_layout(v14, ttnn::Layout::ROW_MAJOR, std::nullopt, std::nullopt, - static_cast<::ttnn::IDevice *>(nullptr)); + ttnn::Tensor v15 = ttnn::to_layout(v14, ttnn::Layout::ROW_MAJOR, std::nullopt, std::nullopt, static_cast<::ttnn::IDevice *>(nullptr)); ttnn::deallocate(v14, false); return v15; } -int main() { - // Create shapes - // - const size_t tensor_height = 32; - const size_t tensor_width = 32; - ttnn::SimpleShape xs = - ttnn::SimpleShape({1, 1, tensor_height, tensor_width}); - ttnn::SimpleShape ys = - ttnn::SimpleShape({1, 1, tensor_height, tensor_width}); - - // Create tensors on cpu - // - auto x = ttnn::ones(xs, ttnn::DataType::BFLOAT16, ttnn::Layout::TILE); - auto y = ttnn::ones(ys, ttnn::DataType::BFLOAT16, ttnn::Layout::TILE); - - // Run fwd pass on device - // - ttnn::Tensor result = forward(x, y); +std::tuple createInputsFor_add() { + ttnn::SimpleShape v1 = ttnn::SimpleShape(tt::tt_metal::LegacyShape({32, 32, })); + ttnn::Tensor v2 = ttnn::ones(v1, ttnn::DataType::BFLOAT16, ttnn::Layout::ROW_MAJOR, std::nullopt, std::nullopt); + ttnn::SimpleShape v3 = ttnn::SimpleShape(tt::tt_metal::LegacyShape({32, 32, })); + ttnn::Tensor v4 = ttnn::ones(v3, ttnn::DataType::BFLOAT16, ttnn::Layout::ROW_MAJOR, std::nullopt, std::nullopt); + return std::make_tuple(v2, v4); +} - // Print result - // - result.print(); +int32_t main() { + ttnn::Tensor v1; + ttnn::Tensor v2; + std::tie(v1, v2) = createInputsFor_add(); + ttnn::Tensor v3 = add(v1, v2); + int32_t v4 = 0; + return v4; }