Merge branch 'main' into vmilosevic/docker-build

tenstorrent · Aug 7, 2024 · 78caf35 · 78caf35
2 parents 89c1d3e + 2bdb825
commit 78caf35
Show file tree

Hide file tree

Showing 151 changed files with 5,047 additions and 1,398 deletions.
diff --git a/.github/actions/build-toolchain/action.yml b/.github/actions/build-toolchain/action.yml
@@ -4,6 +4,10 @@ inputs:
   os:
     description: 'Operating System'
     required: true
+  sdk:
+    description: 'MacOS SDK, if applicable'
+    required: true
+    default: '0'
 runs:
   using: 'composite'
   steps:
@@ -18,7 +22,7 @@ runs:
       uses: actions/cache@v4
       with:
         path: /opt/ttmlir-toolchain
-        key: ${{ inputs.os }}-ttmlir-toolchain-${{ hashFiles('env/**') }}
+        key: ${{ inputs.os }}-ttmlir-toolchain-${{ hashFiles('env/**') }}-${{ inputs.sdk }}
 
     - name: 'Build ttmlir-toolchain'
       if: steps.cache-toolchain.outputs.cache-hit != 'true'

diff --git a/.github/workflows/macos-build.yml b/.github/workflows/macos-build.yml
@@ -4,6 +4,9 @@ on:
   workflow_dispatch:
   workflow_call:
 
+env:
+  SDK_VERSION: "0"
+
 jobs:
   build:
     strategy:
@@ -23,16 +26,23 @@ jobs:
       with:
         os: ${{ matrix.build.runs-on }}
 
+    - name: Get macos sdk version
+      if: startsWith(matrix.build.runs-on, 'macos')
+      shell: bash
+      run: |
+        echo "SDK_VERSION=$(xcrun --show-sdk-version)" >> $GITHUB_ENV
+
     - name: Build and cache ttmlir-toolchain
       uses: ./.github/actions/build-toolchain
       with:
         os: ${{ matrix.build.runs-on }}
+        sdk: ${{ env.SDK_VERSION }}
 
     - name: ccache
       uses: hendrikmuhs/ccache-action@v1.2
       with:
         create-symlink: true
-        key: ${{ matrix.build.runs-on }}-runtime-${{ matrix.build.enable_runtime }}
+        key: ${{ matrix.build.runs-on }}-runtime-${{ matrix.build.enable_runtime }}-${{ env.SDK_VERSION }}
 
     - name: Set reusable strings
       id: strings
@@ -72,6 +82,12 @@ jobs:
         source env/activate
         cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build.build_type }} -- check-ttmlir
 
+    - name: Build ttrt
+      shell: bash
+      run: |
+        source env/activate
+        cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build.build_type }} -- ttrt
+
     - name: Upload Test Report
       uses: actions/upload-artifact@v4
       with:

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,6 +5,8 @@ if (NOT DEFINED ENV{TTMLIR_ENV_ACTIVATED})
   message(FATAL_ERROR "tt-mlir environment not activated. Please run 'source env/activate'.")
 endif()
 
+option(TT_RUNTIME_ENABLE_PERF_TRACE "Enable performance mode" OFF)
+
 set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON)
 
 set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to")

diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_custom_target(copy-docs-dir
   COMMAND
-    cp -r ${CMAKE_CURRENT_SOURCE_DIR}/ ${CMAKE_CURRENT_BINARY_DIR}
+    cp -r ${CMAKE_CURRENT_SOURCE_DIR}/* ${CMAKE_CURRENT_BINARY_DIR}
 )
 
 add_custom_target(autogen-summary

diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
@@ -25,3 +25,4 @@
   - [Runtime Stitching](./specs/runtime-stitching.md)
   - [Tensor Layout](./specs/tensor-layout.md)
     - [TTNN Interactive Visualizer](./specs/tensor-layout-interactive.md)
+  - [Device](./specs/device.md)
diff --git a/docs/src/adding-an-op.md b/docs/src/adding-an-op.md
@@ -7,13 +7,19 @@ reference the diff alongside this guide to see the changes in full.
 
 This guide will cover the following steps:
 
-1. [Define the Op in the TTIR frontend dialect](#1-define-the-op-in-the-ttir-frontend-dialect)
-2. [Define the Op in the TTNN backend dialect](#2-define-the-op-in-the-ttnn-backend-dialect)
-3. [Convert / Implement the Op in the TTNN passes](#3-convert--implement-the-op-in-the-ttnn-passes)
-4. [Add a unit test for the Op](#4-add-a-unit-test-for-the-op)
-5. [Define flatbuffer schema for the Op](#5-define-flatbuffer-schema-for-the-op)
-6. [Serialize the Op in the flatbuffer format](#6-serialize-the-op-in-the-flatbuffer-format)
-7. [Add runtime support for the Op](#7-add-runtime-support-for-the-op)
+- [Adding an Op](#adding-an-op)
+  - [1. Define the Op in the TTIR frontend dialect](#1-define-the-op-in-the-ttir-frontend-dialect)
+  - [2. Define the Op in the TTNN backend dialect](#2-define-the-op-in-the-ttnn-backend-dialect)
+      - [`TTNNOps.td`](#ttnnopstd)
+      - [`TTNNOps.cpp`](#ttnnopscpp)
+  - [3. Convert / Implement the Op in the TTNN passes](#3-convert--implement-the-op-in-the-ttnn-passes)
+  - [4. Add a unit test for the Op](#4-add-a-unit-test-for-the-op)
+      - [`test/ttmlir/Dialect/TTNN/simple_matmul.mlir`](#testttmlirdialectttnnsimple_matmulmlir)
+  - [5. Define flatbuffer schema for the Op](#5-define-flatbuffer-schema-for-the-op)
+      - [`include/ttmlir/Target/TTNN/program.fbs`](#includettmlirtargetttnnprogramfbs)
+  - [6. Serialize the Op in the flatbuffer format](#6-serialize-the-op-in-the-flatbuffer-format)
+  - [7. Add runtime support for the Op](#7-add-runtime-support-for-the-op)
+      - [`runtime/lib/ttnn/program.cpp`](#runtimelibttnnprogramcpp)
 
 ## 1. Define the Op in the TTIR frontend dialect
 
@@ -99,13 +105,13 @@ section for details, the process is the same.
 Next we will implement the conversion from the TTIR `matmul` Op to the TTNN `matmul` Op.
 This is a trivial conversion, as the Ops are identical in their semantics, so
 the changeset isn't going to be very instructive, but will at least point to the
-files involved. The conversion is implemented in the `ConvertTTIRToTNN` pass in
-file `lib/Dialect/TTNN/Transforms/Passes.cpp`.
+files involved. The conversion is implemented in the `ConvertTTIRToTTNNPass` pass in
+file `lib/Conversion/TTIRToTTNN/TTIRToTTNNPass.cpp`.
 
-Zooming into `class ConvertTTIRToTNN` we can see we implement the pass interface
+Zooming into `class ConvertTTIRToTTNNPass` we can see we implement the pass interface
 via member function `void runOnOperation() final`.  This function will be called
 for every operation matching the type specified in the pass tablegen file. A
-quick look at `include/ttmlir/Dialect/TTNN/Passes.td` we can see:
+quick look at `include/ttmlir/Conversion/Passes.td` we can see:
 
 ```
 def ConvertTTIRToTTNN: Pass<"convert-ttir-to-ttnn", "::mlir::ModuleOp"> {
@@ -121,22 +127,21 @@ can match much more complicated patterns (nested inside of the `ModuleOp`'s
 than just a single operation.
 
 ```cpp
-{{#include ../../../lib/Dialect/TTNN/Transforms/Passes.cpp:adding_an_op_matmul_rewrite_pattern_set}}
+{{#include ../../../lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp:adding_an_op_matmul_rewrite_pattern_set}}
 ```
 
-> More information on rewrite patterns and their capabilities can be found in the [MLIR
-> documentation](https://mlir.llvm.org/docs/PatternRewriter/).
+> More information on rewrite patterns and their capabilities can be found in the MLIR documentation [here](https://mlir.llvm.org/docs/PatternRewriter/) and [here](https://mlir.llvm.org/docs/DialectConversion/).
 
-For matmul, we defined a new pattern rewriter that's generic to all binary ops
+For matmul, we defined a new conversion pattern that's generic to all binary ops
 with arguments named `a` and `b`:
 
 ```cpp
-{{#include ../../../lib/Dialect/TTNN/Transforms/Passes.cpp:adding_an_op_matmul_op_rewriter}}
+{{#include ../../../lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp:adding_an_op_matmul_op_rewriter}}
 ```
 
 Invoked as part of the rewrite set:
 ```cpp
-TTIRToTTNNBinaryOpRewriter<ttir::MatmulOp, MatmulOp>
+MatmulOpConversionPattern
 ```
 
 We also need to add this op to the C++ emitter,
@@ -149,7 +154,7 @@ So far we have defined the Op in the TTIR and TTNN dialects,
 implemented verifiers, and have conversion passes.  Now we need to add a unit
 test to ensure that the pass is working correctly.  The unit tests are located
 in `test/ttmlir/Dialect` area.  In this case we'll add a test under the `TTNN`
-subdirectory since we are testing the `ConvertTTIRToTTNN` pass.
+subdirectory since we are testing the `ConvertTTIRToTTNNPass`.
 
 #### `test/ttmlir/Dialect/TTNN/simple_matmul.mlir`
 
@@ -215,11 +220,11 @@ to a program called `flatc` which generates C++ code (or any language for that
 matter) for serializing and deserializing the schema. This generated code can be
 found in `build/include/ttmlir/Target/TTNN/program_generated.h`.
 
-Let's head over to `lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp` to define
+Let's head over to `lib/Target/TTNN/TTNNToFlatbuffer.cpp` to define
 a `createOp` overloaded function that does the conversion from MLIR to flatbuffer:
 
 ```cpp
-{{#include ../../../lib/Dialect/TTNN/Transforms/TTNNToSerializedBinary.cpp:adding_an_op_matmul_serialize_to_binary}}
+{{#include ../../../lib/Target/TTNN/TTNNToFlatbuffer.cpp:adding_an_op_matmul_serialize_to_binary}}
 ```
 
 Lots of things are happening here, let's break it down:
@@ -241,7 +246,7 @@ Lots of things are happening here, let's break it down:
 
 We can finally generate a binary with our new Op!  We can use the following command:
 ```bash
-./build/bin/ttmlir-opt --ttir-layout --ttnn-open-device --convert-ttir-to-ttnn --ttnn-serialize-to-binary="output=out.ttnn" test/ttmlir/Dialect/TTNN/simple_matmul.mlir
+./build/bin/ttmlir-opt --ttir-to-ttnn-backend-pipeline test/ttmlir/Dialect/TTNN/simple_matmul.mlir | ./build/bin/ttmlir-translate --ttnn-to-flatbuffer -o out.ttnn
 ```
 
 And we can inspect the with [`ttrt`](./ttrt.md):

diff --git a/docs/src/build.md b/docs/src/build.md
@@ -33,6 +33,7 @@ cmake --build build
 ```
 
 > - To enable the ttnn/metal runtime add `-DTTMLIR_ENABLE_RUNTIME=ON`
+> - To enable the ttnn/metal perf runtime add `-DTT_RUNTIME_ENABLE_PERF_TRACE=ON` and `export ENABLE_TRACY=1` to environment before building
 > - To accelerate the builds with ccache use `-DCMAKE_CXX_COMPILER_LAUNCHER=ccache`
 > - To accelerate builds further, if python bindings aren't needed, `-DTTMLIR_ENABLE_BINDINGS_PYTHON=OFF`. For some reason the python bindings link step is very slow.
 > - TTNN build is automatically integrated / handled by tt-mlir cmake build system.  For debugging and further information regarding the TTNN backend build step, please refer to [TTNN Documentation](https://tenstorrent.github.io/tt-metal/latest/ttnn/ttnn/installing.html).
@@ -45,11 +46,11 @@ cmake --build build
 >   For more information, please refer to
 >   [TT-NN and TT-Metailium installation documentation](https://tenstorrent.github.io/tt-metal/latest/ttnn/ttnn/installing.html#step-4-install-and-start-using-tt-nn-and-tt-metalium).
 
-| OS | Offline Compiler Only | Runtime Enabled Build |
-|----|-----------------------|-----------------------|
-| Ubuntu 22.04  | ✅ | ❌ |
-| Ubuntu 20.04  | ✅ | ✅ |
-| MacOS         | ✅ | ❌ |
+| OS | Offline Compiler Only | Runtime Enabled Build | Runtime + Perf Enabled Build |
+|----|-----------------------|-----------------------| -----------------------------|
+| Ubuntu 22.04  | ✅ | ❌ | ❌ |
+| Ubuntu 20.04  | ✅ | ✅ | ✅ |
+| MacOS         | ✅ | ❌ | ❌ |
 
 ## Test
 
@@ -104,10 +105,11 @@ For more information visit [pre-commit](https://pre-commit.com/)
 ```bash
 source env/activate
 cmake --build build -- docs
-mdbook serve build/docs/book
+mdbook serve build/docs
 ```
 
 > - `mdbook` can be installed with the system's package manager.
+> - `mdbook serve` will by default create a local server at `http://localhost:3000`.
 
 ## Dependencies
 
@@ -119,6 +121,15 @@ Make sure to have Git LFS installed. You can install it with the following comma
 sudo apt-get install git-lfs
 ```
 
+If you are building performance trace with `-DTT_RUNTIME_ENABLE_PERF_TRACE=ON`, you will have to install the following packages
+
+```bash
+pip install loguru
+pip install torch
+pip install pandas
+pip install seaborn
+```
+
 ### Ubuntu 22.04
 
 We need to install Ninja which can be done with the following command

diff --git a/docs/src/dialects-overview.md b/docs/src/dialects-overview.md
@@ -6,7 +6,7 @@ individual dialect documentation for more details.:
 - `tt`: Common types such as, `tt.tile`, `tt.layout`, `tt.grid`, etc. and enums such as, data formats, memory spaces, iterator types etc.
 - `ttir`: A high level dialect that models the tensor compute graph on tenstorrent devices. Accepts `tosa` and `linalg` input.
   - `ttir.generic`: Generically describe compute work.
-  - `ttir.layout`: Convert between different tensor memory layouts and transfer between different memory spaces.
+  - `ttir.to_layout`: Convert between different tensor memory layouts and transfer between different memory spaces.
   - `tensor.pad`: Pad a tensor with a value (ie. convs)
   - `ttir.yield`: return result memref of computation in dispatch region body, lowers to `ttkernel.yield`
   - `ttir.kernel`: lowers to some backend kernel