#884: Fixed on device perf collection for unary ops and added unit te…

…sts for each op for perf mode in ttrt
tenstorrent · Oct 29, 2024 · c342baf · c342baf
1 parent 648739a
commit c342baf
Show file tree

Hide file tree

Showing 47 changed files with 899 additions and 195 deletions.
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
@@ -277,7 +277,14 @@ jobs:
       if: matrix.build.enable_perf == 'ON'
       run: |
         source env/activate
-        ttrt ${{ matrix.build.name }} ${{ matrix.build.ttrt_flags }} ${{ steps.strings.outputs.build-output-dir }}/test/ttmlir/Silicon --host-only
+        ttrt ${{ matrix.build.name }} ${{ matrix.build.ttrt_flags }} ${{ steps.strings.outputs.build-output-dir }}/test/ttmlir/Silicon/TTNN/perf_unit
+
+    - name: Upload ttrt test report
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: ${{ matrix.build.runs-on }}_${{ matrix.build.name }}_results.json
+        path: ${{ matrix.build.name }}_results.json
 
   run-ttrt-tests:
 
@@ -392,8 +399,8 @@ jobs:
         source env/activate
         pytest -ssv runtime/tools/python/test/test_run.py
 
-    #- name: ttrt perf tests
-    #  shell: bash
-    #  run: |
-    #    source env/activate
-    #    pytest -ssv runtime/tools/python/test/test_perf.py
+    - name: ttrt perf tests
+      shell: bash
+      run: |
+        source env/activate
+        pytest -ssv runtime/tools/python/test/test_perf.py
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,6 +22,10 @@ set(TTMLIR_ENABLE_BINDINGS_PYTHON ON CACHE BOOL "Enable Python bindings")
 
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake/modules)
 
+if (TT_RUNTIME_ENABLE_PERF_TRACE)
+  add_compile_options(-DTRACY_ENABLE=ON)
+endif()
+
 add_compile_options(-Wall -Wextra -Wpedantic -Werror -Wno-unused-parameter --system-header-prefix=/opt/ttmlir-toolchain)
 
 include(TTMLIRBuildTypes)

diff --git a/docs/src/ttrt.md b/docs/src/ttrt.md
@@ -197,8 +197,14 @@ ttrt query --save-artifacts --artifact-dir /path/to/some/dir
 Run performance mode of a binary file or a directory of binary files
 Note: It's required to be on a system with silicon and to have a runtime enabled build `-DTTMLIR_ENABLE_RUNTIME=ON`. Also need perf enabled build `-DTT_RUNTIME_ENABLE_PERF_TRACE=ON`.
 Note: You can collect host only related performance data via `--host-only` flag. By default, host and device side performance data are both collected.
-Restriction: `/dir/of/flatbuffers` can only be used if collecting `--host-only` (as performance data is collected upon closing of device, if we run a directory of flatbuffers, we cannot get accurate device performance data since device is only closed at end of execution).
-Restriction: We can only run perf mode (for now) on .mlir files that have only 1 function (func.func)
+If the saving artifacts flag is provided, perf mode will dump the following files in the artifacts directory
+```bash
+ops_perf_results.csv : compiled op performance results
+profile_log_device.csv : dump of all device side profiled results
+tracy_ops_data.csv : op data results dumped in a readable format
+tracy_ops_times.csv : op time results dumped in a readable format
+tracy_profile_log_host.tracy : tracy profiled results file, this file can be fed into the tracy GUI
+```
 
 ```bash
 ttrt perf --help
@@ -215,6 +221,14 @@ ttrt perf /dir/of/flatbuffers --log-file ttrt.log --host-only
 ttrt perf --save-artifacts --artifact-dir /path/to/some/dir
 ```
 
+To use the Tracy GUI, run the following instructions on your macbook. You can upload your .tracy file into the GUI to view the profiled dumps.
+```bash
+git clone https://github.com/tenstorrent-metal/tracy.git
+cd tracy/profiler/build/unix
+make all
+./Tracy-release
+```
+
 ### check
 Check a binary file or a directory of binary files against a system desc (by default, uses the host machine)
 Note: It's required to be on a system with silicon and to have a runtime enabled build `-DTTMLIR_ENABLE_RUNTIME=ON`.
@@ -283,9 +297,9 @@ run_instance = API.Run(artifacts=custom_artifacts)
 Once all the arguments are setup, you can run your API instance with all your provided arguments. Note, APIs are stateless. Thus, subsequent calls to the same API instance will not preserve previous call artifacts. You can generate a new artifacts directory for subsequent runs if you wish to call the APIs multiple times, for example.
 
 ```bash
-query_instance()
-read_instance()
-run_instance()
+result_code, results = query_instance()
+result_code, results = read_instance()
+result_code, results = run_instance()
 ```
 
 ### Putting it all together
@@ -312,6 +326,7 @@ artifacts_folder_path = "/opt/folder"
 custom_artifacts = Artifacts(logger=custom_logger, artifacts_folder_path=artifacts_folder_path)
 
 run_instance = API.Run(args=custom_args, logger=custom_logger, artifacts=custom_artifacts)
+result_code, results = run_instance()
 
 ```
 

diff --git a/runtime/lib/ttmetal/CMakeLists.txt b/runtime/lib/ttmetal/CMakeLists.txt
@@ -14,7 +14,3 @@ target_link_libraries(TTRuntimeTTMetal PUBLIC TTMETAL_LIBRARY)
 add_dependencies(TTRuntimeTTMetal TTMETAL_LIBRARY tt-metal FBS_GENERATION)
 
 # Optionally compile profiling code and link tracy client for perf profiling.
-if (TT_RUNTIME_ENABLE_PERF_TRACE)
-  target_compile_definitions(TTRuntimeTTMetal PUBLIC TRACY_ENABLE)
-  target_link_libraries(TTRuntimeTTMetal PUBLIC TRACY_LIBRARY)
-endif()
diff --git a/runtime/lib/ttnn/operations/CMakeLists.txt b/runtime/lib/ttnn/operations/CMakeLists.txt
@@ -41,4 +41,10 @@ target_include_directories(TTRuntimeTTNNOps PUBLIC
 )
 target_include_directories(TTRuntimeTTNNOps PUBLIC "$<BUILD_INTERFACE:${TTMETAL_INCLUDE_DIRS}>")
 target_link_libraries(TTRuntimeTTNNOps PUBLIC TTNN_LIBRARY)
+
+if (TT_RUNTIME_ENABLE_PERF_TRACE)
+  target_link_libraries(TTRuntimeTTNNOps PUBLIC TRACY_LIBRARY)
+endif()
+
+
 add_dependencies(TTRuntimeTTNNOps TTNN_LIBRARY tt-metal FBS_GENERATION)
diff --git a/runtime/test/CMakeLists.txt b/runtime/test/CMakeLists.txt
@@ -48,6 +48,10 @@ target_link_libraries(TTRuntimeTEST INTERFACE
     GTest::gtest_main
 )
 
+if (TT_RUNTIME_ENABLE_PERF_TRACE)
+  list(APPEND TTRuntimeTEST TRACY_LIBRARY)
+endif()
+
 function(add_runtime_gtest test_name)
   add_executable(${test_name} ${ARGN})
   set_property(TARGET ${test_name} PROPERTY CXX_STANDARD 20)

diff --git a/runtime/tools/python/test/test_perf.py b/runtime/tools/python/test/test_perf.py
@@ -27,8 +27,7 @@
 def test_flatbuffer():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     perf_instance = API.Perf(args=custom_args)
     perf_instance()
 
@@ -38,41 +37,18 @@ def test_flatbuffer():
 
 
 def test_flatbuffer_cmd():
-    command = f"ttrt perf {BINARY_FILE_PATH} --host-only --log-file {test_flatbuffer_cmd.__name__}_perf.log"
+    command = f"ttrt perf {PERF_BINARY_FILE_PATH} --log-file {test_flatbuffer_cmd.__name__}_perf.log"
     sub_process_command(command)
 
     assert (
         check_results("perf_results.json") == 0
     ), f"one of more tests failed in={test_flatbuffer_cmd.__name__}"
 
 
-def test_dir_flatbuffer():
-    API.initialize_apis()
-    custom_args = {}
-    custom_args["binary"] = DIRECTORY_PATH
-    custom_args["--host-only"] = True
-    perf_instance = API.Perf(args=custom_args)
-    perf_instance()
-
-    assert (
-        check_results("perf_results.json") == 0
-    ), f"one of more tests failed in={test_dir_flatbuffer.__name__}"
-
-
-def test_dir_flatbuffer_cmd():
-    command = f"ttrt perf {DIRECTORY_PATH} --host-only --log-file {test_dir_flatbuffer_cmd.__name__}_perf.log"
-    sub_process_command(command)
-
-    assert (
-        check_results("perf_results.json") == 0
-    ), f"one of more tests failed in={test_dir_flatbuffer_cmd.__name__}"
-
-
 def test_logger():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     log_file_name = "test.log"
     custom_logger = Logger(log_file_name)
     perf_instance = API.Perf(args=custom_args, logger=custom_logger)
@@ -86,8 +62,7 @@ def test_logger():
 def test_artifacts():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     log_file_name = "test.log"
     custom_logger = Logger(log_file_name)
     artifacts_folder_path = f"{os.getcwd()}/test-artifacts"
@@ -105,8 +80,7 @@ def test_artifacts():
 def test_clean_artifacts():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     custom_args["--clean-artifacts"] = True
     perf_instance = API.Perf(args=custom_args)
     perf_instance()
@@ -117,7 +91,7 @@ def test_clean_artifacts():
 
 
 def test_clean_artifacts_cmd():
-    command = f"ttrt perf {BINARY_FILE_PATH} --host-only --clean-artifacts --log-file {test_clean_artifacts_cmd.__name__}_perf.log"
+    command = f"ttrt perf {PERF_BINARY_FILE_PATH} --clean-artifacts --log-file {test_clean_artifacts_cmd.__name__}_perf.log"
     sub_process_command(command)
 
     assert (
@@ -128,8 +102,7 @@ def test_clean_artifacts_cmd():
 def test_save_artifacts():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     custom_args["--clean-artifacts"] = True
     custom_args["--save-artifacts"] = True
     perf_instance = API.Perf(args=custom_args)
@@ -141,7 +114,7 @@ def test_save_artifacts():
 
 
 def test_save_artifacts_cmd():
-    command = f"ttrt perf {BINARY_FILE_PATH} --host-only --clean-artifacts --save-artifacts --log-file {test_save_artifacts_cmd.__name__}_perf.log"
+    command = f"ttrt perf {PERF_BINARY_FILE_PATH} --clean-artifacts --save-artifacts --log-file {test_save_artifacts_cmd.__name__}_perf.log"
     sub_process_command(command)
 
     assert (
@@ -152,8 +125,7 @@ def test_save_artifacts_cmd():
 def test_log_file():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     custom_args["--log-file"] = "test.log"
     perf_instance = API.Perf(args=custom_args)
     perf_instance()
@@ -164,7 +136,7 @@ def test_log_file():
 
 
 def test_log_file_cmd():
-    command = f"ttrt perf {BINARY_FILE_PATH} --host-only --log-file test.log --log-file {test_log_file_cmd.__name__}_perf.log"
+    command = f"ttrt perf {PERF_BINARY_FILE_PATH} --log-file test.log --log-file {test_log_file_cmd.__name__}_perf.log"
     sub_process_command(command)
 
     assert (
@@ -175,8 +147,7 @@ def test_log_file_cmd():
 def test_artifact_dir():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     custom_args["--clean-artifacts"] = True
     custom_args["--save-artifacts"] = True
     custom_args["--artifact-dir"] = f"{os.getcwd()}/test-artifacts"
@@ -189,7 +160,7 @@ def test_artifact_dir():
 
 
 def test_artifact_dir_cmd():
-    command = f"ttrt perf {BINARY_FILE_PATH} --host-only --clean-artifacts --save-artifacts --artifact-dir {os.getcwd()}/test-artifacts --log-file {test_artifact_dir_cmd.__name__}_perf.log"
+    command = f"ttrt perf {PERF_BINARY_FILE_PATH} --clean-artifacts --save-artifacts --artifact-dir {os.getcwd()}/test-artifacts --log-file {test_artifact_dir_cmd.__name__}_perf.log"
     sub_process_command(command)
 
     assert (
@@ -200,8 +171,7 @@ def test_artifact_dir_cmd():
 def test_program_index():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     custom_args["--program-index"] = "0"
     perf_instance = API.Perf(args=custom_args)
     perf_instance()
@@ -212,7 +182,7 @@ def test_program_index():
 
 
 def test_program_index_cmd():
-    command = f"ttrt perf {BINARY_FILE_PATH} --host-only --program-index 0 --log-file {test_program_index_cmd.__name__}_perf.log"
+    command = f"ttrt perf {PERF_BINARY_FILE_PATH} --program-index 0 --log-file {test_program_index_cmd.__name__}_perf.log"
     sub_process_command(command)
 
     assert (
@@ -223,8 +193,7 @@ def test_program_index_cmd():
 def test_loops():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
-    custom_args["--host-only"] = True
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
     custom_args["--loops"] = 1
     perf_instance = API.Perf(args=custom_args)
     perf_instance()
@@ -235,38 +204,31 @@ def test_loops():
 
 
 def test_loops_cmd():
-    command = f"ttrt perf {BINARY_FILE_PATH} --host-only --loops 1 --log-file {test_loops_cmd.__name__}_perf.log"
+    command = f"ttrt perf {PERF_BINARY_FILE_PATH} --loops 1 --log-file {test_loops_cmd.__name__}_perf.log"
     sub_process_command(command)
 
     assert (
         check_results("perf_results.json") == 0
     ), f"one of more tests failed in={test_loops_cmd.__name__}"
 
 
-@pytest.mark.skip(
-    "Issue: 762 - Need to support proper reading of device data. Includes fixing perf mode in ttrt"
-)
-def test_device():
+def test_host_only():
     API.initialize_apis()
     custom_args = {}
-    custom_args["binary"] = BINARY_FILE_PATH
+    custom_args["binary"] = PERF_BINARY_FILE_PATH
+    custom_args["--host-only"] = True
     perf_instance = API.Perf(args=custom_args)
     perf_instance()
 
     assert (
         check_results("perf_results.json") == 0
-    ), f"one of more tests failed in={test_device.__name__}"
+    ), f"one of more tests failed in={test_host_only.__name__}"
 
 
-@pytest.mark.skip(
-    "Issue: 762 - Need to support proper reading of device data. Includes fixing perf mode in ttrt"
-)
-def test_device_cmd():
-    command = (
-        f"ttrt perf {BINARY_FILE_PATH} --log-file {test_device_cmd.__name__}_perf.log"
-    )
+def test_host_only_cmd():
+    command = f"ttrt perf {PERF_BINARY_FILE_PATH} --host-only --log-file {test_host_only_cmd.__name__}_perf.log"
     sub_process_command(command)
 
     assert (
         check_results("perf_results.json") == 0
-    ), f"one of more tests failed in={test_device_cmd.__name__}"
+    ), f"one of more tests failed in={test_host_only_cmd.__name__}"
diff --git a/runtime/tools/python/test/test_read.py b/runtime/tools/python/test/test_read.py
@@ -205,7 +205,7 @@ def test_section():
 
 
 def test_section_cmd():
-    command = f"ttrt read {BINARY_FILE_PATH} --section mlir --log-file {test_section_cmd.__name__}_read.log"
+    command = f"ttrt read {BINARY_FILE_PATH} --section all --log-file {test_section_cmd.__name__}_read.log"
     sub_process_command(command)
 
     assert (