From f4dcb3b8c90a64d51bd7f1553d130894b762517c Mon Sep 17 00:00:00 2001
From: Collin Tod <collintod@tenstorrent.com>
Date: Tue, 4 Feb 2025 22:52:35 +0000
Subject: [PATCH 1/2] Remove dep. on runtime workaround in `binary.cpp`

This change is made to avert an innocuous bug where `ttrt` can't find a
symbol when it is built in debug mode.
---
 runtime/lib/binary.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/runtime/lib/binary.cpp b/runtime/lib/binary.cpp
index 9d09b2be67..0156f6a701 100644
--- a/runtime/lib/binary.cpp
+++ b/runtime/lib/binary.cpp
@@ -7,7 +7,6 @@
 #include "flatbuffers/idl.h"
 
 #include "tt/runtime/detail/logger.h"
-#include "tt/runtime/detail/workarounds.h"
 #include "tt/runtime/types.h"
 #include "tt/runtime/utils.h"
 #include "ttmlir/Target/Common/system_desc_bfbs_generated.h"
@@ -39,10 +38,6 @@ static std::string asJson(void const *fbb, uint8_t const *binarySchema,
 
 static std::vector<uint32_t>
 calculateStride(std::vector<uint32_t> const &shape) {
-  LOG_ASSERT(
-      workaround::Env::get().defaultStrideComputation,
-      "Stride currently removed from flatbuffer thus defaultStrideComputation"
-      "workaround must be enabled");
   LOG_ASSERT(!shape.empty());
   std::vector<uint32_t> stride(shape.size(), 1);
   for (size_t i = shape.size() - 1; i > 0; i--) {

From 26a66d2a7bcbff76668e71a96478e2f1f5875bee Mon Sep 17 00:00:00 2001
From: Collin Tod <collintod@tenstorrent.com>
Date: Thu, 6 Feb 2025 16:42:54 +0000
Subject: [PATCH 2/2] Remove all instances of `defaultStrideComputation`

---
 runtime/include/tt/runtime/detail/workarounds.h | 15 ++-------------
 runtime/lib/binary.cpp                          |  5 +++++
 runtime/lib/common/workarounds.cpp              |  3 +--
 runtime/tools/python/ttrt/common/run.py         |  8 --------
 4 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/runtime/include/tt/runtime/detail/workarounds.h b/runtime/include/tt/runtime/detail/workarounds.h
index c5fcca38fe..878e52dc45 100644
--- a/runtime/include/tt/runtime/detail/workarounds.h
+++ b/runtime/include/tt/runtime/detail/workarounds.h
@@ -17,14 +17,13 @@ struct Env {
 #endif
   get(bool maxpool2dPreshard = true, bool swapBinaryOperands = true,
       bool readUpdateIndexFromDeviceForKVCache = true,
-      bool defaultStrideComputation = true,
       bool toLayoutAPIAssumeSingleChip = true,
       bool usePaddingPairSignatureWithQueueId = true)
 #if defined(TT_RUNTIME_WORKAROUNDS) && TT_RUNTIME_WORKAROUNDS == 1
       ;
 #else
   {
-    return Env(true, true, true, true, true, true);
+    return Env(true, true, true, true, true);
   }
 #endif
   // TODO(bug #855): Ideally we should have an op that preshards for maxpool2d
@@ -42,13 +41,6 @@ struct Env {
   // to be able to pluck this update index from a runtime tensor.
   bool readUpdateIndexFromDeviceForKVCache;
 
-  // TODO(bug #2045): Our current stride calculation is incorrect for tilized
-  // tensors. The current solution is to remove stride entirely from the
-  // flatbuffer and calculate the stride in runtime assuming using the default
-  // method ignoring details like grid, layout etc. Once we have a more
-  // sophisticated way for handling this, we can remove this workaround.
-  bool defaultStrideComputation;
-
   // TODO(bug #1778): We currently don't have device grid information (mesh
   // shape, offset) in the flatbuffer TensorDesc nor in the mlir LayoutAttr. We
   // need to add this information to the tensorDesc so that the runtime toLayout
@@ -70,13 +62,12 @@ struct Env {
 private:
   constexpr Env(bool maxpool2dPreshard, bool swapBinaryOperands,
                 bool readUpdateIndexFromDeviceForKVCache,
-                bool defaultStrideComputation, bool toLayoutAPIAssumeSingleChip,
+                bool toLayoutAPIAssumeSingleChip,
                 bool usePaddingPairSignatureWithQueueId)
       : maxpool2dPreshard(maxpool2dPreshard),
         swapBinaryOperands(swapBinaryOperands),
         readUpdateIndexFromDeviceForKVCache(
             readUpdateIndexFromDeviceForKVCache),
-        defaultStrideComputation(defaultStrideComputation),
         toLayoutAPIAssumeSingleChip(toLayoutAPIAssumeSingleChip),
         usePaddingPairSignatureWithQueueId(usePaddingPairSignatureWithQueueId) {
   }
@@ -91,8 +82,6 @@ inline std::ostream &operator<<(std::ostream &os, const Env &env) {
   os << "\t"
      << "readUpdateIndexFromDeviceForKVCache: "
      << env.readUpdateIndexFromDeviceForKVCache << "\n";
-  os << "\t"
-     << "defaultStrideComputation: " << env.defaultStrideComputation << "\n";
   os << "\t"
      << "toLayoutAPIAssumeSingleChip: " << env.toLayoutAPIAssumeSingleChip
      << "\n";
diff --git a/runtime/lib/binary.cpp b/runtime/lib/binary.cpp
index 0156f6a701..fd0037389f 100644
--- a/runtime/lib/binary.cpp
+++ b/runtime/lib/binary.cpp
@@ -38,6 +38,11 @@ static std::string asJson(void const *fbb, uint8_t const *binarySchema,
 
 static std::vector<uint32_t>
 calculateStride(std::vector<uint32_t> const &shape) {
+  // TODO(bug #2045): Our current stride calculation is incorrect for tilized
+  // tensors. The current solution is to remove stride entirely from the
+  // flatbuffer and calculate the stride in runtime assuming using the default
+  // method ignoring details like grid, layout etc. Once we have a more
+  // sophisticated way for handling this, we can remove this workaround.
   LOG_ASSERT(!shape.empty());
   std::vector<uint32_t> stride(shape.size(), 1);
   for (size_t i = shape.size() - 1; i > 0; i--) {
diff --git a/runtime/lib/common/workarounds.cpp b/runtime/lib/common/workarounds.cpp
index e9847eed24..fb1b62b45d 100644
--- a/runtime/lib/common/workarounds.cpp
+++ b/runtime/lib/common/workarounds.cpp
@@ -8,12 +8,11 @@ namespace tt::runtime::workaround {
 #if defined(TT_RUNTIME_WORKAROUNDS) && TT_RUNTIME_WORKAROUNDS == 1
 const Env &Env::get(bool maxpool2dPreshard, bool swapBinaryOperands,
                     bool readUpdateIndexFromDeviceForKVCache,
-                    bool defaultStrideComputation,
                     bool toLayoutAPIAssumeSingleChip,
                     bool usePaddingPairSignatureWithQueueId) {
   static const Env config(maxpool2dPreshard, swapBinaryOperands,
                           readUpdateIndexFromDeviceForKVCache,
-                          defaultStrideComputation, toLayoutAPIAssumeSingleChip,
+                          toLayoutAPIAssumeSingleChip,
                           usePaddingPairSignatureWithQueueId);
   return config;
 }
diff --git a/runtime/tools/python/ttrt/common/run.py b/runtime/tools/python/ttrt/common/run.py
index 1dd8d5d5b0..8851fcc4d2 100644
--- a/runtime/tools/python/ttrt/common/run.py
+++ b/runtime/tools/python/ttrt/common/run.py
@@ -140,13 +140,6 @@ def initialize_api():
             choices=[True, False],
             help="disable read update index for kv cache workaround",
         )
-        Run.register_arg(
-            name="--disable-default-stride-computation",
-            type=bool,
-            default=False,
-            choices=[True, False],
-            help="disable runtime default stride computation workaround",
-        )
         Run.register_arg(
             name="--disable-to-layout-api-assume-single-chip",
             type=bool,
@@ -431,7 +424,6 @@ def convert_input_layouts(device, inputs, fbb, program_index):
                 not self["--disable-maxpool2d-preshard"],
                 not self["--disable-swap-binary-operands"],
                 not self["--disable-read-update-index-for-kv-cache"],
-                not self["--disable-default-stride-computation"],
                 not self["--disable-to-layout-api-assume-single-chip"],
                 not self["--disable-pad-op-padding-pairs-signature"],
             )