From ba352467034b73b0e572882a8af90fc96fac0fa9 Mon Sep 17 00:00:00 2001
From: Stanislav Minakov <sminakov@tenstorrent.com>
Date: Wed, 22 Jan 2025 13:04:14 -0800
Subject: [PATCH] Remove deprecated Tensor constructor with Shape (#16955)

### Ticket

### Problem description
We're continuing to remove all usages of Shape/LegacyShape from the
codebase

### What's changed
Removed Tensor constructor which takes Shape
Removed all usages

### Checklist
- [x] [Post commit CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12913833779)
- [x] [Model regression CI testing
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899489935)
- [x] [Device performance regression CI testing
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899491700)
- [x] [T3K unit tests CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899497750)
- [x] [T3K frequent tests CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899494794)
- [x] [Nightly model and ttnn tests CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899505501)
- [x] New/Existing tests provide coverage for changes
---
 tests/tt_eager/ops/test_eltwise_binary_op.cpp |   2 +-
 tests/tt_eager/ops/test_eltwise_unary_op.cpp  |   2 +-
 .../ops/test_transpose_wh_multi_core.cpp      |   2 +-
 .../ops/test_transpose_wh_single_core.cpp     |   2 +-
 .../tensors/test_raw_host_memory_pointer.cpp  |  22 +-
 .../unit_tests/gtests/test_async_runtime.cpp  |   2 +-
 .../unit_tests/gtests/test_multi_device.cpp   |   6 +-
 tt-train/sources/examples/sample_app/main.cpp |   2 +-
 .../sources/ttml/core/tt_tensor_utils.cpp     |   2 +-
 ttnn/cpp/ttnn/distributed/api.cpp             |  47 ++--
 .../operations/core/to_dtype/to_dtype_op.hpp  | 119 +++++----
 .../pad/device/pad_program_factory.cpp        |   4 +-
 .../reshape_on_device/reshape.cpp             |  24 +-
 .../unary/device/unary_composite_op.cpp       |   6 +-
 .../experimental/reduction/argmax/argmax.cpp  |  39 ++-
 ttnn/cpp/ttnn/operations/functions.hpp        | 243 ++++++++++++------
 .../upsample_program_factory_multicore.cpp    |   2 +-
 .../sliding_window/sliding_window.cpp         |   6 +-
 ttnn/cpp/ttnn/tensor/serialization.cpp        |  14 +-
 ttnn/cpp/ttnn/tensor/tensor.cpp               |   6 +-
 ttnn/cpp/ttnn/tensor/tensor.hpp               |   6 -
 ttnn/cpp/ttnn/tensor/tensor_impl.cpp          |  89 +++++--
 22 files changed, 413 insertions(+), 234 deletions(-)

diff --git a/tests/tt_eager/ops/test_eltwise_binary_op.cpp b/tests/tt_eager/ops/test_eltwise_binary_op.cpp
index e5251876b0a..23263a253ac 100644
--- a/tests/tt_eager/ops/test_eltwise_binary_op.cpp
+++ b/tests/tt_eager/ops/test_eltwise_binary_op.cpp
@@ -28,7 +28,7 @@ Tensor host_function(const Tensor& input_tensor_a, const Tensor& input_tensor_b)
     }
     return Tensor(
         OwnedStorage{output_buffer},
-        input_tensor_a.get_legacy_shape(),
+        input_tensor_a.get_logical_shape(),
         input_tensor_a.get_dtype(),
         input_tensor_a.get_layout());
 }
diff --git a/tests/tt_eager/ops/test_eltwise_unary_op.cpp b/tests/tt_eager/ops/test_eltwise_unary_op.cpp
index 462979b0b38..c3f2b04feba 100644
--- a/tests/tt_eager/ops/test_eltwise_unary_op.cpp
+++ b/tests/tt_eager/ops/test_eltwise_unary_op.cpp
@@ -50,7 +50,7 @@ Tensor host_function(const Tensor& input_tensor) {
 
     return Tensor(
         OwnedStorage{output_buffer},
-        input_tensor.get_legacy_shape(),
+        input_tensor.get_logical_shape(),
         input_tensor.get_dtype(),
         input_tensor.get_layout());
 }
diff --git a/tests/tt_eager/ops/test_transpose_wh_multi_core.cpp b/tests/tt_eager/ops/test_transpose_wh_multi_core.cpp
index 7b332af4e87..2f7e7a8584d 100644
--- a/tests/tt_eager/ops/test_transpose_wh_multi_core.cpp
+++ b/tests/tt_eager/ops/test_transpose_wh_multi_core.cpp
@@ -23,7 +23,7 @@ using namespace constants;
 
 Tensor perform_transpose_wh(Tensor& input_tensor) {
     TT_FATAL(input_tensor.storage_type() == StorageType::OWNED, "Error");
-    auto ashape = input_tensor.get_legacy_shape();
+    auto ashape = input_tensor.get_padded_shape();
     TT_FATAL(ashape.rank() == 4, "Error");
     auto bshape = ashape;
     bshape[2] = ashape[3];
diff --git a/tests/tt_eager/ops/test_transpose_wh_single_core.cpp b/tests/tt_eager/ops/test_transpose_wh_single_core.cpp
index 7b332af4e87..2f7e7a8584d 100644
--- a/tests/tt_eager/ops/test_transpose_wh_single_core.cpp
+++ b/tests/tt_eager/ops/test_transpose_wh_single_core.cpp
@@ -23,7 +23,7 @@ using namespace constants;
 
 Tensor perform_transpose_wh(Tensor& input_tensor) {
     TT_FATAL(input_tensor.storage_type() == StorageType::OWNED, "Error");
-    auto ashape = input_tensor.get_legacy_shape();
+    auto ashape = input_tensor.get_padded_shape();
     TT_FATAL(ashape.rank() == 4, "Error");
     auto bshape = ashape;
     bshape[2] = ashape[3];
diff --git a/tests/tt_eager/tensors/test_raw_host_memory_pointer.cpp b/tests/tt_eager/tensors/test_raw_host_memory_pointer.cpp
index bc68c910fe5..74601225461 100644
--- a/tests/tt_eager/tensors/test_raw_host_memory_pointer.cpp
+++ b/tests/tt_eager/tensors/test_raw_host_memory_pointer.cpp
@@ -44,14 +44,13 @@
 
 template <typename DataType>
 struct NDArray {
-    tt::tt_metal::LegacyShape shape;
+    ttnn::SimpleShape shape;
     void* data;
 
-    NDArray(tt::tt_metal::LegacyShape shape) :
-        shape(shape), data(malloc(tt::tt_metal::compute_volume(shape) * sizeof(DataType))) {}
+    NDArray(const ttnn::SimpleShape& shape) : shape(shape), data(malloc(shape.volume() * sizeof(DataType))) {}
     ~NDArray() { free(data); }
 
-    std::size_t size() const { return tt::tt_metal::compute_volume(shape); }
+    std::size_t size() const { return shape.volume(); }
 };
 
 void test_raw_host_memory_pointer() {
@@ -66,14 +65,11 @@ void test_raw_host_memory_pointer() {
     int device_id = 0;
     tt::tt_metal::IDevice* device = tt::tt_metal::CreateDevice(device_id);
 
-    tt::tt_metal::LegacyShape shape = {1, 1, tt::constants::TILE_HEIGHT, tt::constants::TILE_WIDTH};
+    ttnn::SimpleShape shape({1, 1, tt::constants::TILE_HEIGHT, tt::constants::TILE_WIDTH});
 
     // Host tensor to print the output
-    Tensor tensor_for_printing = Tensor(
-        OwnedStorage{owned_buffer::create<bfloat16>(tt::tt_metal::compute_volume(shape))},
-        shape,
-        DataType::BFLOAT16,
-        Layout::TILE);
+    Tensor tensor_for_printing =
+        Tensor(OwnedStorage{owned_buffer::create<bfloat16>(shape.volume())}, shape, DataType::BFLOAT16, Layout::TILE);
 
     /* Borrow Data from Numpy Start */
     // Create some
@@ -128,14 +124,12 @@ void test_raw_host_memory_pointer() {
     /* Alternative Way to Print Start */
     // Alternatively, we could allocate memory manually and create Tensors with BorrowedStorage on the fly to print the
     // data
-    void* storage_of_alternative_tensor_for_printing = malloc(tt::tt_metal::compute_volume(shape) * sizeof(bfloat16));
+    void* storage_of_alternative_tensor_for_printing = malloc(shape.volume() * sizeof(bfloat16));
     tt::tt_metal::memcpy(storage_of_alternative_tensor_for_printing, c_dev);
 
     Tensor alternative_tensor_for_printing = Tensor(
         BorrowedStorage{
-            borrowed_buffer::Buffer(
-                static_cast<bfloat16*>(storage_of_alternative_tensor_for_printing),
-                tt::tt_metal::compute_volume(shape)),
+            borrowed_buffer::Buffer(static_cast<bfloat16*>(storage_of_alternative_tensor_for_printing), shape.volume()),
             on_creation_callback,
             on_destruction_callback},
         shape,
diff --git a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp
index 5be2e69c90f..21e055b1a10 100644
--- a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp
+++ b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp
@@ -67,7 +67,7 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) {
     Tensor input_tensor = Tensor(
         input_storage,
         TensorSpec(input_shape, TensorLayout(DataType::BFLOAT16, PageConfig(Layout::TILE), MemoryConfig{})));
-    Tensor output_tensor = Tensor(output_storage, np_out.get_shape(), DataType::BFLOAT16, Layout::TILE);
+    Tensor output_tensor = Tensor(output_storage, np_out.get_logical_shape(), DataType::BFLOAT16, Layout::TILE);
     // Populate input_tensor with data
     ttnn::write_buffer(io_cq, input_tensor, {host_data});
     // Record the completion of the write event
diff --git a/tests/ttnn/unit_tests/gtests/test_multi_device.cpp b/tests/ttnn/unit_tests/gtests/test_multi_device.cpp
index f0caefa847b..f543672e9ba 100644
--- a/tests/ttnn/unit_tests/gtests/test_multi_device.cpp
+++ b/tests/ttnn/unit_tests/gtests/test_multi_device.cpp
@@ -20,11 +20,7 @@ Tensor create_host_multi_device_tensor(const Tensor& tensor, const ReplicateTens
         specs.push_back(tensor.get_tensor_spec());
     }
 
-    return Tensor{
-        MultiDeviceHostStorage(strategy, owned_buffers, specs),
-        tensor.get_legacy_shape(),
-        tensor.get_dtype(),
-        tensor.get_layout()};
+    return Tensor{MultiDeviceHostStorage(strategy, owned_buffers, specs), tensor.get_tensor_spec()};
 }
 
 TEST_F(T3kMultiDeviceFixture, TestGetTensorsFromMultiDeviceStorage) {
diff --git a/tt-train/sources/examples/sample_app/main.cpp b/tt-train/sources/examples/sample_app/main.cpp
index c8917fef49c..7ecea3b33b6 100644
--- a/tt-train/sources/examples/sample_app/main.cpp
+++ b/tt-train/sources/examples/sample_app/main.cpp
@@ -73,7 +73,7 @@ int main() {
         // Let the tensor take ownership of the buffer
         OwnedStorage{std::move(buffer)},
         // IMPORTANT: SHAPE MUST BE 4D ELSE EVERYTHING WILL BREAK during the PAD operation
-        {1, 1, tensor_width, tensor_height},
+        ttnn::SimpleShape({1, 1, tensor_width, tensor_height}),
         // The data type of the tensor
         tt::tt_metal::DataType::BFLOAT16,
         // The layout of the tensor. We don't care about the layout in this demo. But the valid options are TILE and
diff --git a/tt-train/sources/ttml/core/tt_tensor_utils.cpp b/tt-train/sources/ttml/core/tt_tensor_utils.cpp
index 8ac1141929a..c908ed3034a 100644
--- a/tt-train/sources/ttml/core/tt_tensor_utils.cpp
+++ b/tt-train/sources/ttml/core/tt_tensor_utils.cpp
@@ -87,7 +87,7 @@ tt::tt_metal::Tensor ttml_create_owned_tensor(
     std::vector<T>&& data, const ttnn::Shape& shape, tt::tt_metal::DataType data_type, tt::tt_metal::Layout layout) {
     auto buffer = tt::tt_metal::owned_buffer::create(std::move(data));
     auto storage = OwnedStorage{std::move(buffer)};
-    return {std::move(storage), shape, data_type, layout};
+    return {std::move(storage), shape.logical_shape(), data_type, layout};
 }
 
 }  // namespace
diff --git a/ttnn/cpp/ttnn/distributed/api.cpp b/ttnn/cpp/ttnn/distributed/api.cpp
index 0cc8e32c87e..6690b490158 100644
--- a/ttnn/cpp/ttnn/distributed/api.cpp
+++ b/ttnn/cpp/ttnn/distributed/api.cpp
@@ -92,12 +92,7 @@ Tensor aggregate_as_tensor(
             }
         }
         auto storage = MultiDeviceHostStorage{config, std::move(host_owned_buffers), specs};
-        return Tensor(
-            std::move(storage),
-            reference_shard.get_legacy_shape(),
-            reference_shard.get_dtype(),
-            reference_shard.get_layout(),
-            tile);
+        return Tensor(std::move(storage), reference_shard.get_tensor_spec());
     } else {
         std::vector<int> ordered_device_ids;
         std::unordered_map<int, ttnn::TensorSpec> specs;
@@ -122,12 +117,7 @@ Tensor aggregate_as_tensor(
             }
         }
         auto storage = MultiDeviceStorage{config, ordered_device_ids, std::move(device_buffers), specs};
-        return Tensor(
-            std::move(storage),
-            reference_shard.get_legacy_shape(),
-            reference_shard.get_dtype(),
-            reference_shard.get_layout(),
-            tile);
+        return Tensor(std::move(storage), reference_shard.get_tensor_spec());
     }
 }
 
@@ -194,9 +184,14 @@ Tensor get_device_tensor(const Tensor& multi_device_tensor, const int device_id)
         tensor_storage != nullptr && tensor_storage->has_buffer_for_device_id(device_id)) {
         return Tensor{
             DeviceStorage{tensor_storage->get_buffer_for_device_id(device_id)},
-            multi_device_tensor.get_shape(),
-            multi_device_tensor.get_dtype(),
-            multi_device_tensor.get_layout()};
+            TensorSpec(
+                multi_device_tensor.get_logical_shape(),
+                TensorLayout::fromPaddedShape(
+                    multi_device_tensor.get_dtype(),
+                    PageConfig(multi_device_tensor.get_layout()),
+                    MemoryConfig{},
+                    multi_device_tensor.get_logical_shape(),
+                    multi_device_tensor.get_padded_shape()))};
     } else if (std::holds_alternative<tt::tt_metal::DeviceStorage>(multi_device_tensor.get_storage())) {
         return multi_device_tensor;
     }
@@ -266,9 +261,14 @@ Tensor create_multi_device_tensor(
         }
         return Tensor{
             MultiDeviceStorage{strategy, ordered_device_ids, device_buffers, specs},
-            tensors.at(0).get_legacy_shape(),
-            tensors.at(0).get_dtype(),
-            tensors.at(0).get_layout()};
+            TensorSpec(
+                tensors.at(0).get_logical_shape(),
+                TensorLayout::fromPaddedShape(
+                    tensors.at(0).get_dtype(),
+                    PageConfig(tensors.at(0).get_layout()),
+                    MemoryConfig{},
+                    tensors.at(0).get_logical_shape(),
+                    tensors.at(0).get_padded_shape()))};
     } else if (storage_type == StorageType::MULTI_DEVICE_HOST) {
         std::vector<OwnedBuffer> owned_buffers;
         std::vector<ttnn::TensorSpec> specs;
@@ -282,9 +282,14 @@ Tensor create_multi_device_tensor(
         }
         return Tensor{
             MultiDeviceHostStorage{strategy, owned_buffers, specs},
-            tensors.at(0).get_legacy_shape(),
-            tensors.at(0).get_dtype(),
-            tensors.at(0).get_layout()};
+            TensorSpec(
+                tensors.at(0).get_logical_shape(),
+                TensorLayout::fromPaddedShape(
+                    tensors.at(0).get_dtype(),
+                    PageConfig(tensors.at(0).get_layout()),
+                    MemoryConfig{},
+                    tensors.at(0).get_logical_shape(),
+                    tensors.at(0).get_padded_shape()))};
     } else {
         TT_THROW("Invalid storage type for multi-device tensor");
     }
diff --git a/ttnn/cpp/ttnn/operations/core/to_dtype/to_dtype_op.hpp b/ttnn/cpp/ttnn/operations/core/to_dtype/to_dtype_op.hpp
index acf536bc5f5..fe5829d7341 100644
--- a/ttnn/cpp/ttnn/operations/core/to_dtype/to_dtype_op.hpp
+++ b/ttnn/cpp/ttnn/operations/core/to_dtype/to_dtype_op.hpp
@@ -74,17 +74,27 @@ inline Tensor convert_to_cpp_supported_dtype(const Tensor& input_tensor) {
         [&](auto&& buffer) -> Tensor {
             using T = std::decay_t<decltype(buffer)>;
             if constexpr (std::is_same_v<T, tt::tt_metal::OwnedBuffer>) {
-                return Tensor{
+                return Tensor(
                     tt::tt_metal::OwnedStorage{buffer},
-                    input_tensor.get_shape(),
-                    input_dtype,
-                    input_tensor.get_layout()};
+                    TensorSpec(
+                        input_tensor.get_logical_shape(),
+                        TensorLayout::fromPaddedShape(
+                            input_dtype,
+                            PageConfig(input_tensor.get_layout()),
+                            MemoryConfig{},
+                            input_tensor.get_logical_shape(),
+                            input_tensor.get_padded_shape())));
             } else if constexpr (std::is_same_v<T, tt::tt_metal::BorrowedBuffer>) {
                 return Tensor{
                     tt::tt_metal::BorrowedStorage{buffer, []() {}, []() {}},
-                    input_tensor.get_shape(),
-                    input_dtype,
-                    input_tensor.get_layout()};
+                    TensorSpec(
+                        input_tensor.get_logical_shape(),
+                        TensorLayout::fromPaddedShape(
+                            input_dtype,
+                            PageConfig(input_tensor.get_layout()),
+                            MemoryConfig{},
+                            input_tensor.get_logical_shape(),
+                            input_tensor.get_padded_shape()))};
             } else {
                 TT_THROW("Unsupported buffer type");
             }
@@ -112,46 +122,65 @@ inline std::vector<NewT> cast(const tt::tt_metal::borrowed_buffer::Buffer<OldT>&
 }
 
 template <typename T>
-Tensor create_owned_tensor(std::vector<T>&& data, const Shape& shape, DataType data_type, Layout layout) {
+Tensor create_owned_tensor(
+    std::vector<T>&& data,
+    const SimpleShape& logical_shape,
+    const SimpleShape& padded_shape,
+    DataType data_type,
+    Layout layout) {
     auto buffer = tt::tt_metal::owned_buffer::create(std::move(data));
     auto storage = tt::tt_metal::OwnedStorage{std::move(buffer)};
-    return Tensor(std::move(storage), shape, data_type, layout);
+    return Tensor(
+        std::move(storage),
+        TensorSpec(
+            logical_shape,
+            TensorLayout::fromPaddedShape(data_type, PageConfig(layout), MemoryConfig{}, logical_shape, padded_shape)));
 }
 
 template <typename T>
 inline Tensor create_tensor_from_buffer(
     const tt::tt_metal::borrowed_buffer::Buffer<T>& input_buffer,
-    const Shape& shape,
+    const SimpleShape& logical_shape,
+    const SimpleShape& padded_shape,
     const Layout& input_layout,
     const DataType& dtype) {
     switch (dtype) {
         case DataType::UINT16: {
             auto data = cast<uint16_t, T>(input_buffer);
-            return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout);
+            return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR)
+                .to(input_layout);
         }
         case DataType::INT32: {
             auto data = cast<int32_t, T>(input_buffer);
-            return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout);
+            return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR)
+                .to(input_layout);
         }
         case DataType::UINT32: {
             auto data = cast<uint32_t, T>(input_buffer);
-            return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout);
+            return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR)
+                .to(input_layout);
         }
         case DataType::FLOAT32: {
             auto data = cast<float, T>(input_buffer);
-            return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout);
+            return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR)
+                .to(input_layout);
         }
         case DataType::BFLOAT16: {
             auto data = cast<::bfloat16, T>(input_buffer);
-            return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout);
+            return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR)
+                .to(input_layout);
         }
         case DataType::BFLOAT8_B:
         case DataType::BFLOAT4_B: {
             auto data = cast<float, T>(input_buffer);
             auto buffer = tt::tt_metal::owned_buffer::create<float>(std::move(data));
-            auto tensor =
-                Tensor(tt::tt_metal::OwnedStorage{std::move(buffer)}, shape, DataType::FLOAT32, Layout::ROW_MAJOR)
-                    .to(Layout::TILE);
+            auto tensor = Tensor(
+                              tt::tt_metal::OwnedStorage{std::move(buffer)},
+                              logical_shape,
+                              padded_shape,
+                              DataType::FLOAT32,
+                              Layout::ROW_MAJOR)
+                              .to(Layout::TILE);
             auto output_float_data = tt::tt_metal::owned_buffer::get_as<float>(tensor).get();
             auto output_packed_data =
                 dtype == DataType::BFLOAT8_B
@@ -160,7 +189,8 @@ inline Tensor create_tensor_from_buffer(
             auto output_buffer = tt::tt_metal::owned_buffer::create<uint32_t>(std::move(output_packed_data));
             return Tensor(
                 tt::tt_metal::OwnedStorage{std::move(output_buffer)},
-                shape,
+                logical_shape,
+                padded_shape,
                 dtype,
                 Layout::TILE);  // has to be in tile layout
         }
@@ -173,32 +203,35 @@ inline Tensor create_tensor_from_buffer(
 
 inline Tensor convert_to_dtype(const Tensor& input_tensor, const Layout& input_layout, const DataType& dtype) {
     auto input_dtype = input_tensor.get_dtype();
+    const auto& logical_shape = input_tensor.get_logical_shape();
+    const auto& padded_shape = input_tensor.get_padded_shape();
 
-    auto convert_dtype = [&input_layout, &input_dtype, &dtype](const Tensor& input_tensor) {
-        switch (input_dtype) {
-            case DataType::UINT16: {
-                auto buffer = host_buffer::get_as<uint16_t>(input_tensor);
-                return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype);
-            }
-            case DataType::INT32: {
-                auto buffer = host_buffer::get_as<int32_t>(input_tensor);
-                return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype);
-            }
-            case DataType::UINT32: {
-                auto buffer = host_buffer::get_as<uint32_t>(input_tensor);
-                return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype);
-            }
-            case DataType::FLOAT32: {
-                auto buffer = host_buffer::get_as<float>(input_tensor);
-                return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype);
+    auto convert_dtype =
+        [&input_layout, &input_dtype, &dtype, &logical_shape, &padded_shape](const Tensor& input_tensor) {
+            switch (input_dtype) {
+                case DataType::UINT16: {
+                    auto buffer = host_buffer::get_as<uint16_t>(input_tensor);
+                    return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype);
+                }
+                case DataType::INT32: {
+                    auto buffer = host_buffer::get_as<int32_t>(input_tensor);
+                    return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype);
+                }
+                case DataType::UINT32: {
+                    auto buffer = host_buffer::get_as<uint32_t>(input_tensor);
+                    return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype);
+                }
+                case DataType::FLOAT32: {
+                    auto buffer = host_buffer::get_as<float>(input_tensor);
+                    return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype);
+                }
+                case DataType::BFLOAT16: {
+                    auto buffer = host_buffer::get_as<::bfloat16>(input_tensor);
+                    return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype);
+                }
+                default: TT_THROW("Unsupported DataType: {}", input_dtype); break;
             }
-            case DataType::BFLOAT16: {
-                auto buffer = host_buffer::get_as<::bfloat16>(input_tensor);
-                return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype);
-            }
-            default: TT_THROW("Unsupported DataType: {}", input_dtype); break;
-        }
-    };
+        };
     return distributed::is_multi_device_tensor(input_tensor) ? transform(input_tensor, convert_dtype)
                                                              : convert_dtype(input_tensor);
 }
diff --git a/ttnn/cpp/ttnn/operations/data_movement/pad/device/pad_program_factory.cpp b/ttnn/cpp/ttnn/operations/data_movement/pad/device/pad_program_factory.cpp
index fbd6c78e276..ce6e955fd22 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/pad/device/pad_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/pad/device/pad_program_factory.cpp
@@ -43,7 +43,7 @@ operation::ProgramWithCallbacks pad_rm_reader_writer(
     const Tensor pad_value_const_tensor =
         Tensor(
             OwnedStorage{pad_value_const_buffer},
-            Shape(std::array<uint32_t, 4>{1, 1, 1, pad_value_const_buffer_size}),
+            ttnn::SimpleShape({1, 1, 1, pad_value_const_buffer_size}),
             DataType::BFLOAT16,
             Layout::ROW_MAJOR)
             .to(device, MemoryConfig{.memory_layout = TensorMemoryLayout::INTERLEAVED, .buffer_type = BufferType::L1});
@@ -474,7 +474,7 @@ operation::ProgramWithCallbacks pad_rm_reader_writer_multi_core(
     const Tensor pad_value_const_tensor =
         Tensor(
             OwnedStorage{pad_value_const_buffer},
-            Shape(std::array<uint32_t, 4>{1, 1, 1, pad_value_const_buffer_size}),
+            ttnn::SimpleShape({1, 1, 1, pad_value_const_buffer_size}),
             DataType::BFLOAT16,
             Layout::ROW_MAJOR)
             .to(device, MemoryConfig{.memory_layout = TensorMemoryLayout::INTERLEAVED, .buffer_type = BufferType::L1});
diff --git a/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/reshape.cpp b/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/reshape.cpp
index 40b22b244a0..8702f4b04ff 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/reshape.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/reshape.cpp
@@ -18,12 +18,16 @@ namespace ttnn::operations::data_movement {
 namespace detail {
 
 static Tensor manual_insertion(
-    const Tensor& input_tensor, const ttnn::Shape& shape, IDevice* device, const MemoryConfig& output_mem_config) {
+    const Tensor& input_tensor,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
+    IDevice* device,
+    const MemoryConfig& output_mem_config) {
     TT_ASSERT(input_tensor.get_layout() == Layout::ROW_MAJOR);
     TT_ASSERT(
-        shape.logical_shape().volume() == input_tensor.get_logical_volume(),
+        logical_shape.volume() == input_tensor.get_logical_volume(),
         "Required shape volume ({}) must match old shape volume ({})",
-        shape.logical_shape().volume(),
+        logical_shape.volume(),
         input_tensor.get_logical_volume());
     auto device_buffer = input_tensor.device_buffer();
     uint32_t size_in_bytes = device_buffer->size();
@@ -38,7 +42,13 @@ static Tensor manual_insertion(
     }
     auto owned_buffer = owned_buffer::create<uint16_t>(std::move(data_vec));
     auto output =
-        Tensor(OwnedStorage{owned_buffer}, shape, DataType::BFLOAT16, Layout::ROW_MAJOR).to(Layout::ROW_MAJOR);
+        Tensor(
+            OwnedStorage{owned_buffer},
+            TensorSpec(
+                logical_shape,
+                TensorLayout::fromPaddedShape(
+                    DataType::BFLOAT16, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape)))
+            .to(Layout::ROW_MAJOR);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -74,7 +84,11 @@ ttnn::Tensor ReshapeOperation::invoke(
         TT_FATAL(input_tensor.get_dtype() == DataType::BFLOAT16, "Error");
 
         return detail::manual_insertion(
-            (tt::tt_metal::Tensor)input_tensor, output_shape, input_tensor.device(), output_mem_config);
+            (tt::tt_metal::Tensor)input_tensor,
+            output_shape.logical_shape(),
+            output_shape.padded_shape(),
+            input_tensor.device(),
+            output_mem_config);
     }
     std::vector<Tensor> output_tensors = {Tensor(tt::tt_metal::operation::get_workers_for_op_output({input_tensor}))};
     return operation::run(ReshapeDeviceOperation{output_shape, output_mem_config}, {input_tensor}).at(0);
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp
index 8d6198cbc9c..2120f20c26f 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp
@@ -674,7 +674,8 @@ Tensor _swiglu(const Tensor& input_a, int32_t dim, const std::optional<MemoryCon
 // tril : select lower triangular region of input matrix
 Tensor _tril(const Tensor& input_a, int32_t diag, const std::optional<MemoryConfig>& output_mem_config) {
     Tensor index_l = ttnn::index_tril<::bfloat16>(
-        input_a.get_legacy_shape(),
+        input_a.get_logical_shape(),
+        input_a.get_padded_shape(),
         diag,
         DataType::BFLOAT16,
         Layout::TILE,
@@ -686,7 +687,8 @@ Tensor _tril(const Tensor& input_a, int32_t diag, const std::optional<MemoryConf
 // triu : select upper triangular region of input matrix
 Tensor _triu(const Tensor& input_a, int32_t diag, const std::optional<MemoryConfig>& output_mem_config) {
     Tensor index_u = ttnn::index_triu<::bfloat16>(
-        input_a.get_legacy_shape(),
+        input_a.get_logical_shape(),
+        input_a.get_padded_shape(),
         diag,
         DataType::BFLOAT16,
         Layout::TILE,
diff --git a/ttnn/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp b/ttnn/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp
index ef309401445..856bdac5d77 100644
--- a/ttnn/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp
+++ b/ttnn/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp
@@ -15,8 +15,8 @@
 namespace ttnn::operations::experimental::reduction {
 
 Tensor create_mask(const Tensor& input_a, const std::optional<MemoryConfig>& output_mem_config) {
-    auto padded_shape = input_a.get_legacy_shape();
-    auto& unpadded_shape = padded_shape.without_padding();
+    auto& padded_shape = input_a.get_padded_shape();
+    auto& unpadded_shape = input_a.get_logical_shape();
     if (padded_shape == unpadded_shape) {
         return input_a;
     }
@@ -50,12 +50,22 @@ Tensor ArgmaxOperation::invoke(
                     Tensor max_val = ttnn::max(input_a, (int)dim, true, output_memory_config);
                     Tensor max_tensor = ttnn::zeros_like(input_a);
                     Tensor tindex = ttnn::index_width<::bfloat16>(
-                        input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config);
+                        input.get_logical_shape(),
+                        input.get_padded_shape(),
+                        DataType::BFLOAT16,
+                        Layout::TILE,
+                        input_a.device(),
+                        output_memory_config);
                     if (is_width) {
                         max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_memory_config);
                     } else {
                         tindex = ttnn::index_height<::bfloat16>(
-                            input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config);
+                            input.get_logical_shape(),
+                            input.get_padded_shape(),
+                            DataType::BFLOAT16,
+                            Layout::TILE,
+                            input_a.device(),
+                            output_memory_config);
                         max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_memory_config);
                     }
                     tindex = tindex.to(input_a.device());
@@ -94,10 +104,20 @@ Tensor ArgmaxOperation::invoke(
                     Tensor cmp_results = ttnn::eq(input_a, concat_out, std::nullopt, output_memory_config);
                     concat_out.deallocate();
                     Tensor tindex = ttnn::index_channel<::bfloat16>(
-                        input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config);
+                        input.get_logical_shape(),
+                        input.get_padded_shape(),
+                        DataType::BFLOAT16,
+                        Layout::TILE,
+                        input_a.device(),
+                        output_memory_config);
                     if (!is_channel) {
                         tindex = ttnn::index_batch<::bfloat16>(
-                            input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config);
+                            input.get_logical_shape(),
+                            input.get_padded_shape(),
+                            DataType::BFLOAT16,
+                            Layout::TILE,
+                            input_a.device(),
+                            output_memory_config);
                     }
                     tindex = tindex.to(input_a.device());
                     Tensor max_indices = ttnn::multiply(cmp_results, tindex, std::nullopt, output_memory_config);
@@ -120,7 +140,12 @@ Tensor ArgmaxOperation::invoke(
             // TODO: Fix the index generation code. With the fix the code will work for argmax that return entire
             // maximum value index
             Tensor tindex = ttnn::index_all<::bfloat16>(
-                input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config);
+                input.get_logical_shape(),
+                input.get_padded_shape(),
+                DataType::BFLOAT16,
+                Layout::TILE,
+                input_a.device(),
+                output_memory_config);
             Tensor max_val = ttnn::max(input_a, std::nullopt, true, output_memory_config);
             Tensor max_tensor = ttnn::zeros_like(input_a);
             max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_memory_config);
diff --git a/ttnn/cpp/ttnn/operations/functions.hpp b/ttnn/cpp/ttnn/operations/functions.hpp
index 9d4493c5446..6739db01132 100644
--- a/ttnn/cpp/ttnn/operations/functions.hpp
+++ b/ttnn/cpp/ttnn/operations/functions.hpp
@@ -27,7 +27,8 @@ using tt::tt_metal::Tensor;
 
 template <typename T, bool IS_UPPER>
 static Tensor index_trilu(
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     const int32_t diag,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
@@ -35,31 +36,37 @@ static Tensor index_trilu(
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
     // Current implementation restrictions
-    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(tt::tt_metal::compute_volume(shape));
+    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(padded_shape.volume());
 
     auto index = 0;
-    auto rank = shape.rank();
+    auto rank = padded_shape.rank();
     auto penultimate = rank - 2;
     auto ultimate = rank - 1;
-    auto offset = shape[penultimate] * shape[ultimate];
+    auto offset = padded_shape[penultimate] * padded_shape[ultimate];
     auto iterations = 1;
     for (int itr = 0; itr < rank - 2; itr++) {
-        iterations *= shape[itr];
+        iterations *= padded_shape[itr];
     }
     for (uint32_t itr = 0; itr < iterations; itr++) {
-        for (int32_t y = 0; y < shape[penultimate]; y++) {
-            for (int32_t x = 0; x < shape[ultimate]; x++) {
+        for (int32_t y = 0; y < padded_shape[penultimate]; y++) {
+            for (int32_t x = 0; x < padded_shape[ultimate]; x++) {
                 int32_t value = (IS_UPPER) ? (x >= (y + diag)) : (y >= (x - diag));
                 if constexpr (std::is_same_v<T, ::bfloat16>) {
-                    owned_buffer[index + y * shape[ultimate] + x] = T(static_cast<float>(value));
+                    owned_buffer[index + y * padded_shape[ultimate] + x] = T(static_cast<float>(value));
                 } else {
-                    owned_buffer[index + y * shape[ultimate] + x] = static_cast<T>(value);
+                    owned_buffer[index + y * padded_shape[ultimate] + x] = static_cast<T>(value);
                 }
             }  // dim X
         }  // dim Y
         index += offset;
     }
-    auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          logical_shape,
+                          TensorLayout::fromPaddedShape(
+                              data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape)))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -68,34 +75,40 @@ static Tensor index_trilu(
 
 template <typename T>
 static Tensor index_width(
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(tt::tt_metal::compute_volume(shape));
+    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(padded_shape.volume());
     std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits<float>::infinity());
-    auto& up_shape = shape.without_padding();
     auto index = 0;
     auto value = 0;
-    auto rank = up_shape.rank();
+    auto rank = logical_shape.rank();
     auto penultimate = rank - 2;
     auto ultimate = rank - 1;
-    for (uint32_t b = 0; b < up_shape[rank - 4]; b++) {
-        for (uint32_t c = 0; c < up_shape[rank - 3]; c++) {
-            for (uint32_t y = 0; y < up_shape[penultimate]; y++) {
-                for (uint32_t x = 0; x < up_shape[ultimate]; x++) {
+    for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) {
+        for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) {
+            for (uint32_t y = 0; y < logical_shape[penultimate]; y++) {
+                for (uint32_t x = 0; x < logical_shape[ultimate]; x++) {
                     owned_buffer[index++] = T(static_cast<float>(value));
                     value = value + 1;
                 }  // dim W
                 value = 0;
-                index = index + (shape[ultimate] - up_shape[ultimate]);
+                index = index + (padded_shape[ultimate] - logical_shape[ultimate]);
             }  // dim H
-            index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH);
+            index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH);
         }  // dim c
     }  // dim N
-    auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          logical_shape,
+                          TensorLayout::fromPaddedShape(
+                              data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape)))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -104,34 +117,40 @@ static Tensor index_width(
 
 template <typename T>
 static Tensor index_height(
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(tt::tt_metal::compute_volume(shape));
+    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(padded_shape.volume());
     std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits<float>::infinity());
-    auto& up_shape = shape.without_padding();
     auto index = 0;
     auto value = 0;
-    auto rank = up_shape.rank();
+    auto rank = logical_shape.rank();
     auto penultimate = rank - 2;
     auto ultimate = rank - 1;
-    for (uint32_t b = 0; b < up_shape[rank - 4]; b++) {
-        for (uint32_t c = 0; c < up_shape[rank - 3]; c++) {
-            for (uint32_t y = 0; y < up_shape[penultimate]; y++) {
-                for (uint32_t x = 0; x < up_shape[ultimate]; x++) {
+    for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) {
+        for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) {
+            for (uint32_t y = 0; y < logical_shape[penultimate]; y++) {
+                for (uint32_t x = 0; x < logical_shape[ultimate]; x++) {
                     owned_buffer[index++] = T(static_cast<float>(value));
                 }  // dim W
                 value = value + 1;
-                index = index + (shape[ultimate] - up_shape[ultimate]);
+                index = index + (padded_shape[ultimate] - logical_shape[ultimate]);
             }  // dim H
             value = 0;
-            index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH);
+            index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH);
         }  // dim C
     }  // dim N
-    auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          logical_shape,
+                          TensorLayout::fromPaddedShape(
+                              data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape)))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -140,33 +159,39 @@ static Tensor index_height(
 
 template <typename T>
 static Tensor index_all(
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(tt::tt_metal::compute_volume(shape));
+    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(padded_shape.volume());
     std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits<float>::infinity());
-    auto& up_shape = shape.without_padding();
     auto index = 0;
     auto value = 0;
-    auto rank = up_shape.rank();
+    auto rank = logical_shape.rank();
     auto penultimate = rank - 2;
     auto ultimate = rank - 1;
-    for (uint32_t b = 0; b < up_shape[rank - 4]; b++) {
-        for (uint32_t c = 0; c < up_shape[rank - 3]; c++) {
-            for (uint32_t y = 0; y < up_shape[penultimate]; y++) {
-                for (uint32_t x = 0; x < up_shape[ultimate]; x++) {
+    for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) {
+        for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) {
+            for (uint32_t y = 0; y < logical_shape[penultimate]; y++) {
+                for (uint32_t x = 0; x < logical_shape[ultimate]; x++) {
                     owned_buffer[index++] = T(static_cast<float>(value));
                     value = value + 1;
                 }  // dim W
-                index = index + (shape[ultimate] - up_shape[ultimate]);
+                index = index + (padded_shape[ultimate] - logical_shape[ultimate]);
             }  // dim H
-            index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH);
+            index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH);
         }  // dim C
     }  // dim N
-    auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          logical_shape,
+                          TensorLayout::fromPaddedShape(
+                              data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape)))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -175,14 +200,14 @@ static Tensor index_all(
 
 template <typename T>
 static Tensor mask_padded_input(
-    const tt::tt_metal::LegacyShape& padded_shape,
-    const tt::tt_metal::LegacyShape& unpadded_shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(tt::tt_metal::compute_volume(padded_shape));
+    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(padded_shape.volume());
 
     auto index = 0;
     auto rank = padded_shape.rank();
@@ -192,8 +217,8 @@ static Tensor mask_padded_input(
         for (uint32_t c = 0; c < padded_shape[rank - 3]; c++) {
             for (uint32_t y = 0; y < padded_shape[penultimate]; y++) {
                 for (uint32_t x = 0; x < padded_shape[ultimate]; x++) {
-                    if (b < unpadded_shape[rank - 4] && c < unpadded_shape[rank - 3] &&
-                        y < unpadded_shape[penultimate] && x < unpadded_shape[ultimate]) {
+                    if (b < logical_shape[rank - 4] && c < logical_shape[rank - 3] && y < logical_shape[penultimate] &&
+                        x < logical_shape[ultimate]) {
                         owned_buffer[index++] = T(static_cast<float>(1.0));
                     } else {
                         owned_buffer[index++] = T(static_cast<float>(0.0));
@@ -235,8 +260,17 @@ static Tensor fill_first_val_into_tensor(
     for (uint32_t i = 0; i < physical_volume; i++) {
         owned_buffer[i] = input_buffer[0];
     }
-    const tt::tt_metal::LegacyShape& s_a = input_tensor.get_legacy_shape();
-    auto output = Tensor(OwnedStorage{owned_buffer}, s_a, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          input_tensor.get_logical_shape(),
+                          TensorLayout::fromPaddedShape(
+                              data_type,
+                              PageConfig(Layout::ROW_MAJOR),
+                              MemoryConfig{},
+                              input_tensor.get_logical_shape(),
+                              input_tensor.get_padded_shape())))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -251,7 +285,7 @@ static Tensor prod_result_computation_GS(
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    const tt::tt_metal::LegacyShape& s_a = input_tensor.get_legacy_shape();
+    const ttnn::SimpleShape& s_a = input_tensor.get_padded_shape();
     auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(input_tensor.volume());  // ouput
     auto device_buffer = input_tensor.device_buffer();
     uint32_t size_in_bytes = device_buffer->size();
@@ -285,7 +319,17 @@ static Tensor prod_result_computation_GS(
     }
     owned_buffer[0] = result;  // store the result at the first position of the tensor,and the rest of the values as
                                // 0.0f
-    auto output = Tensor(OwnedStorage{owned_buffer}, s_a, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          input_tensor.get_logical_shape(),
+                          TensorLayout::fromPaddedShape(
+                              data_type,
+                              Layout::ROW_MAJOR,
+                              MemoryConfig{},
+                              input_tensor.get_logical_shape(),
+                              input_tensor.get_padded_shape())))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -300,8 +344,8 @@ static Tensor prod_result_computation_WH_B0(
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    const tt::tt_metal::LegacyShape& s_a = input_tensor.get_legacy_shape();
-    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(tt::tt_metal::compute_volume(s_a));  // ouput
+    const auto& s_a = input_tensor.get_padded_shape();
+    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(s_a.volume());  // ouput
     auto device_buffer = input_tensor.device_buffer();
     uint32_t size_in_bytes = device_buffer->size();
     std::vector<T> data_vec;
@@ -338,7 +382,17 @@ static Tensor prod_result_computation_WH_B0(
     }
     owned_buffer[0] = result;  // store the result at the first position of the tensor,and the rest of the values as
                                // 0.0f
-    auto output = Tensor(OwnedStorage{owned_buffer}, s_a, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          input_tensor.get_logical_shape(),
+                          TensorLayout::fromPaddedShape(
+                              data_type,
+                              PageConfig(Layout::ROW_MAJOR),
+                              MemoryConfig{},
+                              input_tensor.get_logical_shape(),
+                              input_tensor.get_padded_shape())))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -347,34 +401,40 @@ static Tensor prod_result_computation_WH_B0(
 
 template <typename T>
 static Tensor index_channel(
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(tt::tt_metal::compute_volume(shape));
+    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(padded_shape.volume());
     std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits<float>::infinity());
-    auto& up_shape = shape.without_padding();
     auto index = 0;
     auto value = 0;
-    auto rank = up_shape.rank();
+    auto rank = logical_shape.rank();
     auto penultimate = rank - 2;
     auto ultimate = rank - 1;
-    for (uint32_t b = 0; b < up_shape[rank - 4]; b++) {
-        for (uint32_t c = 0; c < up_shape[rank - 3]; c++) {
-            for (uint32_t y = 0; y < up_shape[penultimate]; y++) {
-                for (uint32_t x = 0; x < up_shape[ultimate]; x++) {
+    for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) {
+        for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) {
+            for (uint32_t y = 0; y < logical_shape[penultimate]; y++) {
+                for (uint32_t x = 0; x < logical_shape[ultimate]; x++) {
                     owned_buffer[index++] = T(static_cast<float>(value));
                 }  // dim W
-                index = index + (shape[ultimate] - up_shape[ultimate]);
+                index = index + (padded_shape[ultimate] - logical_shape[ultimate]);
             }  // dim H
             value = value + 1;
-            index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH);
+            index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH);
         }  // dim C
         value = 0;
     }  // dim N
-    auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          logical_shape,
+                          TensorLayout::fromPaddedShape(
+                              data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape)))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -383,33 +443,39 @@ static Tensor index_channel(
 
 template <typename T>
 static Tensor index_batch(
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(tt::tt_metal::compute_volume(shape));
+    auto owned_buffer = tt::tt_metal::owned_buffer::create<T>(padded_shape.volume());
     std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits<float>::infinity());
-    auto& up_shape = shape.without_padding();
     auto index = 0;
     auto value = 0;
-    auto rank = up_shape.rank();
+    auto rank = logical_shape.rank();
     auto penultimate = rank - 2;
     auto ultimate = rank - 1;
-    for (uint32_t b = 0; b < up_shape[rank - 4]; b++) {
-        for (uint32_t c = 0; c < up_shape[rank - 3]; c++) {
-            for (uint32_t y = 0; y < up_shape[penultimate]; y++) {
-                for (uint32_t x = 0; x < up_shape[ultimate]; x++) {
+    for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) {
+        for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) {
+            for (uint32_t y = 0; y < logical_shape[penultimate]; y++) {
+                for (uint32_t x = 0; x < logical_shape[ultimate]; x++) {
                     owned_buffer[index++] = T(static_cast<float>(value));
                 }  // dim W
-                index = index + (shape[ultimate] - up_shape[ultimate]);
+                index = index + (padded_shape[ultimate] - logical_shape[ultimate]);
             }  // dim H
-            index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH);
+            index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH);
         }  // dim C
         value = value + 1;
     }  // dim N
-    auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          logical_shape,
+                          TensorLayout::fromPaddedShape(
+                              data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape)))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -419,7 +485,8 @@ static Tensor index_batch(
 template <typename T>
 static Tensor manual_insertion(
     const Tensor& input_tensor,
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
@@ -427,7 +494,7 @@ static Tensor manual_insertion(
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
     TT_ASSERT(input_tensor.get_layout() == Layout::ROW_MAJOR);
     TT_ASSERT(
-        shape[0] * shape[1] * shape[2] * shape[3] == input_tensor.volume(),
+        padded_shape[0] * padded_shape[1] * padded_shape[2] * padded_shape[3] == input_tensor.volume(),
         "Required shape volume must match old shape volume");
     auto device_buffer = input_tensor.device_buffer();
     uint32_t size_in_bytes = device_buffer->size();
@@ -441,7 +508,13 @@ static Tensor manual_insertion(
         tt::tt_metal::tensor_impl::read_data_from_device_buffer<T>(device_buffer, data_vec);
     }
     auto owned_buffer = owned_buffer::create<T>(std::move(data_vec));
-    auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout);
+    auto output = Tensor(
+                      OwnedStorage{owned_buffer},
+                      TensorSpec(
+                          logical_shape,
+                          TensorLayout::fromPaddedShape(
+                              data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape)))
+                      .to(layout);
     if (device != nullptr) {
         output = output.to(device, output_mem_config);
     }
@@ -450,26 +523,28 @@ static Tensor manual_insertion(
 
 template <typename T>
 static Tensor index_tril(
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     const int32_t diag,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    return index_trilu<T, false>(shape, diag, data_type, layout, device, output_mem_config);
+    return index_trilu<T, false>(logical_shape, padded_shape, diag, data_type, layout, device, output_mem_config);
 }
 
 template <typename T>
 static Tensor index_triu(
-    const tt::tt_metal::LegacyShape& shape,
+    const ttnn::SimpleShape& logical_shape,
+    const ttnn::SimpleShape& padded_shape,
     const int32_t diag,
     DataType data_type,
     const Layout layout = Layout::ROW_MAJOR,
     IDevice* device = nullptr,
     const MemoryConfig& output_mem_config = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    return index_trilu<T, true>(shape, diag, data_type, layout, device, output_mem_config);
+    return index_trilu<T, true>(logical_shape, padded_shape, diag, data_type, layout, device, output_mem_config);
 }
 
 namespace random {
diff --git a/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_program_factory_multicore.cpp b/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_program_factory_multicore.cpp
index dc61eca5bfc..4442921f603 100644
--- a/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_program_factory_multicore.cpp
+++ b/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_program_factory_multicore.cpp
@@ -118,7 +118,7 @@ static Tensor create_config_tensor(
      */
     const uint32_t config_buffer_entry_size = 2;
     uint32_t elems_per_core = config_buffer_entry_size * scale_factor_h * input_nsticks_per_core;
-    Shape config_shape({config_vector.size() / elems_per_core, elems_per_core});
+    ttnn::SimpleShape config_shape({config_vector.size() / elems_per_core, elems_per_core});
     auto config_buffer = owned_buffer::create<uint16_t>(std::move(config_vector));
     return Tensor(OwnedStorage{config_buffer}, config_shape, DataType::UINT16, Layout::ROW_MAJOR);
 }
diff --git a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp
index c7af8f4cf44..06e830b779a 100644
--- a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp
+++ b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp
@@ -628,7 +628,7 @@ Tensor construct_on_host_config_tensor(
     // we need the last dim of tensors to be multiple of 2, pad if needed
     uint32_t extend_with_zeroes = config[0].size() % 2;
     extend_with_zeroes = extend_with_zeroes > 0 ? 2 - extend_with_zeroes : 0;
-    Shape config_shape = Shape({(uint32_t)config.size(), (uint32_t)config[0].size() + extend_with_zeroes});
+    ttnn::SimpleShape config_shape({(uint32_t)config.size(), (uint32_t)config[0].size() + extend_with_zeroes});
     std::vector<uint16_t> config_vector = flatten(config, extend_with_zeroes);
     if (p_config.shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED) {
         auto config_buffer = owned_buffer::create<uint16_t>(std::move(config_vector));
@@ -641,7 +641,7 @@ Tensor construct_on_host_config_tensor(
             repeat_config.insert(repeat_config.end(), config_vector.begin(), config_vector.end());
         }
         auto config_buffer = owned_buffer::create<uint16_t>(std::move(repeat_config));
-        config_shape = Shape({config_shape[0] * repeat_factor, config_shape[1]});
+        config_shape = ttnn::SimpleShape({config_shape[0] * repeat_factor, config_shape[1]});
         return Tensor(OwnedStorage{config_buffer}, config_shape, DataType::UINT16, Layout::ROW_MAJOR);
     } else if (p_config.shard_scheme == TensorMemoryLayout::BLOCK_SHARDED) {
         TT_ASSERT(p_config.grid.ranges().size() == 1, "BLOCK_SHARDED should have just a single core range");
@@ -671,7 +671,7 @@ Tensor construct_on_host_config_tensor(
             repeat_config.insert(repeat_config.end(), config_vector.begin(), config_vector.end());
         }
         auto config_buffer = owned_buffer::create<uint16_t>(std::move(repeat_config));
-        config_shape = Shape({config_shape[0] * repeat_factor, config_shape[1]});
+        config_shape = ttnn::SimpleShape({config_shape[0] * repeat_factor, config_shape[1]});
         return Tensor(OwnedStorage{config_buffer}, config_shape, DataType::UINT16, Layout::ROW_MAJOR);
     } else {
         TT_ASSERT(false, "Unsupported shard scheme");
diff --git a/ttnn/cpp/ttnn/tensor/serialization.cpp b/ttnn/cpp/ttnn/tensor/serialization.cpp
index 3ce42d8dec7..46db89831e7 100644
--- a/ttnn/cpp/ttnn/tensor/serialization.cpp
+++ b/ttnn/cpp/ttnn/tensor/serialization.cpp
@@ -297,7 +297,12 @@ Tensor load_tensor_helper(const std::string& file_name, T device) {
 
         auto storage = load_storage(input_stream, data_type, layout, storage_type, device);
 
-        auto tensor = Tensor(std::move(storage), shape, data_type, layout);
+        auto tensor = Tensor(
+            std::move(storage),
+            TensorSpec(
+                shape.logical_shape(),
+                TensorLayout::fromPaddedShape(
+                    data_type, layout, MemoryConfig{}, shape.logical_shape(), shape.padded_shape())));
         if (device != nullptr) {
             tensor = tensor.to(device, memory_config);
         } else if (has_memory_config) {
@@ -316,7 +321,12 @@ Tensor load_tensor_helper(const std::string& file_name, T device) {
         input_stream.read(reinterpret_cast<char*>(&layout), sizeof(Layout));
 
         auto storage = load_owned_storage(input_stream, data_type);
-        auto tensor = Tensor(std::move(storage), shape, data_type, layout);
+        auto tensor = Tensor(
+            std::move(storage),
+            TensorSpec(
+                shape.logical_shape(),
+                TensorLayout::fromPaddedShape(
+                    data_type, layout, MemoryConfig{}, shape.logical_shape(), shape.padded_shape())));
         if (device != nullptr) {
             tensor = tensor.to(device);
         }
diff --git a/ttnn/cpp/ttnn/tensor/tensor.cpp b/ttnn/cpp/ttnn/tensor/tensor.cpp
index a326f561bbe..f2bbf008ac8 100644
--- a/ttnn/cpp/ttnn/tensor/tensor.cpp
+++ b/ttnn/cpp/ttnn/tensor/tensor.cpp
@@ -180,10 +180,6 @@ Tensor::Tensor(
                 dtype, PageConfig(layout, tile), memory_config, logical_shape, padded_shape)));
 }
 
-Tensor::Tensor(
-    Storage storage, const ttnn::Shape& shape, DataType dtype, Layout layout, const std::optional<Tile>& tile) :
-    Tensor(std::move(storage), shape.logical_shape(), shape.padded_shape(), dtype, layout, tile) {}
-
 Tensor::Tensor(Storage storage, TensorSpec tensor_spec) { init(std::move(storage), std::move(tensor_spec)); }
 
 void Tensor::init(Storage storage, TensorSpec tensor_spec) {
@@ -338,7 +334,7 @@ Tensor::~Tensor() {
 
 Tensor::Tensor(
     Storage storage, const ttnn::SimpleShape& shape, DataType dtype, Layout layout, const std::optional<Tile>& tile) :
-    Tensor(std::move(storage), ttnn::Shape(shape.view()), dtype, layout, tile) {}
+    Tensor(std::move(storage), /* logical_shape */ shape, /* padded_shape */ shape, dtype, layout, tile) {}
 
 void Tensor::deallocate(bool force) {
     ZoneScopedN("TensorDeallocate");
diff --git a/ttnn/cpp/ttnn/tensor/tensor.hpp b/ttnn/cpp/ttnn/tensor/tensor.hpp
index 9e770aa3613..a032f331157 100644
--- a/ttnn/cpp/ttnn/tensor/tensor.hpp
+++ b/ttnn/cpp/ttnn/tensor/tensor.hpp
@@ -88,12 +88,6 @@ struct Tensor {
     // ======================================================================================
     explicit Tensor() = default;
 
-    Tensor(
-        Storage storage,
-        const ttnn::Shape& shape,
-        DataType dtype,
-        Layout layout,
-        const std::optional<Tile>& tile = std::nullopt);
     Tensor(
         Storage storage,
         const ttnn::SimpleShape& shape,
diff --git a/ttnn/cpp/ttnn/tensor/tensor_impl.cpp b/ttnn/cpp/ttnn/tensor/tensor_impl.cpp
index 3d8b2d83ddc..4553c55501c 100644
--- a/ttnn/cpp/ttnn/tensor/tensor_impl.cpp
+++ b/ttnn/cpp/ttnn/tensor/tensor_impl.cpp
@@ -187,10 +187,17 @@ Tensor pad_bfloat8_b(
     auto input_float_data =
         unpack_bfp8_tiles_into_float_vec(input_packed_data, /*row_major_output=*/false, /*is_exp_a=*/false, tile);
     auto input_float_buffer = owned_buffer::create<float>(std::move(input_float_data));
-    auto float_tensor =
-        Tensor(
-            OwnedStorage{input_float_buffer}, tensor.get_legacy_shape(), DataType::FLOAT32, tensor.get_layout(), tile)
-            .pad(output_padded_shape, input_tensor_start, pad_value);
+    auto float_tensor = Tensor(
+                            OwnedStorage{input_float_buffer},
+                            TensorSpec(
+                                tensor.get_logical_shape(),
+                                TensorLayout::fromPaddedShape(
+                                    DataType::FLOAT32,
+                                    PageConfig(tensor.get_layout(), tile),
+                                    MemoryConfig{},
+                                    tensor.get_logical_shape(),
+                                    tensor.get_padded_shape())))
+                            .pad(output_padded_shape, input_tensor_start, pad_value);
 
     // Convert back to BFLOAT8_B
     auto output_float_data = owned_buffer::get_as<float>(float_tensor).get();
@@ -218,10 +225,17 @@ Tensor unpad_bfloat8_b(
     auto input_float_data =
         unpack_bfp8_tiles_into_float_vec(input_packed_data, /*row_major_output=*/false, /*is_exp_a=*/false, tile);
     auto input_float_buffer = owned_buffer::create<float>(std::move(input_float_data));
-    auto float_tensor =
-        Tensor(
-            OwnedStorage{input_float_buffer}, tensor.get_legacy_shape(), DataType::FLOAT32, tensor.get_layout(), tile)
-            .unpad(output_tensor_start, output_tensor_end);
+    auto float_tensor = Tensor(
+                            OwnedStorage{input_float_buffer},
+                            TensorSpec(
+                                tensor.get_logical_shape(),
+                                TensorLayout::fromPaddedShape(
+                                    DataType::FLOAT32,
+                                    PageConfig(tensor.get_layout(), tile),
+                                    MemoryConfig{},
+                                    tensor.get_logical_shape(),
+                                    tensor.get_padded_shape())))
+                            .unpad(output_tensor_start, output_tensor_end);
 
     // Convert back to BFLOAT8_B
     auto output_float_data = owned_buffer::get_as<float>(float_tensor).get();
@@ -230,10 +244,14 @@ Tensor unpad_bfloat8_b(
     auto output_uint32_buffer = owned_buffer::create<uint32_t>(std::move(output_packed_data));
     return Tensor(
         std::move(OwnedStorage{std::move(output_uint32_buffer)}),
-        float_tensor.get_legacy_shape(),
-        DataType::BFLOAT8_B,
-        tensor.get_layout(),
-        tile);
+        TensorSpec(
+            float_tensor.get_logical_shape(),
+            TensorLayout::fromPaddedShape(
+                DataType::BFLOAT8_B,
+                PageConfig(tensor.get_layout(), tile),
+                MemoryConfig{},
+                float_tensor.get_logical_shape(),
+                float_tensor.get_padded_shape())));
 }
 
 Tensor pad_bfloat4_b(
@@ -249,10 +267,17 @@ Tensor pad_bfloat4_b(
     auto input_float_data =
         unpack_bfp4_tiles_into_float_vec(input_packed_data, /*row_major_output=*/false, /*is_exp_a=*/false, tile);
     auto input_float_buffer = owned_buffer::create<float>(std::move(input_float_data));
-    auto float_tensor =
-        Tensor(
-            OwnedStorage{input_float_buffer}, tensor.get_legacy_shape(), DataType::FLOAT32, tensor.get_layout(), tile)
-            .pad(output_padded_shape, input_tensor_start, pad_value);
+    auto float_tensor = Tensor(
+                            OwnedStorage{input_float_buffer},
+                            TensorSpec(
+                                tensor.get_logical_shape(),
+                                TensorLayout::fromPaddedShape(
+                                    DataType::FLOAT32,
+                                    PageConfig(tensor.get_layout(), tile),
+                                    MemoryConfig{},
+                                    tensor.get_logical_shape(),
+                                    tensor.get_logical_shape())))
+                            .pad(output_padded_shape, input_tensor_start, pad_value);
 
     // Convert back to BFLOAT4_B
     auto output_float_data = owned_buffer::get_as<float>(float_tensor).get();
@@ -280,10 +305,17 @@ Tensor unpad_bfloat4_b(
     auto input_float_data =
         unpack_bfp4_tiles_into_float_vec(input_packed_data, /*row_major_output=*/false, /*is_exp_a=*/false, tile);
     auto input_float_buffer = owned_buffer::create<float>(std::move(input_float_data));
-    auto float_tensor =
-        Tensor(
-            OwnedStorage{input_float_buffer}, tensor.get_legacy_shape(), DataType::FLOAT32, tensor.get_layout(), tile)
-            .unpad(output_tensor_start, output_tensor_end);
+    auto float_tensor = Tensor(
+                            OwnedStorage{input_float_buffer},
+                            TensorSpec(
+                                tensor.get_logical_shape(),
+                                TensorLayout::fromPaddedShape(
+                                    DataType::FLOAT32,
+                                    PageConfig(tensor.get_layout(), tile),
+                                    MemoryConfig{},
+                                    tensor.get_logical_shape(),
+                                    tensor.get_padded_shape())))
+                            .unpad(output_tensor_start, output_tensor_end);
 
     // Convert back to BFLOAT4_B
     auto output_float_data = owned_buffer::get_as<float>(float_tensor).get();
@@ -292,10 +324,14 @@ Tensor unpad_bfloat4_b(
     auto output_uint32_buffer = owned_buffer::create<uint32_t>(std::move(output_packed_data));
     return Tensor(
         std::move(OwnedStorage{std::move(output_uint32_buffer)}),
-        float_tensor.get_legacy_shape(),
-        DataType::BFLOAT4_B,
-        tensor.get_layout(),
-        tile);
+        TensorSpec(
+            float_tensor.get_logical_shape(),
+            TensorLayout::fromPaddedShape(
+                DataType::BFLOAT4_B,
+                PageConfig(tensor.get_layout(), tile),
+                MemoryConfig{},
+                float_tensor.get_logical_shape(),
+                float_tensor.get_padded_shape())));
 }
 
 // ======================================================================================
@@ -1254,7 +1290,7 @@ Tensor unpad(
         tensor.get_storage());
     return Tensor(
         OwnedStorage{output_buffer},
-        output_shape,
+        ttnn::SimpleShape(output_shape),
         tensor.get_dtype(),
         tensor.get_layout(),
         tensor.get_tensor_spec().tile());
@@ -1293,8 +1329,7 @@ template <typename T>
 Tensor extract_shard(const Tensor& tensor, const uint32_t& core_id) {
     auto buffer = tensor.buffer();
     auto buffer_shard_shape = buffer->shard_spec().shape();
-    std::array<uint32_t, 4> shard_shape_array = {1, 1, buffer_shard_shape[0], buffer_shard_shape[1]};
-    tt::tt_metal::LegacyShape shard_shape(shard_shape_array);
+    ttnn::SimpleShape shard_shape({1, 1, buffer_shard_shape[0], buffer_shard_shape[1]});
     std::vector<T> device_data;
     ::detail::ReadShard(*buffer, device_data, core_id);