From ba352467034b73b0e572882a8af90fc96fac0fa9 Mon Sep 17 00:00:00 2001 From: Stanislav Minakov Date: Wed, 22 Jan 2025 13:04:14 -0800 Subject: [PATCH] Remove deprecated Tensor constructor with Shape (#16955) ### Ticket ### Problem description We're continuing to remove all usages of Shape/LegacyShape from the codebase ### What's changed Removed Tensor constructor which takes Shape Removed all usages ### Checklist - [x] [Post commit CI passes](https://github.com/tenstorrent/tt-metal/actions/runs/12913833779) - [x] [Model regression CI testing passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899489935) - [x] [Device performance regression CI testing passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899491700) - [x] [T3K unit tests CI passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899497750) - [x] [T3K frequent tests CI passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899494794) - [x] [Nightly model and ttnn tests CI passes](https://github.com/tenstorrent/tt-metal/actions/runs/12899505501) - [x] New/Existing tests provide coverage for changes --- tests/tt_eager/ops/test_eltwise_binary_op.cpp | 2 +- tests/tt_eager/ops/test_eltwise_unary_op.cpp | 2 +- .../ops/test_transpose_wh_multi_core.cpp | 2 +- .../ops/test_transpose_wh_single_core.cpp | 2 +- .../tensors/test_raw_host_memory_pointer.cpp | 22 +- .../unit_tests/gtests/test_async_runtime.cpp | 2 +- .../unit_tests/gtests/test_multi_device.cpp | 6 +- tt-train/sources/examples/sample_app/main.cpp | 2 +- .../sources/ttml/core/tt_tensor_utils.cpp | 2 +- ttnn/cpp/ttnn/distributed/api.cpp | 47 ++-- .../operations/core/to_dtype/to_dtype_op.hpp | 119 +++++---- .../pad/device/pad_program_factory.cpp | 4 +- .../reshape_on_device/reshape.cpp | 24 +- .../unary/device/unary_composite_op.cpp | 6 +- .../experimental/reduction/argmax/argmax.cpp | 39 ++- ttnn/cpp/ttnn/operations/functions.hpp | 243 ++++++++++++------ .../upsample_program_factory_multicore.cpp | 2 +- .../sliding_window/sliding_window.cpp | 6 +- ttnn/cpp/ttnn/tensor/serialization.cpp | 14 +- ttnn/cpp/ttnn/tensor/tensor.cpp | 6 +- ttnn/cpp/ttnn/tensor/tensor.hpp | 6 - ttnn/cpp/ttnn/tensor/tensor_impl.cpp | 89 +++++-- 22 files changed, 413 insertions(+), 234 deletions(-) diff --git a/tests/tt_eager/ops/test_eltwise_binary_op.cpp b/tests/tt_eager/ops/test_eltwise_binary_op.cpp index e5251876b0a..23263a253ac 100644 --- a/tests/tt_eager/ops/test_eltwise_binary_op.cpp +++ b/tests/tt_eager/ops/test_eltwise_binary_op.cpp @@ -28,7 +28,7 @@ Tensor host_function(const Tensor& input_tensor_a, const Tensor& input_tensor_b) } return Tensor( OwnedStorage{output_buffer}, - input_tensor_a.get_legacy_shape(), + input_tensor_a.get_logical_shape(), input_tensor_a.get_dtype(), input_tensor_a.get_layout()); } diff --git a/tests/tt_eager/ops/test_eltwise_unary_op.cpp b/tests/tt_eager/ops/test_eltwise_unary_op.cpp index 462979b0b38..c3f2b04feba 100644 --- a/tests/tt_eager/ops/test_eltwise_unary_op.cpp +++ b/tests/tt_eager/ops/test_eltwise_unary_op.cpp @@ -50,7 +50,7 @@ Tensor host_function(const Tensor& input_tensor) { return Tensor( OwnedStorage{output_buffer}, - input_tensor.get_legacy_shape(), + input_tensor.get_logical_shape(), input_tensor.get_dtype(), input_tensor.get_layout()); } diff --git a/tests/tt_eager/ops/test_transpose_wh_multi_core.cpp b/tests/tt_eager/ops/test_transpose_wh_multi_core.cpp index 7b332af4e87..2f7e7a8584d 100644 --- a/tests/tt_eager/ops/test_transpose_wh_multi_core.cpp +++ b/tests/tt_eager/ops/test_transpose_wh_multi_core.cpp @@ -23,7 +23,7 @@ using namespace constants; Tensor perform_transpose_wh(Tensor& input_tensor) { TT_FATAL(input_tensor.storage_type() == StorageType::OWNED, "Error"); - auto ashape = input_tensor.get_legacy_shape(); + auto ashape = input_tensor.get_padded_shape(); TT_FATAL(ashape.rank() == 4, "Error"); auto bshape = ashape; bshape[2] = ashape[3]; diff --git a/tests/tt_eager/ops/test_transpose_wh_single_core.cpp b/tests/tt_eager/ops/test_transpose_wh_single_core.cpp index 7b332af4e87..2f7e7a8584d 100644 --- a/tests/tt_eager/ops/test_transpose_wh_single_core.cpp +++ b/tests/tt_eager/ops/test_transpose_wh_single_core.cpp @@ -23,7 +23,7 @@ using namespace constants; Tensor perform_transpose_wh(Tensor& input_tensor) { TT_FATAL(input_tensor.storage_type() == StorageType::OWNED, "Error"); - auto ashape = input_tensor.get_legacy_shape(); + auto ashape = input_tensor.get_padded_shape(); TT_FATAL(ashape.rank() == 4, "Error"); auto bshape = ashape; bshape[2] = ashape[3]; diff --git a/tests/tt_eager/tensors/test_raw_host_memory_pointer.cpp b/tests/tt_eager/tensors/test_raw_host_memory_pointer.cpp index bc68c910fe5..74601225461 100644 --- a/tests/tt_eager/tensors/test_raw_host_memory_pointer.cpp +++ b/tests/tt_eager/tensors/test_raw_host_memory_pointer.cpp @@ -44,14 +44,13 @@ template struct NDArray { - tt::tt_metal::LegacyShape shape; + ttnn::SimpleShape shape; void* data; - NDArray(tt::tt_metal::LegacyShape shape) : - shape(shape), data(malloc(tt::tt_metal::compute_volume(shape) * sizeof(DataType))) {} + NDArray(const ttnn::SimpleShape& shape) : shape(shape), data(malloc(shape.volume() * sizeof(DataType))) {} ~NDArray() { free(data); } - std::size_t size() const { return tt::tt_metal::compute_volume(shape); } + std::size_t size() const { return shape.volume(); } }; void test_raw_host_memory_pointer() { @@ -66,14 +65,11 @@ void test_raw_host_memory_pointer() { int device_id = 0; tt::tt_metal::IDevice* device = tt::tt_metal::CreateDevice(device_id); - tt::tt_metal::LegacyShape shape = {1, 1, tt::constants::TILE_HEIGHT, tt::constants::TILE_WIDTH}; + ttnn::SimpleShape shape({1, 1, tt::constants::TILE_HEIGHT, tt::constants::TILE_WIDTH}); // Host tensor to print the output - Tensor tensor_for_printing = Tensor( - OwnedStorage{owned_buffer::create(tt::tt_metal::compute_volume(shape))}, - shape, - DataType::BFLOAT16, - Layout::TILE); + Tensor tensor_for_printing = + Tensor(OwnedStorage{owned_buffer::create(shape.volume())}, shape, DataType::BFLOAT16, Layout::TILE); /* Borrow Data from Numpy Start */ // Create some @@ -128,14 +124,12 @@ void test_raw_host_memory_pointer() { /* Alternative Way to Print Start */ // Alternatively, we could allocate memory manually and create Tensors with BorrowedStorage on the fly to print the // data - void* storage_of_alternative_tensor_for_printing = malloc(tt::tt_metal::compute_volume(shape) * sizeof(bfloat16)); + void* storage_of_alternative_tensor_for_printing = malloc(shape.volume() * sizeof(bfloat16)); tt::tt_metal::memcpy(storage_of_alternative_tensor_for_printing, c_dev); Tensor alternative_tensor_for_printing = Tensor( BorrowedStorage{ - borrowed_buffer::Buffer( - static_cast(storage_of_alternative_tensor_for_printing), - tt::tt_metal::compute_volume(shape)), + borrowed_buffer::Buffer(static_cast(storage_of_alternative_tensor_for_printing), shape.volume()), on_creation_callback, on_destruction_callback}, shape, diff --git a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp index 5be2e69c90f..21e055b1a10 100644 --- a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp +++ b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp @@ -67,7 +67,7 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) { Tensor input_tensor = Tensor( input_storage, TensorSpec(input_shape, TensorLayout(DataType::BFLOAT16, PageConfig(Layout::TILE), MemoryConfig{}))); - Tensor output_tensor = Tensor(output_storage, np_out.get_shape(), DataType::BFLOAT16, Layout::TILE); + Tensor output_tensor = Tensor(output_storage, np_out.get_logical_shape(), DataType::BFLOAT16, Layout::TILE); // Populate input_tensor with data ttnn::write_buffer(io_cq, input_tensor, {host_data}); // Record the completion of the write event diff --git a/tests/ttnn/unit_tests/gtests/test_multi_device.cpp b/tests/ttnn/unit_tests/gtests/test_multi_device.cpp index f0caefa847b..f543672e9ba 100644 --- a/tests/ttnn/unit_tests/gtests/test_multi_device.cpp +++ b/tests/ttnn/unit_tests/gtests/test_multi_device.cpp @@ -20,11 +20,7 @@ Tensor create_host_multi_device_tensor(const Tensor& tensor, const ReplicateTens specs.push_back(tensor.get_tensor_spec()); } - return Tensor{ - MultiDeviceHostStorage(strategy, owned_buffers, specs), - tensor.get_legacy_shape(), - tensor.get_dtype(), - tensor.get_layout()}; + return Tensor{MultiDeviceHostStorage(strategy, owned_buffers, specs), tensor.get_tensor_spec()}; } TEST_F(T3kMultiDeviceFixture, TestGetTensorsFromMultiDeviceStorage) { diff --git a/tt-train/sources/examples/sample_app/main.cpp b/tt-train/sources/examples/sample_app/main.cpp index c8917fef49c..7ecea3b33b6 100644 --- a/tt-train/sources/examples/sample_app/main.cpp +++ b/tt-train/sources/examples/sample_app/main.cpp @@ -73,7 +73,7 @@ int main() { // Let the tensor take ownership of the buffer OwnedStorage{std::move(buffer)}, // IMPORTANT: SHAPE MUST BE 4D ELSE EVERYTHING WILL BREAK during the PAD operation - {1, 1, tensor_width, tensor_height}, + ttnn::SimpleShape({1, 1, tensor_width, tensor_height}), // The data type of the tensor tt::tt_metal::DataType::BFLOAT16, // The layout of the tensor. We don't care about the layout in this demo. But the valid options are TILE and diff --git a/tt-train/sources/ttml/core/tt_tensor_utils.cpp b/tt-train/sources/ttml/core/tt_tensor_utils.cpp index 8ac1141929a..c908ed3034a 100644 --- a/tt-train/sources/ttml/core/tt_tensor_utils.cpp +++ b/tt-train/sources/ttml/core/tt_tensor_utils.cpp @@ -87,7 +87,7 @@ tt::tt_metal::Tensor ttml_create_owned_tensor( std::vector&& data, const ttnn::Shape& shape, tt::tt_metal::DataType data_type, tt::tt_metal::Layout layout) { auto buffer = tt::tt_metal::owned_buffer::create(std::move(data)); auto storage = OwnedStorage{std::move(buffer)}; - return {std::move(storage), shape, data_type, layout}; + return {std::move(storage), shape.logical_shape(), data_type, layout}; } } // namespace diff --git a/ttnn/cpp/ttnn/distributed/api.cpp b/ttnn/cpp/ttnn/distributed/api.cpp index 0cc8e32c87e..6690b490158 100644 --- a/ttnn/cpp/ttnn/distributed/api.cpp +++ b/ttnn/cpp/ttnn/distributed/api.cpp @@ -92,12 +92,7 @@ Tensor aggregate_as_tensor( } } auto storage = MultiDeviceHostStorage{config, std::move(host_owned_buffers), specs}; - return Tensor( - std::move(storage), - reference_shard.get_legacy_shape(), - reference_shard.get_dtype(), - reference_shard.get_layout(), - tile); + return Tensor(std::move(storage), reference_shard.get_tensor_spec()); } else { std::vector ordered_device_ids; std::unordered_map specs; @@ -122,12 +117,7 @@ Tensor aggregate_as_tensor( } } auto storage = MultiDeviceStorage{config, ordered_device_ids, std::move(device_buffers), specs}; - return Tensor( - std::move(storage), - reference_shard.get_legacy_shape(), - reference_shard.get_dtype(), - reference_shard.get_layout(), - tile); + return Tensor(std::move(storage), reference_shard.get_tensor_spec()); } } @@ -194,9 +184,14 @@ Tensor get_device_tensor(const Tensor& multi_device_tensor, const int device_id) tensor_storage != nullptr && tensor_storage->has_buffer_for_device_id(device_id)) { return Tensor{ DeviceStorage{tensor_storage->get_buffer_for_device_id(device_id)}, - multi_device_tensor.get_shape(), - multi_device_tensor.get_dtype(), - multi_device_tensor.get_layout()}; + TensorSpec( + multi_device_tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + multi_device_tensor.get_dtype(), + PageConfig(multi_device_tensor.get_layout()), + MemoryConfig{}, + multi_device_tensor.get_logical_shape(), + multi_device_tensor.get_padded_shape()))}; } else if (std::holds_alternative(multi_device_tensor.get_storage())) { return multi_device_tensor; } @@ -266,9 +261,14 @@ Tensor create_multi_device_tensor( } return Tensor{ MultiDeviceStorage{strategy, ordered_device_ids, device_buffers, specs}, - tensors.at(0).get_legacy_shape(), - tensors.at(0).get_dtype(), - tensors.at(0).get_layout()}; + TensorSpec( + tensors.at(0).get_logical_shape(), + TensorLayout::fromPaddedShape( + tensors.at(0).get_dtype(), + PageConfig(tensors.at(0).get_layout()), + MemoryConfig{}, + tensors.at(0).get_logical_shape(), + tensors.at(0).get_padded_shape()))}; } else if (storage_type == StorageType::MULTI_DEVICE_HOST) { std::vector owned_buffers; std::vector specs; @@ -282,9 +282,14 @@ Tensor create_multi_device_tensor( } return Tensor{ MultiDeviceHostStorage{strategy, owned_buffers, specs}, - tensors.at(0).get_legacy_shape(), - tensors.at(0).get_dtype(), - tensors.at(0).get_layout()}; + TensorSpec( + tensors.at(0).get_logical_shape(), + TensorLayout::fromPaddedShape( + tensors.at(0).get_dtype(), + PageConfig(tensors.at(0).get_layout()), + MemoryConfig{}, + tensors.at(0).get_logical_shape(), + tensors.at(0).get_padded_shape()))}; } else { TT_THROW("Invalid storage type for multi-device tensor"); } diff --git a/ttnn/cpp/ttnn/operations/core/to_dtype/to_dtype_op.hpp b/ttnn/cpp/ttnn/operations/core/to_dtype/to_dtype_op.hpp index acf536bc5f5..fe5829d7341 100644 --- a/ttnn/cpp/ttnn/operations/core/to_dtype/to_dtype_op.hpp +++ b/ttnn/cpp/ttnn/operations/core/to_dtype/to_dtype_op.hpp @@ -74,17 +74,27 @@ inline Tensor convert_to_cpp_supported_dtype(const Tensor& input_tensor) { [&](auto&& buffer) -> Tensor { using T = std::decay_t; if constexpr (std::is_same_v) { - return Tensor{ + return Tensor( tt::tt_metal::OwnedStorage{buffer}, - input_tensor.get_shape(), - input_dtype, - input_tensor.get_layout()}; + TensorSpec( + input_tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + input_dtype, + PageConfig(input_tensor.get_layout()), + MemoryConfig{}, + input_tensor.get_logical_shape(), + input_tensor.get_padded_shape()))); } else if constexpr (std::is_same_v) { return Tensor{ tt::tt_metal::BorrowedStorage{buffer, []() {}, []() {}}, - input_tensor.get_shape(), - input_dtype, - input_tensor.get_layout()}; + TensorSpec( + input_tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + input_dtype, + PageConfig(input_tensor.get_layout()), + MemoryConfig{}, + input_tensor.get_logical_shape(), + input_tensor.get_padded_shape()))}; } else { TT_THROW("Unsupported buffer type"); } @@ -112,46 +122,65 @@ inline std::vector cast(const tt::tt_metal::borrowed_buffer::Buffer& } template -Tensor create_owned_tensor(std::vector&& data, const Shape& shape, DataType data_type, Layout layout) { +Tensor create_owned_tensor( + std::vector&& data, + const SimpleShape& logical_shape, + const SimpleShape& padded_shape, + DataType data_type, + Layout layout) { auto buffer = tt::tt_metal::owned_buffer::create(std::move(data)); auto storage = tt::tt_metal::OwnedStorage{std::move(buffer)}; - return Tensor(std::move(storage), shape, data_type, layout); + return Tensor( + std::move(storage), + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape(data_type, PageConfig(layout), MemoryConfig{}, logical_shape, padded_shape))); } template inline Tensor create_tensor_from_buffer( const tt::tt_metal::borrowed_buffer::Buffer& input_buffer, - const Shape& shape, + const SimpleShape& logical_shape, + const SimpleShape& padded_shape, const Layout& input_layout, const DataType& dtype) { switch (dtype) { case DataType::UINT16: { auto data = cast(input_buffer); - return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout); + return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR) + .to(input_layout); } case DataType::INT32: { auto data = cast(input_buffer); - return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout); + return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR) + .to(input_layout); } case DataType::UINT32: { auto data = cast(input_buffer); - return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout); + return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR) + .to(input_layout); } case DataType::FLOAT32: { auto data = cast(input_buffer); - return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout); + return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR) + .to(input_layout); } case DataType::BFLOAT16: { auto data = cast<::bfloat16, T>(input_buffer); - return create_owned_tensor(std::move(data), shape, dtype, Layout::ROW_MAJOR).to(input_layout); + return create_owned_tensor(std::move(data), logical_shape, padded_shape, dtype, Layout::ROW_MAJOR) + .to(input_layout); } case DataType::BFLOAT8_B: case DataType::BFLOAT4_B: { auto data = cast(input_buffer); auto buffer = tt::tt_metal::owned_buffer::create(std::move(data)); - auto tensor = - Tensor(tt::tt_metal::OwnedStorage{std::move(buffer)}, shape, DataType::FLOAT32, Layout::ROW_MAJOR) - .to(Layout::TILE); + auto tensor = Tensor( + tt::tt_metal::OwnedStorage{std::move(buffer)}, + logical_shape, + padded_shape, + DataType::FLOAT32, + Layout::ROW_MAJOR) + .to(Layout::TILE); auto output_float_data = tt::tt_metal::owned_buffer::get_as(tensor).get(); auto output_packed_data = dtype == DataType::BFLOAT8_B @@ -160,7 +189,8 @@ inline Tensor create_tensor_from_buffer( auto output_buffer = tt::tt_metal::owned_buffer::create(std::move(output_packed_data)); return Tensor( tt::tt_metal::OwnedStorage{std::move(output_buffer)}, - shape, + logical_shape, + padded_shape, dtype, Layout::TILE); // has to be in tile layout } @@ -173,32 +203,35 @@ inline Tensor create_tensor_from_buffer( inline Tensor convert_to_dtype(const Tensor& input_tensor, const Layout& input_layout, const DataType& dtype) { auto input_dtype = input_tensor.get_dtype(); + const auto& logical_shape = input_tensor.get_logical_shape(); + const auto& padded_shape = input_tensor.get_padded_shape(); - auto convert_dtype = [&input_layout, &input_dtype, &dtype](const Tensor& input_tensor) { - switch (input_dtype) { - case DataType::UINT16: { - auto buffer = host_buffer::get_as(input_tensor); - return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype); - } - case DataType::INT32: { - auto buffer = host_buffer::get_as(input_tensor); - return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype); - } - case DataType::UINT32: { - auto buffer = host_buffer::get_as(input_tensor); - return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype); - } - case DataType::FLOAT32: { - auto buffer = host_buffer::get_as(input_tensor); - return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype); + auto convert_dtype = + [&input_layout, &input_dtype, &dtype, &logical_shape, &padded_shape](const Tensor& input_tensor) { + switch (input_dtype) { + case DataType::UINT16: { + auto buffer = host_buffer::get_as(input_tensor); + return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype); + } + case DataType::INT32: { + auto buffer = host_buffer::get_as(input_tensor); + return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype); + } + case DataType::UINT32: { + auto buffer = host_buffer::get_as(input_tensor); + return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype); + } + case DataType::FLOAT32: { + auto buffer = host_buffer::get_as(input_tensor); + return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype); + } + case DataType::BFLOAT16: { + auto buffer = host_buffer::get_as<::bfloat16>(input_tensor); + return create_tensor_from_buffer(buffer, logical_shape, padded_shape, input_layout, dtype); + } + default: TT_THROW("Unsupported DataType: {}", input_dtype); break; } - case DataType::BFLOAT16: { - auto buffer = host_buffer::get_as<::bfloat16>(input_tensor); - return create_tensor_from_buffer(buffer, input_tensor.get_shape(), input_layout, dtype); - } - default: TT_THROW("Unsupported DataType: {}", input_dtype); break; - } - }; + }; return distributed::is_multi_device_tensor(input_tensor) ? transform(input_tensor, convert_dtype) : convert_dtype(input_tensor); } diff --git a/ttnn/cpp/ttnn/operations/data_movement/pad/device/pad_program_factory.cpp b/ttnn/cpp/ttnn/operations/data_movement/pad/device/pad_program_factory.cpp index fbd6c78e276..ce6e955fd22 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/pad/device/pad_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/pad/device/pad_program_factory.cpp @@ -43,7 +43,7 @@ operation::ProgramWithCallbacks pad_rm_reader_writer( const Tensor pad_value_const_tensor = Tensor( OwnedStorage{pad_value_const_buffer}, - Shape(std::array{1, 1, 1, pad_value_const_buffer_size}), + ttnn::SimpleShape({1, 1, 1, pad_value_const_buffer_size}), DataType::BFLOAT16, Layout::ROW_MAJOR) .to(device, MemoryConfig{.memory_layout = TensorMemoryLayout::INTERLEAVED, .buffer_type = BufferType::L1}); @@ -474,7 +474,7 @@ operation::ProgramWithCallbacks pad_rm_reader_writer_multi_core( const Tensor pad_value_const_tensor = Tensor( OwnedStorage{pad_value_const_buffer}, - Shape(std::array{1, 1, 1, pad_value_const_buffer_size}), + ttnn::SimpleShape({1, 1, 1, pad_value_const_buffer_size}), DataType::BFLOAT16, Layout::ROW_MAJOR) .to(device, MemoryConfig{.memory_layout = TensorMemoryLayout::INTERLEAVED, .buffer_type = BufferType::L1}); diff --git a/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/reshape.cpp b/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/reshape.cpp index 40b22b244a0..8702f4b04ff 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/reshape.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/reshape_on_device/reshape.cpp @@ -18,12 +18,16 @@ namespace ttnn::operations::data_movement { namespace detail { static Tensor manual_insertion( - const Tensor& input_tensor, const ttnn::Shape& shape, IDevice* device, const MemoryConfig& output_mem_config) { + const Tensor& input_tensor, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, + IDevice* device, + const MemoryConfig& output_mem_config) { TT_ASSERT(input_tensor.get_layout() == Layout::ROW_MAJOR); TT_ASSERT( - shape.logical_shape().volume() == input_tensor.get_logical_volume(), + logical_shape.volume() == input_tensor.get_logical_volume(), "Required shape volume ({}) must match old shape volume ({})", - shape.logical_shape().volume(), + logical_shape.volume(), input_tensor.get_logical_volume()); auto device_buffer = input_tensor.device_buffer(); uint32_t size_in_bytes = device_buffer->size(); @@ -38,7 +42,13 @@ static Tensor manual_insertion( } auto owned_buffer = owned_buffer::create(std::move(data_vec)); auto output = - Tensor(OwnedStorage{owned_buffer}, shape, DataType::BFLOAT16, Layout::ROW_MAJOR).to(Layout::ROW_MAJOR); + Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape( + DataType::BFLOAT16, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape))) + .to(Layout::ROW_MAJOR); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -74,7 +84,11 @@ ttnn::Tensor ReshapeOperation::invoke( TT_FATAL(input_tensor.get_dtype() == DataType::BFLOAT16, "Error"); return detail::manual_insertion( - (tt::tt_metal::Tensor)input_tensor, output_shape, input_tensor.device(), output_mem_config); + (tt::tt_metal::Tensor)input_tensor, + output_shape.logical_shape(), + output_shape.padded_shape(), + input_tensor.device(), + output_mem_config); } std::vector output_tensors = {Tensor(tt::tt_metal::operation::get_workers_for_op_output({input_tensor}))}; return operation::run(ReshapeDeviceOperation{output_shape, output_mem_config}, {input_tensor}).at(0); diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp index 8d6198cbc9c..2120f20c26f 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp @@ -674,7 +674,8 @@ Tensor _swiglu(const Tensor& input_a, int32_t dim, const std::optional& output_mem_config) { Tensor index_l = ttnn::index_tril<::bfloat16>( - input_a.get_legacy_shape(), + input_a.get_logical_shape(), + input_a.get_padded_shape(), diag, DataType::BFLOAT16, Layout::TILE, @@ -686,7 +687,8 @@ Tensor _tril(const Tensor& input_a, int32_t diag, const std::optional& output_mem_config) { Tensor index_u = ttnn::index_triu<::bfloat16>( - input_a.get_legacy_shape(), + input_a.get_logical_shape(), + input_a.get_padded_shape(), diag, DataType::BFLOAT16, Layout::TILE, diff --git a/ttnn/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp b/ttnn/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp index ef309401445..856bdac5d77 100644 --- a/ttnn/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp +++ b/ttnn/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp @@ -15,8 +15,8 @@ namespace ttnn::operations::experimental::reduction { Tensor create_mask(const Tensor& input_a, const std::optional& output_mem_config) { - auto padded_shape = input_a.get_legacy_shape(); - auto& unpadded_shape = padded_shape.without_padding(); + auto& padded_shape = input_a.get_padded_shape(); + auto& unpadded_shape = input_a.get_logical_shape(); if (padded_shape == unpadded_shape) { return input_a; } @@ -50,12 +50,22 @@ Tensor ArgmaxOperation::invoke( Tensor max_val = ttnn::max(input_a, (int)dim, true, output_memory_config); Tensor max_tensor = ttnn::zeros_like(input_a); Tensor tindex = ttnn::index_width<::bfloat16>( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config); + input.get_logical_shape(), + input.get_padded_shape(), + DataType::BFLOAT16, + Layout::TILE, + input_a.device(), + output_memory_config); if (is_width) { max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_memory_config); } else { tindex = ttnn::index_height<::bfloat16>( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config); + input.get_logical_shape(), + input.get_padded_shape(), + DataType::BFLOAT16, + Layout::TILE, + input_a.device(), + output_memory_config); max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_memory_config); } tindex = tindex.to(input_a.device()); @@ -94,10 +104,20 @@ Tensor ArgmaxOperation::invoke( Tensor cmp_results = ttnn::eq(input_a, concat_out, std::nullopt, output_memory_config); concat_out.deallocate(); Tensor tindex = ttnn::index_channel<::bfloat16>( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config); + input.get_logical_shape(), + input.get_padded_shape(), + DataType::BFLOAT16, + Layout::TILE, + input_a.device(), + output_memory_config); if (!is_channel) { tindex = ttnn::index_batch<::bfloat16>( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config); + input.get_logical_shape(), + input.get_padded_shape(), + DataType::BFLOAT16, + Layout::TILE, + input_a.device(), + output_memory_config); } tindex = tindex.to(input_a.device()); Tensor max_indices = ttnn::multiply(cmp_results, tindex, std::nullopt, output_memory_config); @@ -120,7 +140,12 @@ Tensor ArgmaxOperation::invoke( // TODO: Fix the index generation code. With the fix the code will work for argmax that return entire // maximum value index Tensor tindex = ttnn::index_all<::bfloat16>( - input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_memory_config); + input.get_logical_shape(), + input.get_padded_shape(), + DataType::BFLOAT16, + Layout::TILE, + input_a.device(), + output_memory_config); Tensor max_val = ttnn::max(input_a, std::nullopt, true, output_memory_config); Tensor max_tensor = ttnn::zeros_like(input_a); max_tensor = ttnn::add(max_tensor, max_val, std::nullopt, output_memory_config); diff --git a/ttnn/cpp/ttnn/operations/functions.hpp b/ttnn/cpp/ttnn/operations/functions.hpp index 9d4493c5446..6739db01132 100644 --- a/ttnn/cpp/ttnn/operations/functions.hpp +++ b/ttnn/cpp/ttnn/operations/functions.hpp @@ -27,7 +27,8 @@ using tt::tt_metal::Tensor; template static Tensor index_trilu( - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, const int32_t diag, DataType data_type, const Layout layout = Layout::ROW_MAJOR, @@ -35,31 +36,37 @@ static Tensor index_trilu( const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { // Current implementation restrictions - auto owned_buffer = tt::tt_metal::owned_buffer::create(tt::tt_metal::compute_volume(shape)); + auto owned_buffer = tt::tt_metal::owned_buffer::create(padded_shape.volume()); auto index = 0; - auto rank = shape.rank(); + auto rank = padded_shape.rank(); auto penultimate = rank - 2; auto ultimate = rank - 1; - auto offset = shape[penultimate] * shape[ultimate]; + auto offset = padded_shape[penultimate] * padded_shape[ultimate]; auto iterations = 1; for (int itr = 0; itr < rank - 2; itr++) { - iterations *= shape[itr]; + iterations *= padded_shape[itr]; } for (uint32_t itr = 0; itr < iterations; itr++) { - for (int32_t y = 0; y < shape[penultimate]; y++) { - for (int32_t x = 0; x < shape[ultimate]; x++) { + for (int32_t y = 0; y < padded_shape[penultimate]; y++) { + for (int32_t x = 0; x < padded_shape[ultimate]; x++) { int32_t value = (IS_UPPER) ? (x >= (y + diag)) : (y >= (x - diag)); if constexpr (std::is_same_v) { - owned_buffer[index + y * shape[ultimate] + x] = T(static_cast(value)); + owned_buffer[index + y * padded_shape[ultimate] + x] = T(static_cast(value)); } else { - owned_buffer[index + y * shape[ultimate] + x] = static_cast(value); + owned_buffer[index + y * padded_shape[ultimate] + x] = static_cast(value); } } // dim X } // dim Y index += offset; } - auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape( + data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -68,34 +75,40 @@ static Tensor index_trilu( template static Tensor index_width( - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - auto owned_buffer = tt::tt_metal::owned_buffer::create(tt::tt_metal::compute_volume(shape)); + auto owned_buffer = tt::tt_metal::owned_buffer::create(padded_shape.volume()); std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits::infinity()); - auto& up_shape = shape.without_padding(); auto index = 0; auto value = 0; - auto rank = up_shape.rank(); + auto rank = logical_shape.rank(); auto penultimate = rank - 2; auto ultimate = rank - 1; - for (uint32_t b = 0; b < up_shape[rank - 4]; b++) { - for (uint32_t c = 0; c < up_shape[rank - 3]; c++) { - for (uint32_t y = 0; y < up_shape[penultimate]; y++) { - for (uint32_t x = 0; x < up_shape[ultimate]; x++) { + for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) { + for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) { + for (uint32_t y = 0; y < logical_shape[penultimate]; y++) { + for (uint32_t x = 0; x < logical_shape[ultimate]; x++) { owned_buffer[index++] = T(static_cast(value)); value = value + 1; } // dim W value = 0; - index = index + (shape[ultimate] - up_shape[ultimate]); + index = index + (padded_shape[ultimate] - logical_shape[ultimate]); } // dim H - index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH); + index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH); } // dim c } // dim N - auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape( + data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -104,34 +117,40 @@ static Tensor index_width( template static Tensor index_height( - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - auto owned_buffer = tt::tt_metal::owned_buffer::create(tt::tt_metal::compute_volume(shape)); + auto owned_buffer = tt::tt_metal::owned_buffer::create(padded_shape.volume()); std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits::infinity()); - auto& up_shape = shape.without_padding(); auto index = 0; auto value = 0; - auto rank = up_shape.rank(); + auto rank = logical_shape.rank(); auto penultimate = rank - 2; auto ultimate = rank - 1; - for (uint32_t b = 0; b < up_shape[rank - 4]; b++) { - for (uint32_t c = 0; c < up_shape[rank - 3]; c++) { - for (uint32_t y = 0; y < up_shape[penultimate]; y++) { - for (uint32_t x = 0; x < up_shape[ultimate]; x++) { + for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) { + for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) { + for (uint32_t y = 0; y < logical_shape[penultimate]; y++) { + for (uint32_t x = 0; x < logical_shape[ultimate]; x++) { owned_buffer[index++] = T(static_cast(value)); } // dim W value = value + 1; - index = index + (shape[ultimate] - up_shape[ultimate]); + index = index + (padded_shape[ultimate] - logical_shape[ultimate]); } // dim H value = 0; - index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH); + index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH); } // dim C } // dim N - auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape( + data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -140,33 +159,39 @@ static Tensor index_height( template static Tensor index_all( - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - auto owned_buffer = tt::tt_metal::owned_buffer::create(tt::tt_metal::compute_volume(shape)); + auto owned_buffer = tt::tt_metal::owned_buffer::create(padded_shape.volume()); std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits::infinity()); - auto& up_shape = shape.without_padding(); auto index = 0; auto value = 0; - auto rank = up_shape.rank(); + auto rank = logical_shape.rank(); auto penultimate = rank - 2; auto ultimate = rank - 1; - for (uint32_t b = 0; b < up_shape[rank - 4]; b++) { - for (uint32_t c = 0; c < up_shape[rank - 3]; c++) { - for (uint32_t y = 0; y < up_shape[penultimate]; y++) { - for (uint32_t x = 0; x < up_shape[ultimate]; x++) { + for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) { + for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) { + for (uint32_t y = 0; y < logical_shape[penultimate]; y++) { + for (uint32_t x = 0; x < logical_shape[ultimate]; x++) { owned_buffer[index++] = T(static_cast(value)); value = value + 1; } // dim W - index = index + (shape[ultimate] - up_shape[ultimate]); + index = index + (padded_shape[ultimate] - logical_shape[ultimate]); } // dim H - index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH); + index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH); } // dim C } // dim N - auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape( + data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -175,14 +200,14 @@ static Tensor index_all( template static Tensor mask_padded_input( - const tt::tt_metal::LegacyShape& padded_shape, - const tt::tt_metal::LegacyShape& unpadded_shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - auto owned_buffer = tt::tt_metal::owned_buffer::create(tt::tt_metal::compute_volume(padded_shape)); + auto owned_buffer = tt::tt_metal::owned_buffer::create(padded_shape.volume()); auto index = 0; auto rank = padded_shape.rank(); @@ -192,8 +217,8 @@ static Tensor mask_padded_input( for (uint32_t c = 0; c < padded_shape[rank - 3]; c++) { for (uint32_t y = 0; y < padded_shape[penultimate]; y++) { for (uint32_t x = 0; x < padded_shape[ultimate]; x++) { - if (b < unpadded_shape[rank - 4] && c < unpadded_shape[rank - 3] && - y < unpadded_shape[penultimate] && x < unpadded_shape[ultimate]) { + if (b < logical_shape[rank - 4] && c < logical_shape[rank - 3] && y < logical_shape[penultimate] && + x < logical_shape[ultimate]) { owned_buffer[index++] = T(static_cast(1.0)); } else { owned_buffer[index++] = T(static_cast(0.0)); @@ -235,8 +260,17 @@ static Tensor fill_first_val_into_tensor( for (uint32_t i = 0; i < physical_volume; i++) { owned_buffer[i] = input_buffer[0]; } - const tt::tt_metal::LegacyShape& s_a = input_tensor.get_legacy_shape(); - auto output = Tensor(OwnedStorage{owned_buffer}, s_a, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + input_tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + data_type, + PageConfig(Layout::ROW_MAJOR), + MemoryConfig{}, + input_tensor.get_logical_shape(), + input_tensor.get_padded_shape()))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -251,7 +285,7 @@ static Tensor prod_result_computation_GS( IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - const tt::tt_metal::LegacyShape& s_a = input_tensor.get_legacy_shape(); + const ttnn::SimpleShape& s_a = input_tensor.get_padded_shape(); auto owned_buffer = tt::tt_metal::owned_buffer::create(input_tensor.volume()); // ouput auto device_buffer = input_tensor.device_buffer(); uint32_t size_in_bytes = device_buffer->size(); @@ -285,7 +319,17 @@ static Tensor prod_result_computation_GS( } owned_buffer[0] = result; // store the result at the first position of the tensor,and the rest of the values as // 0.0f - auto output = Tensor(OwnedStorage{owned_buffer}, s_a, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + input_tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + data_type, + Layout::ROW_MAJOR, + MemoryConfig{}, + input_tensor.get_logical_shape(), + input_tensor.get_padded_shape()))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -300,8 +344,8 @@ static Tensor prod_result_computation_WH_B0( IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - const tt::tt_metal::LegacyShape& s_a = input_tensor.get_legacy_shape(); - auto owned_buffer = tt::tt_metal::owned_buffer::create(tt::tt_metal::compute_volume(s_a)); // ouput + const auto& s_a = input_tensor.get_padded_shape(); + auto owned_buffer = tt::tt_metal::owned_buffer::create(s_a.volume()); // ouput auto device_buffer = input_tensor.device_buffer(); uint32_t size_in_bytes = device_buffer->size(); std::vector data_vec; @@ -338,7 +382,17 @@ static Tensor prod_result_computation_WH_B0( } owned_buffer[0] = result; // store the result at the first position of the tensor,and the rest of the values as // 0.0f - auto output = Tensor(OwnedStorage{owned_buffer}, s_a, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + input_tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + data_type, + PageConfig(Layout::ROW_MAJOR), + MemoryConfig{}, + input_tensor.get_logical_shape(), + input_tensor.get_padded_shape()))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -347,34 +401,40 @@ static Tensor prod_result_computation_WH_B0( template static Tensor index_channel( - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - auto owned_buffer = tt::tt_metal::owned_buffer::create(tt::tt_metal::compute_volume(shape)); + auto owned_buffer = tt::tt_metal::owned_buffer::create(padded_shape.volume()); std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits::infinity()); - auto& up_shape = shape.without_padding(); auto index = 0; auto value = 0; - auto rank = up_shape.rank(); + auto rank = logical_shape.rank(); auto penultimate = rank - 2; auto ultimate = rank - 1; - for (uint32_t b = 0; b < up_shape[rank - 4]; b++) { - for (uint32_t c = 0; c < up_shape[rank - 3]; c++) { - for (uint32_t y = 0; y < up_shape[penultimate]; y++) { - for (uint32_t x = 0; x < up_shape[ultimate]; x++) { + for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) { + for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) { + for (uint32_t y = 0; y < logical_shape[penultimate]; y++) { + for (uint32_t x = 0; x < logical_shape[ultimate]; x++) { owned_buffer[index++] = T(static_cast(value)); } // dim W - index = index + (shape[ultimate] - up_shape[ultimate]); + index = index + (padded_shape[ultimate] - logical_shape[ultimate]); } // dim H value = value + 1; - index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH); + index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH); } // dim C value = 0; } // dim N - auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape( + data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -383,33 +443,39 @@ static Tensor index_channel( template static Tensor index_batch( - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - auto owned_buffer = tt::tt_metal::owned_buffer::create(tt::tt_metal::compute_volume(shape)); + auto owned_buffer = tt::tt_metal::owned_buffer::create(padded_shape.volume()); std::fill(owned_buffer.begin(), owned_buffer.end(), -std::numeric_limits::infinity()); - auto& up_shape = shape.without_padding(); auto index = 0; auto value = 0; - auto rank = up_shape.rank(); + auto rank = logical_shape.rank(); auto penultimate = rank - 2; auto ultimate = rank - 1; - for (uint32_t b = 0; b < up_shape[rank - 4]; b++) { - for (uint32_t c = 0; c < up_shape[rank - 3]; c++) { - for (uint32_t y = 0; y < up_shape[penultimate]; y++) { - for (uint32_t x = 0; x < up_shape[ultimate]; x++) { + for (uint32_t b = 0; b < logical_shape[rank - 4]; b++) { + for (uint32_t c = 0; c < logical_shape[rank - 3]; c++) { + for (uint32_t y = 0; y < logical_shape[penultimate]; y++) { + for (uint32_t x = 0; x < logical_shape[ultimate]; x++) { owned_buffer[index++] = T(static_cast(value)); } // dim W - index = index + (shape[ultimate] - up_shape[ultimate]); + index = index + (padded_shape[ultimate] - logical_shape[ultimate]); } // dim H - index = index + ((shape[penultimate] - up_shape[penultimate]) * tt::constants::TILE_WIDTH); + index = index + ((padded_shape[penultimate] - logical_shape[penultimate]) * tt::constants::TILE_WIDTH); } // dim C value = value + 1; } // dim N - auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape( + data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -419,7 +485,8 @@ static Tensor index_batch( template static Tensor manual_insertion( const Tensor& input_tensor, - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, @@ -427,7 +494,7 @@ static Tensor manual_insertion( .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { TT_ASSERT(input_tensor.get_layout() == Layout::ROW_MAJOR); TT_ASSERT( - shape[0] * shape[1] * shape[2] * shape[3] == input_tensor.volume(), + padded_shape[0] * padded_shape[1] * padded_shape[2] * padded_shape[3] == input_tensor.volume(), "Required shape volume must match old shape volume"); auto device_buffer = input_tensor.device_buffer(); uint32_t size_in_bytes = device_buffer->size(); @@ -441,7 +508,13 @@ static Tensor manual_insertion( tt::tt_metal::tensor_impl::read_data_from_device_buffer(device_buffer, data_vec); } auto owned_buffer = owned_buffer::create(std::move(data_vec)); - auto output = Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR).to(layout); + auto output = Tensor( + OwnedStorage{owned_buffer}, + TensorSpec( + logical_shape, + TensorLayout::fromPaddedShape( + data_type, PageConfig(Layout::ROW_MAJOR), MemoryConfig{}, logical_shape, padded_shape))) + .to(layout); if (device != nullptr) { output = output.to(device, output_mem_config); } @@ -450,26 +523,28 @@ static Tensor manual_insertion( template static Tensor index_tril( - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, const int32_t diag, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - return index_trilu(shape, diag, data_type, layout, device, output_mem_config); + return index_trilu(logical_shape, padded_shape, diag, data_type, layout, device, output_mem_config); } template static Tensor index_triu( - const tt::tt_metal::LegacyShape& shape, + const ttnn::SimpleShape& logical_shape, + const ttnn::SimpleShape& padded_shape, const int32_t diag, DataType data_type, const Layout layout = Layout::ROW_MAJOR, IDevice* device = nullptr, const MemoryConfig& output_mem_config = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - return index_trilu(shape, diag, data_type, layout, device, output_mem_config); + return index_trilu(logical_shape, padded_shape, diag, data_type, layout, device, output_mem_config); } namespace random { diff --git a/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_program_factory_multicore.cpp b/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_program_factory_multicore.cpp index dc61eca5bfc..4442921f603 100644 --- a/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_program_factory_multicore.cpp +++ b/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_program_factory_multicore.cpp @@ -118,7 +118,7 @@ static Tensor create_config_tensor( */ const uint32_t config_buffer_entry_size = 2; uint32_t elems_per_core = config_buffer_entry_size * scale_factor_h * input_nsticks_per_core; - Shape config_shape({config_vector.size() / elems_per_core, elems_per_core}); + ttnn::SimpleShape config_shape({config_vector.size() / elems_per_core, elems_per_core}); auto config_buffer = owned_buffer::create(std::move(config_vector)); return Tensor(OwnedStorage{config_buffer}, config_shape, DataType::UINT16, Layout::ROW_MAJOR); } diff --git a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp index c7af8f4cf44..06e830b779a 100644 --- a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp +++ b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp @@ -628,7 +628,7 @@ Tensor construct_on_host_config_tensor( // we need the last dim of tensors to be multiple of 2, pad if needed uint32_t extend_with_zeroes = config[0].size() % 2; extend_with_zeroes = extend_with_zeroes > 0 ? 2 - extend_with_zeroes : 0; - Shape config_shape = Shape({(uint32_t)config.size(), (uint32_t)config[0].size() + extend_with_zeroes}); + ttnn::SimpleShape config_shape({(uint32_t)config.size(), (uint32_t)config[0].size() + extend_with_zeroes}); std::vector config_vector = flatten(config, extend_with_zeroes); if (p_config.shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED) { auto config_buffer = owned_buffer::create(std::move(config_vector)); @@ -641,7 +641,7 @@ Tensor construct_on_host_config_tensor( repeat_config.insert(repeat_config.end(), config_vector.begin(), config_vector.end()); } auto config_buffer = owned_buffer::create(std::move(repeat_config)); - config_shape = Shape({config_shape[0] * repeat_factor, config_shape[1]}); + config_shape = ttnn::SimpleShape({config_shape[0] * repeat_factor, config_shape[1]}); return Tensor(OwnedStorage{config_buffer}, config_shape, DataType::UINT16, Layout::ROW_MAJOR); } else if (p_config.shard_scheme == TensorMemoryLayout::BLOCK_SHARDED) { TT_ASSERT(p_config.grid.ranges().size() == 1, "BLOCK_SHARDED should have just a single core range"); @@ -671,7 +671,7 @@ Tensor construct_on_host_config_tensor( repeat_config.insert(repeat_config.end(), config_vector.begin(), config_vector.end()); } auto config_buffer = owned_buffer::create(std::move(repeat_config)); - config_shape = Shape({config_shape[0] * repeat_factor, config_shape[1]}); + config_shape = ttnn::SimpleShape({config_shape[0] * repeat_factor, config_shape[1]}); return Tensor(OwnedStorage{config_buffer}, config_shape, DataType::UINT16, Layout::ROW_MAJOR); } else { TT_ASSERT(false, "Unsupported shard scheme"); diff --git a/ttnn/cpp/ttnn/tensor/serialization.cpp b/ttnn/cpp/ttnn/tensor/serialization.cpp index 3ce42d8dec7..46db89831e7 100644 --- a/ttnn/cpp/ttnn/tensor/serialization.cpp +++ b/ttnn/cpp/ttnn/tensor/serialization.cpp @@ -297,7 +297,12 @@ Tensor load_tensor_helper(const std::string& file_name, T device) { auto storage = load_storage(input_stream, data_type, layout, storage_type, device); - auto tensor = Tensor(std::move(storage), shape, data_type, layout); + auto tensor = Tensor( + std::move(storage), + TensorSpec( + shape.logical_shape(), + TensorLayout::fromPaddedShape( + data_type, layout, MemoryConfig{}, shape.logical_shape(), shape.padded_shape()))); if (device != nullptr) { tensor = tensor.to(device, memory_config); } else if (has_memory_config) { @@ -316,7 +321,12 @@ Tensor load_tensor_helper(const std::string& file_name, T device) { input_stream.read(reinterpret_cast(&layout), sizeof(Layout)); auto storage = load_owned_storage(input_stream, data_type); - auto tensor = Tensor(std::move(storage), shape, data_type, layout); + auto tensor = Tensor( + std::move(storage), + TensorSpec( + shape.logical_shape(), + TensorLayout::fromPaddedShape( + data_type, layout, MemoryConfig{}, shape.logical_shape(), shape.padded_shape()))); if (device != nullptr) { tensor = tensor.to(device); } diff --git a/ttnn/cpp/ttnn/tensor/tensor.cpp b/ttnn/cpp/ttnn/tensor/tensor.cpp index a326f561bbe..f2bbf008ac8 100644 --- a/ttnn/cpp/ttnn/tensor/tensor.cpp +++ b/ttnn/cpp/ttnn/tensor/tensor.cpp @@ -180,10 +180,6 @@ Tensor::Tensor( dtype, PageConfig(layout, tile), memory_config, logical_shape, padded_shape))); } -Tensor::Tensor( - Storage storage, const ttnn::Shape& shape, DataType dtype, Layout layout, const std::optional& tile) : - Tensor(std::move(storage), shape.logical_shape(), shape.padded_shape(), dtype, layout, tile) {} - Tensor::Tensor(Storage storage, TensorSpec tensor_spec) { init(std::move(storage), std::move(tensor_spec)); } void Tensor::init(Storage storage, TensorSpec tensor_spec) { @@ -338,7 +334,7 @@ Tensor::~Tensor() { Tensor::Tensor( Storage storage, const ttnn::SimpleShape& shape, DataType dtype, Layout layout, const std::optional& tile) : - Tensor(std::move(storage), ttnn::Shape(shape.view()), dtype, layout, tile) {} + Tensor(std::move(storage), /* logical_shape */ shape, /* padded_shape */ shape, dtype, layout, tile) {} void Tensor::deallocate(bool force) { ZoneScopedN("TensorDeallocate"); diff --git a/ttnn/cpp/ttnn/tensor/tensor.hpp b/ttnn/cpp/ttnn/tensor/tensor.hpp index 9e770aa3613..a032f331157 100644 --- a/ttnn/cpp/ttnn/tensor/tensor.hpp +++ b/ttnn/cpp/ttnn/tensor/tensor.hpp @@ -88,12 +88,6 @@ struct Tensor { // ====================================================================================== explicit Tensor() = default; - Tensor( - Storage storage, - const ttnn::Shape& shape, - DataType dtype, - Layout layout, - const std::optional& tile = std::nullopt); Tensor( Storage storage, const ttnn::SimpleShape& shape, diff --git a/ttnn/cpp/ttnn/tensor/tensor_impl.cpp b/ttnn/cpp/ttnn/tensor/tensor_impl.cpp index 3d8b2d83ddc..4553c55501c 100644 --- a/ttnn/cpp/ttnn/tensor/tensor_impl.cpp +++ b/ttnn/cpp/ttnn/tensor/tensor_impl.cpp @@ -187,10 +187,17 @@ Tensor pad_bfloat8_b( auto input_float_data = unpack_bfp8_tiles_into_float_vec(input_packed_data, /*row_major_output=*/false, /*is_exp_a=*/false, tile); auto input_float_buffer = owned_buffer::create(std::move(input_float_data)); - auto float_tensor = - Tensor( - OwnedStorage{input_float_buffer}, tensor.get_legacy_shape(), DataType::FLOAT32, tensor.get_layout(), tile) - .pad(output_padded_shape, input_tensor_start, pad_value); + auto float_tensor = Tensor( + OwnedStorage{input_float_buffer}, + TensorSpec( + tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + DataType::FLOAT32, + PageConfig(tensor.get_layout(), tile), + MemoryConfig{}, + tensor.get_logical_shape(), + tensor.get_padded_shape()))) + .pad(output_padded_shape, input_tensor_start, pad_value); // Convert back to BFLOAT8_B auto output_float_data = owned_buffer::get_as(float_tensor).get(); @@ -218,10 +225,17 @@ Tensor unpad_bfloat8_b( auto input_float_data = unpack_bfp8_tiles_into_float_vec(input_packed_data, /*row_major_output=*/false, /*is_exp_a=*/false, tile); auto input_float_buffer = owned_buffer::create(std::move(input_float_data)); - auto float_tensor = - Tensor( - OwnedStorage{input_float_buffer}, tensor.get_legacy_shape(), DataType::FLOAT32, tensor.get_layout(), tile) - .unpad(output_tensor_start, output_tensor_end); + auto float_tensor = Tensor( + OwnedStorage{input_float_buffer}, + TensorSpec( + tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + DataType::FLOAT32, + PageConfig(tensor.get_layout(), tile), + MemoryConfig{}, + tensor.get_logical_shape(), + tensor.get_padded_shape()))) + .unpad(output_tensor_start, output_tensor_end); // Convert back to BFLOAT8_B auto output_float_data = owned_buffer::get_as(float_tensor).get(); @@ -230,10 +244,14 @@ Tensor unpad_bfloat8_b( auto output_uint32_buffer = owned_buffer::create(std::move(output_packed_data)); return Tensor( std::move(OwnedStorage{std::move(output_uint32_buffer)}), - float_tensor.get_legacy_shape(), - DataType::BFLOAT8_B, - tensor.get_layout(), - tile); + TensorSpec( + float_tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + DataType::BFLOAT8_B, + PageConfig(tensor.get_layout(), tile), + MemoryConfig{}, + float_tensor.get_logical_shape(), + float_tensor.get_padded_shape()))); } Tensor pad_bfloat4_b( @@ -249,10 +267,17 @@ Tensor pad_bfloat4_b( auto input_float_data = unpack_bfp4_tiles_into_float_vec(input_packed_data, /*row_major_output=*/false, /*is_exp_a=*/false, tile); auto input_float_buffer = owned_buffer::create(std::move(input_float_data)); - auto float_tensor = - Tensor( - OwnedStorage{input_float_buffer}, tensor.get_legacy_shape(), DataType::FLOAT32, tensor.get_layout(), tile) - .pad(output_padded_shape, input_tensor_start, pad_value); + auto float_tensor = Tensor( + OwnedStorage{input_float_buffer}, + TensorSpec( + tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + DataType::FLOAT32, + PageConfig(tensor.get_layout(), tile), + MemoryConfig{}, + tensor.get_logical_shape(), + tensor.get_logical_shape()))) + .pad(output_padded_shape, input_tensor_start, pad_value); // Convert back to BFLOAT4_B auto output_float_data = owned_buffer::get_as(float_tensor).get(); @@ -280,10 +305,17 @@ Tensor unpad_bfloat4_b( auto input_float_data = unpack_bfp4_tiles_into_float_vec(input_packed_data, /*row_major_output=*/false, /*is_exp_a=*/false, tile); auto input_float_buffer = owned_buffer::create(std::move(input_float_data)); - auto float_tensor = - Tensor( - OwnedStorage{input_float_buffer}, tensor.get_legacy_shape(), DataType::FLOAT32, tensor.get_layout(), tile) - .unpad(output_tensor_start, output_tensor_end); + auto float_tensor = Tensor( + OwnedStorage{input_float_buffer}, + TensorSpec( + tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + DataType::FLOAT32, + PageConfig(tensor.get_layout(), tile), + MemoryConfig{}, + tensor.get_logical_shape(), + tensor.get_padded_shape()))) + .unpad(output_tensor_start, output_tensor_end); // Convert back to BFLOAT4_B auto output_float_data = owned_buffer::get_as(float_tensor).get(); @@ -292,10 +324,14 @@ Tensor unpad_bfloat4_b( auto output_uint32_buffer = owned_buffer::create(std::move(output_packed_data)); return Tensor( std::move(OwnedStorage{std::move(output_uint32_buffer)}), - float_tensor.get_legacy_shape(), - DataType::BFLOAT4_B, - tensor.get_layout(), - tile); + TensorSpec( + float_tensor.get_logical_shape(), + TensorLayout::fromPaddedShape( + DataType::BFLOAT4_B, + PageConfig(tensor.get_layout(), tile), + MemoryConfig{}, + float_tensor.get_logical_shape(), + float_tensor.get_padded_shape()))); } // ====================================================================================== @@ -1254,7 +1290,7 @@ Tensor unpad( tensor.get_storage()); return Tensor( OwnedStorage{output_buffer}, - output_shape, + ttnn::SimpleShape(output_shape), tensor.get_dtype(), tensor.get_layout(), tensor.get_tensor_spec().tile()); @@ -1293,8 +1329,7 @@ template Tensor extract_shard(const Tensor& tensor, const uint32_t& core_id) { auto buffer = tensor.buffer(); auto buffer_shard_shape = buffer->shard_spec().shape(); - std::array shard_shape_array = {1, 1, buffer_shard_shape[0], buffer_shard_shape[1]}; - tt::tt_metal::LegacyShape shard_shape(shard_shape_array); + ttnn::SimpleShape shard_shape({1, 1, buffer_shard_shape[0], buffer_shard_shape[1]}); std::vector device_data; ::detail::ReadShard(*buffer, device_data, core_id);