Skip to content

Commit

Permalink
Refactor toMemoryConfigOp (#253)
Browse files Browse the repository at this point in the history
* #92: Refactor toMemoryConfigOp

* hard code tilize untilize code path until tile_shape properly implemented
  • Loading branch information
jnie-TT authored Aug 2, 2024
1 parent e791b09 commit dcb4b51
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 57 deletions.
1 change: 1 addition & 0 deletions runtime/include/tt/runtime/detail/ttnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#define FMT_HEADER_ONLY
#include "ttnn/device.hpp"
#include "ttnn/operations/binary.hpp"
#include "ttnn/operations/copy.hpp"
#include "ttnn/operations/core.hpp"
#include "ttnn/operations/creation.hpp"
#include "ttnn/operations/matmul.hpp"
Expand Down
6 changes: 3 additions & 3 deletions runtime/lib/binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ std::vector<TensorDesc> getProgramInputs(Flatbuffer binary,
input->desc()->shape()->end()};
desc.stride = {input->desc()->layout()->stride()->begin(),
input->desc()->layout()->stride()->end()};
desc.itemsize = utils::dataTypeElementSize(
desc.itemsize = ::tt::runtime::utils::dataTypeElementSize(
input->desc()->layout()->memory_desc()->data_type());
desc.dataType = input->desc()->layout()->memory_desc()->data_type();
inputs.push_back(desc);
Expand All @@ -94,7 +94,7 @@ std::vector<TensorDesc> getProgramOutputs(Flatbuffer binary,
output->desc()->shape()->end()};
desc.stride = {output->desc()->layout()->stride()->begin(),
output->desc()->layout()->stride()->end()};
desc.itemsize = utils::dataTypeElementSize(
desc.itemsize = ::tt::runtime::utils::dataTypeElementSize(
output->desc()->layout()->memory_desc()->data_type());
desc.dataType = output->desc()->layout()->memory_desc()->data_type();
outputs.push_back(desc);
Expand Down Expand Up @@ -210,7 +210,7 @@ Flatbuffer Flatbuffer::loadFromPath(char const *path) {

std::streampos size = fbb.tellg();
fbb.seekg(0, std::ios::beg);
auto buffer = utils::malloc_shared(size);
auto buffer = ::tt::runtime::utils::malloc_shared(size);
fbb.read(static_cast<char *>(buffer.get()), size);
return Flatbuffer(buffer);
}
Expand Down
1 change: 1 addition & 0 deletions runtime/lib/ttnn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ add_library(TTRuntimeTTNN
runtime.cpp
program.cpp
)
target_compile_options(TTRuntimeTTNN PRIVATE -mavx -mavx2)
target_include_directories(TTRuntimeTTNN PUBLIC
${PROJECT_SOURCE_DIR}/runtime/include
${PROJECT_BINARY_DIR}/include/ttmlir/Target/Common
Expand Down
175 changes: 142 additions & 33 deletions runtime/lib/ttnn/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "tt/runtime/detail/ttnn.h"
#include "tt/runtime/runtime.h"
#include "utils.h"

#include "ttmlir/Target/TTNN/Target.h"
#include "ttmlir/Version.h"
Expand All @@ -24,50 +25,158 @@
// some reason a static_assert fails when this is called from within our
// namespace.
ttnn::Tensor tilize(ttnn::Tensor const &input) {
ttnn::Tensor unsqueezeTensor = ttnn::unsqueeze_to_4D(input);
return ttnn::to_layout(unsqueezeTensor, ttnn::TILE_LAYOUT, std::nullopt,
return ttnn::to_layout(input, ::ttnn::TILE_LAYOUT, std::nullopt, std::nullopt,
(Device *)nullptr);
}

ttnn::Tensor untilize(ttnn::Tensor const &input) {
return ttnn::to_layout(input, ::ttnn::ROW_MAJOR_LAYOUT, std::nullopt,
std::nullopt, (Device *)nullptr);
}

namespace tt::runtime::ttnn {

static ::ttnn::Tensor convertDataType(const ::ttnn::Tensor &input,
const ::ttnn::DataType &targetDataType) {
const ::ttnn::StorageType storageType = input.storage_type();
if (storageType == ::tt::tt_metal::StorageType::BORROWED) {
return ::ttnn::to_dtype(input, targetDataType);
} else if (storageType == ::tt::tt_metal::StorageType::DEVICE) {
if (input.get_layout() != ::ttnn::TILE_LAYOUT) {
// typecast op requires tilized tensor
::ttnn::Tensor converted =
::ttnn::typecast(::tilize(input), targetDataType);
// untilize and return
return ::untilize(converted);
}
return ::ttnn::typecast(input, targetDataType);
} else {
throw runtime_error("Unsupported storage type");
}
}

/* TODO: Blocked by issue #272, ideal flow is to determine tilize/untilize with
* tile_shape */
static ::ttnn::Tensor
updateLayoutAndDataType(const ::ttnn::Tensor &inputTensor,
const ::ttnn::DataType targetDataType,
const bool shouldTilize, const bool shouldUntilize) {
::ttnn::Tensor outputTensor = inputTensor;
const bool shouldConvertDataType = inputTensor.get_dtype() != targetDataType;
// const int targetTileX = targetTileShape->x();
// const int targetTileY = targetTileShape->y();
// const bool shouldTilize =
// targetTileX == 32 and targetTileY == 32 and
// inputTensor.get_layout() == ::ttnn::ROW_MAJOR_LAYOUT;
// const bool shouldUntilize = (targetTileX != 32 or targetTileY != 32) and
// inputTensor.get_layout() ==
// ::ttnn::TILE_LAYOUT;
if (shouldTilize) {
outputTensor = ::tilize(outputTensor);
} else if (shouldUntilize) {
outputTensor = ::untilize(outputTensor);
}
if (shouldConvertDataType) {
outputTensor = convertDataType(outputTensor, targetDataType);
}
return outputTensor;
}

// TODO: right now hardcoding tilize/untilize, should determine with tile shape
// blocked by issue #272
static void
run(::tt::target::ttnn::ToMemoryConfigOp const *op, ::ttnn::Device &device,
std::unordered_map<std::uint32_t, ::ttnn::Tensor *> &liveTensors,
std::list<::ttnn::Tensor> &tensorPool) {
if (op->out()->desc()->layout()->memory_desc()->memory_space() ==
::tt::target::MemorySpace::System) {
auto &inputTensor = *liveTensors.at(op->in0()->global_id());
auto cpu = inputTensor.cpu();
::ttnn::Tensor untilized;
if (op->out()->desc()->layout()->memory_desc()->data_type() ==
::tt::target::DataType::Float32) {
untilized = ::tt::tt_metal::tensor_impl::to_layout<float>(
cpu, ::ttnn::ROW_MAJOR_LAYOUT);
} else if (op->out()->desc()->layout()->memory_desc()->data_type() ==
::tt::target::DataType::BFloat16) {
untilized = ::tt::tt_metal::tensor_impl::to_layout<bfloat16>(
cpu, ::ttnn::ROW_MAJOR_LAYOUT);
} else {
throw std::runtime_error("Unsupported data type");
const ::ttnn::Tensor &inputTensor = *liveTensors.at(op->in0()->global_id());
assert(inputTensor.storage_type() == ::tt::tt_metal::StorageType::BORROWED or
inputTensor.storage_type() == ::tt::tt_metal::StorageType::DEVICE);

const ::tt::target::Dim2d *targetTileShape =
op->out()->desc()->layout()->memory_desc()->tile_shape();
TT_FATAL(utils::isValidTileShape(targetTileShape),
"Invalid tile shape ({}, {})", targetTileShape->x(),
targetTileShape->y());

::tt::target::DataType targetDataType =
op->out()->desc()->layout()->memory_desc()->data_type();
::ttnn::DataType targetDataTypeTTNN = utils::toTTNNDataType(targetDataType);

const ::tt::target::MemorySpace targetMemorySpace =
op->out()->desc()->layout()->memory_desc()->memory_space();

switch (targetMemorySpace) {
case ::tt::target::MemorySpace::System:
case ::tt::target::MemorySpace::SystemMMIO: {
::ttnn::Tensor result;
if (inputTensor.storage_type() == ::tt::tt_metal::StorageType::BORROWED) {
result =
updateLayoutAndDataType(inputTensor, targetDataTypeTTNN, false, true);
} else if (inputTensor.storage_type() ==
::tt::tt_metal::StorageType::DEVICE) {
result = updateLayoutAndDataType(inputTensor.cpu(), targetDataTypeTTNN,
false, true);
}
auto &outputTensor = *liveTensors.at(op->out()->global_id());
void *src = ::tt::tt_metal::get_raw_host_data_ptr(untilized);
::ttnn::Tensor &outputTensor = *liveTensors.at(op->out()->global_id());
void *src = ::tt::tt_metal::get_raw_host_data_ptr(result);
void *dst = ::tt::tt_metal::get_raw_host_data_ptr(outputTensor);
std::uint32_t size = untilized.volume() * untilized.element_size();
std::uint32_t size = result.volume() * result.element_size();
std::memcpy(dst, src, size);
return;
}
bool isL1 = op->in0()->desc()->layout()->memory_desc()->memory_space() ==
::tt::target::MemorySpace::DeviceL1;
const auto memoryConfig =
isL1 ? ::ttnn::L1_MEMORY_CONFIG : ::ttnn::DRAM_MEMORY_CONFIG;
auto &inputTensor = *liveTensors.at(op->in0()->global_id());
::ttnn::Tensor tilized = ::tilize(inputTensor);
auto deviceTensor = ::ttnn::to_device(tilized, &device, memoryConfig);
tensorPool.push_back(deviceTensor);
// auto [iter, inserted] =
liveTensors.try_emplace(op->out()->global_id(), &tensorPool.back());
// assert(inserted && "Duplicate output tensor");
break;
}
case ::tt::target::MemorySpace::DeviceDRAM: {
::tt::tt_metal::MemoryConfig memConfig = ::ttnn::DRAM_MEMORY_CONFIG;
if (inputTensor.storage_type() == ::tt::tt_metal::StorageType::BORROWED) {
::ttnn::Tensor result = inputTensor;
bool shouldTilize = true;
// device tilize requires BFLOAT16, if not then tilize on host
if (result.get_dtype() != ::ttnn::DataType::BFLOAT16) {
result = ::tilize(result);
shouldTilize = false;
}
result = ::ttnn::to_device(result, &device, memConfig);
result = updateLayoutAndDataType(result, targetDataTypeTTNN, shouldTilize,
false);
tensorPool.push_back(result);
liveTensors.try_emplace(op->out()->global_id(), &tensorPool.back());
} else if (inputTensor.storage_type() ==
::tt::tt_metal::StorageType::DEVICE) {
::ttnn::Tensor result = updateLayoutAndDataType(
inputTensor, targetDataTypeTTNN, false, false);
result = ::ttnn::to_memory_config(result, memConfig, std::nullopt);
tensorPool.push_back(result);
liveTensors.try_emplace(op->out()->global_id(), &tensorPool.back());
}
break;
}
// Currently similar to ::tt::target::MemorySpace::DeviceDRAM
// But will need it's own code path when we add support for sharding
case ::tt::target::MemorySpace::DeviceL1: {
::tt::tt_metal::MemoryConfig memConfig = ::ttnn::L1_MEMORY_CONFIG;
if (inputTensor.storage_type() == ::tt::tt_metal::StorageType::BORROWED) {
::ttnn::Tensor result = inputTensor;
bool shouldTilize = true;
// device tilize requires BFLOAT16, if not then tilize on host
if (result.get_dtype() != ::ttnn::DataType::BFLOAT16) {
result = ::tilize(result);
shouldTilize = false;
}
result = ::ttnn::to_device(result, &device, memConfig);
result = updateLayoutAndDataType(result, targetDataTypeTTNN, shouldTilize,
false);
tensorPool.push_back(result);
liveTensors.try_emplace(op->out()->global_id(), &tensorPool.back());
} else if (inputTensor.storage_type() ==
::tt::tt_metal::StorageType::DEVICE) {
::ttnn::Tensor result = updateLayoutAndDataType(
inputTensor, targetDataTypeTTNN, false, false);
result = ::ttnn::to_memory_config(result, memConfig, std::nullopt);
tensorPool.push_back(result);
liveTensors.try_emplace(op->out()->global_id(), &tensorPool.back());
}
break;
}
}
}

static void
Expand Down
24 changes: 3 additions & 21 deletions runtime/lib/ttnn/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "tt/runtime/runtime.h"
#include "tt/runtime/detail/ttnn.h"
#include "tt/runtime/utils.h"
#include "utils.h"

#include "ttmlir/Target/TTNN/Target.h"
#include "ttmlir/Version.h"
Expand Down Expand Up @@ -71,7 +72,7 @@ std::pair<SystemDesc, DeviceIds> getCurrentSystemDesc() {
}
uint8_t *buf = fbb.GetBufferPointer();
auto size = fbb.GetSize();
auto handle = utils::malloc_shared(size);
auto handle = ::tt::runtime::utils::malloc_shared(size);
std::memcpy(handle.get(), buf, size);
::ttnn::close_device(device);
return std::make_pair(SystemDesc(handle), chipIds);
Expand Down Expand Up @@ -104,33 +105,14 @@ static BorrowedStorage createStorage(void *ptr, std::uint32_t numElements,
}
}

static ::ttnn::DataType toTTNNDataType(::tt::target::DataType dataType) {
switch (dataType) {
case ::tt::target::DataType::Float32:
return ::ttnn::DataType::FLOAT32;
// case ::tt::target::DataType::Float16:
// return ::ttnn::DataType::FLOAT16;
case ::tt::target::DataType::BFloat16:
return ::ttnn::DataType::BFLOAT16;
case ::tt::target::DataType::UInt32:
return ::ttnn::DataType::UINT32;
case ::tt::target::DataType::UInt16:
return ::ttnn::DataType::UINT16;
// case ::tt::target::DataType::UInt8:
// return ::ttnn::DataType::UINT8;
default:
throw std::runtime_error("Unsupported data type");
}
}

Tensor createTensor(std::shared_ptr<void> data,
std::vector<std::uint32_t> const &shape,
std::vector<std::uint32_t> const &stride,
std::uint32_t itemsize, ::tt::target::DataType dataType) {
std::uint32_t numElements = shape[0] * stride[0];
auto tensor = std::make_shared<::ttnn::Tensor>(
createStorage(data.get(), numElements, dataType), shape,
toTTNNDataType(dataType), ::ttnn::Layout::ROW_MAJOR);
utils::toTTNNDataType(dataType), ::ttnn::Layout::ROW_MAJOR);
return Tensor(tensor, data);
}

Expand Down
41 changes: 41 additions & 0 deletions runtime/lib/ttnn/utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#ifndef TTNN_RUNTIME_UTILS_H
#define TTNN_RUNTIME_UTILS_H

#include "ttmlir/Target/TTNN/Target.h"
#include "ttnn/types.hpp"

namespace tt::runtime::ttnn::utils {

inline bool isValidTileShape(const ::tt::target::Dim2d *shape) {
return (shape->x() == 0 and shape->y() == 0) or
(shape->x() == 1 and shape->y() == 1) or
(shape->x() == 32 and shape->y() == 32);
}

inline ::ttnn::DataType toTTNNDataType(::tt::target::DataType dataType) {
switch (dataType) {
case ::tt::target::DataType::Float32:
return ::ttnn::DataType::FLOAT32;
case ::tt::target::DataType::BFloat16:
return ::ttnn::DataType::BFLOAT16;
case ::tt::target::DataType::BFP_BFloat8:
return ::ttnn::DataType::BFLOAT8_B;
case ::tt::target::DataType::BFP_BFloat4:
return ::ttnn::DataType::BFLOAT4_B;
case ::tt::target::DataType::UInt32:
return ::ttnn::DataType::UINT32;
case ::tt::target::DataType::UInt16:
return ::ttnn::DataType::UINT16;

default:
throw std::runtime_error("Unsupported data type");
}
}

} // namespace tt::runtime::ttnn::utils

#endif

0 comments on commit dcb4b51

Please sign in to comment.