Skip to content

Commit

Permalink
Update output tensors in from device op (#776)
Browse files Browse the repository at this point in the history
* Update output tensors in from device op, fix ci

* Add non-zero to silicon test matrix
  • Loading branch information
jnie-TT authored Sep 20, 2024
1 parent f97ec2e commit 04a7441
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
matrix:
build: [
{runs-on: ubuntu-latest, enable_perf: OFF, name: "run", ttrt_flags: "--non-zero"},
{runs-on: ubuntu-latest, enable_perf: OFF, name: "run", ttrt_flags: ""},
{runs-on: ubuntu-latest, enable_perf: ON, name: "perf", ttrt_flags: ""},
]

Expand Down Expand Up @@ -147,10 +147,10 @@ jobs:
fail-fast: false
matrix:
build: [
{runs-on: n150, enable_perf: OFF, name: "run"},
{runs-on: n150, enable_perf: ON, name: "perf"},
{runs-on: n300, enable_perf: OFF, name: "run"},
{runs-on: n300, enable_perf: ON, name: "perf"},
{runs-on: n150, enable_perf: OFF, name: "run", ttrt_flags: "--non-zero"},
{runs-on: n150, enable_perf: ON, name: "perf", ttrt_flags: "--non-zero"},
{runs-on: n300, enable_perf: OFF, name: "run", ttrt_flags: "--non-zero"},
{runs-on: n300, enable_perf: ON, name: "perf", ttrt_flags: "--non-zero"},
]

runs-on:
Expand Down
11 changes: 9 additions & 2 deletions runtime/lib/ttnn/operations/layout/from_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ void run(const ::tt::target::ttnn::FromDeviceOp *op, ProgramContext &context) {

::ttnn::Tensor out = ::ttnn::from_device(inputTensor);

tensorPool.try_emplace(op->out()->global_id(), out);
if (tensorPool.contains(op->out()->global_id())) {
::ttnn::Tensor &outputTensor = tensorPool.at(op->out()->global_id());
void *src = ::tt::tt_metal::get_raw_host_data_ptr(out);
void *dst = ::tt::tt_metal::get_raw_host_data_ptr(outputTensor);
std::uint32_t size = out.volume() * out.element_size();
std::memcpy(dst, src, size);
} else {
tensorPool.insert_or_assign(op->out()->global_id(), out);
}
}

} // namespace tt::runtime::ttnn::operations::layout

0 comments on commit 04a7441

Please sign in to comment.