Skip to content

Commit

Permalink
Improve BCS waiting logic to satisfy AUB/TBX mode
Browse files Browse the repository at this point in the history
Change-Id: I52b44959b8bdc1cc66f136a4785233b95870fd0b
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
  • Loading branch information
BartoszDunajski authored and Compute-Runtime-Automation committed Oct 22, 2019
1 parent c6e13fd commit a046de5
Show file tree
Hide file tree
Showing 12 changed files with 80 additions and 30 deletions.
2 changes: 1 addition & 1 deletion runtime/command_queue/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushS
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanAllocationList(taskCountToWait, TEMPORARY_ALLOCATION);

if (auto bcsCsr = getBcsCommandStreamReceiver()) {
auto bcsTaskCount = *bcsCsr->getTagAddress();
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCount, 0, false, false);
bcsCsr->waitForTaskCountAndCleanAllocationList(bcsTaskCount, TEMPORARY_ALLOCATION);
}

Expand Down
3 changes: 3 additions & 0 deletions runtime/command_queue/command_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {

bool isMultiEngineQueue() const { return this->multiEngineQueue; }

void updateBcsTaskCount(uint32_t newBcsTaskCount) { this->bcsTaskCount = newBcsTaskCount; }

// taskCount of last task
uint32_t taskCount = 0;

Expand Down Expand Up @@ -455,6 +457,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
QueuePriority priority = QueuePriority::MEDIUM;
QueueThrottle throttle = QueueThrottle::MEDIUM;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
uint32_t bcsTaskCount = 0;

bool perfCountersEnabled = false;

Expand Down
2 changes: 1 addition & 1 deletion runtime/command_queue/enqueue_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
enqueueProperties.blitProperties->csrDependencies.makeResident(*bcsCsr);
previousTimestampPacketNodes->makeResident(*bcsCsr);
timestampPacketContainer->makeResident(*bcsCsr);
bcsCsr->blitBuffer(*enqueueProperties.blitProperties);
this->bcsTaskCount = bcsCsr->blitBuffer(*enqueueProperties.blitProperties);
}

DispatchFlags dispatchFlags(
Expand Down
2 changes: 1 addition & 1 deletion runtime/command_stream/command_stream_receiver.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ class CommandStreamReceiver {
this->latestSentTaskCount = latestSentTaskCount;
}

virtual void blitBuffer(const BlitProperties &blitProperites) = 0;
virtual uint32_t blitBuffer(const BlitProperties &blitProperites) = 0;

ScratchSpaceController *getScratchSpaceController() const {
return scratchSpaceController.get();
Expand Down
2 changes: 1 addition & 1 deletion runtime/command_stream/command_stream_receiver_hw.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return CommandStreamReceiverType::CSR_HW;
}

void blitBuffer(const BlitProperties &blitProperites) override;
uint32_t blitBuffer(const BlitProperties &blitProperites) override;

bool isMultiOsContextCapable() const override;

Expand Down
4 changes: 3 additions & 1 deletion runtime/command_stream/command_stream_receiver_hw_base.inl
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ bool CommandStreamReceiverHw<GfxFamily>::detectInitProgrammingFlagsRequired(cons
}

template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitProperties &blitProperites) {
uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitProperties &blitProperites) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;

Expand Down Expand Up @@ -845,6 +845,8 @@ void CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitProperties &blitPr
waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, false);
internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION);
}

return newTaskCount;
}

template <typename GfxFamily>
Expand Down
4 changes: 3 additions & 1 deletion runtime/helpers/task_information.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,9 @@ void CommandWithoutKernel::dispatchBlitOperation() {
blitProperties.csrDependencies.push_back(barrierTimestampPacketNodes.get());
blitProperties.outputTimestampPacket = currentTimestampPacketNodes.get();

bcsCsr->blitBuffer(blitProperties);
auto bcsTaskCount = bcsCsr->blitBuffer(blitProperties);

commandQueue.updateBcsTaskCount(bcsTaskCount);
}

CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) {
Expand Down
2 changes: 1 addition & 1 deletion unit_tests/kernel/kernel_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {

void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
}
void blitBuffer(const BlitProperties &blitProperites) override{};
uint32_t blitBuffer(const BlitProperties &blitProperites) override { return taskCount; };

CompletionStamp flushTask(
LinearStream &commandStream,
Expand Down
4 changes: 2 additions & 2 deletions unit_tests/libult/ult_aub_command_stream_receiver.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw<GfxFamily>
return csr;
}

void blitBuffer(const BlitProperties &blitProperites) override {
uint32_t blitBuffer(const BlitProperties &blitProperites) override {
blitBufferCalled++;
BaseClass::blitBuffer(blitProperites);
return BaseClass::blitBuffer(blitProperites);
}

uint32_t blitBufferCalled = 0;
Expand Down
6 changes: 3 additions & 3 deletions unit_tests/libult/ult_command_stream_receiver.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
return CommandStreamReceiverHw<GfxFamily>::obtainUniqueOwnership();
}

void blitBuffer(const BlitProperties &blitProperites) override {
uint32_t blitBuffer(const BlitProperties &blitProperites) override {
blitBufferCalled++;
CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitProperites);
return CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitProperites);
}

bool createPerDssBackedBuffer(Device &device) override {
Expand Down Expand Up @@ -210,4 +210,4 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
DispatchFlags recordedDispatchFlags;
bool multiOsContextCapable = false;
};
} // namespace NEO
} // namespace NEO
72 changes: 57 additions & 15 deletions unit_tests/mem_obj/buffer_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,31 @@ struct BcsBufferTests : public ::testing::Test {
std::unique_ptr<CommandStreamReceiver> bcsCsr;
};

template <typename FamilyType>
class MyMockCsr : public UltCommandStreamReceiver<FamilyType> {
public:
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;

void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode) override {
EXPECT_EQ(this->latestFlushedTaskCount, taskCountToWait);
EXPECT_EQ(0u, flushStampToWait);
EXPECT_FALSE(useQuickKmdSleep);
EXPECT_FALSE(forcePowerSavingMode);
waitForTaskCountWithKmdNotifyFallbackCalled++;
}

void waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) override {
EXPECT_EQ(1u, waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(this->latestFlushedTaskCount, requiredTaskCount);
waitForTaskCountAndCleanAllocationListCalled++;
}

uint32_t waitForTaskCountAndCleanAllocationListCalled = 0;
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
CommandStreamReceiver *gpgpuCsr = nullptr;
};

template <typename FamilyType>
void SetUpT() {
if (is32bit) {
Expand Down Expand Up @@ -1122,21 +1147,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenInputAndOutputTimestampPacketWhenBlitCal
}

HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingEnqueueWhenUsingBcsThenCallWait) {
class MyMockCsr : public UltCommandStreamReceiver<FamilyType> {
public:
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;

void waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) override {
EXPECT_TRUE(gpgpuCsr->getTemporaryAllocations().peekIsEmpty());
EXPECT_EQ(*this->getTagAddress(), requiredTaskCount);
waitForTaskCountAndCleanAllocationListCalled++;
}

uint32_t waitForTaskCountAndCleanAllocationListCalled = 0;
CommandStreamReceiver *gpgpuCsr = nullptr;
};

auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment());
auto myMockCsr = new MyMockCsr<FamilyType>(*device->getExecutionEnvironment());
myMockCsr->taskCount = 1234;
myMockCsr->initializeTagAllocation();
myMockCsr->setupContext(*bcsMockContext->bcsOsContext);
Expand All @@ -1163,6 +1174,37 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingEnqueueWhenUsingBcsThenCallWait)
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}

HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForValidTaskCountOnBlockingCall) {
auto myMockCsr = new MyMockCsr<FamilyType>(*device->getExecutionEnvironment());
myMockCsr->taskCount = 1234;
myMockCsr->initializeTagAllocation();
myMockCsr->setupContext(*bcsMockContext->bcsOsContext);
bcsMockContext->bcsCsr.reset(myMockCsr);

EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()};

auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
cmdQ->bcsEngine = &bcsEngineControl;
auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
myMockCsr->gpgpuCsr = &gpgpuCsr;

cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
buffer->forceDisallowCPUCopy = true;
void *hostPtr = reinterpret_cast<void *>(0x12340000);

UserEvent userEvent;
cl_event waitlist = &userEvent;

cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, &waitlist, nullptr);

userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);

cmdQ->finish();
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}

TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenNonRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) {
hwInfo->capabilityTable.ftrRenderCompressedBuffers = false;

Expand Down
7 changes: 4 additions & 3 deletions unit_tests/mocks/mock_csr.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,11 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
return completionStamp;
}

void blitBuffer(const BlitProperties &blitProperites) override {
uint32_t blitBuffer(const BlitProperties &blitProperites) override {
if (!skipBlitCalls) {
CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitProperites);
return CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitProperites);
}
return taskCount;
}

bool skipBlitCalls = false;
Expand Down Expand Up @@ -286,7 +287,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
}

void blitBuffer(const BlitProperties &blitProperites) override{};
uint32_t blitBuffer(const BlitProperties &blitProperites) override { return taskCount; };

void setOSInterface(OSInterface *osInterface);

Expand Down

0 comments on commit a046de5

Please sign in to comment.