Skip to content

Commit

Permalink
implement dynamic managment of batch parameters (#621)
Browse files Browse the repository at this point in the history
Add an option to manage dynamicaly batch parameters. The aim is to get
the best parameter for an application. Because many things can happen
in the system, we are always updating to take any change of load in
account.
This algorithm has been tested on chromeos with various workloads and
produce great improvement performance wise.

This new option is disabled by default.
  • Loading branch information
rjodinchr authored Apr 23, 2024
1 parent cde8f36 commit a063f18
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ add_library(OpenCL-objects OBJECT
printf.cpp
program.cpp
queue.cpp
queue_controller.cpp
semaphore.cpp
sha1.cpp
tracing.cpp
Expand Down
3 changes: 3 additions & 0 deletions src/config.def
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ OPTION(uint32_t, max_first_cmd_batch_size, 10000u)
OPTION(uint32_t, max_cmd_group_size, UINT32_MAX)
OPTION(uint32_t, max_first_cmd_group_size, UINT32_MAX)

// experimental
OPTION(bool, dynamic_batches, false)

OPTION(uint32_t, max_entry_points_instances, 2*1024u) // FIXME find a better definition
OPTION(uint32_t, enqueue_command_retry_sleep_us, UINT32_MAX) // UINT32_MAX meaning no retry

Expand Down
17 changes: 14 additions & 3 deletions src/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "init.hpp"
#include "memory.hpp"
#include "queue.hpp"
#include "queue_controller.hpp"
#include "tracing.hpp"
#include "utils.hpp"

Expand Down Expand Up @@ -48,6 +49,11 @@ cvk_command_queue::cvk_command_queue(
cvk_warn_fn("out-of-order execution enabled, will be ignored");
}

if (config.dynamic_batches) {
m_controllers.push_back(
std::make_unique<cvk_queue_controller_batch_parameters>(this));
}

TRACE_CNT_VAR_INIT(batch_in_flight_counter,
"clvk-queue_" + std::to_string((uintptr_t)this) +
"-batches");
Expand Down Expand Up @@ -202,7 +208,7 @@ cl_int cvk_command_queue::enqueue_command(cvk_command* cmd, _cl_event** event) {
}
} else {
// End the current command batch
if ((err = end_current_command_batch()) != CL_SUCCESS) {
if ((err = end_current_command_batch(true)) != CL_SUCCESS) {
return err;
}

Expand Down Expand Up @@ -278,7 +284,7 @@ cl_int cvk_command_queue::enqueue_command_with_deps(
return err;
}

cl_int cvk_command_queue::end_current_command_batch() {
cl_int cvk_command_queue::end_current_command_batch(bool from_flush) {
if (m_command_batch && m_command_batch->batch_size() > 0) {
TRACE_FUNCTION("queue", (uintptr_t)this, "batch_size",
m_command_batch->batch_size());
Expand All @@ -287,6 +293,11 @@ cl_int cvk_command_queue::end_current_command_batch() {
return CL_OUT_OF_RESOURCES;
}
enqueue_command(m_command_batch);

for (auto& controller : m_controllers) {
controller->update_after_end_current_command_batch(from_flush);
}

m_command_batch = nullptr;

batch_enqueued();
Expand Down Expand Up @@ -474,7 +485,7 @@ cl_int cvk_command_queue::flush_no_lock() {
std::unique_ptr<cvk_command_group> group;

// End current command batch
cl_int err = end_current_command_batch();
cl_int err = end_current_command_batch(true);
if (err != CL_SUCCESS) {
return err;
}
Expand Down
9 changes: 8 additions & 1 deletion src/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
#include "kernel.hpp"
#include "objects.hpp"
#include "printf.hpp"
#include "queue_controller.hpp"
#include "tracing.hpp"

struct cvk_command;
struct cvk_command_queue;
struct cvk_command_batch;
struct cvk_queue_controller;
using cvk_command_queue_holder = refcounted_holder<cvk_command_queue>;

struct cvk_command_group {
Expand Down Expand Up @@ -240,7 +242,7 @@ struct cvk_command_queue : public _cl_command_queue,
CHECK_RETURN cl_int enqueue_command_with_retry(cvk_command*,
_cl_event** event);
CHECK_RETURN cl_int enqueue_command(cvk_command* cmd, _cl_event** event);
CHECK_RETURN cl_int end_current_command_batch();
CHECK_RETURN cl_int end_current_command_batch(bool from_flush = false);
void executor();

cvk_device* m_device;
Expand Down Expand Up @@ -270,6 +272,11 @@ struct cvk_command_queue : public _cl_command_queue,
TRACE_CNT_VAR(group_in_flight_counter);

std::unique_ptr<cvk_buffer> m_printf_buffer;

std::vector<std::unique_ptr<cvk_queue_controller>> m_controllers;

friend struct cvk_queue_controller;
friend struct cvk_queue_controller_batch_parameters;
};

static inline cvk_command_queue* icd_downcast(cl_command_queue queue) {
Expand Down
151 changes: 151 additions & 0 deletions src/queue_controller.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Copyright 2024 The clvk authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "queue_controller.hpp"
#include "queue.hpp"

cvk_queue_controller_batch_parameters::cvk_queue_controller_batch_parameters(
cvk_command_queue* queue)
: cvk_queue_controller(queue),
m_max_cmd_batch_size_limit(queue->device()->get_max_cmd_batch_size()),
m_max_first_cmd_batch_size_limit(
queue->device()->get_max_first_cmd_batch_size()),
m_max_first_cmd_batch_size_limit_hit(0), m_last_batch_size(0),
m_no_batch_in_flight_since_last_flush(false) {
TRACE_CNT_VAR_INIT(max_cmd_batch_size_counter,
"clvk-queue_" + std::to_string((uintptr_t)this) +
"-max_batch_size");
TRACE_CNT_VAR_INIT(max_first_cmd_batch_size_counter,
"clvk-queue_" + std::to_string((uintptr_t)this) +
"-max_first_batch_size");
TRACE_CNT_VAR_INIT(max_first_cmd_batch_size_limit_counter,
"clvk-queue_" + std::to_string((uintptr_t)this) +
"-max_first_batch_size_limit");
TRACE_CNT_VAR_INIT(max_first_cmd_batch_size_limit_hit_counter,
"clvk-queue_" + std::to_string((uintptr_t)this) +
"-max_first_batch_size_limit_hit");
TRACE_CNT_VAR_INIT(last_batch_size_counter,
"clvk-queue_" + std::to_string((uintptr_t)this) +
"-last_batch_size");

TRACE_CNT(max_cmd_batch_size_counter, queue->m_max_cmd_batch_size);
TRACE_CNT(max_first_cmd_batch_size_counter,
queue->m_max_first_cmd_batch_size);
TRACE_CNT(max_first_cmd_batch_size_limit_counter,
m_max_first_cmd_batch_size_limit);
TRACE_CNT(max_first_cmd_batch_size_limit_hit_counter, 0);
TRACE_CNT(last_batch_size_counter, m_last_batch_size);
}

void cvk_queue_controller_batch_parameters::
update_after_end_current_command_batch(bool from_flush) {
TRACE_FUNCTION();
auto reset_after_flush = [this]() {
if (m_queue->m_nb_batch_in_flight > 1 &&
!m_no_batch_in_flight_since_last_flush) {
// Increase max_cmd_batch_size if there was always batches in flight
// since last flush.
m_queue->m_max_cmd_batch_size += m_queue->m_nb_batch_in_flight;
}
// Reset after flush
m_last_batch_size = 0;
m_no_batch_in_flight_since_last_flush = false;
};
auto trace = [this]() {
TRACE_CNT(max_cmd_batch_size_counter, m_queue->m_max_cmd_batch_size);
TRACE_CNT(max_first_cmd_batch_size_counter,
m_queue->m_max_first_cmd_batch_size);
TRACE_CNT(max_first_cmd_batch_size_limit_counter,
m_max_first_cmd_batch_size_limit);
TRACE_CNT(max_first_cmd_batch_size_limit_hit_counter,
m_max_first_cmd_batch_size_limit_hit);
TRACE_CNT(last_batch_size_counter, m_last_batch_size);
};
if (!m_queue->m_command_batch) {
if (from_flush) {
reset_after_flush();
}
trace();
return;
}
auto batch_size = m_queue->m_command_batch->batch_size();
if (m_last_batch_size == 0) {
m_last_batch_size = batch_size;
trace();
return;
}

// update m_no_batch_in_flight_since_last_flush
m_no_batch_in_flight_since_last_flush |= m_queue->m_nb_batch_in_flight == 0;

if (m_queue->m_nb_batch_in_flight == 0 &&
m_last_batch_size == m_queue->m_max_first_cmd_batch_size) {
// Nothing in flight and we flush because of first_cmd_batch_size
// reached. It should only happen the first time. Otherwise it means
// that our batches were too small to have the time to enqueue. Let's
// increase the first_cmd_batch_size_limit, reset the
// first_cmd_batch_size_limit_hit, increase first_cmd_batch_size to
// create bigger batch, and reset max_cmd_batch_size to make many small
// (size of first) batches to begin with.
m_max_first_cmd_batch_size_limit =
m_queue->m_max_first_cmd_batch_size + 1;
m_max_first_cmd_batch_size_limit_hit = 0;
m_queue->m_max_first_cmd_batch_size += 5;
m_queue->m_max_cmd_batch_size = m_queue->m_max_first_cmd_batch_size;
} else if (m_queue->m_nb_batch_in_flight == 0 &&
m_queue->m_max_cmd_batch_size >=
m_queue->m_max_first_cmd_batch_size + 2) {
// Nothing in flight and we flush because of either a flush or
// max_cmd_batch_size has been reached. Decrease max_cmd_batch_size if
// it does not go under max_first_cmd_batch_size to try to create batch
// before the end of the first batch.
m_queue->m_max_cmd_batch_size -= 2;
} else if (m_queue->m_nb_batch_in_flight > 0 &&
m_last_batch_size <= m_queue->m_max_first_cmd_batch_size) {
// Commands in flight and the last batch was smaller that
// max_first_cmd_batch_size. Make smaller first batch to try to reduce
// the latency.
m_queue->m_max_first_cmd_batch_size -= 1;
}

if (from_flush) {
reset_after_flush();
} else {
m_last_batch_size = batch_size;
}

// Do not increate m_max_cmd_batch_size over the initial value
if (m_queue->m_max_cmd_batch_size > m_max_cmd_batch_size_limit) {
m_queue->m_max_cmd_batch_size = m_max_cmd_batch_size_limit;
}
// Do not decrease m_max_first_cmd_batch_size under the limit.
// After 4 tries, reset the limit to 1, allowing any possitive value one
// time.
if (m_queue->m_max_first_cmd_batch_size <
m_max_first_cmd_batch_size_limit) {
m_max_first_cmd_batch_size_limit_hit++;
if (m_max_first_cmd_batch_size_limit_hit == 4) {
m_max_first_cmd_batch_size_limit_hit = 0;
m_max_first_cmd_batch_size_limit = 1;
} else {
m_queue->m_max_first_cmd_batch_size =
m_max_first_cmd_batch_size_limit;
}
}
// max_first_cmd_batch_size should not get bigger than max_cmd_batch_size.
if (m_queue->m_max_cmd_batch_size < m_queue->m_max_first_cmd_batch_size) {
m_queue->m_max_first_cmd_batch_size = m_queue->m_max_cmd_batch_size;
}
trace();
}
49 changes: 49 additions & 0 deletions src/queue_controller.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2024 The clvk authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "queue.hpp"

struct cvk_queue_controller {
cvk_queue_controller(cvk_command_queue* queue) : m_queue(queue) {}

virtual ~cvk_queue_controller() {}

virtual void update_after_end_current_command_batch(bool from_flush) {
(void)from_flush;
}

protected:
cvk_command_queue* m_queue;
};

struct cvk_queue_controller_batch_parameters : public cvk_queue_controller {
cvk_queue_controller_batch_parameters(cvk_command_queue* queue);

void update_after_end_current_command_batch(bool from_flush) override final;

private:
cl_uint m_max_cmd_batch_size_limit;
cl_uint m_max_first_cmd_batch_size_limit;
cl_uint m_max_first_cmd_batch_size_limit_hit;
cl_uint m_last_batch_size;
bool m_no_batch_in_flight_since_last_flush;

TRACE_CNT_VAR(max_cmd_batch_size_counter);
TRACE_CNT_VAR(max_first_cmd_batch_size_counter);
TRACE_CNT_VAR(max_first_cmd_batch_size_limit_counter);
TRACE_CNT_VAR(max_first_cmd_batch_size_limit_hit_counter);
TRACE_CNT_VAR(last_batch_size_counter);
};

0 comments on commit a063f18

Please sign in to comment.