Skip to content

Commit

Permalink
[OpenGl] Merge Retr SSBO into Args. (#3313)
Browse files Browse the repository at this point in the history
fixes #3278
  • Loading branch information
ailzhang authored Oct 29, 2021
1 parent dc65418 commit a27036f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 38 deletions.
14 changes: 7 additions & 7 deletions taichi/backends/opengl/codegen_opengl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,6 @@ class KernelGen : public IRVisitor {
REGISTER_BUFFER(std430, buffer, gtmp, GLBufId::Gtmp);
if (used.buf_args)
REGISTER_BUFFER(std430, buffer, args, GLBufId::Args);
if (used.buf_retr)
REGISTER_BUFFER(std430, writeonly buffer, retr, GLBufId::Retr);

#undef REGISTER_BUFFER
#undef DEFINE_LAYOUT
Expand Down Expand Up @@ -451,7 +449,7 @@ class KernelGen : public IRVisitor {
used.int32 = true;
std::string var_name = fmt::format("_s{}_{}", i, stmt->short_name());
emit("int {} = _args_i32_[{} + {} * {} + {}];", var_name,
taichi_opengl_earg_base / sizeof(int), arg_id,
taichi_opengl_extra_args_base / sizeof(int), arg_id,
taichi_max_num_indices, i);
size_var_names.push_back(std::move(var_name));
}
Expand Down Expand Up @@ -729,10 +727,12 @@ class KernelGen : public IRVisitor {
}

void visit(ReturnStmt *stmt) override {
used.buf_retr = true;
used.buf_args = true;
// TODO: use stmt->ret_id instead of 0 as index
emit("_retr_{}_[0] = {};",
emit("_args_{}_[{} >> {} + 0] = {};",
opengl_data_type_short_name(stmt->element_type()),
taichi_opengl_ret_base,
opengl_data_address_shifter(stmt->element_type()),
stmt->value->short_name());
}

Expand Down Expand Up @@ -787,8 +787,8 @@ class KernelGen : public IRVisitor {
used.buf_args = true;
used.int32 = true;
emit("int {} = _args_i32_[{} + {} * {} + {}];", name,
taichi_opengl_earg_base / sizeof(int), arg_id, taichi_max_num_indices,
axis);
taichi_opengl_extra_args_base / sizeof(int), arg_id,
taichi_max_num_indices, axis);
}

std::string make_kernel_name() {
Expand Down
48 changes: 21 additions & 27 deletions taichi/backends/opengl/opengl_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ void CompiledProgram::init_args(Kernel *kernel) {

args_buf_size = arg_count * sizeof(uint64_t);
if (ext_arr_map.size()) {
args_buf_size = taichi_opengl_earg_base +
args_buf_size = taichi_opengl_extra_args_base +
arg_count * taichi_max_num_indices * sizeof(int);
}

Expand Down Expand Up @@ -269,27 +269,26 @@ void DeviceCompiledProgram::launch(Context &ctx, OpenGlRuntime *runtime) const {

uint8_t *args_buf_mapped = nullptr;

// clang-format off
// Prepare external array: copy from ctx.args[i] (which is a host pointer
// pointing to the external array) to device, and save the accumulated copied
// size information. Note here we copy external array to Arg buffer in
// runtime. Its layout is shown below:
// | args | shape of ext arr | ext arr |
// | args | shape of ext arr | ret | ext arr |
// baseptr
// |..taichi_opengl_earg_base..|
// |.................ext_arr_offset.........................|
// |.......................ctx.args[i]............................|
// i-th arg (ext arr)
// We save each external array's offset from args_buf_ baseptr back to
// ctx.args[i].
// |..taichi_opengl_extra_args_base..|
// |...............taichi_opengl_ret_base.................|
// |................taichi_opengl_external_arr_base..............|
// |............................ctx.args[i]............................|
// i-th arg (ext arr)
// We save each external array's offset from args_buf_ baseptr back to ctx.args[i].
// clang-format on
if (program_.total_ext_arr_size) {
void *baseptr = device_->map(args_buf_);
size_t ext_arr_offset =
size_t(taichi_opengl_earg_base) +
sizeof(int) * size_t(program_.arg_count * taichi_max_num_indices);
size_t accum_size = 0;
for (const auto &[i, size] : program_.ext_arr_map) {
auto ptr = (void *)ctx.args[i];
ctx.args[i] = accum_size + ext_arr_offset;
ctx.args[i] = accum_size + taichi_opengl_external_arr_base;
ext_arr_host_ptrs[i] = ptr;
if (program_.check_ext_arr_read(i)) {
std::memcpy((char *)baseptr + ctx.args[i], ptr, size);
Expand All @@ -307,7 +306,8 @@ void DeviceCompiledProgram::launch(Context &ctx, OpenGlRuntime *runtime) const {
program_.arg_count * sizeof(uint64_t));
if (program_.ext_arr_map.size()) {
std::memcpy(
args_buf_mapped + size_t(taichi_opengl_earg_base), ctx.extra_args,
args_buf_mapped + size_t(taichi_opengl_extra_args_base),
ctx.extra_args,
size_t(program_.arg_count * taichi_max_num_indices) * sizeof(int));
}
device_->unmap(args_buf_);
Expand All @@ -332,10 +332,9 @@ void DeviceCompiledProgram::launch(Context &ctx, OpenGlRuntime *runtime) const {
binder->buffer(0, int(GLBufId::Runtime), core_bufs.runtime);
binder->buffer(0, int(GLBufId::Root), core_bufs.root);
binder->buffer(0, int(GLBufId::Gtmp), core_bufs.gtmp);
if (program_.args_buf_size)
if (program_.args_buf_size || program_.ret_buf_size ||
program_.total_ext_arr_size)
binder->buffer(0, int(GLBufId::Args), args_buf_);
if (program_.ret_buf_size)
binder->buffer(0, int(GLBufId::Retr), ret_buf_);

cmdlist->bind_pipeline(compiled_pipeline_[i].get());
cmdlist->bind_resources(binder);
Expand Down Expand Up @@ -366,30 +365,25 @@ void DeviceCompiledProgram::launch(Context &ctx, OpenGlRuntime *runtime) const {
}

if (program_.ret_buf_size) {
memcpy(runtime->result_buffer, device_->map(ret_buf_),
uint8_t *baseptr = (uint8_t *)device_->map(args_buf_);
memcpy(runtime->result_buffer, baseptr + taichi_opengl_ret_base,
program_.ret_buf_size);
device_->unmap(ret_buf_);
device_->unmap(args_buf_);
}
}

DeviceCompiledProgram::DeviceCompiledProgram(CompiledProgram &&program,
Device *device)
: program_(std::move(program)), device_(device) {
if (program_.args_buf_size || program_.total_ext_arr_size) {
if (program_.args_buf_size || program_.total_ext_arr_size ||
program_.ret_buf_size) {
args_buf_ = device->allocate_memory(
{program_.args_buf_size + program_.total_ext_arr_size,
{taichi_opengl_external_arr_base + program_.total_ext_arr_size,
/*host_write=*/true,
/*host_read=*/true,
/*export_sharing=*/false});
}

if (program_.ret_buf_size) {
ret_buf_ =
device->allocate_memory({program_.ret_buf_size, /*host_write=*/false,
/*host_read=*/true,
/*export_sharing=*/false});
}

for (auto &k : program_.kernels) {
compiled_pipeline_.push_back(
device->create_pipeline({PipelineSourceType::glsl_src,
Expand Down
12 changes: 8 additions & 4 deletions taichi/backends/opengl/opengl_kernel_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ class SNode;

namespace opengl {

constexpr int taichi_opengl_earg_base = taichi_max_num_args * sizeof(uint64_t);
constexpr int taichi_opengl_extra_args_base =
taichi_max_num_args * sizeof(uint64_t);
constexpr int taichi_opengl_ret_base =
taichi_opengl_extra_args_base +
taichi_max_num_args * taichi_max_num_indices * sizeof(int);
constexpr int taichi_opengl_external_arr_base =
taichi_opengl_ret_base + sizeof(uint64_t);

struct UsedFeature {
// types:
Expand All @@ -28,7 +34,6 @@ struct UsedFeature {
// buffers:
bool buf_args{false};
bool buf_gtmp{false};
bool buf_retr{false};

// utilties:
bool fast_pow{false};
Expand All @@ -46,8 +51,7 @@ enum class GLBufId {
Root = 0,
Gtmp = 1,
Args = 2,
Retr = 3,
Runtime = 4,
Runtime = 3,
};

struct IOV {
Expand Down

0 comments on commit a27036f

Please sign in to comment.