Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement printf #565

Merged
merged 13 commits into from
Jul 16, 2023
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ add_library(OpenCL-objects OBJECT
kernel.cpp
log.cpp
memory.cpp
printf.cpp
program.cpp
queue.cpp
semaphore.cpp
Expand Down
3 changes: 3 additions & 0 deletions src/config.def
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ OPTION(bool, skip_spirv_capability_check, false)
OPTION(bool, keep_temporaries, false)
OPTION(std::string, spirv_arch, "spir")
OPTION(bool, physical_addressing, false)

OPTION(std::string, clspv_native_builtins, "")
OPTION(std::string, clspv_library_builtins, "")

OPTION(uint32_t, printf_buffer_size, 1024*1024u)

#if COMPILER_AVAILABLE
OPTION(std::string, clspv_options, "")
#if !CLSPV_ONLINE_COMPILER
Expand Down
1 change: 1 addition & 0 deletions src/exports.map
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CLVK_UNIT_TESTING_FCT {
global:
clvk_override_device_max_compute_work_group_count;
clvk_restore_device_properties;
clvk_override_printf_buffer_size;
local:
*;
};
Expand Down
2 changes: 1 addition & 1 deletion src/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ bool cvk_kernel_argument_values::setup_descriptor_sets() {
// Setup module-scope variables
if (program->module_constant_data_buffer() != nullptr &&
program->module_constant_data_buffer_info()->type ==
constant_data_buffer_type::storage_buffer) {
module_buffer_type::storage_buffer) {
auto buffer = program->module_constant_data_buffer();
auto info = program->module_constant_data_buffer_info();
cvk_debug_fn(
Expand Down
6 changes: 6 additions & 0 deletions src/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <unordered_map>
#include <vector>

#include "spirv/unified1/NonSemanticClspvReflection.h"

Comment on lines +22 to +23
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#include "spirv/unified1/NonSemanticClspvReflection.h"

Should no longer be necessary.

#include "memory.hpp"
#include "objects.hpp"
#include "program.hpp"
Expand Down Expand Up @@ -143,6 +145,10 @@ struct cvk_kernel : public _cl_kernel, api_object<object_magic::kernel> {
return m_args.at(arg_index).info.type_qualifier;
}

bool uses_printf() const { return m_entry_point->uses_printf(); }

bool requires_serialized_execution() const { return uses_printf(); }

private:
friend cvk_kernel_argument_values;

Expand Down
254 changes: 254 additions & 0 deletions src/printf.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
// Copyright 2022 The clvk authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sstream>

#include "printf.hpp"

// Extract the conversion specifier from a format string
char get_fmt_conversion(std::string_view fmt) {
auto conversionSpecPos = fmt.find_first_of("diouxXfFeEgGaAcsp");
return fmt.at(conversionSpecPos);
}

// Read type T from given pointer
template <typename T> T read_buff(const char* data) {
return *(reinterpret_cast<const T*>(data));
}

// Read type T from given pointer then increment the pointer
template <typename T> T read_inc_buff(char*& data) {
T out = *(reinterpret_cast<T*>(data));
data += sizeof(T);
return out;
}

// Extract the optional vector flag and return a modified format string suitable
// for calling snprintf on individual vector elements
std::string get_vector_fmt(std::string fmt, int& vector_size, int& element_size,
std::string& remaining_fmt) {
// Consume flags (skipping initial '%')
auto pos = fmt.find_first_not_of(" +-#0", 1ul);
// Consume precision and field width
pos = fmt.find_first_not_of("123456789.", pos);

if (fmt.at(pos) != 'v') {
vector_size = 1;
return std::string{fmt};
}

// Trim the data after the conversion specifier and store it in
// `remaining_fmt`
auto pos_conversion = fmt.find_first_of("diouxXfFeEgGaAcsp");
auto fmt_specifier = fmt.substr(0, pos_conversion + 1);
remaining_fmt = fmt.substr(pos_conversion + 1);
fmt = fmt_specifier;

size_t vec_length_pos_start = ++pos;
size_t vec_length_pos_end =
fmt.find_first_not_of("23468", vec_length_pos_start);
auto vec_length_str = fmt.substr(vec_length_pos_start,
vec_length_pos_end - vec_length_pos_start);
int vec_length = std::atoi(vec_length_str.c_str());

auto fmt_pre_vec_len = fmt.substr(0, vec_length_pos_start - 1);
auto fmt_post_vec_len = fmt.substr(vec_length_pos_end, fmt.size());
fmt = fmt_pre_vec_len + fmt_post_vec_len;

// The length modifier is required with vectors
if (fmt_post_vec_len.find("hh") != std::string::npos) {
element_size = 1;
} else if (fmt_post_vec_len.find("hl") != std::string::npos) {
element_size = 4;
} else if (fmt_post_vec_len.find("h") != std::string::npos) {
element_size = 2;
} else if (fmt_post_vec_len.find("l") != std::string::npos) {
element_size = 8;
}

// If 'hl' length modifier is present, strip it as snprintf doesn't
// understand it
size_t hl = fmt.find("hl");
if (hl != std::string::npos) {
fmt.erase(hl, 2);
}

vector_size = vec_length;
return fmt;
}

// Print the format part containing exactly one arg using snprintf
std::string print_part(const std::string& fmt, const char* data, size_t size) {
// We don't know the exact size of the output string, but given we have a
// single argument, the size of the format string plus 1024 bytes is more
// than likely to fit everything. If it doesn't fit, just keep retrying with
// double the output size.
size_t out_size = fmt.size() + 1024;
std::vector<char> out(out_size);
out[0] = '\0';

auto conversion = std::tolower(get_fmt_conversion(fmt));
bool finished = false;
while (!finished) {
int written = 0;
switch (conversion) {
case 's': {
written = snprintf(out.data(), out_size, fmt.c_str(), data);
break;
}
case 'f':
case 'e':
case 'g':
case 'a': {
if (size == 2)
written = snprintf(out.data(), out_size, fmt.c_str(),
cl_half_to_float(read_buff<cl_half>(data)));
else if (size == 4)
written = snprintf(out.data(), out_size, fmt.c_str(),
read_buff<float>(data));
else
written = snprintf(out.data(), out_size, fmt.c_str(),
read_buff<double>(data));
break;
}
default: {
if (size == 1)
written = snprintf(out.data(), out_size, fmt.c_str(),
read_buff<uint8_t>(data));
else if (size == 2)
written = snprintf(out.data(), out_size, fmt.c_str(),
read_buff<uint16_t>(data));
else if (size == 4)
written = snprintf(out.data(), out_size, fmt.c_str(),
read_buff<uint32_t>(data));
else
written = snprintf(out.data(), out_size, fmt.c_str(),
read_buff<uint64_t>(data));
break;
}
}

// Finish if the string fit in the output buffer or snprintf failed,
// otherwise double the output buffer and try again. If snprintf failed,
// set the output to an empty string.
if (written < 0) {
out[0] = '\0';
finished = true;
} else if (written < static_cast<long>(out_size)) {
finished = true;
} else {
out_size *= 2;
out.resize(out_size);
}
}

return std::string(out.data());
}

void process_printf(char*& data, const printf_descriptor_map_t& descs) {

uint32_t printf_id = read_inc_buff<uint32_t>(data);
auto& format_string = descs.at(printf_id).format_string;

std::stringstream printf_out{};

// Firstly print the part of the format string up to the first '%'
size_t next_part = format_string.find_first_of('%');
printf_out << format_string.substr(0, next_part);

// Decompose the remaining format string into individual strings with
// one format specifier each, handle each one individually
size_t arg_idx = 0;
while (next_part < format_string.size() - 1) {
// Get the part of the format string before the next format specifier
size_t part_start = next_part;
size_t part_end = format_string.find_first_of('%', part_start + 1);
auto part_fmt = format_string.substr(part_start, part_end - part_start);

// Handle special cases
if (part_end == part_start + 1) {
printf_out << "%";
next_part = part_end + 1;
continue;
} else if (part_end == std::string::npos &&
arg_idx >= descs.at(printf_id).arg_sizes.size()) {
// If there are no remaining arguments, the rest of the format
// should be printed verbatim
printf_out << part_fmt;
break;
}

// The size of the argument that this format part will consume
auto& size = descs.at(printf_id).arg_sizes[arg_idx];

// Check to see if we have a vector format specifier
int vec_len = 0;
int el_size = 0;
std::string remaining_str;
part_fmt = get_vector_fmt(part_fmt, vec_len, el_size, remaining_str);

// Scalar argument
if (vec_len < 2) {
// Special case for %s
if (get_fmt_conversion(part_fmt) == 's') {
uint32_t string_id = read_buff<uint32_t>(data);
printf_out << print_part(
part_fmt, descs.at(string_id).format_string.c_str(), size);
} else {
printf_out << print_part(part_fmt, data, size);
}
data += size;
} else {
// Vector argument
auto* data_start = data;
for (int i = 0; i < vec_len - 1; i++) {
printf_out << print_part(part_fmt, data, size / vec_len) << ",";
data += el_size;
}
printf_out << print_part(part_fmt, data, size / vec_len)
<< remaining_str;
data = data_start + size;
}

// Move to the next format part and prepare to handle the next arg
next_part = part_end;
arg_idx++;
}

printf("%s", printf_out.str().c_str());
}

cl_int cvk_printf(cvk_mem* printf_buffer,
const printf_descriptor_map_t& descriptors) {
CVK_ASSERT(printf_buffer);
if (!printf_buffer->map()) {
cvk_error("Could not map printf buffer");
return CL_OUT_OF_RESOURCES;
}
char* data = static_cast<char*>(printf_buffer->host_va());
auto buffer_size = printf_buffer->size();
const auto bytes_written_size = sizeof(uint32_t);
const auto data_size = buffer_size - bytes_written_size;
auto bytes_written = read_inc_buff<uint32_t>(data) * 4;
auto* data_start = data;

while (static_cast<size_t>(data - data_start) < bytes_written &&
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that we never print anything if the kernel filled the whole buffer and we wrapped around (I haven't checked the clspv side)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added a test showing that we print as much as possible.

static_cast<size_t>(data - data_start) < data_size) {
process_printf(data, descriptors);
}

printf_buffer->unmap();

return CL_SUCCESS;
}
31 changes: 31 additions & 0 deletions src/printf.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright 2022 The clvk authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "memory.hpp"

#include <vector>

struct printf_descriptor {
uint32_t printf_id;
std::string format_string;
std::vector<uint32_t> arg_sizes;
};

using printf_descriptor_map_t = std::unordered_map<uint32_t, printf_descriptor>;

// Process the contents of the printf buffer and print the results to stdout
cl_int cvk_printf(cvk_mem* printf_buffer,
const printf_descriptor_map_t& descriptors);
Loading