Skip to content

Commit

Permalink
xe: ocl: kernel_ctx: add duplication checks
Browse files Browse the repository at this point in the history
  • Loading branch information
echeresh committed Jan 24, 2025
1 parent ddd6e05 commit c8bdcce
Show file tree
Hide file tree
Showing 2 changed files with 293 additions and 142 deletions.
230 changes: 230 additions & 0 deletions src/gpu/intel/compute/kernel_ctx.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
/*******************************************************************************
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#include "gpu/intel/compute/kernel_ctx.hpp"

#include <cassert>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <type_traits>
#include <unordered_map>

#include "common/bit_cast.hpp"
#include "gpu/intel/gpu_primitive_attr.hpp"
#include "gpu/intel/utils.hpp"

namespace dnnl {
namespace impl {
namespace gpu {
namespace intel {
namespace compute {

std::ostream &operator<<(std::ostream &out, const kernel_ctx_t::option_t &opt) {
switch (opt.kind) {
case kernel_ctx_t::option_kind_t::general: out << opt.name; break;
case kernel_ctx_t::option_kind_t::macro: out << "-D" << opt.name; break;
case kernel_ctx_t::option_kind_t::macro_int:
out << "-D" << opt.name << "=" << opt.value.i_value;
if (opt.value.i_value > INT_MAX || opt.value.i_value < INT_MIN)
out << "L";
break;
case kernel_ctx_t::option_kind_t::macro_float:
out << "-D" << opt.name << "=as_float(0x" << std::hex
<< utils::bit_cast<uint32_t>(opt.value.f_value) << ")";
out << std::dec;
break;
case kernel_ctx_t::option_kind_t::macro_string:
out << "-D" << opt.name << "=" << opt.value.s_value;
break;
default: assert(!"Unknown kind");
}
return out;
}

kernel_ctx_t::kernel_ctx_t(const primitive_attr_t *attr) {
set_default_options(attr);
set_default_macros(attr);
}

std::string kernel_ctx_t::options() const {
std::ostringstream oss;
bool is_first = true;
for (auto &kv : options_) {
if (!is_first) oss << " ";
oss << kv.second;
is_first = false;
}
if (use_int32_offset_) {
oss << " -DUSE_INT32_OFFSET";
} else {
// TODO: Determine if specialization for buffers between 2GB and 4GB
// is worthwhile
oss << " -cl-intel-greater-than-4GB-buffer-required";
}
return oss.str();
}

void kernel_ctx_t::register_buffer_size(size_t size) {
if (size > INT_MAX) use_int32_offset(false);
}

void kernel_ctx_t::use_int32_offset(bool value) {
use_int32_offset_ = value;
}

void kernel_ctx_t::define_int(const char *name, int64_t value) {
add_option(option_t(name, value));
}

void kernel_ctx_t::define_int(const std::string &name, int64_t value) {
add_option(option_t(name, value));
}

void kernel_ctx_t::define_float(const char *name, float value) {
add_option(option_t(name, value));
}

void kernel_ctx_t::add_option(const char *option) {
add_option(std::string(option));
}

void kernel_ctx_t::add_option(const std::string &option) {
auto parts = gpu_utils::split(option);
for (auto &p : parts) {
if (p.empty()) continue;
add_option(option_t(p));
}
}

void kernel_ctx_t::add_option(const option_t &option) {
auto it = options_.find(option.name);
if (it != options_.end()) {
if (it->second != option) {
std::cout << "Error: option " << option.name
<< " is already set to a different value.\n";
std::cout << " Old option:" << it->second << "\n";
std::cout << " New option:" << option << "\n";
abort();
}
return;
}
options_[option.name] = option;
}

bool kernel_ctx_t::has_macro(const char *name) const {
return options_.count(name) != 0;
}

bool kernel_ctx_t::has_macro(const std::string &name) const {
return has_macro(name.c_str());
}

void kernel_ctx_t::set_data_type(data_type_t dt) {
switch (dt) {
case data_type::bf16: define_int("DT_BF16", 1); break;
case data_type::f16: define_int("DT_F16", 1); break;
case data_type::f32: define_int("DT_F32", 1); break;
case data_type::f64: define_int("DT_F64", 1); break;
case data_type::s8: define_int("DT_S8", 1); break;
case data_type::u8: define_int("DT_U8", 1); break;
case data_type::f8_e4m3: define_int("DT_HF8", 1); break;
case data_type::f8_e5m2: define_int("DT_BF8", 1); break;
case data_type::f4_e2m1: define_int("DT_F4_E2M1", 1); break;
case data_type::s32: define_int("DT_S32", 1); break;
default: assert(!"unknown data type"); break;
}
}

std::string kernel_ctx_t::data_type() const {
if (has_macro("DT_F16")) return "f16";
if (has_macro("DT_F32")) return "f32";
if (has_macro("DT_F64")) return "f64";
if (has_macro("DT_S8")) return "s8";
return "";
}

void kernel_ctx_t::add_custom_header(
const std::string &header_name, std::string &&source) {
custom_headers_[header_name] = std::move(source);
}

const char *kernel_ctx_t::get_custom_header(
const std::string &header_name) const {
auto iter = custom_headers_.find(header_name);
if (iter != custom_headers_.end()) return iter->second.c_str();
return nullptr;
}

bool kernel_ctx_t::has_custom_headers() const {
return !custom_headers_.empty();
}

void kernel_ctx_t::set_default_options(const primitive_attr_t *attr) {
// By default fp32 division and sqrt are not IEEE-compliant
add_option("-cl-fp32-correctly-rounded-divide-sqrt");

if (attr && attr->gpu_attr_) {
auto *gpu_attr = utils::downcast<gpu_primitive_attr_t *>(
attr->gpu_attr_.get());
if (gpu_attr->threads_per_eu() == 4) {
add_option("-cl-intel-256-GRF-per-thread");
}
}

// Set override flag for checking compiler assumptions
if (gpu_utils::dev_getenv("enable_check_assumptions", 0)) {
add_option("-DENABLE_CHECK_ASSUMPTIONS");
}

if (gpu_utils::dev_getenv("ocl_debug", 0)) { add_option("-DOCL_DEBUG"); }
}

void kernel_ctx_t::set_default_macros(const primitive_attr_t *attr) {
if (attr) define_int("DETERMINISTIC", attr->deterministic_);
}

kernel_ctx_t::option_t::option_t(const std::string &s) {
auto d_pos = s.find("-D");
bool is_macro = (d_pos != std::string::npos);
if (!is_macro) {
name = s;
return;
}
name = s.substr(d_pos + 2);
auto eq_pos = name.find("=");
bool has_value = (eq_pos != std::string::npos);
if (has_value) {
value = name.substr(eq_pos + 1);
name = name.substr(0, eq_pos);
kind = option_kind_t::macro_string;
} else {
kind = option_kind_t::macro;
}
return;
}

bool kernel_ctx_t::option_t::operator==(const option_t &other) const {
return (kind == other.kind) && (name == other.name)
&& (value == other.value);
}

} // namespace compute
} // namespace intel
} // namespace gpu
} // namespace impl
} // namespace dnnl
Loading

0 comments on commit c8bdcce

Please sign in to comment.