oneapi-src · echeresh · Jan 24, 2025
@@ -0,0 +1,230 @@
+/*******************************************************************************
+* Copyright 2025 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "gpu/intel/compute/kernel_ctx.hpp"
+
+#include <cassert>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+
+#include "common/bit_cast.hpp"
+#include "gpu/intel/gpu_primitive_attr.hpp"
+#include "gpu/intel/utils.hpp"
+
+namespace dnnl {
+namespace impl {
+namespace gpu {
+namespace intel {
+namespace compute {
+
+std::ostream &operator<<(std::ostream &out, const kernel_ctx_t::option_t &opt) {
+    switch (opt.kind) {
+        case kernel_ctx_t::option_kind_t::general: out << opt.name; break;
+        case kernel_ctx_t::option_kind_t::macro: out << "-D" << opt.name; break;
+        case kernel_ctx_t::option_kind_t::macro_int:
+            out << "-D" << opt.name << "=" << opt.value.i_value;
+            if (opt.value.i_value > INT_MAX || opt.value.i_value < INT_MIN)
+                out << "L";
+            break;
+        case kernel_ctx_t::option_kind_t::macro_float:
+            out << "-D" << opt.name << "=as_float(0x" << std::hex
+                << utils::bit_cast<uint32_t>(opt.value.f_value) << ")";
+            out << std::dec;
+            break;
+        case kernel_ctx_t::option_kind_t::macro_string:
+            out << "-D" << opt.name << "=" << opt.value.s_value;
+            break;
+        default: assert(!"Unknown kind");
+    }
+    return out;
+}
+
+kernel_ctx_t::kernel_ctx_t(const primitive_attr_t *attr) {
+    set_default_options(attr);
+    set_default_macros(attr);
+}
+
+std::string kernel_ctx_t::options() const {
+    std::ostringstream oss;
+    bool is_first = true;
+    for (auto &kv : options_) {
+        if (!is_first) oss << " ";
+        oss << kv.second;
+        is_first = false;
+    }
+    if (use_int32_offset_) {
+        oss << " -DUSE_INT32_OFFSET";
+    } else {
+        // TODO: Determine if specialization for buffers between 2GB and 4GB
+        // is worthwhile
+        oss << " -cl-intel-greater-than-4GB-buffer-required";
+    }
+    return oss.str();
+}
+
+void kernel_ctx_t::register_buffer_size(size_t size) {
+    if (size > INT_MAX) use_int32_offset(false);
+}
+
+void kernel_ctx_t::use_int32_offset(bool value) {
+    use_int32_offset_ = value;
+}
+
+void kernel_ctx_t::define_int(const char *name, int64_t value) {
+    add_option(option_t(name, value));
+}
+
+void kernel_ctx_t::define_int(const std::string &name, int64_t value) {
+    add_option(option_t(name, value));
+}
+
+void kernel_ctx_t::define_float(const char *name, float value) {
+    add_option(option_t(name, value));
+}
+
+void kernel_ctx_t::add_option(const char *option) {
+    add_option(std::string(option));
+}
+
+void kernel_ctx_t::add_option(const std::string &option) {
+    auto parts = gpu_utils::split(option);
+    for (auto &p : parts) {
+        if (p.empty()) continue;
+        add_option(option_t(p));
+    }
+}
+
+void kernel_ctx_t::add_option(const option_t &option) {
+    auto it = options_.find(option.name);
+    if (it != options_.end()) {
+        if (it->second != option) {
+            std::cout << "Error: option " << option.name
+                      << " is already set to a different value.\n";
+            std::cout << "  Old option:" << it->second << "\n";
+            std::cout << "  New option:" << option << "\n";
+            abort();
+        }
+        return;
+    }
+    options_[option.name] = option;
+}
+
+bool kernel_ctx_t::has_macro(const char *name) const {
+    return options_.count(name) != 0;
+}
+
+bool kernel_ctx_t::has_macro(const std::string &name) const {
+    return has_macro(name.c_str());
+}
+
+void kernel_ctx_t::set_data_type(data_type_t dt) {
+    switch (dt) {
+        case data_type::bf16: define_int("DT_BF16", 1); break;
+        case data_type::f16: define_int("DT_F16", 1); break;
+        case data_type::f32: define_int("DT_F32", 1); break;
+        case data_type::f64: define_int("DT_F64", 1); break;
+        case data_type::s8: define_int("DT_S8", 1); break;
+        case data_type::u8: define_int("DT_U8", 1); break;
+        case data_type::f8_e4m3: define_int("DT_HF8", 1); break;
+        case data_type::f8_e5m2: define_int("DT_BF8", 1); break;
+        case data_type::f4_e2m1: define_int("DT_F4_E2M1", 1); break;
+        case data_type::s32: define_int("DT_S32", 1); break;
+        default: assert(!"unknown data type"); break;
+    }
+}
+
+std::string kernel_ctx_t::data_type() const {
+    if (has_macro("DT_F16")) return "f16";
+    if (has_macro("DT_F32")) return "f32";
+    if (has_macro("DT_F64")) return "f64";
+    if (has_macro("DT_S8")) return "s8";
+    return "";
+}
+
+void kernel_ctx_t::add_custom_header(
+        const std::string &header_name, std::string &&source) {
+    custom_headers_[header_name] = std::move(source);
+}
+
+const char *kernel_ctx_t::get_custom_header(
+        const std::string &header_name) const {
+    auto iter = custom_headers_.find(header_name);
+    if (iter != custom_headers_.end()) return iter->second.c_str();
+    return nullptr;
+}
+
+bool kernel_ctx_t::has_custom_headers() const {
+    return !custom_headers_.empty();
+}
+
+void kernel_ctx_t::set_default_options(const primitive_attr_t *attr) {
+    // By default fp32 division and sqrt are not IEEE-compliant
+    add_option("-cl-fp32-correctly-rounded-divide-sqrt");
+
+    if (attr && attr->gpu_attr_) {
+        auto *gpu_attr = utils::downcast<gpu_primitive_attr_t *>(
+                attr->gpu_attr_.get());
+        if (gpu_attr->threads_per_eu() == 4) {
+            add_option("-cl-intel-256-GRF-per-thread");
+        }
+    }
+
+    // Set override flag for checking compiler assumptions
+    if (gpu_utils::dev_getenv("enable_check_assumptions", 0)) {
+        add_option("-DENABLE_CHECK_ASSUMPTIONS");
+    }
+
+    if (gpu_utils::dev_getenv("ocl_debug", 0)) { add_option("-DOCL_DEBUG"); }
+}
+
+void kernel_ctx_t::set_default_macros(const primitive_attr_t *attr) {
+    if (attr) define_int("DETERMINISTIC", attr->deterministic_);
+}
+
+kernel_ctx_t::option_t::option_t(const std::string &s) {
+    auto d_pos = s.find("-D");
+    bool is_macro = (d_pos != std::string::npos);
+    if (!is_macro) {
+        name = s;
+        return;
+    }
+    name = s.substr(d_pos + 2);
+    auto eq_pos = name.find("=");
+    bool has_value = (eq_pos != std::string::npos);
+    if (has_value) {
+        value = name.substr(eq_pos + 1);
+        name = name.substr(0, eq_pos);
+        kind = option_kind_t::macro_string;
+    } else {
+        kind = option_kind_t::macro;
+    }
+    return;
+}
+
+bool kernel_ctx_t::option_t::operator==(const option_t &other) const {
+    return (kind == other.kind) && (name == other.name)
+            && (value == other.value);
+}
+
+} // namespace compute
+} // namespace intel
+} // namespace gpu
+} // namespace impl
+} // namespace dnnl